Mega Code Archive

Splits a String by char

/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util.ArrayList; import java.util.List; public class Main { /** * <p>Splits a String by Character type as returned by * <code>java.lang.Character.getType(char)</code>. Groups of contiguous * characters of the same type are returned as complete tokens. * <pre> * StringUtils.splitByCharacterType(null) = null * StringUtils.splitByCharacterType("") = [] * StringUtils.splitByCharacterType("ab de fg") = ["ab", " ", "de", " ", "fg"] * StringUtils.splitByCharacterType("ab de fg") = ["ab", " ", "de", " ", "fg"] * StringUtils.splitByCharacterType("ab:cd:ef") = ["ab", ":", "cd", ":", "ef"] * StringUtils.splitByCharacterType("number5") = ["number", "5"] * StringUtils.splitByCharacterType("fooBar") = ["foo", "B", "ar"] * StringUtils.splitByCharacterType("foo200Bar") = ["foo", "200", "B", "ar"] * StringUtils.splitByCharacterType("ASFRules") = ["ASFR", "ules"] * </pre> * @param str the String to split, may be <code>null</code> * @return an array of parsed Strings, <code>null</code> if null String input * @since 2.4 */ public static String[] splitByCharacterType(String str) { return splitByCharacterType(str, false); } /** * <p> * Splits a String by Character type as returned by * <code>java.lang.Character.getType(char)</code>. Groups of contiguous * characters of the same type are returned as complete tokens, with the * following exception: if <code>camelCase</code> is <code>true</code>, * the character of type <code>Character.UPPERCASE_LETTER</code>, if any, * immediately preceding a token of type * <code>Character.LOWERCASE_LETTER</code> will belong to the following * token rather than to the preceding, if any, * <code>Character.UPPERCASE_LETTER</code> token. * * @param str * the String to split, may be <code>null</code> * @param camelCase * whether to use so-called "camel-case" for letter types * @return an array of parsed Strings, <code>null</code> if null String * input * @since 2.4 */ private static String[] splitByCharacterType(String str, boolean camelCase) { if (str == null) { return null; } if (str.length() == 0) { return new String[0]; } char[] c = str.toCharArray(); List list = new ArrayList(); int tokenStart = 0; int currentType = Character.getType(c[tokenStart]); for (int pos = tokenStart + 1; pos < c.length; pos++) { int type = Character.getType(c[pos]); if (type == currentType) { continue; } if (camelCase && type == Character.LOWERCASE_LETTER && currentType == Character.UPPERCASE_LETTER) { int newTokenStart = pos - 1; if (newTokenStart != tokenStart) { list.add(new String(c, tokenStart, newTokenStart - tokenStart)); tokenStart = newTokenStart; } } else { list.add(new String(c, tokenStart, pos - tokenStart)); tokenStart = pos; } currentType = type; } list.add(new String(c, tokenStart, c.length - tokenStart)); return (String[]) list.toArray(new String[list.size()]); } }