Mega Code Archive

Splits the provided text into a list, based on a given separator

import java.util.StringTokenizer; /* * The Apache Software License, Version 1.1 * * Copyright (c) 2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, if * any, must include the following acknowlegement: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowlegement may appear in the software itself, * if and wherever such third-party acknowlegements normally appear. * * 4. The names "The Jakarta Project", "Commons", and "Apache Software * Foundation" must not be used to endorse or promote products derived * from this software without prior written permission. For written * permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache" * nor may "Apache" appear in their names without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */ /** * max: Removed methods that dependent on anything else than common.StringUtils. * * Common <code>String</code> manipulation routines. * * Originally from * <a href="http://jakarta.apache.org/turbine/">Turbine</a> and the * GenerationJavaCore library. * * @author <a href="mailto:jon@latchkey.com">Jon S. Stevens</a> * @author <a href="mailto:dlr@finemaltcoding.com">Daniel Rall</a> * @author <a href="mailto:gcoladonato@yahoo.com">Greg Coladonato</a> * @author <a href="mailto:bayard@generationjava.com">Henri Yandell</a> * @author <a href="mailto:ed@apache.org">Ed Korthof</a> * @author <a href="mailto:rand_mcneely@yahoo.com>Rand McNeely</a> * @author <a href="mailto:scolebourne@joda.org>Stephen Colebourne</a> * @author <a href="mailto:fredrik@westermarck.com>Fredrik Westermarck</a> * @version $Id$ */ public class StringUtil { /** * Splits the provided text into a list, based on a given separator. * The separator is not included in the returned String array. * The maximum number of splits to perfom can be controlled. * A null separator will cause parsing to be on whitespace. * * This is useful for quickly splitting a string directly into * an array of tokens, instead of an enumeration of tokens (as * <code>StringTokenizer</code> does). * * @param str The string to parse. * @param separator Characters used as the delimiters. If * <code>null</code>, splits on whitespace. * @param max The maximum number of elements to include in the * list. A zero or negative value implies no limit. * @return an array of parsed Strings */ public static String[] split(String str, String separator, int max) { StringTokenizer tok = null; if (separator == null) { // Null separator means we're using StringTokenizer's default // delimiter, which comprises all whitespace characters. tok = new StringTokenizer(str); } else { tok = new StringTokenizer(str, separator); } int listSize = tok.countTokens(); if (max > 0 && listSize > max) { listSize = max; } String[] list = new String[listSize]; int i = 0; int lastTokenBegin = 0; int lastTokenEnd = 0; while (tok.hasMoreTokens() ) { if (max > 0 && i == listSize - 1) { // In the situation where we hit the max yet have // tokens left over in our input, the last list // element gets all remaining text. String endToken = tok.nextToken(); lastTokenBegin = str.indexOf(endToken, lastTokenEnd); list[i] = str.substring(lastTokenBegin); break; } else { list[i] = tok.nextToken(); lastTokenBegin = str.indexOf(list[i], lastTokenEnd); lastTokenEnd = lastTokenBegin + list[i].length(); } i++; } return list; } }