Mega Code Archive

A collection of File, URL and filename utility methods

/***************************************************************************** * Copyright (C) The Apache Software Foundation. All rights reserved. * * ------------------------------------------------------------------------- * * This software is published under the terms of the Apache Software License * * version 1.1, a copy of which has been included with this distribution in * * the LICENSE file. * *****************************************************************************/ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.util.BitSet; import java.util.Enumeration; import java.util.Iterator; import java.util.Map; import java.util.NoSuchElementException; /** * A collection of <code>File</code>, <code>URL</code> and filename * utility methods * * @author <a href="mailto:stefano@apache.org">Stefano Mazzocchi</a> * @version CVS $Revision: 1.1 $ $Date: 2002/03/17 13:37:13 $ */ public class NetUtils { /** * Array containing the safe characters set as defined by RFC 1738 */ private static BitSet safeCharacters; private static final char[] hexadecimal = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; static { safeCharacters = new BitSet(256); int i; // 'lowalpha' rule for (i = 'a'; i <= 'z'; i++) { safeCharacters.set(i); } // 'hialpha' rule for (i = 'A'; i <= 'Z'; i++) { safeCharacters.set(i); } // 'digit' rule for (i = '0'; i <= '9'; i++) { safeCharacters.set(i); } // 'safe' rule safeCharacters.set('$'); safeCharacters.set('-'); safeCharacters.set('_'); safeCharacters.set('.'); safeCharacters.set('+'); // 'extra' rule safeCharacters.set('!'); safeCharacters.set('*'); safeCharacters.set('\''); safeCharacters.set('('); safeCharacters.set(')'); safeCharacters.set(','); // special characters common to http: file: and ftp: URLs ('fsegment' and 'hsegment' rules) safeCharacters.set('/'); safeCharacters.set(':'); safeCharacters.set('@'); safeCharacters.set('&'); safeCharacters.set('='); } /** * Decode a path * * @param path the path to decode * @return the decoded path */ public static String decodePath(String path) throws Exception { return java.net.URLDecoder.decode(path, "koi8-r"); } /** * Encode a path as required by the URL specificatin (<a href="http://www.ietf.org/rfc/rfc1738.txt"> * RFC 1738</a>). This differs from <code>java.net.URLEncoder.encode()</code> which encodes according * to the <code>x-www-form-urlencoded</code> MIME format. * * @param path the path to encode * @return the encoded path */ public static String encodePath(String path) { // stolen from org.apache.catalina.servlets.DefaultServlet ;) /** * Note: This code portion is very similar to URLEncoder.encode. * Unfortunately, there is no way to specify to the URLEncoder which * characters should be encoded. Here, ' ' should be encoded as "%20" * and '/' shouldn't be encoded. */ int maxBytesPerChar = 10; StringBuffer rewrittenPath = new StringBuffer(path.length()); ByteArrayOutputStream buf = new ByteArrayOutputStream(maxBytesPerChar); OutputStreamWriter writer = null; try { writer = new OutputStreamWriter(buf, "UTF8"); } catch (Exception e) { e.printStackTrace(); writer = new OutputStreamWriter(buf); } for (int i = 0; i < path.length(); i++) { int c = (int) path.charAt(i); if (safeCharacters.get(c)) { rewrittenPath.append((char)c); } else { // convert to external encoding before hex conversion try { writer.write(c); writer.flush(); } catch(IOException e) { buf.reset(); continue; } byte[] ba = buf.toByteArray(); for (int j = 0; j < ba.length; j++) { // Converting each byte in the buffer byte toEncode = ba[j]; rewrittenPath.append('%'); int low = (int) (toEncode & 0x0f); int high = (int) ((toEncode & 0xf0) >> 4); rewrittenPath.append(hexadecimal[high]); rewrittenPath.append(hexadecimal[low]); } buf.reset(); } } return rewrittenPath.toString(); } /** * Returns the path of the given resource. * * @path the resource * @return the resource path */ public static String getPath(String uri) { int i = uri.lastIndexOf('/'); if(i > -1) return uri.substring(0, i); i = uri.indexOf(':'); return (i > -1) ? uri.substring(i+1,uri.length()) : ""; } /** * Remove path and file information from a filename returning only its * extension component * * @param filename The filename * @return The filename extension (with starting dot!) */ public static String getExtension(String uri) { int dot = uri.lastIndexOf('.'); if (dot > -1) { uri = uri.substring(dot); int slash = uri.lastIndexOf('/'); if (slash > -1) { return null; } else { int sharp = uri.lastIndexOf('#'); if (sharp > -1) { // uri starts with dot already return uri.substring(0, sharp); } else { int mark = uri.lastIndexOf('?'); if (mark > -1) { // uri starts with dot already return uri.substring(0, mark); } else { return uri; } } } } else { return null; } } /** * Absolutize a relative resource on the given absolute path. * * @path the absolute path * @relativeResource the relative resource * @return the absolutized resource */ public static String absolutize(String path, String relativeResource) { if (("".equals(path)) || (path == null)) return relativeResource; if (relativeResource.charAt(0) != '/') { int length = path.length() - 1; boolean slashPresent = (path.charAt(length) == '/'); StringBuffer b = new StringBuffer(); b.append(path); if (!slashPresent) b.append('/'); b.append(relativeResource); return b.toString(); } else { // resource is already absolute return relativeResource; } } /** * Relativize an absolute resource on a given absolute path. * * @path the absolute path * @relativeResource the absolute resource * @return the resource relative to the given path */ public static String relativize(String path, String absoluteResource) { if (("".equals(path)) || (path == null)) return absoluteResource; int length = path.length() - 1; boolean slashPresent = path.charAt(length) == '/'; if (absoluteResource.startsWith(path)) { // resource is direct descentant return absoluteResource.substring(length + (slashPresent ? 1 : 2)); } else { // resource is not direct descendant if (!slashPresent) path += "/"; int index = StringUtils.matchStrings(path, absoluteResource); if (index > 0 && path.charAt(index-1) != '/') { index = path.substring(0, index).lastIndexOf('/'); index++; } String pathDiff = path.substring(index); String resource = absoluteResource.substring(index); int levels = StringUtils.count(pathDiff, '/'); StringBuffer b = new StringBuffer(); for (int i = 0; i < levels; i++) { b.append("../"); } b.append(resource); return b.toString(); } } /** * Normalize a uri containing ../ and ./ paths. * * @param uri The uri path to normalize * @return The normalized uri */ public static String normalize(String uri) { String[] dirty = StringUtils.split(uri, "/"); int length = dirty.length; String[] clean = new String[length]; boolean path; boolean finished; while (true) { path = false; finished = true; for (int i = 0, j = 0; (i < length) && (dirty[i] != null); i++) { if (".".equals(dirty[i])) { // ignore } else if ("..".equals(dirty[i])) { clean[j++] = dirty[i]; if (path) finished = false; } else { if ((i+1 < length) && ("..".equals(dirty[i+1]))) { i++; } else { clean[j++] = dirty[i]; path = true; } } } if (finished) { break; } else { dirty = clean; clean = new String[length]; } } StringBuffer b = new StringBuffer(uri.length()); for (int i = 0; (i < length) && (clean[i] != null); i++) { b.append(clean[i]); if ((i+1 < length) && (clean[i+1] != null)) b.append("/"); } return b.toString(); } /** * Remove parameters from a uri. * * @param uri The uri path to deparameterize. * @param parameters The map that collects parameters. * @return The cleaned uri */ public static String deparameterize(String uri, Map parameters) { int i = uri.lastIndexOf('?'); if (i == -1) return uri; String[] params = StringUtils.split(uri.substring(i+1), "&"); for (int j = 0; j < params.length; j++) { String p = params[j]; int k = p.indexOf('='); if (k == -1) break; String name = p.substring(0, k); String value = p.substring(k+1); parameters.put(name, value); } return uri.substring(0, i); } public static String parameterize(String uri, Map parameters) { if (parameters.size() == 0) { return uri; } StringBuffer buffer = new StringBuffer(uri); buffer.append('?'); for (Iterator i = parameters.entrySet().iterator(); i.hasNext();) { Map.Entry entry = (Map.Entry)i.next(); buffer.append(entry.getKey()); buffer.append('='); buffer.append(entry.getValue()); if (i.hasNext()) { buffer.append('&'); } } return buffer.toString(); } } /***************************************************************************** * Copyright (C) The Apache Software Foundation. All rights reserved. * * ------------------------------------------------------------------------- * * This software is published under the terms of the Apache Software License * * version 1.1, a copy of which has been included with this distribution in * * the LICENSE file. * *****************************************************************************/ /** * A collection of <code>String</code> handling utility methods. * * @author <a href="mailto:ricardo@apache.org">Ricardo Rocha</a> * @author <a href="mailto:stefano@apache.org">Stefano Mazzocchi</a> * @version CVS $Revision: 1.1 $ $Date: 2002/03/17 13:37:13 $ */ class StringUtils { /** * Split a string as an array using whitespace as separator * * @param line The string to be split * @return An array of whitespace-separated tokens */ public static String[] split(String line) { return split(line, " \t\n\r"); } /** * Split a string as an array using a given set of separators * * @param line The string to be split * @param delimiter A string containing token separators * @return An array of token */ public static String[] split(String line, String delimiter) { return Tokenizer.tokenize(line, delimiter, false); } /** * Tests whether a given character is alphabetic, numeric or * underscore * * @param c The character to be tested * @return whether the given character is alphameric or not */ public static boolean isAlphaNumeric(char c) { return c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'); } /** * Counts the occurrence of the given char in the string. * * @param str The string to be tested * @param c the char to be counted * @return the occurrence of the character in the string. */ public static int count(String str, char c) { int index = 0; char[] chars = str.toCharArray(); for (int i = 0; i < chars.length; i++) { if (chars[i] == c) index++; } return index; } /** * Matches two strings. * * @param a The first string * @param b The second string * @return the index where the two strings stop matching starting from 0 */ public static int matchStrings(String a, String b) { int i; char[] ca = a.toCharArray(); char[] cb = b.toCharArray(); int len = ( ca.length < cb.length ) ? ca.length : cb.length; for (i = 0; i < len; i++) { if (ca[i] != cb[i]) break; } return i; } /** * Replaces tokens in input with Value present in System.getProperty */ public static String replaceToken(String s) { int startToken = s.indexOf("${"); int endToken = s.indexOf("}",startToken); String token = s.substring(startToken+2,endToken); StringBuffer value = new StringBuffer(); value.append(s.substring(0,startToken)); value.append(System.getProperty(token)); value.append(s.substring(endToken+1)); return value.toString(); } } /***************************************************************************** * Copyright (C) The Apache Software Foundation. All rights reserved. * * ------------------------------------------------------------------------- * * This software is published under the terms of the Apache Software License * * version 1.1, a copy of which has been included with this distribution in * * the LICENSE file. * *****************************************************************************/ /** * Replacement for StringTokenizer in java.util, beacuse of bug in the * Sun's implementation. * * @author <A HREF="mailto:moravek@pobox.sk">Peter Moravek</A> */ class Tokenizer implements Enumeration { /** * Constructs a string tokenizer for the specified string. All characters * in the delim argument are the delimiters for separating tokens. * If the returnTokens flag is true, then the delimiter characters are * also returned as tokens. Each delimiter is returned as a string of * length one. If the flag is false, the delimiter characters are skipped * and only serve as separators between tokens. * * @param str a string to be parsed * @param delim the delimiters * @param returnTokens flag indicating whether to return the delimiters * as tokens */ public Tokenizer(String str, String delim, boolean returnTokens) { this.str = str; this.delim = delim; this.returnTokens = returnTokens; max = str.length(); } /** * Constructs a string tokenizer for the specified string. The characters * in the delim argument are the delimiters for separating tokens. * Delimiter characters themselves will not be treated as tokens. * * @param str a string to be parsed * @param delim the delimiters */ public Tokenizer(String str, String delim) { this(str, delim, false); } /** * Constructs a string tokenizer for the specified string. The character * in the delim argument is the delimiter for separating tokens. * Delimiter character themselves will not be treated as token. * * @param str a string to be parsed * @param delim the delimiter */ public Tokenizer(String str, char delim) { this(str, String.valueOf(delim), false); } /** * Constructs a string tokenizer for the specified string. The tokenizer * uses the default delimiter set, which is " \t\n\r\f": the space * character, the tab character, the newline character, the carriage-return * character, and the form-feed character. Delimiter characters themselves * will not be treated as tokens. * * @param str a string to be parsed */ public Tokenizer(String str) { this(str, DEFAULT_DELIMITERS, false); } /** * Tests if there are more tokens available from this tokenizer's string. * If this method returns true, then a subsequent call to nextToken with * no argument will successfully return a token. * * @return true if and only if there is at least one token in the string * after the current position; false otherwise. */ public boolean hasMoreTokens() { return ((current < max) ? (true) : (((current == max) && (max == 0 || (returnTokens && delim.indexOf(str.charAt(previous)) >= 0))))); } /** * Returns the next token from this string tokenizer. * * @return the next token from this string tokenizer * * @exception NoSuchElementException if there are no more tokens in this * tokenizer's string */ public String nextToken() throws NoSuchElementException { if (current == max && (max == 0 || (returnTokens && delim.indexOf(str.charAt(previous)) >= 0))) { current++; return new String(); } if (current >= max) throw new NoSuchElementException(); int start = current; String result = null; if (delim.indexOf(str.charAt(start)) >= 0) { if (previous == -1 || (returnTokens && previous != current && delim.indexOf(str.charAt(previous)) >= 0)) { result = new String(); } else if (returnTokens) result = str.substring(start, ++current); if (!returnTokens) current++; } previous = start; start = current; if (result == null) while (current < max && delim.indexOf(str.charAt(current)) < 0) current++; return result == null ? str.substring(start, current) : result; } /** * Returns the next token in this string tokenizer's string. First, the * set of characters considered to be delimiters by this Tokenizer * object is changed to be the characters in the string delim. * Then the next token in the string after the current position is * returned. The current position is advanced beyond the recognized token. * The new delimiter set remains the default after this call. * * @param delim the new delimiters * * @return the next token, after switching to the new delimiter set * * @exception NoSuchElementException if there are no more tokens in this * tokenizer's string. */ public String nextToken(String delim) throws NoSuchElementException { this.delim = delim; return nextToken(); } /** * Returns the same value as the hasMoreTokens method. It exists so that * this class can implement the Enumeration interface. * * @return true if there are more tokens; false otherwise. */ public boolean hasMoreElements() { return hasMoreTokens(); } /** * Returns the same value as the nextToken method, except that its * declared return value is Object rather than String. It exists so that * this class can implement the Enumeration interface. * * @return the next token in the string * * @exception NoSuchElementException if there are no more tokens in this * tokenizer's string */ public Object nextElement() { return nextToken(); } /** * Calculates the number of times that this tokenizer's nextToken method * can be called before it generates an exception. The current position * is not advanced. * * @return the number of tokens remaining in the string using the * current delimiter set */ public int countTokens() { int curr = current; int count = 0; for (int i = curr; i < max; i++) { if (delim.indexOf(str.charAt(i)) >= 0) count++; curr++; } return count + (returnTokens ? count : 0) + 1; } /** * Resets this tokenizer's state so the tokenizing starts from the begin. */ public void reset() { previous = -1; current = 0; } /** * Constructs a string tokenizer for the specified string. All characters * in the delim argument are the delimiters for separating tokens. * If the returnTokens flag is true, then the delimiter characters are * also returned as tokens. Each delimiter is returned as a string of * length one. If the flag is false, the delimiter characters are skipped * and only serve as separators between tokens. Then tokenizes the str * and return an String[] array with tokens. * * @param str a string to be parsed * @param delim the delimiters * @param returnTokens flag indicating whether to return the delimiters * as tokens * * @return array with tokens */ public static String[] tokenize(String str, String delim, boolean returnTokens) { Tokenizer tokenizer = new Tokenizer(str, delim, returnTokens); String[] tokens = new String[tokenizer.countTokens()]; int i = 0; while (tokenizer.hasMoreTokens()) { tokens[i] = tokenizer.nextToken(); i++; } return tokens; } /** * Default delimiters "\t\n\r\f": * the space character, the tab character, the newline character, * the carriage-return character, and the form-feed character. */ public static final String DEFAULT_DELIMITERS = " \t\n\r\f"; /** * String to tokenize. */ private String str = null; /** * Delimiters. */ private String delim = null; /** * Flag indicating whether to return the delimiters as tokens. */ private boolean returnTokens = false; /** * Previous token start. */ private int previous = -1; /** * Current position in str string. */ private int current = 0; /** * Maximal position in str string. */ private int max = 0; }