Mega Code Archive

Categories / Java / Collections Data Structure

Useful for string set lookups and command completion stuff

//package com.ryanm.util.text; import java.util.ArrayList; import java.util.Arrays; import java.util.LinkedList; import java.util.List; /** * Useful for string set lookups and command completion stuff * * @author ryanm */ public class RadixTree { private Node root = new Node( "" ); private final boolean caseSensitive; /** * @param caseSensitive * <code>true</code> if case matters. Note that a * case-insensitive {@link RadixTree} will convert all * strings passed to it for insertion or query to lower * case. */ public RadixTree( boolean caseSensitive ) { this.caseSensitive = caseSensitive; root.isString = false; } /** * Adds string to the set * * @param string */ public void add( CharSequence string ) { if( !caseSensitive ) { string = string.toString().toLowerCase(); } root.addString( string ); } /** * Removes a string from the set * * @param string */ public void remove( CharSequence string ) { if( !caseSensitive ) { string = string.toString().toLowerCase(); } root.removeString( string ); } /** * Tests if the string is contained in the set * * @param string * @return <code>true</code> if the entire string is contained in * the tree */ public boolean contains( CharSequence string ) { if( !caseSensitive ) { string = string.toString().toLowerCase(); } return findPredecessor( string ).length() == string.length(); } /** * Finds the substring of the string that is in the set * * @param string * @return The substring that belongs */ public String findPredecessor( CharSequence string ) { if( !caseSensitive ) { string = string.toString().toLowerCase(); } StringBuilder buff = new StringBuilder(); root.findPredecessor( string, buff ); return buff.toString(); } /** * Finds possible completions that fit in the set * * @param string * @param depth * How deeply to search the tree, the maximum number of * decisions that need to be made to type any one * completion * @return A list of possible completions */ public List<String> findSuccessors( CharSequence string, int depth ) { if( !caseSensitive ) { string = string.toString().toLowerCase(); } List<String> completions = new LinkedList<String>(); root.findSuccessors( string, depth, completions ); return completions; } @Override public String toString() { StringBuilder buff = new StringBuilder(); root.buildString( buff, -1 ); return buff.toString(); } private class Node implements Comparable<Node> { private CharSequence value; private Node[] children = new Node[ 0 ]; /** * Indicates that the string ending at this node is a string in * the set */ private boolean isString = true; private Node( CharSequence string ) { value = string; } private void findSuccessors( CharSequence string, int depth, List<String> completions ) { int d = findDivergenceIndex( string ); if( d < value.length() || d == string.length() ) { StringBuilder prefix = new StringBuilder( value.subSequence( d, value.length() ) ); if( isString ) { completions.add( prefix.toString() ); } if( depth > 0 ) { for( int i = 0; i < children.length; i++ ) { children[ i ].getCompletions( prefix, depth - 1, completions ); } } } else { Node c = findChild( string.charAt( d ) ); if( c != null ) { c.findSuccessors( string.subSequence( d, string.length() ), depth, completions ); } } } private void getCompletions( StringBuilder prefix, int depth, List<String> completions ) { int pl = prefix.length(); prefix.append( value ); if( isString ) { completions.add( prefix.toString() ); } if( depth > 0 ) { for( int i = 0; i < children.length; i++ ) { children[ i ].getCompletions( prefix, depth - 1, completions ); } } prefix.delete( pl, prefix.length() ); } private void addString( CharSequence string ) { int d = findDivergenceIndex( string ); if( d < value.length() ) { // need to split this node Node child = new Node( value.subSequence( d, value.length() ) ); child.children = children; child.isString = isString; value = value.subSequence( 0, d ); children = new Node[] { child }; isString = false; } if( d == string.length() && d > 0 ) { isString = true; } else { Node c = findChild( string.charAt( d ) ); if( c != null ) { c.addString( string.subSequence( d, string.length() ) ); } else { insertNode( new Node( string.subSequence( d, string.length() ) ) ); } } } private void removeString( CharSequence string ) { int d = findDivergenceIndex( string ); if( d == value.length() && d == string.length() ) { isString = false; if( children.length == 1 ) { // unify nodes StringBuilder buff = new StringBuilder( value ); buff.append( children[ 0 ].value ); value = buff; isString = children[ 0 ].isString; children = children[ 0 ].children; } } else { if( d == value.length() ) { // check children Node c = findChild( string.charAt( d ) ); if( c != null ) { c.removeString( string.subSequence( d, string.length() ) ); } } } } private void findPredecessor( CharSequence string, StringBuilder buff ) { int d = findDivergenceIndex( string ); if( d == value.length() && d <= string.length() ) { // this entire node was in the tree and there still some // to go buff.append( value.subSequence( 0, d ) ); // check children if( d < string.length() ) { CharSequence c = string.subSequence( d, string.length() ); Node child = findChild( c.charAt( 0 ) ); child.findPredecessor( c, buff ); } } } private Node findChild( char c ) { for( int i = 0; i < children.length; i++ ) { if( c == children[ i ].value.charAt( 0 ) ) { return children[ i ]; } } return null; } private int findDivergenceIndex( CharSequence string ) { int d = 0; while( d < value.length() && d < string.length() && value.charAt( d ) == string.charAt( d ) ) { d++; } return d; } private void insertNode( Node child ) { int i = Arrays.binarySearch( children, child ); assert i < 0; i += 1; i = -i; Node[] nc = new Node[ children.length + 1 ]; System.arraycopy( children, 0, nc, 0, i ); nc[ i ] = child; if( i < nc.length ) { System.arraycopy( children, i, nc, i + 1, children.length - i ); } children = nc; } @Override public int compareTo( Node o ) { return TextUtils.compareTo( value, o.value ); } private void buildString( StringBuilder buff, int indent ) { for( int i = 0; i < indent; i++ ) { buff.append( " " ); } if( isString ) { buff.append( "\"" ); } buff.append( value ); if( isString ) { buff.append( "\"" ); } indent++; for( int i = 0; i < children.length; i++ ) { buff.append( "\n" ); children[ i ].buildString( buff, indent ); } } } } /** * Utility methods for dealing with text * * @author ryanm */ class TextUtils { /** * Tests if s starts with t, ignoring the case of the characters * * @param s * @param t * @return <code>true</code> if s.toLowerCase().equals( * t.toLowerCase() ), but more efficiently */ public static boolean startsWithIgnoreCase( CharSequence s, CharSequence t ) { if( s.length() < t.length() ) { return false; } for( int i = 0; i < t.length(); i++ ) { char slc = Character.toLowerCase( s.charAt( i ) ); char tlc = Character.toLowerCase( t.charAt( i ) ); if( slc != tlc ) { return false; } } return true; } /** * See {@link String#compareToIgnoreCase(String)} * * @param s * @param t * @return See {@link String#compareToIgnoreCase(String)} */ public static int compareToIgnoreCase( CharSequence s, CharSequence t ) { int i = 0; while( i < s.length() && i < t.length() ) { char a = Character.toLowerCase( s.charAt( i ) ); char b = Character.toLowerCase( t.charAt( i ) ); int diff = a - b; if( diff != 0 ) { return diff; } i++; } return s.length() - t.length(); } /** * See {@link String#compareTo(String)} * * @param s * @param t * @return See {@link String#compareTo(String)} */ public static int compareTo( CharSequence s, CharSequence t ) { int i = 0; while( i < s.length() && i < t.length() ) { char a = s.charAt( i ); char b = t.charAt( i ); int diff = a - b; if( diff != 0 ) { return diff; } i++; } return s.length() - t.length(); } /** * Splits a string * * @param composite * The composite string * @param leftBracket * the opening parenthesis character * @param rightBracket * the closing parenthesis character * @param separator * The character that separates tokens. Separators that * lie between at least one pair of parenthesis are * ignored * @return An array of individual tokens */ public static String[] split( String composite, char leftBracket, char rightBracket, char separator ) { List<String> c = new ArrayList<String>(); int start = 0; int i; int lbcount = 0; for( i = 0; i < composite.length(); i++ ) { if( composite.charAt( i ) == leftBracket ) { lbcount++; } else if( composite.charAt( i ) == rightBracket ) { lbcount--; } else if( composite.charAt( i ) == separator && lbcount == 0 ) { c.add( composite.substring( start, i ).trim() ); start = i + 1; } } c.add( composite.substring( start, i ).trim() ); return c.toArray( new String[ c.size() ] ); } /** * Wraps the input string in {@code <html></html>} and breaks it up * into lines with {@code <br>} elements. Useful for making * multi-line tootips and the like. * * @param s * The input String * @param lineLength * The desired length of the output lines. * @return The HTMLised string */ public static String HTMLiseString( String s, int lineLength ) { if( s != null ) { StringBuilder buff = new StringBuilder( s ); int lineStart = 0; while( lineStart + lineLength < s.length() ) { // find the first whitespace after the linelength int firstSpaceIndex = buff.indexOf( " ", lineStart + lineLength ); // replace it with a <br> if( firstSpaceIndex != -1 ) { buff.deleteCharAt( firstSpaceIndex ); buff.insert( firstSpaceIndex, "<br>" ); lineStart = firstSpaceIndex + 4; } else { lineStart = s.length(); } } buff.insert( 0, "<html>" ); buff.append( "</html>" ); return buff.toString(); } return null; } }