Mega Code Archive

 
Categories / Java / Collections Data Structure
 

Concurrent Hopscotch HashMap

//////////////////////////////////////////////////////////////////////////////// // ConcurrentHopscotchHashMap Class // //////////////////////////////////////////////////////////////////////////////// //TERMS OF USAGE //---------------------------------------------------------------------- // //  Permission to use, copy, modify and distribute this software and //  its documentation for any purpose is hereby granted without fee, //  provided that due acknowledgments to the authors are provided and //  this permission notice appears in all copies of the software. //  The software is provided "as is". There is no warranty of any kind. // //Authors: //  Maurice Herlihy //  Brown University //  and //  Nir Shavit //  Tel-Aviv University //  and //  Moran Tzafrir //  Tel-Aviv University // //  Date: Dec 2, 2008. // //////////////////////////////////////////////////////////////////////////////// // Programmer : Moran Tzafrir (MoranTza@gmail.com) // //////////////////////////////////////////////////////////////////////////////// //package xbird.util.concurrent.map; import java.util.concurrent.locks.ReentrantLock; public class ConcurrentHopscotchHashMap<K, V> {     //constants -----------------------------------     static final short _NULL_DELTA_SHORT = Short.MIN_VALUE;     static final int _NULL_DELTA_INT = (int) (Short.MIN_VALUE);     static final long _NULL_DELTA_FIRST_LONG = (long) (Short.MIN_VALUE & 0xFFFFL);     static final long _NULL_DELTA_NEXT_LONG = (long) ((Short.MIN_VALUE & 0xFFFFL) << 16);     static final long _NULL_HASH_DELTA = 0x0000000080008000L;     static final int _NULL_HASH = 0;     static final int _SEGMENT_SHIFT = 0; //choosed by empirical tests     static final long _FIRST_MASK = 0x000000000000FFFFL;     static final long _NEXT_MASK = 0x00000000FFFF0000L;     static final long _HASH_MASK = 0xFFFFFFFF00000000L;     static final long _NOT_NEXT_MASK = ~(_NEXT_MASK);     static final long _NOT_FIRST_MASK = ~(_FIRST_MASK);     static final long _NOT_HASH_MASK = ~(_HASH_MASK);     static final int _NEXT_SHIFT = 16;     static final int _HASH_SHIFT = 32;     static final int _RETRIES_BEFORE_LOCK = 2;     static final int _MAX_DELTA_BUCKET = Short.MAX_VALUE;     static final int _CACHE_MASK = 4 - 1;     static final int _NOT_CACHE_MASK = ~_CACHE_MASK;     static final int _NULL_INDX = -1;     //inner classes -------------------------------     static final class Segment<K, V> extends ReentrantLock {         volatile int _timestamp;         int _bucketk_mask;         long[] _table_hash_delta;         Object[] _table_key_value;         //AtomicInteger _lock;         int _count;         public Segment(final int initialCapacity) {             init(initialCapacity);         }         private void init(final int initialCapacity) {             //_lock = new AtomicInteger();             //_lock.set(0);             _timestamp = 0;             _bucketk_mask = initialCapacity - 1;             _count = 0;             _table_hash_delta = new long[initialCapacity];             _table_key_value = new Object[initialCapacity << 1];             //create the blocks of buckets             for(int iData = 0; iData < initialCapacity; ++iData) {                 _table_hash_delta[iData] = _NULL_HASH_DELTA;             }         }         /*         void lock() {             while(!_lock.compareAndSet(0, 0xFFFF)) { }         }         void unlock() {             _lock.set(0);         }         */         @SuppressWarnings("unchecked")         static final <K, V> Segment<K, V>[] newArray(final int numSegments) {             return new Segment[numSegments];         }         boolean containsKey(final K key, final int hash) {             //go over the list and look for key             int start_timestamp = _timestamp;             int iBucket = hash & _bucketk_mask;             long data = _table_hash_delta[iBucket];             final int first_delta = (short) data;             if(0 != first_delta) {                 if(_NULL_DELTA_SHORT == first_delta)                     return false;                 iBucket += first_delta;                 data = _table_hash_delta[iBucket];             }             do {                 if(hash == (data >> _HASH_SHIFT) && key.equals(_table_key_value[iBucket << 1]))                     return true;                 final int nextDelta = (int) data >> _NEXT_SHIFT;                 if(_NULL_DELTA_INT != nextDelta) {                     iBucket += nextDelta;                     data = _table_hash_delta[iBucket];                     continue;                 } else {                     final int curr_timestamp = _timestamp;                     if(curr_timestamp == start_timestamp)                         return false;                     start_timestamp = curr_timestamp;                     iBucket = hash & _bucketk_mask;                     data = _table_hash_delta[iBucket];                     final int first_delta2 = (short) data;                     if(0 != first_delta2) {                         if(_NULL_DELTA_SHORT == first_delta2)                             return false;                         iBucket += first_delta2;                         data = _table_hash_delta[iBucket];                     }                     continue;                 }             } while(true);         }         V get(final K key, final int hash) {             //go over the list and look for key             int start_timestamp = 0;             int iBucket = 0;             long data = 0;             boolean is_need_init = true;             do {                 if(is_need_init) {                     is_need_init = false;                     start_timestamp = _timestamp;                     iBucket = hash & _bucketk_mask;                     data = _table_hash_delta[iBucket];                     final int first_delta = (short) data;                     if(0 != first_delta) {                         if(_NULL_DELTA_SHORT == first_delta)                             return null;                         iBucket += first_delta;                         data = _table_hash_delta[iBucket];                     }                 }                 final int iRef;                 if(hash == (data >> _HASH_SHIFT)                         && key.equals(_table_key_value[iRef = iBucket << 1])) {                     final V value = (V) _table_key_value[iRef + 1];                     if(_timestamp == start_timestamp)                         return value;                     is_need_init = true;                     continue;                 }                 final int nextDelta = (int) data >> _NEXT_SHIFT;                 if(_NULL_DELTA_INT != nextDelta) {                     iBucket += nextDelta;                     data = _table_hash_delta[iBucket];                     continue;                 } else {                     if(_timestamp == start_timestamp)                         return null;                     is_need_init = true;                     continue;                 }             } while(true);         }         V put(final K key, final int hash, final V value) {             lock();             try {                 //look for key in hash-map .....................................                 final int i_start_bucket = hash & _bucketk_mask;                 int iBucket = i_start_bucket;                 long data = _table_hash_delta[i_start_bucket];                 final short first_delta = (short) data;                 if(_NULL_DELTA_SHORT != first_delta) {                     if(0 != first_delta) {                         iBucket += first_delta;                         data = _table_hash_delta[iBucket];                     }                     do {                         final int iRef;                         if(hash == (data >> _HASH_SHIFT)                                 && key.equals(_table_key_value[iRef = (iBucket << 1)]))                             return (V) _table_key_value[iRef + 1];                         final int next_delta = (int) data >> _NEXT_SHIFT;                         if(_NULL_DELTA_INT == next_delta)                             break;                         else {                             iBucket += next_delta;                             data = _table_hash_delta[iBucket];                         }                     } while(true);                 }                 //try to place the key in the same cache-line ..................                 final int i_start_cacheline = i_start_bucket & _NOT_CACHE_MASK;                 final int i_end_cacheline = i_start_cacheline + _CACHE_MASK;                 int i_free_bucket = i_start_bucket;                 do {                     long free_data = _table_hash_delta[i_free_bucket];                     if(_NULL_HASH == (free_data >> _HASH_SHIFT)) {                         //we found a free bucket at the cahce-line, so                         //add the new bucket to the begining of the list                         int i_ref_bucket = i_free_bucket << 1;                         _table_key_value[i_ref_bucket] = key;                         _table_key_value[++i_ref_bucket] = value;                         free_data &= _NOT_HASH_MASK;                         free_data |= ((long) hash << _HASH_SHIFT);                         if(0 == first_delta) {                             final long start_data = _table_hash_delta[i_start_bucket];                             final int start_next = (int) start_data >> _NEXT_SHIFT;                             if(_NULL_DELTA_INT != start_next) {                                 final long new_free_next = i_start_bucket + start_next                                         - i_free_bucket;                                 _table_hash_delta[i_free_bucket] = (free_data & _NOT_NEXT_MASK)                                         | ((new_free_next << _NEXT_SHIFT) & _NEXT_MASK);                             } else                                 _table_hash_delta[i_free_bucket] = free_data;                             final long new_start_next = i_free_bucket - i_start_bucket;                             _table_hash_delta[i_start_bucket] = (start_data & _NOT_NEXT_MASK)                                     | ((new_start_next << _NEXT_SHIFT) & _NEXT_MASK);                         } else {//0 != first_delta                             if(_NULL_DELTA_SHORT != first_delta) {                                 final long new_free_next = i_start_bucket + first_delta                                         - i_free_bucket;                                 free_data &= _NOT_NEXT_MASK;                                 free_data |= ((new_free_next << _NEXT_SHIFT) & _NEXT_MASK);                             }                             final long start_data;                             if(i_free_bucket != i_start_bucket) {                                 start_data = _table_hash_delta[i_start_bucket];                                 _table_hash_delta[i_free_bucket] = free_data;                             } else                                 start_data = free_data;                             final long new_start_first = i_free_bucket - i_start_bucket;                             _table_hash_delta[i_start_bucket] = (start_data & _NOT_FIRST_MASK)                                     | (new_start_first & _FIRST_MASK);                         }                         ++_count;                         ++_timestamp;                         return null;                     }                     ++i_free_bucket;                     if(i_free_bucket > i_end_cacheline)                         i_free_bucket = i_start_cacheline;                 } while(i_start_bucket != i_free_bucket);                 //place key in arbitrary free forward bucket ...................                 int i_max_bucket = i_start_bucket + _MAX_DELTA_BUCKET;                 if(i_max_bucket > _bucketk_mask)                     i_max_bucket = _bucketk_mask;                 i_free_bucket = i_end_cacheline + 1;                 while(i_free_bucket <= i_max_bucket) {                     long free_data = _table_hash_delta[i_free_bucket];                     if(_NULL_HASH == (free_data >> _HASH_SHIFT)) {                         //we found a free bucket outside of the cahce-line, so                         //add the new bucket to the end of the list                         int i_ref_bucket = i_free_bucket << 1;                         _table_key_value[i_ref_bucket] = key;                         _table_key_value[++i_ref_bucket] = value;                         free_data &= _NOT_HASH_MASK;                         free_data |= ((long) hash << _HASH_SHIFT);                         _table_hash_delta[i_free_bucket] = free_data;                         if(_NULL_DELTA_SHORT == first_delta) {                             long new_start_first = (i_free_bucket - i_start_bucket) & _FIRST_MASK;                             long start_data = (_table_hash_delta[i_start_bucket] & _NOT_FIRST_MASK)                                     | new_start_first;                             _table_hash_delta[i_start_bucket] = start_data;                         } else {                             long new_last_next = ((i_free_bucket - iBucket) << _NEXT_SHIFT)                                     & _NEXT_MASK;                             long last_data = (_table_hash_delta[iBucket] & _NOT_NEXT_MASK)                                     | new_last_next;                             _table_hash_delta[iBucket] = last_data;                         }                         ++_count;                         ++_timestamp;                         return null;                     }                     i_free_bucket += 2;                 }                 //place key in arbitrary free backward bucket ...................                 int i_min_bucket = i_start_bucket - _MAX_DELTA_BUCKET;                 if(i_min_bucket < 0)                     i_min_bucket = 0;                 i_free_bucket = i_start_cacheline - 1;                 while(i_free_bucket >= i_min_bucket) {                     long free_data = _table_hash_delta[i_free_bucket];                     if(_NULL_HASH == (free_data >> _HASH_SHIFT)) {                         //we found a free bucket outside of the cahce-line, so                         //add the new bucket to the end of the list                         int i_ref_bucket = i_free_bucket << 1;                         _table_key_value[i_ref_bucket] = key;                         _table_key_value[++i_ref_bucket] = value;                         free_data &= _NOT_HASH_MASK;                         free_data |= ((long) hash << _HASH_SHIFT);                         _table_hash_delta[i_free_bucket] = free_data;                         if(_NULL_DELTA_SHORT == first_delta) {                             long new_start_first = (i_free_bucket - i_start_bucket) & _FIRST_MASK;                             long start_data = (_table_hash_delta[i_start_bucket] & _NOT_FIRST_MASK)                                     | new_start_first;                             _table_hash_delta[i_start_bucket] = start_data;                         } else {                             long new_last_next = ((i_free_bucket - iBucket) << _NEXT_SHIFT)                                     & _NEXT_MASK;                             long last_data = (_table_hash_delta[iBucket] & _NOT_NEXT_MASK)                                     | new_last_next;                             _table_hash_delta[iBucket] = last_data;                         }                         ++_count;                         ++_timestamp;                         return null;                     }                     i_free_bucket -= 2;                 }             } finally {                 unlock();             }             return null;         }         private void optimize_cacheline_use(final int i_free_bucket) {             final int i_start_cacheline = i_free_bucket & _NOT_CACHE_MASK;             final int i_end_cacheline = i_start_cacheline + _CACHE_MASK;             //go over the buckets that reside in the cacheline of the free bucket             for(int i_cacheline = i_start_cacheline; i_cacheline <= i_end_cacheline; ++i_cacheline) {                 //check if current bucket has keys                 final long data = _table_hash_delta[i_cacheline];                 final short first_delta = (short) data;                 if(_NULL_DELTA_INT != first_delta) {                     int last_i_relocate = _NULL_INDX;                     int i_relocate = i_cacheline + first_delta;                     int curr_delta = first_delta;                     //go over the keys in the bucket-list                     do {                         //if the key reside outside the cahceline                         if(curr_delta < 0 || curr_delta > _CACHE_MASK) {                             //copy key, value, & hash to the free bucket                             final int i_key_value = i_free_bucket << 1;                             final int i_rel_key_value = i_relocate << 1;                             _table_key_value[i_key_value] = _table_key_value[i_rel_key_value];                             _table_key_value[i_key_value + 1] = _table_key_value[i_rel_key_value + 1];                             long relocate_data = _table_hash_delta[i_relocate];                             long free_data = _table_hash_delta[i_free_bucket];                             free_data &= _NOT_HASH_MASK;                             free_data |= (relocate_data & _HASH_MASK);                             //update the next-field of the free-bucket                             free_data &= _NOT_NEXT_MASK;                             final int relocate_next_delta = (int) relocate_data >> _NEXT_SHIFT;                             if(_NULL_DELTA_INT == relocate_next_delta) {                                 free_data |= _NULL_DELTA_NEXT_LONG;                             } else {                                 final long new_next = (((i_relocate + relocate_next_delta) - i_free_bucket) & 0xFFFFL) << 16;                                 free_data |= new_next;                             }                             _table_hash_delta[i_free_bucket] = free_data;                             //update the "first" or "next" field of the last                             if(_NULL_INDX == last_i_relocate) {                                 long start_data = _table_hash_delta[i_cacheline] & _NOT_FIRST_MASK;                                 start_data |= ((i_free_bucket - i_cacheline) & 0xFFFFL);                                 _table_hash_delta[i_cacheline] = start_data;                             } else {                                 long last_data = _table_hash_delta[last_i_relocate]                                         & _NOT_NEXT_MASK;                                 last_data |= (((i_free_bucket - last_i_relocate) & 0xFFFFL) << 16);                                 _table_hash_delta[last_i_relocate] = last_data;                             }                             //                             ++_timestamp;                             relocate_data &= _NOT_HASH_MASK;//hash=null                             relocate_data &= _NOT_NEXT_MASK;                             relocate_data |= _NULL_DELTA_NEXT_LONG;//next = null                             _table_hash_delta[i_relocate] = relocate_data;                             _table_key_value[i_rel_key_value] = null;//key=null                             _table_key_value[i_rel_key_value + 1] = null;//value=null                             return;                         }                         final long relocate_data = _table_hash_delta[i_relocate];                         final int next_delta = (int) relocate_data >> _NEXT_SHIFT;                         if(_NULL_DELTA_INT == next_delta)                             break;                         last_i_relocate = i_relocate;                         curr_delta += next_delta;                         i_relocate += next_delta;                     } while(true);//for on list                 }//if list exists             }//for on list         }         V remove(final K key, final int hash) {             lock();             try {                 //go over the list and look for key                 final int i_start_bucket = hash & _bucketk_mask;                 int iBucket = i_start_bucket;                 long data = _table_hash_delta[iBucket];                 final short first_delta = (short) data;                 if(0 != first_delta) {                     if(_NULL_DELTA_SHORT == first_delta)                         return null;                     iBucket += first_delta;                     data = _table_hash_delta[iBucket];                 }                 int i_last_bucket = -1;                 do {                     final int iRef;                     if(hash == (data >> _HASH_SHIFT)                             && key.equals(_table_key_value[iRef = (iBucket << 1)])) {                         data &= _NOT_HASH_MASK;                         final int next_delta = (int) data >> _NEXT_SHIFT;                         _table_hash_delta[iBucket] = data; //hash = null                         _table_key_value[iRef] = null; //key = null;                         final int iRef2 = iRef + 1;                         final V key_value = (V) _table_key_value[iRef2];                         _table_key_value[iRef2] = null; //value = null;                         if(-1 == i_last_bucket) {                             long start_data = _table_hash_delta[i_start_bucket] & _NOT_FIRST_MASK;                             if(_NULL_DELTA_INT == next_delta) {                                 start_data |= _NULL_DELTA_FIRST_LONG;                             } else {                                 final long new_first = (first_delta + next_delta) & 0xFFFFL;                                 start_data |= new_first;                             }                             if(i_start_bucket == iBucket) {                                 start_data &= _NOT_NEXT_MASK;                                 start_data |= _NULL_DELTA_NEXT_LONG;                                 --_count;                                 ++_timestamp;                                 _table_hash_delta[i_start_bucket] = start_data;                                 //return key_value;                             } else                                 _table_hash_delta[i_start_bucket] = start_data;                         } else {                             long last_data = _table_hash_delta[i_last_bucket];                             final int last_next_delta = (int) last_data >> _NEXT_SHIFT;                             last_data &= _NOT_NEXT_MASK;                             if(_NULL_DELTA_INT == next_delta) {                                 last_data |= _NULL_DELTA_NEXT_LONG;                             } else {                                 final long new_next = ((last_next_delta + next_delta) & 0xFFFFL) << 16;                                 last_data |= new_next;                             }                             _table_hash_delta[i_last_bucket] = last_data;                         }                         if(i_start_bucket != iBucket) {                             --_count;                             ++_timestamp;                             data &= _NOT_NEXT_MASK;                             data |= _NULL_DELTA_NEXT_LONG;                             _table_hash_delta[iBucket] = data; //next = null                         }                         optimize_cacheline_use(iBucket);                         return key_value;                     }                     final int nextDelta = (int) data >> _NEXT_SHIFT;                     if(_NULL_DELTA_INT != nextDelta) {                         i_last_bucket = iBucket;                         iBucket += nextDelta;                         data = _table_hash_delta[iBucket];                         continue;                     } else                         return null;                 } while(true);             } finally {                 unlock();             }         }         void clear() {}     }     //fields --------------------------------------     final int _segment_shift;     final int _segment_mask;     final Segment<K, V>[] _segments;     //small utilities -----------------------------     private static int nearestPowerOfTwo(int value) {         int rc = 1;         while(rc < value) {             rc <<= 1;         }         return rc;     }     private static final int hash(int h) {         // Spread bits to regularize both segment and index locations,         // using variant of single-word Wang/Jenkins hash.         h += (h << 15) ^ 0xffffcd7d;         h ^= (h >>> 10);         h += (h << 3);         h ^= (h >>> 6);         h += (h << 2) + (h << 14);         return h ^ (h >>> 16);     }     private final Segment<K, V> segmentFor(int hash) {         return _segments[(hash >>> _segment_shift) & _segment_mask];         //return _segments[(hash >>> 8) & _segment_mask];         //return _segments[hash & _segment_mask];     }     //public operations ---------------------------     public ConcurrentHopscotchHashMap(final int initialCapacity, final int concurrencyLevel) {         //check for the validity of the algorithems         if(initialCapacity < 0 || concurrencyLevel <= 0 /*|| machineCachelineSize <= 0*/)             throw new IllegalArgumentException();         //set the user preference, should we force cache-line alignment         //_is_cacheline_alignment = isCachelineAlignment;         //calculate cache-line mask         //final int bucketSize = Math.max(8, 2*machinePointerSize);         //_cache_mask = ( (machineCachelineSize / bucketSize) - 1 );         //allocate the segments array         final int numSegments = nearestPowerOfTwo(concurrencyLevel);         _segment_mask = (numSegments - 1);         _segments = Segment.<K, V> newArray(numSegments);         // Find power-of-two sizes best matching arguments         int sshift = 0;         int ssize = 1;         while(ssize < numSegments) {             ++sshift;             ssize <<= 1;         }         _segment_shift = 32 - sshift;         //initialize the segmens         final int initCapacity = nearestPowerOfTwo(initialCapacity);         final int segmentCapacity = initCapacity / numSegments;         for(int iSeg = 0; iSeg < numSegments; ++iSeg) {             _segments[iSeg] = new Segment<K, V>(segmentCapacity);         }     }     public boolean isEmpty() {         final Segment<K, V>[] segments = this._segments;         /*          * We keep track of per-segment "timestamp" to avoid ABA          * problems in which an element in one segment was added and          * in another removed during traversal, in which case the          * table was never actually empty at any point. Note the          * similar use of "timestamp" in the size() and containsValue()          * methods, which are the only other methods also susceptible          * to ABA problems.          */         int[] mc = new int[segments.length];         int mcsum = 0;         for(int i = 0; i < segments.length; ++i) {             if(0 != segments[i]._count)                 return false;             else                 mcsum += mc[i] = segments[i]._timestamp;         }         // If mcsum happens to be zero, then we know we got a snapshot         // before any modifications at all were made.  This is         // probably common enough to bother tracking.         if(mcsum != 0) {             for(int i = 0; i < segments.length; ++i) {                 if(0 != segments[i]._count || mc[i] != segments[i]._timestamp)                     return false;             }         }         return true;     }     public int size() {         final Segment<K, V>[] segments = this._segments;         long sum = 0;         long check = 0;         int[] mc = new int[segments.length];         // Try a few times to get accurate count. On failure due to         // continuous async changes in table, resort to locking.         for(int iTry = 0; iTry < _RETRIES_BEFORE_LOCK; ++iTry) {             check = 0;             sum = 0;             int mcsum = 0;             for(int i = 0; i < segments.length; ++i) {                 sum += segments[i]._count;                 mcsum += mc[i] = segments[i]._timestamp;             }             if(mcsum != 0) {                 for(int i = 0; i < segments.length; ++i) {                     check += segments[i]._count;                     if(mc[i] != segments[i]._timestamp) {                         check = -1; // force retry                         break;                     }                 }             }             if(check == sum)                 break;         }         if(check != sum) { // Resort to locking all segments             sum = 0;             for(int i = 0; i < segments.length; ++i)                 segments[i].lock();             for(int i = 0; i < segments.length; ++i)                 sum += segments[i]._count;             for(int i = 0; i < segments.length; ++i)                 segments[i].unlock();         }         if(sum > Integer.MAX_VALUE)             return Integer.MAX_VALUE;         else             return (int) sum;     }     //contains     public boolean containsKey(final K key) {         final int hash = hash(key.hashCode());         return segmentFor(hash).containsKey(key, hash);     }     public V get(final K key) {         final int hash = hash(key.hashCode());         return segmentFor(hash).get(key, hash);     }     //add     public V put(K key, V value) {         if(value == null)             throw new NullPointerException();         final int hash = hash(key.hashCode());         return segmentFor(hash).put(key, hash, value);     }     //remove     public V remove(final K key) {         final int hash = hash(key.hashCode());         return segmentFor(hash).remove(key, hash);     }     //general     public void clear() {} }