Skip to content

Commit 2b8e6fd

Browse files
committed
Modified utility hash maps to gracefully handle very large number of entries.
1 parent 3593987 commit 2b8e6fd

File tree

4 files changed

+465
-58
lines changed

4 files changed

+465
-58
lines changed

src/com/esotericsoftware/kryo/util/IdentityMap.java

Lines changed: 117 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,20 @@
2020
import java.util.Iterator;
2121
import java.util.NoSuchElementException;
2222

23-
/** An unordered map that uses identity comparison for keys. This implementation is a cuckoo hash map using 3 hashes, random
24-
* walking, and a small stash for problematic keys. Null keys are not allowed. Null values are allowed. No allocation is done
25-
* except when growing the table size. <br>
23+
/** An unordered map that uses identity comparison for keys. This implementation is a cuckoo hash map using 3 hashes
24+
* (if table size is less than 2^16) or 4 hashes (if table size is greater than or equal to 2^16), random walking, and
25+
* a small stash for problematic keys Null keys are not allowed. Null values are allowed. No allocation is done except when
26+
* growing the table size. <br>
2627
* <br>
2728
* This map performs very fast get, containsKey, and remove (typically O(1), worst case O(log(n))). Put may be a bit slower,
2829
* depending on hash collisions. Load factors greater than 0.91 greatly increase the chances the map will have to rehash to the
2930
* next higher POT size.
3031
* @author Nathan Sweet */
3132
public class IdentityMap<K, V> {
32-
private static final int PRIME1 = 0xbe1f14b1;
33-
private static final int PRIME2 = 0xb4b82e39;
34-
private static final int PRIME3 = 0xced1c241;
33+
// primes for hash functions 2, 3, and 4
34+
private static final int PRIME2 = 0xbe1f14b1;
35+
private static final int PRIME3 = 0xb4b82e39;
36+
private static final int PRIME4 = 0xced1c241;
3537

3638
public int size;
3739

@@ -43,6 +45,7 @@ public class IdentityMap<K, V> {
4345
private int hashShift, mask, threshold;
4446
private int stashCapacity;
4547
private int pushIterations;
48+
private boolean isBigTable;
4649

4750
private Entries entries;
4851
private Values values;
@@ -70,6 +73,9 @@ public IdentityMap (int initialCapacity, float loadFactor) {
7073
if (loadFactor <= 0) throw new IllegalArgumentException("loadFactor must be > 0: " + loadFactor);
7174
this.loadFactor = loadFactor;
7275

76+
// big table is when capacity >= 2^16
77+
isBigTable = (capacity >>> 16) != 0 ? true : false;
78+
7379
threshold = (int)(capacity * loadFactor);
7480
mask = capacity - 1;
7581
hashShift = 31 - Integer.numberOfTrailingZeros(capacity);
@@ -82,7 +88,10 @@ public IdentityMap (int initialCapacity, float loadFactor) {
8288

8389
public V put (K key, V value) {
8490
if (key == null) throw new IllegalArgumentException("key cannot be null.");
91+
// avoid getfield opcode
8592
K[] keyTable = this.keyTable;
93+
int mask = this.mask;
94+
boolean isBigTable = this.isBigTable;
8695

8796
// Check for existing keys.
8897
int hashCode = System.identityHashCode(key);
@@ -110,6 +119,18 @@ public V put (K key, V value) {
110119
return oldValue;
111120
}
112121

122+
int index4 = -1;
123+
K key4 = null;
124+
if (isBigTable) {
125+
index4 = hash4(hashCode);
126+
key4 = keyTable[index4];
127+
if (key4 == key) {
128+
V oldValue = valueTable[index4];
129+
valueTable[index4] = value;
130+
return oldValue;
131+
}
132+
}
133+
113134
// Update key in the stash.
114135
for (int i = capacity, n = i + stashSize; i < n; i++) {
115136
if (keyTable[i] == key) {
@@ -141,7 +162,14 @@ public V put (K key, V value) {
141162
return null;
142163
}
143164

144-
push(key, value, index1, key1, index2, key2, index3, key3);
165+
if (isBigTable && key4 == null) {
166+
keyTable[index4] = key;
167+
valueTable[index4] = value;
168+
if (size++ >= threshold) resize(capacity << 1);
169+
return null;
170+
}
171+
172+
push(key, value, index1, key1, index2, key2, index3, key3, index4, key4);
145173
return null;
146174
}
147175

@@ -176,21 +204,37 @@ private void putResize (K key, V value) {
176204
return;
177205
}
178206

179-
push(key, value, index1, key1, index2, key2, index3, key3);
207+
int index4 = -1;
208+
K key4 = null;
209+
if (isBigTable) {
210+
index4 = hash4(hashCode);
211+
key4 = keyTable[index4];
212+
if (key4 == null) {
213+
keyTable[index4] = key;
214+
valueTable[index4] = value;
215+
if (size++ >= threshold) resize(capacity << 1);
216+
return;
217+
}
218+
}
219+
220+
push(key, value, index1, key1, index2, key2, index3, key3, index4, key4);
180221
}
181222

182-
private void push (K insertKey, V insertValue, int index1, K key1, int index2, K key2, int index3, K key3) {
223+
private void push (K insertKey, V insertValue, int index1, K key1, int index2, K key2, int index3, K key3, int index4, K key4) {
224+
// avoid getfield opcode
183225
K[] keyTable = this.keyTable;
184226
V[] valueTable = this.valueTable;
185227
int mask = this.mask;
228+
boolean isBigTable = this.isBigTable;
186229

187230
// Push keys until an empty bucket is found.
188231
K evictedKey;
189232
V evictedValue;
190233
int i = 0, pushIterations = this.pushIterations;
234+
int n = isBigTable ? 4 : 3;
191235
do {
192236
// Replace the key and value for one of the hashes.
193-
switch (ObjectMap.random.nextInt(3)) {
237+
switch (ObjectMap.random.nextInt(n)) {
194238
case 0:
195239
evictedKey = key1;
196240
evictedValue = valueTable[index1];
@@ -203,12 +247,18 @@ private void push (K insertKey, V insertValue, int index1, K key1, int index2, K
203247
keyTable[index2] = insertKey;
204248
valueTable[index2] = insertValue;
205249
break;
206-
default:
250+
case 2:
207251
evictedKey = key3;
208252
evictedValue = valueTable[index3];
209253
keyTable[index3] = insertKey;
210254
valueTable[index3] = insertValue;
211255
break;
256+
default:
257+
evictedKey = key4;
258+
evictedValue = valueTable[index4];
259+
keyTable[index4] = insertKey;
260+
valueTable[index4] = insertValue;
261+
break;
212262
}
213263

214264
// If the evicted key hashes to an empty bucket, put it there and stop.
@@ -240,6 +290,17 @@ private void push (K insertKey, V insertValue, int index1, K key1, int index2, K
240290
return;
241291
}
242292

293+
if (isBigTable) {
294+
index4 = hash4(hashCode);
295+
key4 = keyTable[index4];
296+
if (key4 == null) {
297+
keyTable[index4] = evictedKey;
298+
valueTable[index4] = evictedValue;
299+
if (size++ >= threshold) resize(capacity << 1);
300+
return;
301+
}
302+
}
303+
243304
if (++i == pushIterations) break;
244305

245306
insertKey = evictedKey;
@@ -271,7 +332,15 @@ public V get (K key) {
271332
index = hash2(hashCode);
272333
if (key != keyTable[index]) {
273334
index = hash3(hashCode);
274-
if (key != keyTable[index]) return getStash(key, null);
335+
if (key != keyTable[index]) {
336+
if (isBigTable) {
337+
index = hash4(hashCode);
338+
if (key != keyTable[index]) return getStash(key, null);
339+
}
340+
else {
341+
return getStash(key, null);
342+
}
343+
}
275344
}
276345
}
277346
return valueTable[index];
@@ -284,7 +353,15 @@ public V get (K key, V defaultValue) {
284353
index = hash2(hashCode);
285354
if (key != keyTable[index]) {
286355
index = hash3(hashCode);
287-
if (key != keyTable[index]) return getStash(key, defaultValue);
356+
if (key != keyTable[index]) {
357+
if (isBigTable) {
358+
index = hash4(hashCode);
359+
if (key != keyTable[index]) return getStash(key, defaultValue);
360+
}
361+
else {
362+
return getStash(key, defaultValue);
363+
}
364+
}
288365
}
289366
}
290367
return valueTable[index];
@@ -326,6 +403,17 @@ public V remove (K key) {
326403
return oldValue;
327404
}
328405

406+
if (isBigTable) {
407+
index = hash4(hashCode);
408+
if (keyTable[index] == key) {
409+
keyTable[index] = null;
410+
V oldValue = valueTable[index];
411+
valueTable[index] = null;
412+
size--;
413+
return oldValue;
414+
}
415+
}
416+
329417
return removeStash(key);
330418
}
331419

@@ -412,7 +500,14 @@ public boolean containsKey (K key) {
412500
index = hash2(hashCode);
413501
if (key != keyTable[index]) {
414502
index = hash3(hashCode);
415-
if (key != keyTable[index]) return containsKeyStash(key);
503+
if (key != keyTable[index]) {
504+
if (isBigTable) {
505+
index = hash4(hashCode);
506+
if (key != keyTable[index]) return containsKeyStash(key);
507+
} else {
508+
return containsKeyStash(key);
509+
}
510+
}
416511
}
417512
}
418513
return true;
@@ -462,6 +557,9 @@ private void resize (int newSize) {
462557
stashCapacity = Math.max(3, (int)Math.ceil(Math.log(newSize)) * 2);
463558
pushIterations = Math.max(Math.min(newSize, 8), (int)Math.sqrt(newSize) / 8);
464559

560+
// big table is when capacity >= 2^16
561+
isBigTable = (capacity >>> 16) != 0 ? true : false;
562+
465563
K[] oldKeyTable = keyTable;
466564
V[] oldValueTable = valueTable;
467565

@@ -489,6 +587,11 @@ private int hash3 (int h) {
489587
return (h ^ h >>> hashShift) & mask;
490588
}
491589

590+
private int hash4 (int h) {
591+
h *= PRIME4;
592+
return (h ^ h >>> hashShift) & mask;
593+
}
594+
492595
public String toString () {
493596
if (size == 0) return "[]";
494597
StringBuilder buffer = new StringBuilder(32);

0 commit comments

Comments
 (0)