From f6884cc6ddb6b938443512d029c7beacfc7a3ffc Mon Sep 17 00:00:00 2001 From: Colin Patrick McCabe Date: Thu, 18 Mar 2021 09:58:49 -0700 Subject: [PATCH] MINOR: Fix BaseHashTable sizing (#10334) The array backing BaseHashTable is intended to be sized as a power of two. Due to a bug, the initial array size was calculated incorrectly in some cases. Also make the maximum array size the largest possible 31-bit power of two. Previously it was a smaller size but this was due to a typo. Reviewers: Ismael Juma , Jose Sancio , Chia-Ping Tsai --- .../apache/kafka/timeline/BaseHashTable.java | 30 ++++++++++++++----- .../kafka/timeline/BaseHashTableTest.java | 20 +++++++++++++ 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/metadata/src/main/java/org/apache/kafka/timeline/BaseHashTable.java b/metadata/src/main/java/org/apache/kafka/timeline/BaseHashTable.java index 7183528173c..05315466519 100644 --- a/metadata/src/main/java/org/apache/kafka/timeline/BaseHashTable.java +++ b/metadata/src/main/java/org/apache/kafka/timeline/BaseHashTable.java @@ -40,14 +40,14 @@ class BaseHashTable { private final static double MAX_LOAD_FACTOR = 0.75f; /** - * The natural log of 2 + * The minimum number of slots we can have in the hash table. */ - private final static double LN_2 = Math.log(2); + final static int MIN_CAPACITY = 2; /** * The maximum number of slots we can have in the hash table. */ - private final static int MAX_CAPACITY = 0x4000000; + final static int MAX_CAPACITY = 1 << 30; private Object[] elements; private int size = 0; @@ -56,12 +56,28 @@ class BaseHashTable { this.elements = new Object[expectedSizeToCapacity(expectedSize)]; } + /** + * Calculate the capacity we should provision, given the expected size. + * + * Our capacity must always be a power of 2, and never less than 2 or more + * than MAX_CAPACITY. We use 64-bit numbers here to avoid overflow + * concerns. + */ static int expectedSizeToCapacity(int expectedSize) { - if (expectedSize <= 1) { - return 2; + long minCapacity = (long) Math.ceil((float) expectedSize / MAX_LOAD_FACTOR); + return Math.max(MIN_CAPACITY, + (int) Math.min(MAX_CAPACITY, roundUpToPowerOfTwo(minCapacity))); + } + + private static long roundUpToPowerOfTwo(long i) { + if (i <= 0) { + return 0; + } else if (i > (1L << 62)) { + throw new ArithmeticException("There are no 63-bit powers of 2 higher than " + + "or equal to " + i); + } else { + return 1L << -Long.numberOfLeadingZeros(i - 1); } - double sizeToFit = expectedSize / MAX_LOAD_FACTOR; - return (int) Math.min(MAX_CAPACITY, Math.ceil(Math.log(sizeToFit) / LN_2)); } final int baseSize() { diff --git a/metadata/src/test/java/org/apache/kafka/timeline/BaseHashTableTest.java b/metadata/src/test/java/org/apache/kafka/timeline/BaseHashTableTest.java index 11f774cab7b..a73357c5234 100644 --- a/metadata/src/test/java/org/apache/kafka/timeline/BaseHashTableTest.java +++ b/metadata/src/test/java/org/apache/kafka/timeline/BaseHashTableTest.java @@ -121,4 +121,24 @@ public class BaseHashTableTest { assertEquals(Integer.valueOf(i), table.baseRemove(Integer.valueOf(i))); } } + + @Test + public void testExpectedSizeToCapacity() { + assertEquals(2, BaseHashTable.expectedSizeToCapacity(Integer.MIN_VALUE)); + assertEquals(2, BaseHashTable.expectedSizeToCapacity(-123)); + assertEquals(2, BaseHashTable.expectedSizeToCapacity(0)); + assertEquals(2, BaseHashTable.expectedSizeToCapacity(1)); + assertEquals(4, BaseHashTable.expectedSizeToCapacity(2)); + assertEquals(4, BaseHashTable.expectedSizeToCapacity(3)); + assertEquals(8, BaseHashTable.expectedSizeToCapacity(4)); + assertEquals(16, BaseHashTable.expectedSizeToCapacity(12)); + assertEquals(32, BaseHashTable.expectedSizeToCapacity(13)); + assertEquals(0x2000000, BaseHashTable.expectedSizeToCapacity(0x1010400)); + assertEquals(0x4000000, BaseHashTable.expectedSizeToCapacity(0x2000000)); + assertEquals(0x4000000, BaseHashTable.expectedSizeToCapacity(0x2000001)); + assertEquals(BaseHashTable.MAX_CAPACITY, BaseHashTable.expectedSizeToCapacity(BaseHashTable.MAX_CAPACITY)); + assertEquals(BaseHashTable.MAX_CAPACITY, BaseHashTable.expectedSizeToCapacity(BaseHashTable.MAX_CAPACITY + 1)); + assertEquals(BaseHashTable.MAX_CAPACITY, BaseHashTable.expectedSizeToCapacity(Integer.MAX_VALUE - 1)); + assertEquals(BaseHashTable.MAX_CAPACITY, BaseHashTable.expectedSizeToCapacity(Integer.MAX_VALUE)); + } }