Optimize hash table modulus operation using big switch.

Integer division is really, really slow. The integer hash table benchmark spends most of its time in modulus operations! This change shaves 32% off the integer hash table benchmark runtime, and 8% off the string hash table benchmark runtime.

Optimize hash table modulus operation using big switch.
Integer division is really, really slow. The integer hash table benchmark spends most of its time in modulus operations! This change shaves 32% off the integer hash table benchmark runtime, and 8% off the string hash table benchmark runtime.
25a25704 · Kenton Varda · 255194de · 25a25704 · 25a25704
Commit 25a25704 authored Jun 17, 2018 by Kenton Varda
Hide whitespace changes
Inline Side-by-side

Showing with 55 additions and 4 deletions

table.c++ c++/src/kj/table.c++ +49 -0

table.h c++/src/kj/table.h +6 -4

No files found.
--- a/c++/src/kj/table.c++
+++ b/c++/src/kj/table.c++
@@ -90,6 +90,55 @@ static const size_t PRIMES[] = {
  1610612741,  // 2^30 = 1073741824
 };
+uint chooseBucket(uint hash, uint count) {
+  // Integer modulus is really, really slow. It turns out that the compiler can generate much
+  // faster code if the denominator is a constant. Since we have a fixed set of possible
+  // denominators, a big old switch() statement is a win.
+  // TODO(perf): Consider using power-of-two bucket sizes. We can safely do so as long as we demand
+  //   high-quality hash functions -- kj::hashCode() needs good diffusion even for integers, can't
+  //   just be a cast. Also be sure to implement Robin Hood hashing to avoid extremely bad negative
+  //   lookup time when elements have sequential hashes (otherwise, it could be necessary to scan
+  //   the entire list to determine that an element isn't present).
+  switch (count) {
+#define HANDLE(i) case i##u: return hash % i##u
+    HANDLE(         1);
+    HANDLE(         3);
+    HANDLE(         5);
+    HANDLE(        11);
+    HANDLE(        23);
+    HANDLE(        53);
+    HANDLE(        97);
+    HANDLE(       193);
+    HANDLE(       389);
+    HANDLE(       769);
+    HANDLE(      1543);
+    HANDLE(      3079);
+    HANDLE(      6151);
+    HANDLE(     12289);
+    HANDLE(     24593);
+    HANDLE(     49157);
+    HANDLE(     98317);
+    HANDLE(    196613);
+    HANDLE(    393241);
+    HANDLE(    786433);
+    HANDLE(   1572869);
+    HANDLE(   3145739);
+    HANDLE(   6291469);
+    HANDLE(  12582917);
+    HANDLE(  25165843);
+    HANDLE(  50331653);
+    HANDLE( 100663319);
+    HANDLE( 201326611);
+    HANDLE( 402653189);
+    HANDLE( 805306457);
+    HANDLE(1610612741);
+#undef HANDLE
+    default: return hash % count;
+  }
+}
 size_t chooseHashTableSize(uint size) {
  if (size == 0) return 0;

--- a/c++/src/kj/table.h
+++ b/c++/src/kj/table.h
@@ -756,6 +756,8 @@ inline size_t probeHash(const kj::Array<HashBucket>& buckets, size_t i) {
 kj::Array<HashBucket> rehash(kj::ArrayPtr<const HashBucket> oldBuckets, size_t targetSize);
+uint chooseBucket(uint hash, uint count);
 }  // namespace _ (private)
 template <typename Callbacks>
@@ -785,7 +787,7 @@ public:
    uint hashCode = cb.hashCode(table[pos]);
    Maybe<_::HashBucket&> erasedSlot;
-    for (uint i = hashCode % buckets.size();; i = _::probeHash(buckets, i)) {
+    for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) {
      auto& bucket = buckets[i];
      if (bucket.isEmpty()) {
        // no duplicates found
@@ -813,7 +815,7 @@ public:
  template <typename Row>
  void erase(kj::ArrayPtr<Row> table, size_t pos) {
    uint hashCode = cb.hashCode(table[pos]);
-    for (uint i = hashCode % buckets.size();; i = _::probeHash(buckets, i)) {
+    for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) {
      auto& bucket = buckets[i];
      if (bucket.isPos(pos)) {
        // found it
@@ -831,7 +833,7 @@ public:
  template <typename Row>
  void move(kj::ArrayPtr<Row> table, size_t oldPos, size_t newPos) {
    uint hashCode = cb.hashCode(table[oldPos]);
-    for (uint i = hashCode % buckets.size();; i = _::probeHash(buckets, i)) {
+    for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) {
      auto& bucket = buckets[i];
      if (bucket.isPos(oldPos)) {
        // found it
@@ -850,7 +852,7 @@ public:
    if (buckets.size() == 0) return nullptr;
    uint hashCode = cb.hashCode(params...);
-    for (uint i = hashCode % buckets.size();; i = _::probeHash(buckets, i)) {
+    for (uint i = _::chooseBucket(hashCode, buckets.size());; i = _::probeHash(buckets, i)) {
      auto& bucket = buckets[i];
      if (bucket.isEmpty()) {
        // not found.