Commit 3bd1f6fc authored by Kenton Varda's avatar Kenton Varda

Add hashing framework and HashMap and TreeMap implementations.

parent 8707e731
// Copyright (c) 2018 Kenton Varda and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "hash.h"
namespace kj {
namespace _ { // private
uint HashCoder::operator*(ArrayPtr<const byte> s) const {
// murmur2 adapted from libc++ source code.
//
// TODO(perf): Use CityHash or FarmHash on 64-bit machines? They seem optimized for x86-64; what
// about ARM? Ask Vlad for advice.
constexpr uint m = 0x5bd1e995;
constexpr uint r = 24;
uint h = s.size();
const byte* data = s.begin();
uint len = s.size();
for (; len >= 4; data += 4, len -= 4) {
uint k;
memcpy(&k, data, sizeof(k));
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
}
switch (len) {
case 3:
h ^= data[2] << 16;
case 2:
h ^= data[1] << 8;
case 1:
h ^= data[0];
h *= m;
}
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
} // namespace _ (private)
} // namespace kj
// Copyright (c) 2018 Kenton Varda and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#if defined(__GNUC__) && !KJ_HEADER_WARNINGS
#pragma GCC system_header
#endif
#include "string.h"
namespace kj {
namespace _ { // private
struct HashCoder {
// This is a dummy type with only one instance: HASHCODER (below). To make an arbitrary type
// hashable, define `operator*(HashCoder, T)` to return any other type that is already hashable.
// Be sure to declare the operator in the same namespace as `T` **or** in the global scope.
// You can use the KJ_HASHCODE() macro as syntax sugar for this.
//
// A more usual way to accomplish what we're doing here would be to require that you define
// a function like `hashCode(T)` and then rely on argument-dependent lookup. However, this has
// the problem that it pollutes other people's namespaces and even the global namespace. For
// example, some other project may already have functions called `hashCode` which do something
// different. Declaring `operator*` with `Stringifier` as the left operand cannot conflict with
// anything.
uint operator*(ArrayPtr<const byte> s) const;
inline uint operator*(ArrayPtr<byte> s) const { return operator*(s.asConst()); }
inline uint operator*(ArrayPtr<const char> s) const { return operator*(s.asBytes()); }
inline uint operator*(ArrayPtr<char> s) const { return operator*(s.asBytes()); }
inline uint operator*(const Array<const char>& s) const { return operator*(s.asBytes()); }
inline uint operator*(const Array<char>& s) const { return operator*(s.asBytes()); }
inline uint operator*(const String& s) const { return operator*(s.asBytes()); }
inline uint operator*(const StringPtr& s) const { return operator*(s.asBytes()); }
inline uint operator*(decltype(nullptr)) const { return 0; }
inline uint operator*(bool b) const { return b; }
inline uint operator*(char i) const { return i; }
inline uint operator*(signed char i) const { return i; }
inline uint operator*(unsigned char i) const { return i; }
inline uint operator*(signed short i) const { return i; }
inline uint operator*(unsigned short i) const { return i; }
inline uint operator*(signed int i) const { return i; }
inline uint operator*(unsigned int i) const { return i; }
inline uint operator*(signed long i) const {
if (sizeof(i) == sizeof(uint)) {
return operator*(static_cast<uint>(i));
} else {
return operator*(static_cast<unsigned long long>(i));
}
}
inline uint operator*(unsigned long i) const {
if (sizeof(i) == sizeof(uint)) {
return operator*(static_cast<uint>(i));
} else {
return operator*(static_cast<unsigned long long>(i));
}
}
inline uint operator*(signed long long i) const {
return operator*(static_cast<unsigned long long>(i));
}
inline uint operator*(unsigned long long i) const {
// Mix 64 bits to 32 bits in such a way that if our input values differ primarily in the upper
// 32 bits, we still get good diffusion. (I.e. we cannot just truncate!)
//
// 49123 is an arbitrarily-chosen prime that is vaguely close to 2^16.
//
// TODO(perf): I just made this up. Is it OK?
return static_cast<uint>(i) + static_cast<uint>(i >> 32) * 49123;
}
template <typename T>
uint operator*(T* ptr) const {
if (sizeof(ptr) == sizeof(uint)) {
// TODO(cleanup): In C++17, make the if() above be `if constexpr ()`, then change this to
// reinterpret_cast<uint>(ptr).
return reinterpret_cast<unsigned long long>(ptr);
} else {
return operator*(reinterpret_cast<unsigned long long>(ptr));
}
}
template <typename T, typename = decltype(instance<const HashCoder&>() * instance<const T&>())>
uint operator*(ArrayPtr<T> arr) const;
template <typename T, typename = decltype(instance<const HashCoder&>() * instance<const T&>())>
uint operator*(const Array<T>& arr) const;
template <typename T, typename Result = decltype(instance<T>().hashCode())>
inline Result operator*(T&& value) const { return kj::fwd<T>(value).hashCode(); }
};
static KJ_CONSTEXPR(const) HashCoder HASHCODER = HashCoder();
} // namespace _ (private)
#define KJ_HASHCODE(...) operator*(::kj::_::HashCoder, __VA_ARGS__)
// Defines a stringifier for a custom type. Example:
//
// class Foo {...};
// inline StringPtr KJ_STRINGIFY(const Foo& foo) { return foo.name(); }
//
// This allows Foo to be passed to str().
//
// The function should be declared either in the same namespace as the target type or in the global
// namespace. It can return any type which is an iterable container of chars.
inline uint hashCode(uint value) { return value; }
template <typename T>
inline uint hashCode(T&& value) { return hashCode(_::HASHCODER * kj::fwd<T>(value)); }
template <typename... T>
inline uint hashCode(T&&... values) {
uint hashes[] = { hashCode(kj::fwd<T>(values))... };
return hashCode(kj::ArrayPtr<uint>(hashes).asBytes());
}
// kj::hashCode() is a universal hashing function, like kj::str() is a universal stringification
// function. Throw stuff in, get a hash code.
//
// Hash codes may differ between different processes, even running exactly the same code.
//
// NOT SUITABLE FOR CRYPTOGRAPHY. This is for hash tables, not crypto.
// =======================================================================================
// inline implementation details
namespace _ { // private
template <typename T, typename>
inline uint HashCoder::operator*(ArrayPtr<T> arr) const {
// Hash each array element to create a string of hashes, then murmur2 over those.
//
// TODO(perf): Choose a more-modern hash. (See hash.c++.)
constexpr uint m = 0x5bd1e995;
constexpr uint r = 24;
uint h = arr.size() * sizeof(uint);
for (auto& e: arr) {
uint k = kj::hashCode(e);
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
}
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
template <typename T, typename>
inline uint HashCoder::operator*(const Array<T>& arr) const {
return operator*(arr.asPtr());
}
} // namespace _ (private)
} // namespace kj
// Copyright (c) 2018 Kenton Varda and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "map.h"
#include <kj/test.h>
namespace kj {
namespace _ {
KJ_TEST("HashMap") {
HashMap<String, int> map;
map.insert(kj::str("foo"), 123);
map.insert(kj::str("bar"), 456);
KJ_EXPECT(KJ_ASSERT_NONNULL(map.find("foo"_kj)) == 123);
KJ_EXPECT(KJ_ASSERT_NONNULL(map.find("bar"_kj)) == 456);
KJ_EXPECT(map.find("baz"_kj) == nullptr);
map.upsert(kj::str("foo"), 789, [](int& old, uint newValue) {
KJ_EXPECT(old == 123);
KJ_EXPECT(newValue == 789);
old = 321;
});
KJ_EXPECT(KJ_ASSERT_NONNULL(map.find("foo"_kj)) == 321);
KJ_EXPECT(map.erase("bar"_kj));
KJ_EXPECT(!map.erase("baz"_kj));
KJ_EXPECT(KJ_ASSERT_NONNULL(map.find("foo"_kj)) == 321);
KJ_EXPECT(map.size() == 1);
KJ_EXPECT(map.begin()->key == "foo");
auto iter = map.begin();
++iter;
KJ_EXPECT(iter == map.end());
map.erase(*map.begin());
KJ_EXPECT(map.size() == 0);
}
KJ_TEST("TreeMap") {
TreeMap<String, int> map;
map.insert(kj::str("foo"), 123);
map.insert(kj::str("bar"), 456);
KJ_EXPECT(KJ_ASSERT_NONNULL(map.find("foo"_kj)) == 123);
KJ_EXPECT(KJ_ASSERT_NONNULL(map.find("bar"_kj)) == 456);
KJ_EXPECT(map.find("baz"_kj) == nullptr);
map.upsert(kj::str("foo"), 789, [](int& old, uint newValue) {
KJ_EXPECT(old == 123);
KJ_EXPECT(newValue == 789);
old = 321;
});
KJ_EXPECT(KJ_ASSERT_NONNULL(map.find("foo"_kj)) == 321);
KJ_EXPECT(map.erase("bar"_kj));
KJ_EXPECT(!map.erase("baz"_kj));
KJ_EXPECT(KJ_ASSERT_NONNULL(map.find("foo"_kj)) == 321);
KJ_EXPECT(map.size() == 1);
KJ_EXPECT(map.begin()->key == "foo");
auto iter = map.begin();
++iter;
KJ_EXPECT(iter == map.end());
map.erase(*map.begin());
KJ_EXPECT(map.size() == 0);
}
KJ_TEST("TreeMap range") {
TreeMap<String, int> map;
map.insert(kj::str("foo"), 1);
map.insert(kj::str("bar"), 2);
map.insert(kj::str("baz"), 3);
map.insert(kj::str("qux"), 4);
map.insert(kj::str("corge"), 5);
{
auto ordered = KJ_MAP(e, map) -> kj::StringPtr { return e.key; };
KJ_ASSERT(ordered.size() == 5);
KJ_EXPECT(ordered[0] == "bar");
KJ_EXPECT(ordered[1] == "baz");
KJ_EXPECT(ordered[2] == "corge");
KJ_EXPECT(ordered[3] == "foo");
KJ_EXPECT(ordered[4] == "qux");
}
{
auto range = map.range("baz", "foo");
auto iter = range.begin();
KJ_EXPECT(iter->key == "baz");
++iter;
KJ_EXPECT(iter->key == "corge");
++iter;
KJ_EXPECT(iter == range.end());
}
map.eraseRange("baz", "foo");
{
auto ordered = KJ_MAP(e, map) -> kj::StringPtr { return e.key; };
KJ_ASSERT(ordered.size() == 3);
KJ_EXPECT(ordered[0] == "bar");
KJ_EXPECT(ordered[1] == "foo");
KJ_EXPECT(ordered[2] == "qux");
}
}
} // namespace kj
} // namespace _
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment