hash_tables.h 9.03 KB
Newer Older
gejun's avatar
gejun committed
1 2 3 4 5 6 7
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//

//
// Deal with the differences between Microsoft and GNU implemenations
8 9
// of hash_map. Allows all platforms to use |butil::hash_map| and
// |butil::hash_set|.
gejun's avatar
gejun committed
10
//  eg:
Ge Jun's avatar
Ge Jun committed
11
//   butil::hash_map<int, std::string> my_map;
12
//   butil::hash_set<int> my_set;
gejun's avatar
gejun committed
13 14 15 16 17 18 19 20
//
// NOTE: It is an explicit non-goal of this class to provide a generic hash
// function for pointers.  If you want to hash a pointers to a particular class,
// please define the template specialization elsewhere (for example, in its
// header file) and keep it specific to just pointers to that class.  This is
// because identity hashes are not desirable for all types that might show up
// in containers as pointers.

21 22
#ifndef BUTIL_CONTAINERS_HASH_TABLES_H_
#define BUTIL_CONTAINERS_HASH_TABLES_H_
gejun's avatar
gejun committed
23 24 25

#include <utility>

26 27 28
#include "butil/basictypes.h"
#include "butil/strings/string16.h"
#include "butil/build_config.h"
gejun's avatar
gejun committed
29 30 31 32 33

#if defined(COMPILER_MSVC)
#include <hash_map>
#include <hash_set>

34
#define BUTIL_HASH_NAMESPACE stdext
gejun's avatar
gejun committed
35 36 37

#elif defined(COMPILER_GCC)
#if defined(OS_ANDROID)
38
#define BUTIL_HASH_NAMESPACE std
gejun's avatar
gejun committed
39
#else
40
#define BUTIL_HASH_NAMESPACE __gnu_cxx
gejun's avatar
gejun committed
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
#endif

// This is a hack to disable the gcc 4.4 warning about hash_map and hash_set
// being deprecated.  We can get rid of this when we upgrade to VS2008 and we
// can use <tr1/unordered_map> and <tr1/unordered_set>.
#ifdef __DEPRECATED
#define CHROME_OLD__DEPRECATED __DEPRECATED
#undef __DEPRECATED
#endif

#if defined(OS_ANDROID)
#include <hash_map>
#include <hash_set>
#else
#include <ext/hash_map>
#include <ext/hash_set>
#endif

#include <string>

#ifdef CHROME_OLD__DEPRECATED
#define __DEPRECATED CHROME_OLD__DEPRECATED
#undef CHROME_OLD__DEPRECATED
#endif

66
namespace BUTIL_HASH_NAMESPACE {
gejun's avatar
gejun committed
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105

#if !defined(OS_ANDROID)
// The GNU C++ library provides identity hash functions for many integral types,
// but not for |long long|.  This hash function will truncate if |size_t| is
// narrower than |long long|.  This is probably good enough for what we will
// use it for.

#define DEFINE_TRIVIAL_HASH(integral_type) \
    template<> \
    struct hash<integral_type> { \
      std::size_t operator()(integral_type value) const { \
        return static_cast<std::size_t>(value); \
      } \
    }

DEFINE_TRIVIAL_HASH(long long);
DEFINE_TRIVIAL_HASH(unsigned long long);

#undef DEFINE_TRIVIAL_HASH
#endif  // !defined(OS_ANDROID)

// Implement string hash functions so that strings of various flavors can
// be used as keys in STL maps and sets.  The hash algorithm comes from the
// GNU C++ library, in <tr1/functional>.  It is duplicated here because GCC
// versions prior to 4.3.2 are unable to compile <tr1/functional> when RTTI
// is disabled, as it is in our build.

#define DEFINE_STRING_HASH(string_type) \
    template<> \
    struct hash<string_type> { \
      std::size_t operator()(const string_type& s) const { \
        std::size_t result = 0; \
        for (string_type::const_iterator i = s.begin(); i != s.end(); ++i) \
          result = (result * 131) + *i; \
        return result; \
      } \
    }

DEFINE_STRING_HASH(std::string);
106
DEFINE_STRING_HASH(butil::string16);
gejun's avatar
gejun committed
107 108 109

#undef DEFINE_STRING_HASH

110
}  // namespace BUTIL_HASH_NAMESPACE
gejun's avatar
gejun committed
111 112

#else  // COMPILER
113
#error define BUTIL_HASH_NAMESPACE for your compiler
gejun's avatar
gejun committed
114 115
#endif  // COMPILER

116
namespace butil {
117 118 119 120
using BUTIL_HASH_NAMESPACE::hash_map;
using BUTIL_HASH_NAMESPACE::hash_multimap;
using BUTIL_HASH_NAMESPACE::hash_multiset;
using BUTIL_HASH_NAMESPACE::hash_set;
gejun's avatar
gejun committed
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232

// Implement hashing for pairs of at-most 32 bit integer values.
// When size_t is 32 bits, we turn the 64-bit hash code into 32 bits by using
// multiply-add hashing. This algorithm, as described in
// Theorem 4.3.3 of the thesis "Über die Komplexität der Multiplikation in
// eingeschränkten Branchingprogrammmodellen" by Woelfel, is:
//
//   h32(x32, y32) = (h64(x32, y32) * rand_odd64 + rand16 * 2^16) % 2^64 / 2^32
//
// Contact danakj@chromium.org for any questions.
inline std::size_t HashInts32(uint32_t value1, uint32_t value2) {
  uint64_t value1_64 = value1;
  uint64_t hash64 = (value1_64 << 32) | value2;

  if (sizeof(std::size_t) >= sizeof(uint64_t))
    return static_cast<std::size_t>(hash64);

  uint64_t odd_random = 481046412LL << 32 | 1025306955LL;
  uint32_t shift_random = 10121U << 16;

  hash64 = hash64 * odd_random + shift_random;
  std::size_t high_bits = static_cast<std::size_t>(
      hash64 >> (8 * (sizeof(uint64_t) - sizeof(std::size_t))));
  return high_bits;
}

// Implement hashing for pairs of up-to 64-bit integer values.
// We use the compound integer hash method to produce a 64-bit hash code, by
// breaking the two 64-bit inputs into 4 32-bit values:
// http://opendatastructures.org/versions/edition-0.1d/ods-java/node33.html#SECTION00832000000000000000
// Then we reduce our result to 32 bits if required, similar to above.
inline std::size_t HashInts64(uint64_t value1, uint64_t value2) {
  uint32_t short_random1 = 842304669U;
  uint32_t short_random2 = 619063811U;
  uint32_t short_random3 = 937041849U;
  uint32_t short_random4 = 3309708029U;

  uint32_t value1a = static_cast<uint32_t>(value1 & 0xffffffff);
  uint32_t value1b = static_cast<uint32_t>((value1 >> 32) & 0xffffffff);
  uint32_t value2a = static_cast<uint32_t>(value2 & 0xffffffff);
  uint32_t value2b = static_cast<uint32_t>((value2 >> 32) & 0xffffffff);

  uint64_t product1 = static_cast<uint64_t>(value1a) * short_random1;
  uint64_t product2 = static_cast<uint64_t>(value1b) * short_random2;
  uint64_t product3 = static_cast<uint64_t>(value2a) * short_random3;
  uint64_t product4 = static_cast<uint64_t>(value2b) * short_random4;

  uint64_t hash64 = product1 + product2 + product3 + product4;

  if (sizeof(std::size_t) >= sizeof(uint64_t))
    return static_cast<std::size_t>(hash64);

  uint64_t odd_random = 1578233944LL << 32 | 194370989LL;
  uint32_t shift_random = 20591U << 16;

  hash64 = hash64 * odd_random + shift_random;
  std::size_t high_bits = static_cast<std::size_t>(
      hash64 >> (8 * (sizeof(uint64_t) - sizeof(std::size_t))));
  return high_bits;
}

#define DEFINE_32BIT_PAIR_HASH(Type1, Type2) \
inline std::size_t HashPair(Type1 value1, Type2 value2) { \
  return HashInts32(value1, value2); \
}

DEFINE_32BIT_PAIR_HASH(int16_t, int16_t);
DEFINE_32BIT_PAIR_HASH(int16_t, uint16_t);
DEFINE_32BIT_PAIR_HASH(int16_t, int32_t);
DEFINE_32BIT_PAIR_HASH(int16_t, uint32_t);
DEFINE_32BIT_PAIR_HASH(uint16_t, int16_t);
DEFINE_32BIT_PAIR_HASH(uint16_t, uint16_t);
DEFINE_32BIT_PAIR_HASH(uint16_t, int32_t);
DEFINE_32BIT_PAIR_HASH(uint16_t, uint32_t);
DEFINE_32BIT_PAIR_HASH(int32_t, int16_t);
DEFINE_32BIT_PAIR_HASH(int32_t, uint16_t);
DEFINE_32BIT_PAIR_HASH(int32_t, int32_t);
DEFINE_32BIT_PAIR_HASH(int32_t, uint32_t);
DEFINE_32BIT_PAIR_HASH(uint32_t, int16_t);
DEFINE_32BIT_PAIR_HASH(uint32_t, uint16_t);
DEFINE_32BIT_PAIR_HASH(uint32_t, int32_t);
DEFINE_32BIT_PAIR_HASH(uint32_t, uint32_t);

#undef DEFINE_32BIT_PAIR_HASH

#define DEFINE_64BIT_PAIR_HASH(Type1, Type2) \
inline std::size_t HashPair(Type1 value1, Type2 value2) { \
  return HashInts64(value1, value2); \
}

DEFINE_64BIT_PAIR_HASH(int16_t, int64_t);
DEFINE_64BIT_PAIR_HASH(int16_t, uint64_t);
DEFINE_64BIT_PAIR_HASH(uint16_t, int64_t);
DEFINE_64BIT_PAIR_HASH(uint16_t, uint64_t);
DEFINE_64BIT_PAIR_HASH(int32_t, int64_t);
DEFINE_64BIT_PAIR_HASH(int32_t, uint64_t);
DEFINE_64BIT_PAIR_HASH(uint32_t, int64_t);
DEFINE_64BIT_PAIR_HASH(uint32_t, uint64_t);
DEFINE_64BIT_PAIR_HASH(int64_t, int16_t);
DEFINE_64BIT_PAIR_HASH(int64_t, uint16_t);
DEFINE_64BIT_PAIR_HASH(int64_t, int32_t);
DEFINE_64BIT_PAIR_HASH(int64_t, uint32_t);
DEFINE_64BIT_PAIR_HASH(int64_t, int64_t);
DEFINE_64BIT_PAIR_HASH(int64_t, uint64_t);
DEFINE_64BIT_PAIR_HASH(uint64_t, int16_t);
DEFINE_64BIT_PAIR_HASH(uint64_t, uint16_t);
DEFINE_64BIT_PAIR_HASH(uint64_t, int32_t);
DEFINE_64BIT_PAIR_HASH(uint64_t, uint32_t);
DEFINE_64BIT_PAIR_HASH(uint64_t, int64_t);
DEFINE_64BIT_PAIR_HASH(uint64_t, uint64_t);

#undef DEFINE_64BIT_PAIR_HASH
233
}  // namespace butil
gejun's avatar
gejun committed
234

235
namespace BUTIL_HASH_NAMESPACE {
gejun's avatar
gejun committed
236 237 238 239 240 241 242 243 244 245 246

// Implement methods for hashing a pair of integers, so they can be used as
// keys in STL containers.

// NOTE(gejun): Specialize ptr as well which is supposed to work with 
// containers by default

#if defined(COMPILER_MSVC)

template<typename Type1, typename Type2>
inline std::size_t hash_value(const std::pair<Type1, Type2>& value) {
247
  return butil::HashPair(value.first, value.second);
gejun's avatar
gejun committed
248 249 250 251 252 253 254 255 256 257
}
template<typename Type>
inline std::size_t hash_value(Type* ptr) {
  return (uintptr_t)ptr;
}

#elif defined(COMPILER_GCC)
template<typename Type1, typename Type2>
struct hash<std::pair<Type1, Type2> > {
  std::size_t operator()(std::pair<Type1, Type2> value) const {
258
    return butil::HashPair(value.first, value.second);
gejun's avatar
gejun committed
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
  }
};
template<typename Type>
struct hash<Type*> {
  std::size_t operator()(Type* ptr) const {
    return (uintptr_t)ptr;
  }
};

#else
#error define hash<std::pair<Type1, Type2> > for your compiler
#endif  // COMPILER

}

#undef DEFINE_PAIR_HASH_FUNCTION_START
#undef DEFINE_PAIR_HASH_FUNCTION_END

277
#endif  // BUTIL_CONTAINERS_HASH_TABLES_H_