hash_tables.h 8.37 KB
Newer Older
gejun's avatar
gejun committed
1 2 3 4 5 6 7
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//

//
// Deal with the differences between Microsoft and GNU implemenations
8 9
// of hash_map. Allows all platforms to use |butil::hash_map| and
// |butil::hash_set|.
gejun's avatar
gejun committed
10
//  eg:
Ge Jun's avatar
Ge Jun committed
11
//   butil::hash_map<int, std::string> my_map;
12
//   butil::hash_set<int> my_set;
gejun's avatar
gejun committed
13 14 15 16 17 18 19 20
//
// NOTE: It is an explicit non-goal of this class to provide a generic hash
// function for pointers.  If you want to hash a pointers to a particular class,
// please define the template specialization elsewhere (for example, in its
// header file) and keep it specific to just pointers to that class.  This is
// because identity hashes are not desirable for all types that might show up
// in containers as pointers.

21 22
#ifndef BUTIL_CONTAINERS_HASH_TABLES_H_
#define BUTIL_CONTAINERS_HASH_TABLES_H_
gejun's avatar
gejun committed
23 24 25

#include <utility>

26 27 28
#include "butil/basictypes.h"
#include "butil/strings/string16.h"
#include "butil/build_config.h"
29
#include "butil/third_party/murmurhash3/murmurhash3.h"   // fmix64
gejun's avatar
gejun committed
30 31 32 33 34

#if defined(COMPILER_MSVC)
#include <hash_map>
#include <hash_set>

35
#define BUTIL_HASH_NAMESPACE stdext
gejun's avatar
gejun committed
36 37 38

#elif defined(COMPILER_GCC)
#if defined(OS_ANDROID)
39
#define BUTIL_HASH_NAMESPACE std
gejun's avatar
gejun committed
40
#else
41
#define BUTIL_HASH_NAMESPACE __gnu_cxx
gejun's avatar
gejun committed
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
#endif

// This is a hack to disable the gcc 4.4 warning about hash_map and hash_set
// being deprecated.  We can get rid of this when we upgrade to VS2008 and we
// can use <tr1/unordered_map> and <tr1/unordered_set>.
#ifdef __DEPRECATED
#define CHROME_OLD__DEPRECATED __DEPRECATED
#undef __DEPRECATED
#endif

#if defined(OS_ANDROID)
#include <hash_map>
#include <hash_set>
#else
#include <ext/hash_map>
#include <ext/hash_set>
#endif

#include <string>

#ifdef CHROME_OLD__DEPRECATED
#define __DEPRECATED CHROME_OLD__DEPRECATED
#undef CHROME_OLD__DEPRECATED
#endif

67
namespace BUTIL_HASH_NAMESPACE {
gejun's avatar
gejun committed
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106

#if !defined(OS_ANDROID)
// The GNU C++ library provides identity hash functions for many integral types,
// but not for |long long|.  This hash function will truncate if |size_t| is
// narrower than |long long|.  This is probably good enough for what we will
// use it for.

#define DEFINE_TRIVIAL_HASH(integral_type) \
    template<> \
    struct hash<integral_type> { \
      std::size_t operator()(integral_type value) const { \
        return static_cast<std::size_t>(value); \
      } \
    }

DEFINE_TRIVIAL_HASH(long long);
DEFINE_TRIVIAL_HASH(unsigned long long);

#undef DEFINE_TRIVIAL_HASH
#endif  // !defined(OS_ANDROID)

// Implement string hash functions so that strings of various flavors can
// be used as keys in STL maps and sets.  The hash algorithm comes from the
// GNU C++ library, in <tr1/functional>.  It is duplicated here because GCC
// versions prior to 4.3.2 are unable to compile <tr1/functional> when RTTI
// is disabled, as it is in our build.

#define DEFINE_STRING_HASH(string_type) \
    template<> \
    struct hash<string_type> { \
      std::size_t operator()(const string_type& s) const { \
        std::size_t result = 0; \
        for (string_type::const_iterator i = s.begin(); i != s.end(); ++i) \
          result = (result * 131) + *i; \
        return result; \
      } \
    }

DEFINE_STRING_HASH(std::string);
107
DEFINE_STRING_HASH(butil::string16);
gejun's avatar
gejun committed
108 109 110

#undef DEFINE_STRING_HASH

111
}  // namespace BUTIL_HASH_NAMESPACE
gejun's avatar
gejun committed
112 113

#else  // COMPILER
114
#error define BUTIL_HASH_NAMESPACE for your compiler
gejun's avatar
gejun committed
115 116
#endif  // COMPILER

117
namespace butil {
118 119 120 121
using BUTIL_HASH_NAMESPACE::hash_map;
using BUTIL_HASH_NAMESPACE::hash_multimap;
using BUTIL_HASH_NAMESPACE::hash_multiset;
using BUTIL_HASH_NAMESPACE::hash_set;
gejun's avatar
gejun committed
122 123 124 125 126

// Implement hashing for pairs of at-most 32 bit integer values.
inline std::size_t HashInts32(uint32_t value1, uint32_t value2) {
  uint64_t value1_64 = value1;
  uint64_t hash64 = (value1_64 << 32) | value2;
127
  return static_cast<size_t>(fmix64(hash64));
gejun's avatar
gejun committed
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
}

// Implement hashing for pairs of up-to 64-bit integer values.
// We use the compound integer hash method to produce a 64-bit hash code, by
// breaking the two 64-bit inputs into 4 32-bit values:
// http://opendatastructures.org/versions/edition-0.1d/ods-java/node33.html#SECTION00832000000000000000
// Then we reduce our result to 32 bits if required, similar to above.
inline std::size_t HashInts64(uint64_t value1, uint64_t value2) {
  uint32_t short_random1 = 842304669U;
  uint32_t short_random2 = 619063811U;
  uint32_t short_random3 = 937041849U;
  uint32_t short_random4 = 3309708029U;

  uint32_t value1a = static_cast<uint32_t>(value1 & 0xffffffff);
  uint32_t value1b = static_cast<uint32_t>((value1 >> 32) & 0xffffffff);
  uint32_t value2a = static_cast<uint32_t>(value2 & 0xffffffff);
  uint32_t value2b = static_cast<uint32_t>((value2 >> 32) & 0xffffffff);

  uint64_t product1 = static_cast<uint64_t>(value1a) * short_random1;
  uint64_t product2 = static_cast<uint64_t>(value1b) * short_random2;
  uint64_t product3 = static_cast<uint64_t>(value2a) * short_random3;
  uint64_t product4 = static_cast<uint64_t>(value2b) * short_random4;

  uint64_t hash64 = product1 + product2 + product3 + product4;

  if (sizeof(std::size_t) >= sizeof(uint64_t))
    return static_cast<std::size_t>(hash64);

  uint64_t odd_random = 1578233944LL << 32 | 194370989LL;
  uint32_t shift_random = 20591U << 16;

  hash64 = hash64 * odd_random + shift_random;
  std::size_t high_bits = static_cast<std::size_t>(
      hash64 >> (8 * (sizeof(uint64_t) - sizeof(std::size_t))));
  return high_bits;
}

#define DEFINE_32BIT_PAIR_HASH(Type1, Type2) \
inline std::size_t HashPair(Type1 value1, Type2 value2) { \
  return HashInts32(value1, value2); \
}

DEFINE_32BIT_PAIR_HASH(int16_t, int16_t);
DEFINE_32BIT_PAIR_HASH(int16_t, uint16_t);
DEFINE_32BIT_PAIR_HASH(int16_t, int32_t);
DEFINE_32BIT_PAIR_HASH(int16_t, uint32_t);
DEFINE_32BIT_PAIR_HASH(uint16_t, int16_t);
DEFINE_32BIT_PAIR_HASH(uint16_t, uint16_t);
DEFINE_32BIT_PAIR_HASH(uint16_t, int32_t);
DEFINE_32BIT_PAIR_HASH(uint16_t, uint32_t);
DEFINE_32BIT_PAIR_HASH(int32_t, int16_t);
DEFINE_32BIT_PAIR_HASH(int32_t, uint16_t);
DEFINE_32BIT_PAIR_HASH(int32_t, int32_t);
DEFINE_32BIT_PAIR_HASH(int32_t, uint32_t);
DEFINE_32BIT_PAIR_HASH(uint32_t, int16_t);
DEFINE_32BIT_PAIR_HASH(uint32_t, uint16_t);
DEFINE_32BIT_PAIR_HASH(uint32_t, int32_t);
DEFINE_32BIT_PAIR_HASH(uint32_t, uint32_t);

#undef DEFINE_32BIT_PAIR_HASH

#define DEFINE_64BIT_PAIR_HASH(Type1, Type2) \
inline std::size_t HashPair(Type1 value1, Type2 value2) { \
  return HashInts64(value1, value2); \
}

DEFINE_64BIT_PAIR_HASH(int16_t, int64_t);
DEFINE_64BIT_PAIR_HASH(int16_t, uint64_t);
DEFINE_64BIT_PAIR_HASH(uint16_t, int64_t);
DEFINE_64BIT_PAIR_HASH(uint16_t, uint64_t);
DEFINE_64BIT_PAIR_HASH(int32_t, int64_t);
DEFINE_64BIT_PAIR_HASH(int32_t, uint64_t);
DEFINE_64BIT_PAIR_HASH(uint32_t, int64_t);
DEFINE_64BIT_PAIR_HASH(uint32_t, uint64_t);
DEFINE_64BIT_PAIR_HASH(int64_t, int16_t);
DEFINE_64BIT_PAIR_HASH(int64_t, uint16_t);
DEFINE_64BIT_PAIR_HASH(int64_t, int32_t);
DEFINE_64BIT_PAIR_HASH(int64_t, uint32_t);
DEFINE_64BIT_PAIR_HASH(int64_t, int64_t);
DEFINE_64BIT_PAIR_HASH(int64_t, uint64_t);
DEFINE_64BIT_PAIR_HASH(uint64_t, int16_t);
DEFINE_64BIT_PAIR_HASH(uint64_t, uint16_t);
DEFINE_64BIT_PAIR_HASH(uint64_t, int32_t);
DEFINE_64BIT_PAIR_HASH(uint64_t, uint32_t);
DEFINE_64BIT_PAIR_HASH(uint64_t, int64_t);
DEFINE_64BIT_PAIR_HASH(uint64_t, uint64_t);

#undef DEFINE_64BIT_PAIR_HASH
216
}  // namespace butil
gejun's avatar
gejun committed
217

218
namespace BUTIL_HASH_NAMESPACE {
gejun's avatar
gejun committed
219 220 221 222 223 224 225 226 227 228 229

// Implement methods for hashing a pair of integers, so they can be used as
// keys in STL containers.

// NOTE(gejun): Specialize ptr as well which is supposed to work with 
// containers by default

#if defined(COMPILER_MSVC)

template<typename Type1, typename Type2>
inline std::size_t hash_value(const std::pair<Type1, Type2>& value) {
230
  return butil::HashPair(value.first, value.second);
gejun's avatar
gejun committed
231 232 233 234 235 236 237 238 239 240
}
template<typename Type>
inline std::size_t hash_value(Type* ptr) {
  return (uintptr_t)ptr;
}

#elif defined(COMPILER_GCC)
template<typename Type1, typename Type2>
struct hash<std::pair<Type1, Type2> > {
  std::size_t operator()(std::pair<Type1, Type2> value) const {
241
    return butil::HashPair(value.first, value.second);
gejun's avatar
gejun committed
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
  }
};
template<typename Type>
struct hash<Type*> {
  std::size_t operator()(Type* ptr) const {
    return (uintptr_t)ptr;
  }
};

#else
#error define hash<std::pair<Type1, Type2> > for your compiler
#endif  // COMPILER

}

#undef DEFINE_PAIR_HASH_FUNCTION_START
#undef DEFINE_PAIR_HASH_FUNCTION_END

260
#endif  // BUTIL_CONTAINERS_HASH_TABLES_H_