// Copyright (c) 2013 Baidu, Inc.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Author: Ge,Jun (gejun@baidu.com)
// Date: Wed Nov 27 12:59:20 CST 2013

// This closed addressing hash-map puts first linked node in bucket array
// directly to save an extra memory indirection. As a result, this map yields
// close performance to raw array on nearly all operations, probably being the
// fastest hashmap for small-sized key/value ever.
//
// Performance comparisons between several maps:
//  [ value = 8 bytes ]
//  Sequentially inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 11/108/55/58
//  Sequentially erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 7/123/55/37
//  Sequentially inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 10/92/55/54
//  Sequentially erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/67/51/35
//  Sequentially inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 10/100/66/54
//  Sequentially erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/72/55/35
//  [ value = 32 bytes ]
//  Sequentially inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 14/108/56/56
//  Sequentially erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/77/53/38
//  Sequentially inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 14/94/54/53
//  Sequentially erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/66/49/36
//  Sequentially inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 13/106/62/54
//  Sequentially erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/69/53/36
//  [ value = 128 bytes ]
//  Sequentially inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 31/182/96/96
//  Sequentially erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 8/117/51/44
//  Sequentially inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 29/191/100/97
//  Sequentially erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/100/49/44
//  Sequentially inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 30/184/113/114
//  Sequentially erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/99/52/43
//  [ value = 8 bytes ]
//  Randomly inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 11/171/108/60
//  Randomly erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 8/158/126/37
//  Randomly inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 10/159/117/54
//  Randomly erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/153/135/36
//  Randomly inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 12/223/180/55
//  Randomly erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 7/237/210/48
//  [ value = 32 bytes ]
//  Randomly inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 16/179/108/57
//  Randomly erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 5/157/120/38
//  Randomly inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 15/168/127/54
//  Randomly erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 5/164/135/39
//  Randomly inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 19/241/201/56
//  Randomly erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 5/235/218/54
//  [ value = 128 bytes ]
//  Randomly inserting 100 into FlatMap/std::map/base::PooledMap/base::hash_map takes 35/242/154/97
//  Randomly erasing 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 7/185/119/56
//  Randomly inserting 1000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 35/262/182/99
//  Randomly erasing 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/215/157/66
//  Randomly inserting 10000 into FlatMap/std::map/base::PooledMap/base::hash_map takes 44/330/278/114
//  Randomly erasing 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/307/242/90
//  [ value = 8 bytes ]
//  Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 6/51/52/13
//  Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/98/82/14
//  Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/175/170/14
//  [ value = 32 bytes ]
//  Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/52/52/14
//  Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/84/82/13
//  Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/164/156/14
//  [ value = 128 bytes ]
//  Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/54/53/14
//  Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/88/90/13
//  Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/178/185/14
//  [ value = 8 bytes ]
//  Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 5/51/49/14
//  Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/86/94/14
//  Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/177/171/14
//  [ value = 32 bytes ]
//  Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/51/53/14
//  Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/98/82/13
//  Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/163/156/14
//  [ value = 128 bytes ]
//  Seeking 100 from FlatMap/std::map/base::PooledMap/base::hash_map takes 3/55/53/14
//  Seeking 1000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/88/89/13
//  Seeking 10000 from FlatMap/std::map/base::PooledMap/base::hash_map takes 4/177/185/14

#ifndef BASE_FLAT_MAP_H
#define BASE_FLAT_MAP_H

#include <stdint.h>
#include <functional>
#include <iostream>                               // std::ostream
#include "base/type_traits.h"
#include "base/logging.h"
#include "base/find_cstr.h"
#include "base/single_threaded_pool.h"            // SingleThreadedPool
#include "base/containers/hash_tables.h"          // hash<>
#include "base/bit_array.h"                       // bit_array_*
#include "base/strings/string_piece.h"            // StringPiece

namespace base {

template <typename _Map, typename _Element> class FlatMapIterator;
template <typename _Map, typename _Element> class SparseFlatMapIterator;
template <typename K, typename T> class FlatMapElement;
struct FlatMapVoid {}; // Replace void which is not constructible.
template <typename K> struct DefaultHasher;
template <typename K> struct DefaultEqualTo;

struct BucketInfo {
    size_t longest_length;
    double average_length;
};

// NOTE: Objects stored in FlatMap MUST be copyable.
template <typename _K, typename _T,
          // Compute hash code from key.
          // Use public/murmurhash3 to make better distributions.
          typename _Hash = DefaultHasher<_K>,
          // Test equivalence between stored-key and passed-key.
          // stored-key is always on LHS, passed-key is always on RHS.
          typename _Equal = DefaultEqualTo<_K>,
          bool _Sparse = false>
class FlatMap {
public:
    typedef _K key_type;
    typedef _T mapped_type;
    typedef FlatMapElement<_K, _T> Element;
    typedef typename Element::value_type value_type;
    typedef typename conditional<
        _Sparse, SparseFlatMapIterator<FlatMap, value_type>,
        FlatMapIterator<FlatMap, value_type> >::type iterator;
    typedef typename conditional<
        _Sparse, SparseFlatMapIterator<FlatMap, const value_type>, 
        FlatMapIterator<FlatMap, const value_type> >::type const_iterator;
    typedef _Hash hasher;
    typedef _Equal key_equal;
    struct PositionHint {
        size_t nbucket;
        size_t offset;
        bool at_entry;
        key_type key;
    };
    
    FlatMap(const hasher& hashfn = hasher(), const key_equal& eql = key_equal());
    ~FlatMap();
    FlatMap(const FlatMap& rhs);    
    void operator=(const FlatMap& rhs);
    void swap(FlatMap & rhs);

    // Must be called to initialize this map, otherwise insert/operator[]
    // crashes, and seek/erase fails.
    // `nbucket' is the initial number of buckets. `load_factor' is the 
    // maximum value of size()*100/nbucket, if the value is reached, nbucket
    // will be doubled and all items stored will be rehashed which is costly.
    // Choosing proper values for these 2 parameters reduces costs.
    int init(size_t nbucket, u_int load_factor = 80);
    
    // Insert a pair of |key| and |value|. If size()*100/bucket_count() is 
    // more than load_factor(), a resize() will be done.
    // Returns address of the inserted value, NULL on error.
    mapped_type* insert(const key_type& key, const mapped_type& value);

    // Remove |key| and the associated value
    // Returns: 1 on erased, 0 otherwise.
    template <typename K2> size_t erase(const K2& key);
    
    // Remove all items. Allocated spaces are NOT returned by system.
    void clear();

    // Remove all items and return all allocated spaces to system.
    void clear_and_reset_pool();
        
    // Search for the value associated with |key|
    // Returns: address of the value
    template <typename K2> mapped_type* seek(const K2& key) const;

    // Get the value associated with |key|. If |key| does not exist,
    // insert with a default-constructed value. If size()*100/bucket_count()
    // is more than load_factor, a resize will be done.
    // Returns reference of the value
    mapped_type& operator[](const key_type& key);

    // Resize this map. This is optional because resizing will be triggered by
    // insert() or operator[] if there're too many items.
    // Returns successful or not.
    bool resize(size_t nbucket);
    
    // Iterators
    iterator begin();
    iterator end();
    const_iterator begin() const;
    const_iterator end() const;

    // Iterate FlatMap inconsistently in more-than-one passes. This is used
    // in multi-threaded environment to divide the critical sections of
    // iterating big maps into smaller ones. "inconsistently" means that:
    //  * elements added during iteration may be missed.
    //  * some elements may be iterated more than once.
    //  * iteration is restarted at beginning when the map is resized.
    // Example: (copying all keys in multi-threaded environment)
    //   LOCK;
    //   size_t n = 0;
    //   for (Map::const_iterator it = map.begin(); it != map.end(); ++it) {
    //     if (++n >= 256/*max iterated one pass*/) {
    //       Map::PositionHint hint;
    //       map.save_iterator(it, &hint);
    //       n = 0;
    //       UNLOCK;
    //       LOCK;
    //       it = map.restore_iterator(hint);
    //       if (it == map.begin()) { // resized
    //         keys->clear();
    //       }
    //       if (it == map.end()) {
    //         break;
    //       }
    //     }
    //     keys->push_back(it->first);
    //   }
    //   UNLOCK;
    void save_iterator(const const_iterator&, PositionHint*) const;
    const_iterator restore_iterator(const PositionHint&) const;

    // True if init() was successfully called.
    bool initialized() const { return _buckets != NULL; }

    bool empty() const { return _size == 0; }
    size_t size() const { return _size; }
    size_t bucket_count() const { return _nbucket; }
    u_int load_factor () const { return _load_factor; }

    // Returns #nodes of longest bucket in this map. This scans all buckets.
    BucketInfo bucket_info() const;

    struct Bucket {
        explicit Bucket(const _K& k) : next(NULL)
        { new (element_spaces) Element(k); }
        Bucket(const Bucket& other) : next(NULL)
        { new (element_spaces) Element(other.element()); }
        bool is_valid() const { return next != (const Bucket*)-1UL; }
        void set_invalid() { next = (Bucket*)-1UL; }
        // NOTE: Only be called when in_valid() is true.
        Element& element() {
            void* spaces = element_spaces; // Suppress strict-aliasing
            return *reinterpret_cast<Element*>(spaces);
        }
        const Element& element() const {
            const void* spaces = element_spaces;
            return *reinterpret_cast<const Element*>(spaces);
        }
        Bucket* next;
        char element_spaces[sizeof(Element)];
    };

private:
template <typename _Map, typename _Element> friend class FlatMapIterator;
template <typename _Map, typename _Element> friend class FlatMapSparseIterator;
    // True if buckets need to be resized before holding `size' elements.
    inline bool is_too_crowded(size_t size) const
    { return size * 100 >= _nbucket * _load_factor; }
        
    size_t _size;
    size_t _nbucket;
    Bucket* _buckets;
    uint64_t* _thumbnail;
    u_int _load_factor;
    hasher _hashfn;
    key_equal _eql;
    SingleThreadedPool<sizeof(Bucket), 1024, 16> _pool;
};

template <typename _K,
          typename _Hash = DefaultHasher<_K>,
          typename _Equal = DefaultEqualTo<_K>,
          bool _Sparse = false>
class FlatSet {
public:
    typedef FlatMap<_K, FlatMapVoid, _Hash, _Equal, _Sparse> Map;
    typedef typename Map::key_type key_type;
    typedef typename Map::value_type value_type;
    typedef typename Map::Bucket Bucket;
    typedef typename Map::iterator iterator;
    typedef typename Map::const_iterator const_iterator;
    typedef typename Map::hasher hasher;
    typedef typename Map::key_equal key_equal;
    
    FlatSet(const hasher& hashfn = hasher(), const key_equal& eql = key_equal())
        : _map(hashfn, eql) {}
    void swap(FlatSet & rhs) { _map.swap(rhs._map); }

    int init(size_t nbucket, u_int load_factor = 80)
    { return _map.init(nbucket, load_factor); }

    const void* insert(const key_type& key)
    { return _map.insert(key, FlatMapVoid()); }

    template <typename K2>
    size_t erase(const K2& key) { return _map.erase(key); }

    void clear() { return _map.clear(); }
    void clear_and_reset_pool() { return _map.clear_and_reset_pool(); }

    template <typename K2>
    const void* seek(const K2& key) const { return _map.seek(key); }

    bool resize(size_t nbucket) { return _map.resize(nbucket); }
    
    iterator begin() { return _map.begin(); }
    iterator end() { return _map.end(); }
    const_iterator begin() const { return _map.begin(); }
    const_iterator end() const { return _map.end(); }

    bool initialized() const { return _map.initialized(); }
    bool empty() const { return _map.empty(); }
    size_t size() const { return _map.size(); }
    size_t bucket_count() const { return _map.bucket_count(); }
    u_int load_factor () const { return _map.load_factor(); }
    BucketInfo bucket_info() const { return _map.bucket_info(); }

private:
    Map _map;
};

template <typename _K, typename _T,
          typename _Hash = DefaultHasher<_K>,
          typename _Equal = DefaultEqualTo<_K> >
class SparseFlatMap : public FlatMap<_K, _T, _Hash, _Equal, true> {
};

template <typename _K,
          typename _Hash = DefaultHasher<_K>,
          typename _Equal = DefaultEqualTo<_K> >
class SparseFlatSet : public FlatSet<_K, _Hash, _Equal, true> {
};

// Implement FlatMapElement
template <typename K, typename T>
class FlatMapElement {
public:
    typedef std::pair<const K, T> value_type;
    // NOTE: Have to initialize _value in this way which is treated by GCC
    // specially that _value is zeroized(POD) or constructed(non-POD). Other
    // methods do not work. For example, if we put _value into the std::pair
    // and do initialization by calling _pair(k, T()), _value will be copy
    // constructed from the defaultly constructed instance(not zeroized for
    // POD) which is wrong generally.
    explicit FlatMapElement(const K& k) : _key(k), _value(T()) {}
    //                                             ^^^^^^^^^^^
    const K& first_ref() const { return _key; }
    T& second_ref() { return _value; }
    value_type& value_ref() { return *reinterpret_cast<value_type*>(this); }
    inline static const K& first_ref_from_value(const value_type& v)
    { return v.first; }
    inline static const T& second_ref_from_value(const value_type& v)
    { return v.second; }
private:
    const K _key;
    T _value;
};

template <typename K>
class FlatMapElement<K, FlatMapVoid> {
public:
    typedef const K value_type;
    explicit FlatMapElement(const K& k) : _key(k) {}
    const K& first_ref() const { return _key; }
    FlatMapVoid& second_ref() { return second_ref_from_value(_key); }
    value_type& value_ref() { return _key; }
    inline static const K& first_ref_from_value(value_type& v) { return v; }
    inline static FlatMapVoid& second_ref_from_value(value_type&) {
        static FlatMapVoid dummy;
        return dummy;
    }
private:
    K _key;
};

// Implement DefaultHasher and DefaultEqualTo
template <typename K>
struct DefaultHasher : public BASE_HASH_NAMESPACE::hash<K> {
};

template <>
struct DefaultHasher<std::string> {
    std::size_t operator()(const base::StringPiece& s) const {
        std::size_t result = 0;
        for (base::StringPiece::const_iterator
                 i = s.begin(); i != s.end(); ++i) {
            result = result * 101 + *i;
        }
        return result;
    }
    std::size_t operator()(const char* s) const {
        std::size_t result = 0;
        for (; *s; ++s) {
            result = result * 101 + *s;
        }
        return result;
    }
    std::size_t operator()(const std::string& s) const {
        std::size_t result = 0;
        for (std::string::const_iterator i = s.begin(); i != s.end(); ++i) {
            result = result * 101 + *i;
        }
        return result;        
    }
};

template <typename K>
struct DefaultEqualTo : public std::equal_to<K> {
};

template <>
struct DefaultEqualTo<std::string> {
    bool operator()(const std::string& s1, const std::string& s2) const
    { return s1 == s2; }
    bool operator()(const std::string& s1, const base::StringPiece& s2) const
    { return s1 == s2; }
    bool operator()(const std::string& s1, const char* s2) const
    { return s1 == s2; }
};

// find_cstr and find_lowered_cstr
template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
const _T* find_cstr(const FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
                    const char* key) {
    return m.seek(key);
}

template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
_T* find_cstr(FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
              const char* key) {
    return m.seek(key);
}

template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
const _T* find_cstr(const FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
                    const char* key, size_t length) {
    return m.seek(base::StringPiece(key, length));
}

template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
_T* find_cstr(FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
              const char* key, size_t length) {
    return m.seek(base::StringPiece(key, length));
}

template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
const _T* find_lowered_cstr(
    const FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
    const char* key) {
    return m.seek(*tls_stringmap_temp.get_lowered_string(key));
}

template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
_T* find_lowered_cstr(FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
                      const char* key) {
    return m.seek(*tls_stringmap_temp.get_lowered_string(key));
}

template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
const _T* find_lowered_cstr(
    const FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
    const char* key, size_t length) {
    return m.seek(*tls_stringmap_temp.get_lowered_string(key, length));
}

template <typename _T, typename _Hash, typename _Equal, bool _Sparse>
_T* find_lowered_cstr(FlatMap<std::string, _T, _Hash, _Equal, _Sparse>& m,
                      const char* key, size_t length) {
    return m.seek(*tls_stringmap_temp.get_lowered_string(key, length));
}

}  // namespace base

#include "base/containers/flat_map_inl.h"

#endif  //BASE_FLAT_MAP_H