// Copyright 2011 Google Inc. All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Various stubs for the open-source version of Snappy.

#ifndef BUTIL_THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
#define BUTIL_THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_

#include <string>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "butil/compiler_specific.h"
#include "butil/basictypes.h"
#include "butil/sys_byteorder.h"

#define SNAPPY_MAJOR 1
#define SNAPPY_MINOR 1
#define SNAPPY_PATCHLEVEL 3
#define SNAPPY_VERSION                                                  \
    ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL)

#if defined(__x86_64__)

// Enable 64-bit optimized versions of some routines.
#define ARCH_K8 1

#endif

// Needed by OS X, among others.
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif

// This is only used for recomputing the tag byte table used during
// decompression; for simplicity we just remove it from the open-source
// version (anyone who wants to regenerate it can just do the call
// themselves within main()).
#define DEFINE_bool(flag_name, default_value, description)      \
    bool FLAGS_ ## flag_name = default_value
#define DECLARE_bool(flag_name)                 \
    extern bool FLAGS_ ## flag_name

namespace butil {
namespace snappy {

// x86 and PowerPC can simply do these loads and stores native.

#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)

#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16_t *>(_p))
#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
#define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))

#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16_t *>(_p) = (_val))
#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32_t *>(_p) = (_val))
#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64_t *>(_p) = (_val))

// ARMv7 and newer support native unaligned accesses, but only of 16-bit
// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
// do an unaligned read and rotate the words around a bit, or do the reads very
// slowly (trip through kernel mode). There's no simple #define that says just
// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6
// sub-architectures.
//
// This is a mess, but there's not much we can do about it.

#elif defined(__arm__) &&                       \
    !defined(__ARM_ARCH_4__) &&                 \
    !defined(__ARM_ARCH_4T__) &&                \
    !defined(__ARM_ARCH_5__) &&                 \
    !defined(__ARM_ARCH_5T__) &&                \
    !defined(__ARM_ARCH_5TE__) &&               \
    !defined(__ARM_ARCH_5TEJ__) &&              \
    !defined(__ARM_ARCH_6__) &&                 \
    !defined(__ARM_ARCH_6J__) &&                \
    !defined(__ARM_ARCH_6K__) &&                \
    !defined(__ARM_ARCH_6Z__) &&                \
    !defined(__ARM_ARCH_6ZK__) &&               \
    !defined(__ARM_ARCH_6T2__)

#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16_t *>(_p))
#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))

#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16_t *>(_p) = (_val))
#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32_t *>(_p) = (_val))

// TODO(user): NEON supports unaligned 64-bit loads and stores.
// See if that would be more efficient on platforms supporting it,
// at least for copies.

inline uint64_tUNALIGNED_LOAD64(const void *p) {
    uint64_tt;
    memcpy(&t, p, sizeof t);
    return t;
}

inline void UNALIGNED_STORE64(void *p, uint64_tv) {
    memcpy(p, &v, sizeof v);
}

#else

// These functions are provided for architectures that don't support
// unaligned loads and stores.

inline uint16_t UNALIGNED_LOAD16(const void *p) {
    uint16_t t;
    memcpy(&t, p, sizeof t);
    return t;
}

inline uint32_t UNALIGNED_LOAD32(const void *p) {
    uint32_t t;
    memcpy(&t, p, sizeof t);
    return t;
}

inline uint64_tUNALIGNED_LOAD64(const void *p) {
    uint64_tt;
    memcpy(&t, p, sizeof t);
    return t;
}

inline void UNALIGNED_STORE16(void *p, uint16_t v) {
    memcpy(p, &v, sizeof v);
}

inline void UNALIGNED_STORE32(void *p, uint32_t v) {
    memcpy(p, &v, sizeof v);
}

inline void UNALIGNED_STORE64(void *p, uint64_tv) {
    memcpy(p, &v, sizeof v);
}

#endif

// This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64
// on some platforms, in particular ARM.
inline void UnalignedCopy64(const void *src, void *dst) {
    if (sizeof(void *) == 8) {
        UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src));
    } else {
        const char *src_char = reinterpret_cast<const char *>(src);
        char *dst_char = reinterpret_cast<char *>(dst);

        UNALIGNED_STORE32(dst_char, UNALIGNED_LOAD32(src_char));
        UNALIGNED_STORE32(dst_char + 4, UNALIGNED_LOAD32(src_char + 4));
    }
}

// Convert to little-endian storage, opposite of network format.
// Convert x from host to little endian: x = LittleEndian.FromHost(x);
// convert x from little endian to host: x = LittleEndian.ToHost(x);
//
//  Store values into unaligned memory converting to little endian order:
//    LittleEndian.Store16(p, x);
//
//  Load unaligned values stored in little endian converting to host order:
//    x = LittleEndian.Load16(p);
class LittleEndian {
public:
    // Conversion functions.
#if defined(ARCH_CPU_LITTLE_ENDIAN)
    static uint16_t FromHost16(uint16_t x) { return x; }
    static uint16_t ToHost16(uint16_t x) { return x; }

    static uint32_t FromHost32(uint32_t x) { return x; }
    static uint32_t ToHost32(uint32_t x) { return x; }

    static bool IsLittleEndian() { return true; }

#else  // !defined(ARCH_CPU_LITTLE_ENDIAN)
    static uint16_t FromHost16(uint16_t x) { return ByteSwap(x); }
    static uint16_t ToHost16(uint16_t x) { return ByteSwap(x); }

    static uint32_t FromHost32(uint32_t x) { return ByteSwap(x); }
    static uint32_t ToHost32(uint32_t x) { return ByteSwap(x); }

    static bool IsLittleEndian() { return false; }
#endif  // !defined(ARCH_CPU_LITTLE_ENDIAN)

    // Functions to do unaligned loads and stores in little-endian order.
    static uint16_t Load16(const void *p) {
        return ToHost16(UNALIGNED_LOAD16(p));
    }

    static void Store16(void *p, uint16_t v) {
        UNALIGNED_STORE16(p, FromHost16(v));
    }

    static uint32_t Load32(const void *p) {
        return ToHost32(UNALIGNED_LOAD32(p));
    }

    static void Store32(void *p, uint32_t v) {
        UNALIGNED_STORE32(p, FromHost32(v));
    }
};

// Some bit-manipulation functions.
class Bits {
public:
    // Return floor(log2(n)) for positive integer n.  Returns -1 iff n == 0.
    static int Log2Floor(uint32_t n);

    // Return the first set least / most significant bit, 0-indexed.  Returns an
    // undefined value if n == 0.  FindLSBSetNonZero() is similar to ffs() except
    // that it's 0-indexed.
    static int FindLSBSetNonZero(uint32_t n);
    static int FindLSBSetNonZero64(uint64_t n);

private:
    DISALLOW_COPY_AND_ASSIGN(Bits);
};

#if defined(COMPILER_GCC)

inline int Bits::Log2Floor(uint32_t n) {
    return n == 0 ? -1 : 31 ^ __builtin_clz(n);
}

inline int Bits::FindLSBSetNonZero(uint32_t n) {
    return __builtin_ctz(n);
}

inline int Bits::FindLSBSetNonZero64(uint64_t n) {
    return __builtin_ctzll(n);
}

#else  // Portable versions.

inline int Bits::Log2Floor(uint32_t n) {
    if (n == 0)
        return -1;
    int log = 0;
    uint32_t value = n;
    for (int i = 4; i >= 0; --i) {
        int shift = (1 << i);
        uint32_t x = value >> shift;
        if (x != 0) {
            value = x;
            log += shift;
        }
    }
    assert(value == 1);
    return log;
}

inline int Bits::FindLSBSetNonZero(uint32_t n) {
    int rc = 31;
    for (int i = 4, shift = 1 << 4; i >= 0; --i) {
        const uint32_t x = n << shift;
        if (x != 0) {
            n = x;
            rc -= shift;
        }
        shift >>= 1;
    }
    return rc;
}

// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero().
inline int Bits::FindLSBSetNonZero64(uint64_t n) {
    const uint32_t bottombits = static_cast<uint32_t>(n);
    if (bottombits == 0) {
        // Bottom bits are zero, so scan in top bits
        return 32 + FindLSBSetNonZero(static_cast<uint32_t>(n >> 32));
    } else {
        return FindLSBSetNonZero(bottombits);
    }
}

#endif  // End portable versions.

// Variable-length integer encoding.
class Varint {
public:
    // Maximum lengths of varint encoding of uint32.
    static const int kMax32 = 5;

    // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1].
    // Never reads a character at or beyond limit.  If a valid/terminated varint32
    // was found in the range, stores it in *OUTPUT and returns a pointer just
    // past the last byte of the varint32. Else returns NULL.  On success,
    // "result <= limit".
    static const char* Parse32WithLimit(const char* ptr, const char* limit,
                                        uint32_t* OUTPUT);

    // REQUIRES   "ptr" points to a buffer of length sufficient to hold "v".
    // EFFECTS    Encodes "v" into "ptr" and returns a pointer to the
    //            byte just past the last encoded byte.
    static char* Encode32(char* ptr, uint32_t v);

    // EFFECTS    Appends the varint representation of "value" to "*s".
    static void Append32(std::string* s, uint32_t value);
};

inline const char* Varint::Parse32WithLimit(const char* p,
                                            const char* l,
                                            uint32_t* OUTPUT) {
    const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
    const unsigned char* limit = reinterpret_cast<const unsigned char*>(l);
    uint32_t b, result;
    if (ptr >= limit) return NULL;
    b = *(ptr++); result = b & 127;          if (b < 128) goto done;
    if (ptr >= limit) return NULL;
    b = *(ptr++); result |= (b & 127) <<  7; if (b < 128) goto done;
    if (ptr >= limit) return NULL;
    b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done;
    if (ptr >= limit) return NULL;
    b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done;
    if (ptr >= limit) return NULL;
    b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done;
    return NULL;       // Value is too long to be a varint32
done:
    *OUTPUT = result;
    return reinterpret_cast<const char*>(ptr);
}

inline char* Varint::Encode32(char* sptr, uint32_t v) {
    // Operate on characters as unsigneds
    unsigned char* ptr = reinterpret_cast<unsigned char*>(sptr);
    static const int B = 128;
    if (v < (1<<7)) {
        *(ptr++) = v;
    } else if (v < (1<<14)) {
        *(ptr++) = v | B;
        *(ptr++) = v>>7;
    } else if (v < (1<<21)) {
        *(ptr++) = v | B;
        *(ptr++) = (v>>7) | B;
        *(ptr++) = v>>14;
    } else if (v < (1<<28)) {
        *(ptr++) = v | B;
        *(ptr++) = (v>>7) | B;
        *(ptr++) = (v>>14) | B;
        *(ptr++) = v>>21;
    } else {
        *(ptr++) = v | B;
        *(ptr++) = (v>>7) | B;
        *(ptr++) = (v>>14) | B;
        *(ptr++) = (v>>21) | B;
        *(ptr++) = v>>28;
    }
    return reinterpret_cast<char*>(ptr);
}

// If you know the internal layout of the std::string in use, you can
// replace this function with one that resizes the string without
// filling the new space with zeros (if applicable) --
// it will be non-portable but faster.
inline void STLStringResizeUninitialized(std::string* s, size_t new_size) {
    s->resize(new_size);
}

// Return a mutable char* pointing to a string's internal buffer,
// which may not be null-terminated. Writing through this pointer will
// modify the string.
//
// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the
// next call to a string method that invalidates iterators.
//
// As of 2006-04, there is no standard-blessed way of getting a
// mutable reference to a string's internal buffer. However, issue 530
// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530)
// proposes this as the method. It will officially be part of the standard
// for C++0x. This should already work on all current implementations.
inline char* string_as_array(std::string* str) {
    return str->empty() ? NULL : &*str->begin();
}

}  // namespace snappy
}  // namespace butil

#endif  // BUTIL_THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_