/*-----------------------------------------------------------------------------
* MurmurHash3 was written by Austin Appleby, and is placed in the public
* domain.
*
* This implementation was written by Shane Day, and is also public domain.
*
* This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
* with support for progressive processing.
*/

/* ------------------------------------------------------------------------- */
/* Determine what native type to use for uint32_t */

/* We can't use the name 'uint32_t' here because it will conflict with
* any version provided by the system headers or application. */

/* First look for special cases */
#if defined(_MSC_VER)
#define MH_UINT32 unsigned long
#endif

/* If the compiler says it's C99 then take its word for it */
#if !defined(MH_UINT32) && ( \
 defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L )
#include <stdint.h>
#define MH_UINT32 uint32_t
#endif

/* Otherwise try testing against max value macros from limit.h */
#if !defined(MH_UINT32)
#include  <limits.h>
#if   (USHRT_MAX == 0xffffffffUL)
#define MH_UINT32 unsigned short
#elif (UINT_MAX == 0xffffffffUL)
#define MH_UINT32 unsigned int
#elif (ULONG_MAX == 0xffffffffUL)
#define MH_UINT32 unsigned long
#endif
#endif

#if !defined(MH_UINT32)
#error Unable to determine type name for unsigned 32-bit int
#endif

/* I'm yet to work on a platform where 'unsigned char' is not 8 bits */
#define MH_UINT8  unsigned char

void PMurHash32_Process(MH_UINT32 *ph1, MH_UINT32 *pcarry, const void *key, int len);
MH_UINT32 PMurHash32_Result(MH_UINT32 h1, MH_UINT32 carry, MH_UINT32 total_length);
MH_UINT32 PMurHash32(MH_UINT32 seed, const void *key, int len);
void hashMurmurx86 ( const void * key, const int len, const unsigned int seed, void * out );

/* I used ugly type names in the header to avoid potential conflicts with
* application or system typedefs & defines. Since I'm not including any more
* headers below here I can rename these so that the code reads like C99 */
#undef uint32_t
#define uint32_t MH_UINT32
#undef uint8_t
#define uint8_t  MH_UINT8

/* MSVC warnings we choose to ignore */
#if defined(_MSC_VER)
#pragma warning(disable: 4127) /* conditional expression is constant */
#endif

/*-----------------------------------------------------------------------------
* Endianess, misalignment capabilities and util macros
*
* The following 3 macros are defined in this section. The other macros defined
* are only needed to help derive these 3.
*
* READ_UINT32(x)   Read a little endian unsigned 32-bit int
* UNALIGNED_SAFE   Defined if READ_UINT32 works on non-word boundaries
* ROTL32(x,r)      Rotate x left by r bits
*/

/* Convention is to define __BYTE_ORDER == to one of these values */
#if !defined(__BIG_ENDIAN)
#define __BIG_ENDIAN 4321
#endif
#if !defined(__LITTLE_ENDIAN)
#define __LITTLE_ENDIAN 1234
#endif

/* I386 */
#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(i386)
#define __BYTE_ORDER __LITTLE_ENDIAN
#define UNALIGNED_SAFE
#endif

/* gcc 'may' define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ to 1 (Note the trailing __),
* or even _LITTLE_ENDIAN or _BIG_ENDIAN (Note the single _ prefix) */
#if !defined(__BYTE_ORDER)
#if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__==1 || defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN==1
#define __BYTE_ORDER __LITTLE_ENDIAN
#elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1 || defined(_BIG_ENDIAN) && _BIG_ENDIAN==1
#define __BYTE_ORDER __BIG_ENDIAN
#endif
#endif

/* gcc (usually) defines xEL/EB macros for ARM and MIPS endianess */
#if !defined(__BYTE_ORDER)
#if defined(__ARMEL__) || defined(__MIPSEL__)
#define __BYTE_ORDER __LITTLE_ENDIAN
#endif
#if defined(__ARMEB__) || defined(__MIPSEB__)
#define __BYTE_ORDER __BIG_ENDIAN
#endif
#endif

/* Now find best way we can to READ_UINT32 */
#if __BYTE_ORDER==__LITTLE_ENDIAN
/* CPU endian matches murmurhash algorithm, so read 32-bit word directly */
#define READ_UINT32(ptr)   (*((uint32_t*)(ptr)))
#elif __BYTE_ORDER==__BIG_ENDIAN
/* TODO: Add additional cases below where a compiler provided bswap32 is available */
#if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3))
#define READ_UINT32(ptr)   (__builtin_bswap32(*((uint32_t*)(ptr))))
#else
/* Without a known fast bswap32 we're just as well off doing this */
#define READ_UINT32(ptr)   (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
#define UNALIGNED_SAFE
#endif
#else
/* Unknown endianess so last resort is to read individual bytes */
#define READ_UINT32(ptr)   (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)

/* Since we're not doing word-reads we can skip the messing about with realignment */
#define UNALIGNED_SAFE
#endif

/*-----------------------------------------------------------------------------
* Core murmurhash algorithm macros */

#define C1  (0xcc9e2d51)
#define C2  (0x1b873593)

/* This is the main processing body of the algorithm. It operates
* on each full 32-bits of input. */
#define DOBLOCK(h1, k1) do{ \
 k1 *= C1; \
      k1 = ROTL32(k1,15); \
           k1 *= C2; \
                 \
                 h1 ^= k1; \
                       h1 = ROTL32(h1,13); \
                            h1 = h1*5+0xe6546b64; \
                                 }while (0)


/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */
/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */
#define DOBYTES(cnt, h1, c, n, ptr, len) do{ \
                                  int _i = cnt; \
                                                                           while (_i--){    \
        c = c>>8 | *ptr++<<24;    \
        n++;    len--;    \
        if (n==4)        {            \
                        DOBLOCK(h1, c);            \
                        n = 0;            \
                    } \
            }}while (0)

/*---------------------------------------------------------------------------*/

/* Main hashing function. Initialise carry to 0 and h1 to 0 or an initial seed
* if wanted. Both ph1 and pcarry are required arguments. */
void PMurHash32_Process(uint32_t *ph1, uint32_t *pcarry, const void *key, int len)
{
    uint32_t h1 = *ph1;
    uint32_t c = *pcarry;
    
    const uint8_t *ptr = (uint8_t*)                         key;
    const uint8_t *end;
    
    /* Extract carry count from low 2 bits of c value */
    int n = c & 3;
    
#if defined(UNALIGNED_SAFE)
    /* This CPU handles unaligned word access */
    
    /* Consume any carry bytes */
    int i = (4-n) & 3;
    if (i && i <= len)
    {
        DOBYTES(i, h1, c, n, ptr, len);
    }
    
    /* Process 32-bit chunks */
    end = ptr + len/4*4;
    for ( ; ptr < end ; ptr+=4)
    {
        uint32_t k1 = READ_UINT32(ptr);
        DOBLOCK(h1, k1);
    }
    
#else /*UNALIGNED_SAFE*/
    /* This CPU does not handle unaligned word access */
    
    /* Consume enough so that the next data byte is word aligned */
    int i = -(long)ptr & 3;
    if (i && i <= len)
    {
        DOBYTES(i, h1, c, n, ptr, len);
    }
    
    /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
    end = ptr + len/4*4;
    switch (n)
    {
            /* how many bytes in c */
        case 0: /* c=[----]  w=[3210]  b=[3210]=w            c'=[----] */
            for ( ; ptr < end ; ptr+=4)
            {
                uint32_t k1 = READ_UINT32(ptr);
                DOBLOCK(h1, k1);
            }
            break;
        case 1: /* c=[0---]  w=[4321]  b=[3210]=c>>24|w<<8   c'=[4---] */
            for ( ; ptr < end ; ptr+=4)
            {
                uint32_t k1 = c>>24;
                c = READ_UINT32(ptr);
                k1 |= c<<8;
                DOBLOCK(h1, k1);
            }
            break;
        case 2: /* c=[10--]  w=[5432]  b=[3210]=c>>16|w<<16  c'=[54--] */
            for ( ; ptr < end ; ptr+=4)
            {
                uint32_t k1 = c>>16;
                c = READ_UINT32(ptr);
                k1 |= c<<16;
                DOBLOCK(h1, k1);
            }
            break;
        case 3: /* c=[210-]  w=[6543]  b=[3210]=c>>8|w<<24   c'=[654-] */
            for ( ; ptr < end ; ptr+=4)
            {
                uint32_t k1 = c>>8;
                c = READ_UINT32(ptr);
                k1 |= c<<24;
                DOBLOCK(h1, k1);
            }
    }
#endif /*UNALIGNED_SAFE*/
    
    /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */
    len -= len/4*4;
    
    /* Append any remaining bytes into carry */
    DOBYTES(len, h1, c, n, ptr, len);
    
    /* Copy out new running hash and carry */
    *ph1 = h1;
    *pcarry = (c & ~0xff) | n;
}

/*---------------------------------------------------------------------------*/

/* Finalize a hash. To match the original Murmur3A the total_length must be provided */
uint32_t PMurHash32_Result(uint32_t h, uint32_t carry, uint32_t total_length)
{
    uint32_t k1;
    int n = carry & 3;
    if (n)
    {
        k1 = carry >> (4-n)*8;
        k1 *= C1;
        k1 = ROTL32(k1,15);
        k1 *= C2;
        h ^= k1;
    }
    h ^= total_length;
    
    /* fmix */
    h ^= h >> 16;
    h *= 0x85ebca6b;
    h ^= h >> 13;
    h *= 0xc2b2ae35;
    h ^= h >> 16;
    
    return h;
}

/*---------------------------------------------------------------------------*/

/* Murmur3A compatable all-at-once */
uint32_t PMurHash32(uint32_t seed, const void *key, int len)
{
    uint32_t h1=seed, carry=0;
    PMurHash32_Process(&h1, &carry, key, len);
    return PMurHash32_Result(h1, carry, len);
}

void hashMurmurx86 ( const void * key, const int len, const unsigned int seed, void * out )
{
    *(unsigned int*)out = PMurHash32 (seed, key, len);
}