bit_reader.h 11 KB
Newer Older
AoD314's avatar
AoD314 committed
1 2
// Copyright 2010 Google Inc. All Rights Reserved.
//
3 4 5 6 7
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
AoD314's avatar
AoD314 committed
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
// -----------------------------------------------------------------------------
//
// Boolean decoder
//
// Author: Skal (pascal.massimino@gmail.com)
//         Vikas Arora (vikaas.arora@gmail.com)

#ifndef WEBP_UTILS_BIT_READER_H_
#define WEBP_UTILS_BIT_READER_H_

#include <assert.h>
#ifdef _MSC_VER
#include <stdlib.h>  // _byteswap_ulong
#endif
#include <string.h>  // For memcpy
#include "../webp/types.h"

#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif

AoD314's avatar
AoD314 committed
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
// The Boolean decoder needs to maintain infinite precision on the value_ field.
// However, since range_ is only 8bit, we only need an active window of 8 bits
// for value_. Left bits (MSB) gets zeroed and shifted away when value_ falls
// below 128, range_ is updated, and fresh bits read from the bitstream are
// brought in as LSB.
// To avoid reading the fresh bits one by one (slow), we cache a few of them
// ahead (actually, we cache BITS of them ahead. See below). There's two
// strategies regarding how to shift these looked-ahead fresh bits into the
// 8bit window of value_: either we shift them in, while keeping the position of
// the window fixed. Or we slide the window to the right while keeping the cache
// bits at a fixed, right-justified, position.
//
//  Example, for BITS=16: here is the content of value_ for both strategies:
//
//          !USE_RIGHT_JUSTIFY            ||        USE_RIGHT_JUSTIFY
//                                        ||
//   <- 8b -><- 8b -><- BITS bits  ->     ||  <- 8b+3b -><- 8b -><- 13 bits ->
//   [unused][value_][cached bits][0]     ||  [unused...][value_][cached bits]
//  [........00vvvvvvBBBBBBBBBBBBB000]LSB || [...........00vvvvvvBBBBBBBBBBBBB]
//                                        ||
// After calling VP8Shift(), where we need to shift away two zeros:
//  [........vvvvvvvvBBBBBBBBBBB00000]LSB || [.............vvvvvvvvBBBBBBBBBBB]
//                                        ||
// Just before we need to call VP8LoadNewBytes(), the situation is:
//  [........vvvvvv000000000000000000]LSB || [..........................vvvvvv]
//                                        ||
// And just after calling VP8LoadNewBytes():
//  [........vvvvvvvvBBBBBBBBBBBBBBBB]LSB || [........vvvvvvvvBBBBBBBBBBBBBBBB]
//
// -> we're back to height active 'value_' bits (marked 'v') and BITS cached
// bits (marked 'B')
//
// The right-justify strategy tends to use less shifts and is often faster.

//------------------------------------------------------------------------------
// BITS can be any multiple of 8 from 8 to 56 (inclusive).
// Pick values that fit natural register size.

#if !defined(WEBP_REFERENCE_IMPLEMENTATION)

#define USE_RIGHT_JUSTIFY

#if defined(__i386__) || defined(_M_IX86)      // x86 32bit
#define BITS 16
#elif defined(__x86_64__) || defined(_M_X64)   // x86 64bit
#define BITS 56
#elif defined(__arm__) || defined(_M_ARM)      // ARM
#define BITS 24
#else                      // reasonable default
#define BITS 24
#endif

#else     // reference choices

#define USE_RIGHT_JUSTIFY
#define BITS 8

#endif

//------------------------------------------------------------------------------
// Derived types and constants

// bit_t = natural register type
// lbit_t = natural type for memory I/O

#if (BITS > 32)
typedef uint64_t bit_t;
typedef uint64_t lbit_t;
#elif (BITS == 32)
typedef uint64_t bit_t;
typedef uint32_t lbit_t;
#elif (BITS == 24)
typedef uint32_t bit_t;
typedef uint32_t lbit_t;
AoD314's avatar
AoD314 committed
103 104 105 106 107 108 109 110
#elif (BITS == 16)
typedef uint32_t bit_t;
typedef uint16_t lbit_t;
#else
typedef uint32_t bit_t;
typedef uint8_t lbit_t;
#endif

AoD314's avatar
AoD314 committed
111 112 113 114 115 116 117
#ifndef USE_RIGHT_JUSTIFY
typedef bit_t range_t;     // type for storing range_
#define MASK ((((bit_t)1) << (BITS)) - 1)
#else
typedef uint32_t range_t;  // range_ only uses 8bits here. No need for bit_t.
#endif

AoD314's avatar
AoD314 committed
118
//------------------------------------------------------------------------------
AoD314's avatar
AoD314 committed
119
// Bitreader
AoD314's avatar
AoD314 committed
120 121 122 123 124 125 126 127

typedef struct VP8BitReader VP8BitReader;
struct VP8BitReader {
  const uint8_t* buf_;        // next byte to be read
  const uint8_t* buf_end_;    // end of read buffer
  int eof_;                   // true if input is exhausted

  // boolean decoder
AoD314's avatar
AoD314 committed
128 129 130
  range_t range_;            // current range minus 1. In [127, 254] interval.
  bit_t value_;              // current value
  int bits_;                 // number of valid bits left
AoD314's avatar
AoD314 committed
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
};

// Initialize the bit reader and the boolean decoder.
void VP8InitBitReader(VP8BitReader* const br,
                      const uint8_t* const start, const uint8_t* const end);

// return the next value made of 'num_bits' bits
uint32_t VP8GetValue(VP8BitReader* const br, int num_bits);
static WEBP_INLINE uint32_t VP8Get(VP8BitReader* const br) {
  return VP8GetValue(br, 1);
}

// return the next value with sign-extension.
int32_t VP8GetSignedValue(VP8BitReader* const br, int num_bits);

// Read a bit with proba 'prob'. Speed-critical function!
extern const uint8_t kVP8Log2Range[128];
AoD314's avatar
AoD314 committed
148
extern const range_t kVP8NewRange[128];
AoD314's avatar
AoD314 committed
149 150 151 152

void VP8LoadFinalBytes(VP8BitReader* const br);    // special case for the tail

static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) {
AoD314's avatar
AoD314 committed
153
  assert(br != NULL && br->buf_ != NULL);
AoD314's avatar
AoD314 committed
154 155 156 157 158 159 160
  // Read 'BITS' bits at a time if possible.
  if (br->buf_ + sizeof(lbit_t) <= br->buf_end_) {
    // convert memory type to register type (with some zero'ing!)
    bit_t bits;
    lbit_t in_bits = *(lbit_t*)br->buf_;
    br->buf_ += (BITS) >> 3;
#if !defined(__BIG_ENDIAN__)
AoD314's avatar
AoD314 committed
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
#if (BITS > 32)
// gcc 4.3 has builtin functions for swap32/swap64
#if defined(__GNUC__) && \
           (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
    bits = (bit_t)__builtin_bswap64(in_bits);
#elif defined(_MSC_VER)
    bits = (bit_t)_byteswap_uint64(in_bits);
#elif defined(__x86_64__)
    __asm__ volatile("bswapq %0" : "=r"(bits) : "0"(in_bits));
#else  // generic code for swapping 64-bit values (suggested by bdb@)
    bits = (bit_t)in_bits;
    bits = ((bits & 0xffffffff00000000ull) >> 32) |
           ((bits & 0x00000000ffffffffull) << 32);
    bits = ((bits & 0xffff0000ffff0000ull) >> 16) |
           ((bits & 0x0000ffff0000ffffull) << 16);
    bits = ((bits & 0xff00ff00ff00ff00ull) >> 8) |
           ((bits & 0x00ff00ff00ff00ffull) << 8);
#endif
    bits >>= 64 - BITS;
#elif (BITS >= 24)
AoD314's avatar
AoD314 committed
181 182
#if defined(__i386__) || defined(__x86_64__)
    __asm__ volatile("bswap %k0" : "=r"(in_bits) : "0"(in_bits));
AoD314's avatar
AoD314 committed
183
    bits = (bit_t)in_bits;   // 24b/32b -> 32b/64b zero-extension
AoD314's avatar
AoD314 committed
184
#elif defined(_MSC_VER)
AoD314's avatar
AoD314 committed
185
    bits = (bit_t)_byteswap_ulong(in_bits);
AoD314's avatar
AoD314 committed
186 187 188 189
#else
    bits = (bit_t)(in_bits >> 24) | ((in_bits >> 8) & 0xff00)
         | ((in_bits << 8) & 0xff0000)  | (in_bits << 24);
#endif  // x86
AoD314's avatar
AoD314 committed
190
    bits >>= (32 - BITS);
AoD314's avatar
AoD314 committed
191 192 193 194 195 196
#elif (BITS == 16)
    // gcc will recognize a 'rorw $8, ...' here:
    bits = (bit_t)(in_bits >> 8) | ((in_bits & 0xff) << 8);
#else   // BITS == 8
    bits = (bit_t)in_bits;
#endif
AoD314's avatar
AoD314 committed
197
#else    // BIG_ENDIAN
AoD314's avatar
AoD314 committed
198
    bits = (bit_t)in_bits;
199
    if (BITS != 8 * sizeof(bit_t)) bits >>= (8 * sizeof(bit_t) - BITS);
AoD314's avatar
AoD314 committed
200
#endif
AoD314's avatar
AoD314 committed
201 202 203 204 205 206
#ifndef USE_RIGHT_JUSTIFY
    br->value_ |= bits << (-br->bits_);
#else
    br->value_ = bits | (br->value_ << (BITS));
#endif
    br->bits_ += (BITS);
AoD314's avatar
AoD314 committed
207 208 209 210 211
  } else {
    VP8LoadFinalBytes(br);    // no need to be inlined
  }
}

AoD314's avatar
AoD314 committed
212 213
static WEBP_INLINE int VP8BitUpdate(VP8BitReader* const br, range_t split) {
  if (br->bits_ < 0) {  // Make sure we have a least BITS bits in 'value_'
AoD314's avatar
AoD314 committed
214 215
    VP8LoadNewBytes(br);
  }
AoD314's avatar
AoD314 committed
216 217 218 219 220
#ifndef USE_RIGHT_JUSTIFY
  split |= (MASK);
  if (br->value_ > split) {
    br->range_ -= split + 1;
    br->value_ -= split + 1;
AoD314's avatar
AoD314 committed
221 222
    return 1;
  } else {
AoD314's avatar
AoD314 committed
223
    br->range_ = split;
AoD314's avatar
AoD314 committed
224 225
    return 0;
  }
AoD314's avatar
AoD314 committed
226 227 228 229 230 231 232 233 234 235 236 237 238 239
#else
  {
    const int pos = br->bits_;
    const range_t value = (range_t)(br->value_ >> pos);
    if (value > split) {
      br->range_ -= split + 1;
      br->value_ -= (bit_t)(split + 1) << pos;
      return 1;
    } else {
      br->range_ = split;
      return 0;
    }
  }
#endif
AoD314's avatar
AoD314 committed
240 241 242
}

static WEBP_INLINE void VP8Shift(VP8BitReader* const br) {
AoD314's avatar
AoD314 committed
243
#ifndef USE_RIGHT_JUSTIFY
AoD314's avatar
AoD314 committed
244
  // range_ is in [0..127] interval here.
AoD314's avatar
AoD314 committed
245
  const bit_t idx = br->range_ >> (BITS);
AoD314's avatar
AoD314 committed
246 247 248
  const int shift = kVP8Log2Range[idx];
  br->range_ = kVP8NewRange[idx];
  br->value_ <<= shift;
AoD314's avatar
AoD314 committed
249 250 251 252 253 254 255
  br->bits_ -= shift;
#else
  const int shift = kVP8Log2Range[br->range_];
  assert(br->range_ < (range_t)128);
  br->range_ = kVP8NewRange[br->range_];
  br->bits_ -= shift;
#endif
AoD314's avatar
AoD314 committed
256 257
}
static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) {
AoD314's avatar
AoD314 committed
258
#ifndef USE_RIGHT_JUSTIFY
AoD314's avatar
AoD314 committed
259 260
  // It's important to avoid generating a 64bit x 64bit multiply here.
  // We just need an 8b x 8b after all.
AoD314's avatar
AoD314 committed
261 262 263 264 265 266 267 268 269
  const range_t split =
      (range_t)((uint32_t)(br->range_ >> (BITS)) * prob) << ((BITS) - 8);
  const int bit = VP8BitUpdate(br, split);
  if (br->range_ <= (((range_t)0x7e << (BITS)) | (MASK))) {
    VP8Shift(br);
  }
  return bit;
#else
  const range_t split = (br->range_ * prob) >> 8;
AoD314's avatar
AoD314 committed
270
  const int bit = VP8BitUpdate(br, split);
AoD314's avatar
AoD314 committed
271
  if (br->range_ <= (range_t)0x7e) {
AoD314's avatar
AoD314 committed
272 273 274
    VP8Shift(br);
  }
  return bit;
AoD314's avatar
AoD314 committed
275
#endif
AoD314's avatar
AoD314 committed
276 277 278
}

static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) {
AoD314's avatar
AoD314 committed
279
  const range_t split = (br->range_ >> 1);
AoD314's avatar
AoD314 committed
280 281 282 283 284 285 286
  const int bit = VP8BitUpdate(br, split);
  VP8Shift(br);
  return bit ? -v : v;
}


// -----------------------------------------------------------------------------
AoD314's avatar
AoD314 committed
287 288 289
// Bitreader for lossless format

typedef uint64_t vp8l_val_t;  // right now, this bit-reader can only use 64bit.
AoD314's avatar
AoD314 committed
290 291

typedef struct {
AoD314's avatar
AoD314 committed
292 293 294 295 296 297 298
  vp8l_val_t     val_;        // pre-fetched bits
  const uint8_t* buf_;        // input byte buffer
  size_t         len_;        // buffer length
  size_t         pos_;        // byte position in buf_
  int            bit_pos_;    // current bit-reading position in val_
  int            eos_;        // bitstream is finished
  int            error_;      // an error occurred (buffer overflow attempt...)
AoD314's avatar
AoD314 committed
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313
} VP8LBitReader;

void VP8LInitBitReader(VP8LBitReader* const br,
                       const uint8_t* const start,
                       size_t length);

//  Sets a new data buffer.
void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
                            const uint8_t* const buffer, size_t length);

// Reads the specified number of bits from Read Buffer.
// Flags an error in case end_of_stream or n_bits is more than allowed limit.
// Flags eos if this read attempt is going to cross the read buffer.
uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits);

AoD314's avatar
AoD314 committed
314 315 316 317 318 319 320 321
// Return the prefetched bits, so they can be looked up.
static WEBP_INLINE uint32_t VP8LPrefetchBits(VP8LBitReader* const br) {
  return (uint32_t)(br->val_ >> br->bit_pos_);
}

// Discard 'num_bits' bits from the cache.
static WEBP_INLINE void VP8LDiscardBits(VP8LBitReader* const br, int num_bits) {
  br->bit_pos_ += num_bits;
AoD314's avatar
AoD314 committed
322 323 324 325 326 327 328 329 330 331
}

// Advances the Read buffer by 4 bytes to make room for reading next 32 bits.
void VP8LFillBitWindow(VP8LBitReader* const br);

#if defined(__cplusplus) || defined(c_plusplus)
}    // extern "C"
#endif

#endif  /* WEBP_UTILS_BIT_READER_H_ */