Commit 740941c8 authored by AoD314's avatar AoD314

update libwebp up to 0.3.0

parent db45e04d
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
#include "./vp8i.h" #include "./vp8i.h"
#include "./vp8li.h" #include "./vp8li.h"
#include "../utils/filters.h" #include "../utils/filters.h"
#include "../utils/quant_levels.h" #include "../utils/quant_levels_dec.h"
#include "../webp/format_constants.h" #include "../webp/format_constants.h"
#if defined(__cplusplus) || defined(c_plusplus) #if defined(__cplusplus) || defined(c_plusplus)
...@@ -44,7 +44,6 @@ static int DecodeAlpha(const uint8_t* data, size_t data_size, ...@@ -44,7 +44,6 @@ static int DecodeAlpha(const uint8_t* data, size_t data_size,
int width, int height, int stride, uint8_t* output) { int width, int height, int stride, uint8_t* output) {
uint8_t* decoded_data = NULL; uint8_t* decoded_data = NULL;
const size_t decoded_size = height * width; const size_t decoded_size = height * width;
uint8_t* unfiltered_data = NULL;
WEBP_FILTER_TYPE filter; WEBP_FILTER_TYPE filter;
int pre_processing; int pre_processing;
int rsrv; int rsrv;
...@@ -83,29 +82,19 @@ static int DecodeAlpha(const uint8_t* data, size_t data_size, ...@@ -83,29 +82,19 @@ static int DecodeAlpha(const uint8_t* data, size_t data_size,
} }
if (ok) { if (ok) {
WebPFilterFunc unfilter_func = WebPUnfilters[filter]; WebPUnfilterFunc unfilter_func = WebPUnfilters[filter];
if (unfilter_func != NULL) { if (unfilter_func != NULL) {
unfiltered_data = (uint8_t*)malloc(decoded_size);
if (unfiltered_data == NULL) {
ok = 0;
goto Error;
}
// TODO(vikas): Implement on-the-fly decoding & filter mechanism to decode // TODO(vikas): Implement on-the-fly decoding & filter mechanism to decode
// and apply filter per image-row. // and apply filter per image-row.
unfilter_func(decoded_data, width, height, 1, width, unfiltered_data); unfilter_func(width, height, width, decoded_data);
// Construct raw_data (height x stride) from alpha data (height x width).
CopyPlane(unfiltered_data, width, output, stride, width, height);
free(unfiltered_data);
} else {
// Construct raw_data (height x stride) from alpha data (height x width).
CopyPlane(decoded_data, width, output, stride, width, height);
} }
// Construct raw_data (height x stride) from alpha data (height x width).
CopyPlane(decoded_data, width, output, stride, width, height);
if (pre_processing == ALPHA_PREPROCESSED_LEVELS) { if (pre_processing == ALPHA_PREPROCESSED_LEVELS) {
ok = DequantizeLevels(decoded_data, width, height); ok = DequantizeLevels(decoded_data, width, height);
} }
} }
Error:
if (method != ALPHA_NO_COMPRESSION) { if (method != ALPHA_NO_COMPRESSION) {
free(decoded_data); free(decoded_data);
} }
......
...@@ -97,54 +97,51 @@ static void FilterRow(const VP8Decoder* const dec) { ...@@ -97,54 +97,51 @@ static void FilterRow(const VP8Decoder* const dec) {
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Precompute the filtering strength for each segment and each i4x4/i16x16 mode.
void VP8StoreBlock(VP8Decoder* const dec) { static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
if (dec->filter_type_ > 0) { if (dec->filter_type_ > 0) {
VP8FInfo* const info = dec->f_info_ + dec->mb_x_; int s;
const int skip = dec->mb_info_[dec->mb_x_].skip_; const VP8FilterHeader* const hdr = &dec->filter_hdr_;
int level = dec->filter_levels_[dec->segment_]; for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
if (dec->filter_hdr_.use_lf_delta_) { int i4x4;
// TODO(skal): only CURRENT is handled for now. // First, compute the initial level
level += dec->filter_hdr_.ref_lf_delta_[0]; int base_level;
if (dec->is_i4x4_) { if (dec->segment_hdr_.use_segment_) {
level += dec->filter_hdr_.mode_lf_delta_[0]; base_level = dec->segment_hdr_.filter_strength_[s];
} if (!dec->segment_hdr_.absolute_delta_) {
} base_level += hdr->level_;
level = (level < 0) ? 0 : (level > 63) ? 63 : level; }
info->f_level_ = level;
if (dec->filter_hdr_.sharpness_ > 0) {
if (dec->filter_hdr_.sharpness_ > 4) {
level >>= 2;
} else { } else {
level >>= 1; base_level = hdr->level_;
} }
if (level > 9 - dec->filter_hdr_.sharpness_) { for (i4x4 = 0; i4x4 <= 1; ++i4x4) {
level = 9 - dec->filter_hdr_.sharpness_; VP8FInfo* const info = &dec->fstrengths_[s][i4x4];
int level = base_level;
if (hdr->use_lf_delta_) {
// TODO(skal): only CURRENT is handled for now.
level += hdr->ref_lf_delta_[0];
if (i4x4) {
level += hdr->mode_lf_delta_[0];
}
}
level = (level < 0) ? 0 : (level > 63) ? 63 : level;
info->f_level_ = level;
if (hdr->sharpness_ > 0) {
if (hdr->sharpness_ > 4) {
level >>= 2;
} else {
level >>= 1;
}
if (level > 9 - hdr->sharpness_) {
level = 9 - hdr->sharpness_;
}
}
info->f_ilevel_ = (level < 1) ? 1 : level;
info->f_inner_ = 0;
} }
} }
info->f_ilevel_ = (level < 1) ? 1 : level;
info->f_inner_ = (!skip || dec->is_i4x4_);
}
{
// Transfer samples to row cache
int y;
const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;
const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;
uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;
uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;
uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;
for (y = 0; y < 16; ++y) {
memcpy(ydst + y * dec->cache_y_stride_,
dec->yuv_b_ + Y_OFF + y * BPS, 16);
}
for (y = 0; y < 8; ++y) {
memcpy(udst + y * dec->cache_uv_stride_,
dec->yuv_b_ + U_OFF + y * BPS, 8);
memcpy(vdst + y * dec->cache_uv_stride_,
dec->yuv_b_ + V_OFF + y * BPS, 8);
}
} }
} }
...@@ -339,6 +336,7 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) { ...@@ -339,6 +336,7 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
dec->br_mb_y_ = dec->mb_h_; dec->br_mb_y_ = dec->mb_h_;
} }
} }
PrecomputeFilterStrengths(dec);
return VP8_STATUS_OK; return VP8_STATUS_OK;
} }
...@@ -496,6 +494,7 @@ static int AllocateMemory(VP8Decoder* const dec) { ...@@ -496,6 +494,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
// alpha plane // alpha plane
dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL; dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
mem += alpha_size; mem += alpha_size;
assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);
// note: left-info is initialized once for all. // note: left-info is initialized once for all.
memset(dec->mb_info_ - 1, 0, mb_info_size); memset(dec->mb_info_ - 1, 0, mb_info_size);
...@@ -551,6 +550,7 @@ static WEBP_INLINE void Copy32b(uint8_t* dst, uint8_t* src) { ...@@ -551,6 +550,7 @@ static WEBP_INLINE void Copy32b(uint8_t* dst, uint8_t* src) {
} }
void VP8ReconstructBlock(VP8Decoder* const dec) { void VP8ReconstructBlock(VP8Decoder* const dec) {
int j;
uint8_t* const y_dst = dec->yuv_b_ + Y_OFF; uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
uint8_t* const u_dst = dec->yuv_b_ + U_OFF; uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
uint8_t* const v_dst = dec->yuv_b_ + V_OFF; uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
...@@ -558,7 +558,6 @@ void VP8ReconstructBlock(VP8Decoder* const dec) { ...@@ -558,7 +558,6 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
// Rotate in the left samples from previously decoded block. We move four // Rotate in the left samples from previously decoded block. We move four
// pixels at a time for alignment reason, and because of in-loop filter. // pixels at a time for alignment reason, and because of in-loop filter.
if (dec->mb_x_ > 0) { if (dec->mb_x_ > 0) {
int j;
for (j = -1; j < 16; ++j) { for (j = -1; j < 16; ++j) {
Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]); Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
} }
...@@ -567,7 +566,6 @@ void VP8ReconstructBlock(VP8Decoder* const dec) { ...@@ -567,7 +566,6 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]); Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
} }
} else { } else {
int j;
for (j = 0; j < 16; ++j) { for (j = 0; j < 16; ++j) {
y_dst[j * BPS - 1] = 129; y_dst[j * BPS - 1] = 129;
} }
...@@ -670,6 +668,21 @@ void VP8ReconstructBlock(VP8Decoder* const dec) { ...@@ -670,6 +668,21 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
} }
} }
} }
// Transfer reconstructed samples from yuv_b_ cache to final destination.
{
const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;
const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;
uint8_t* const y_out = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;
uint8_t* const u_out = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;
uint8_t* const v_out = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;
for (j = 0; j < 16; ++j) {
memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);
}
for (j = 0; j < 8; ++j) {
memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);
memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);
}
}
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
......
...@@ -425,9 +425,8 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) { ...@@ -425,9 +425,8 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
} }
return VP8_STATUS_SUSPENDED; return VP8_STATUS_SUSPENDED;
} }
// Reconstruct and emit samples.
VP8ReconstructBlock(dec); VP8ReconstructBlock(dec);
// Store data and save block's filtering params
VP8StoreBlock(dec);
// Release buffer only if there is only one partition // Release buffer only if there is only one partition
if (dec->num_parts_ == 1) { if (dec->num_parts_ == 1) {
...@@ -596,12 +595,22 @@ void WebPIDelete(WebPIDecoder* idec) { ...@@ -596,12 +595,22 @@ void WebPIDelete(WebPIDecoder* idec) {
WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer, WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
size_t output_buffer_size, int output_stride) { size_t output_buffer_size, int output_stride) {
const int is_external_memory = (output_buffer != NULL);
WebPIDecoder* idec; WebPIDecoder* idec;
if (mode >= MODE_YUV) return NULL; if (mode >= MODE_YUV) return NULL;
if (!is_external_memory) { // Overwrite parameters to sane values.
output_buffer_size = 0;
output_stride = 0;
} else { // A buffer was passed. Validate the other params.
if (output_stride == 0 || output_buffer_size == 0) {
return NULL; // invalid parameter.
}
}
idec = WebPINewDecoder(NULL); idec = WebPINewDecoder(NULL);
if (idec == NULL) return NULL; if (idec == NULL) return NULL;
idec->output_.colorspace = mode; idec->output_.colorspace = mode;
idec->output_.is_external_memory = 1; idec->output_.is_external_memory = is_external_memory;
idec->output_.u.RGBA.rgba = output_buffer; idec->output_.u.RGBA.rgba = output_buffer;
idec->output_.u.RGBA.stride = output_stride; idec->output_.u.RGBA.stride = output_stride;
idec->output_.u.RGBA.size = output_buffer_size; idec->output_.u.RGBA.size = output_buffer_size;
...@@ -612,10 +621,30 @@ WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride, ...@@ -612,10 +621,30 @@ WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
uint8_t* u, size_t u_size, int u_stride, uint8_t* u, size_t u_size, int u_stride,
uint8_t* v, size_t v_size, int v_stride, uint8_t* v, size_t v_size, int v_stride,
uint8_t* a, size_t a_size, int a_stride) { uint8_t* a, size_t a_size, int a_stride) {
WebPIDecoder* const idec = WebPINewDecoder(NULL); const int is_external_memory = (luma != NULL);
WebPIDecoder* idec;
WEBP_CSP_MODE colorspace;
if (!is_external_memory) { // Overwrite parameters to sane values.
luma_size = u_size = v_size = a_size = 0;
luma_stride = u_stride = v_stride = a_stride = 0;
u = v = a = NULL;
colorspace = MODE_YUVA;
} else { // A luma buffer was passed. Validate the other parameters.
if (u == NULL || v == NULL) return NULL;
if (luma_size == 0 || u_size == 0 || v_size == 0) return NULL;
if (luma_stride == 0 || u_stride == 0 || v_stride == 0) return NULL;
if (a != NULL) {
if (a_size == 0 || a_stride == 0) return NULL;
}
colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA;
}
idec = WebPINewDecoder(NULL);
if (idec == NULL) return NULL; if (idec == NULL) return NULL;
idec->output_.colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA;
idec->output_.is_external_memory = 1; idec->output_.colorspace = colorspace;
idec->output_.is_external_memory = is_external_memory;
idec->output_.u.YUVA.y = luma; idec->output_.u.YUVA.y = luma;
idec->output_.u.YUVA.y_stride = luma_stride; idec->output_.u.YUVA.y_stride = luma_stride;
idec->output_.u.YUVA.y_size = luma_size; idec->output_.u.YUVA.y_size = luma_size;
......
...@@ -236,20 +236,6 @@ static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) { ...@@ -236,20 +236,6 @@ static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) {
} }
} }
dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2; dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2;
if (dec->filter_type_ > 0) { // precompute filter levels per segment
if (dec->segment_hdr_.use_segment_) {
int s;
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
int strength = dec->segment_hdr_.filter_strength_[s];
if (!dec->segment_hdr_.absolute_delta_) {
strength += hdr->level_;
}
dec->filter_levels_[s] = strength;
}
} else {
dec->filter_levels_[0] = hdr->level_;
}
}
return !br->eof_; return !br->eof_;
} }
...@@ -458,7 +444,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) { ...@@ -458,7 +444,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Residual decoding (Paragraph 13.2 / 13.3) // Residual decoding (Paragraph 13.2 / 13.3)
static const uint8_t kBands[16 + 1] = { static const int kBands[16 + 1] = {
0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
0 // extra entry as sentinel 0 // extra entry as sentinel
}; };
...@@ -474,6 +460,39 @@ static const uint8_t kZigzag[16] = { ...@@ -474,6 +460,39 @@ static const uint8_t kZigzag[16] = {
}; };
typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; // for const-casting typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; // for const-casting
typedef const uint8_t (*ProbaCtxArray)[NUM_PROBAS];
// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2
static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
int v;
if (!VP8GetBit(br, p[3])) {
if (!VP8GetBit(br, p[4])) {
v = 2;
} else {
v = 3 + VP8GetBit(br, p[5]);
}
} else {
if (!VP8GetBit(br, p[6])) {
if (!VP8GetBit(br, p[7])) {
v = 5 + VP8GetBit(br, 159);
} else {
v = 7 + 2 * VP8GetBit(br, 165);
v += VP8GetBit(br, 145);
}
} else {
const uint8_t* tab;
const int bit1 = VP8GetBit(br, p[8]);
const int bit0 = VP8GetBit(br, p[9 + bit1]);
const int cat = 2 * bit1 + bit0;
v = 0;
for (tab = kCat3456[cat]; *tab; ++tab) {
v += v + VP8GetBit(br, *tab);
}
v += 3 + (8 << cat);
}
}
return v;
}
// Returns the position of the last non-zero coeff plus one // Returns the position of the last non-zero coeff plus one
// (and 0 if there's no coeff at all) // (and 0 if there's no coeff at all)
...@@ -484,54 +503,26 @@ static int GetCoeffs(VP8BitReader* const br, ProbaArray prob, ...@@ -484,54 +503,26 @@ static int GetCoeffs(VP8BitReader* const br, ProbaArray prob,
if (!VP8GetBit(br, p[0])) { // first EOB is more a 'CBP' bit. if (!VP8GetBit(br, p[0])) { // first EOB is more a 'CBP' bit.
return 0; return 0;
} }
while (1) { for (; n < 16; ++n) {
++n; const ProbaCtxArray p_ctx = prob[kBands[n + 1]];
if (!VP8GetBit(br, p[1])) { if (!VP8GetBit(br, p[1])) {
p = prob[kBands[n]][0]; p = p_ctx[0];
} else { // non zero coeff } else { // non zero coeff
int v, j; int v;
if (!VP8GetBit(br, p[2])) { if (!VP8GetBit(br, p[2])) {
p = prob[kBands[n]][1];
v = 1; v = 1;
p = p_ctx[1];
} else { } else {
if (!VP8GetBit(br, p[3])) { v = GetLargeValue(br, p);
if (!VP8GetBit(br, p[4])) { p = p_ctx[2];
v = 2;
} else {
v = 3 + VP8GetBit(br, p[5]);
}
} else {
if (!VP8GetBit(br, p[6])) {
if (!VP8GetBit(br, p[7])) {
v = 5 + VP8GetBit(br, 159);
} else {
v = 7 + 2 * VP8GetBit(br, 165);
v += VP8GetBit(br, 145);
}
} else {
const uint8_t* tab;
const int bit1 = VP8GetBit(br, p[8]);
const int bit0 = VP8GetBit(br, p[9 + bit1]);
const int cat = 2 * bit1 + bit0;
v = 0;
for (tab = kCat3456[cat]; *tab; ++tab) {
v += v + VP8GetBit(br, *tab);
}
v += 3 + (8 << cat);
}
}
p = prob[kBands[n]][2];
} }
j = kZigzag[n - 1]; out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0];
out[j] = VP8GetSigned(br, v) * dq[j > 0]; if (n < 15 && !VP8GetBit(br, p[0])) { // EOB
if (n == 16 || !VP8GetBit(br, p[0])) { // EOB return n + 1;
return n;
} }
} }
if (n == 16) {
return 16;
}
} }
return 16;
} }
// Alias-safe way of converting 4bytes to 32bits. // Alias-safe way of converting 4bytes to 32bits.
...@@ -670,6 +661,12 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) { ...@@ -670,6 +661,12 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
dec->non_zero_ac_ = 0; dec->non_zero_ac_ = 0;
} }
if (dec->filter_type_ > 0) { // store filter info
VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_;
*finfo = dec->fstrengths_[dec->segment_][dec->is_i4x4_];
finfo->f_inner_ = (!info->skip_ || dec->is_i4x4_);
}
return (!token_br->eof_); return (!token_br->eof_);
} }
...@@ -693,10 +690,8 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) { ...@@ -693,10 +690,8 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
"Premature end-of-file encountered."); "Premature end-of-file encountered.");
} }
// Reconstruct and emit samples.
VP8ReconstructBlock(dec); VP8ReconstructBlock(dec);
// Store data and save block's filtering params
VP8StoreBlock(dec);
} }
if (!VP8ProcessRow(dec, io)) { if (!VP8ProcessRow(dec, io)) {
return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted."); return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");
......
...@@ -27,8 +27,8 @@ extern "C" { ...@@ -27,8 +27,8 @@ extern "C" {
// version numbers // version numbers
#define DEC_MAJ_VERSION 0 #define DEC_MAJ_VERSION 0
#define DEC_MIN_VERSION 2 #define DEC_MIN_VERSION 3
#define DEC_REV_VERSION 1 #define DEC_REV_VERSION 0
#define ONLY_KEYFRAME_CODE // to remove any code related to P-Frames #define ONLY_KEYFRAME_CODE // to remove any code related to P-Frames
...@@ -157,7 +157,7 @@ typedef struct { // filter specs ...@@ -157,7 +157,7 @@ typedef struct { // filter specs
} VP8FInfo; } VP8FInfo;
typedef struct { // used for syntax-parsing typedef struct { // used for syntax-parsing
unsigned int nz_; // non-zero AC/DC coeffs unsigned int nz_:24; // non-zero AC/DC coeffs (24bit)
unsigned int dc_nz_:1; // non-zero DC coeffs unsigned int dc_nz_:1; // non-zero DC coeffs
unsigned int skip_:1; // block type unsigned int skip_:1; // block type
} VP8MB; } VP8MB;
...@@ -269,9 +269,9 @@ struct VP8Decoder { ...@@ -269,9 +269,9 @@ struct VP8Decoder {
uint32_t non_zero_ac_; uint32_t non_zero_ac_;
// Filtering side-info // Filtering side-info
int filter_type_; // 0=off, 1=simple, 2=complex int filter_type_; // 0=off, 1=simple, 2=complex
int filter_row_; // per-row flag int filter_row_; // per-row flag
uint8_t filter_levels_[NUM_MB_SEGMENTS]; // precalculated per-segment VP8FInfo fstrengths_[NUM_MB_SEGMENTS][2]; // precalculated per-segment/type
// extensions // extensions
const uint8_t* alpha_data_; // compressed alpha data (if present) const uint8_t* alpha_data_; // compressed alpha data (if present)
...@@ -312,8 +312,6 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io); ...@@ -312,8 +312,6 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io);
int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io); int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
// Process the last decoded row (filtering + output) // Process the last decoded row (filtering + output)
int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io); int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
// Store a block, along with filtering params
void VP8StoreBlock(VP8Decoder* const dec);
// To be called at the start of a new scanline, to initialize predictors. // To be called at the start of a new scanline, to initialize predictors.
void VP8InitScanline(VP8Decoder* const dec); void VP8InitScanline(VP8Decoder* const dec);
// Decode one macroblock. Returns false if there is not enough data. // Decode one macroblock. Returns false if there is not enough data.
......
...@@ -58,18 +58,18 @@ static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = { ...@@ -58,18 +58,18 @@ static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = {
#define CODE_TO_PLANE_CODES 120 #define CODE_TO_PLANE_CODES 120
static const uint8_t code_to_plane_lut[CODE_TO_PLANE_CODES] = { static const uint8_t code_to_plane_lut[CODE_TO_PLANE_CODES] = {
0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a, 0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a,
0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a, 0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a,
0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b, 0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b,
0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03, 0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03,
0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c, 0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c,
0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e, 0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b, 0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b,
0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f, 0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f,
0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b, 0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b,
0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41, 0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41,
0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f, 0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f,
0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70 0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70
}; };
static int DecodeImageStream(int xsize, int ysize, static int DecodeImageStream(int xsize, int ysize,
...@@ -149,31 +149,22 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) { ...@@ -149,31 +149,22 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) {
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Decodes the next Huffman code from bit-stream. // Decodes the next Huffman code from bit-stream.
// FillBitWindow(br) needs to be called at minimum every second call // FillBitWindow(br) needs to be called at minimum every second call
// to ReadSymbolUnsafe. // to ReadSymbol, in order to pre-fetch enough bits.
static int ReadSymbolUnsafe(const HuffmanTree* tree, VP8LBitReader* const br) { static WEBP_INLINE int ReadSymbol(const HuffmanTree* tree,
VP8LBitReader* const br) {
const HuffmanTreeNode* node = tree->root_; const HuffmanTreeNode* node = tree->root_;
int num_bits = 0;
uint32_t bits = VP8LPrefetchBits(br);
assert(node != NULL); assert(node != NULL);
while (!HuffmanTreeNodeIsLeaf(node)) { while (!HuffmanTreeNodeIsLeaf(node)) {
node = HuffmanTreeNextNode(node, VP8LReadOneBitUnsafe(br)); node = HuffmanTreeNextNode(node, bits & 1);
bits >>= 1;
++num_bits;
} }
VP8LDiscardBits(br, num_bits);
return node->symbol_; return node->symbol_;
} }
static WEBP_INLINE int ReadSymbol(const HuffmanTree* tree,
VP8LBitReader* const br) {
const int read_safe = (br->pos_ + 8 > br->len_);
if (!read_safe) {
return ReadSymbolUnsafe(tree, br);
} else {
const HuffmanTreeNode* node = tree->root_;
assert(node != NULL);
while (!HuffmanTreeNodeIsLeaf(node)) {
node = HuffmanTreeNextNode(node, VP8LReadOneBit(br));
}
return node->symbol_;
}
}
static int ReadHuffmanCodeLengths( static int ReadHuffmanCodeLengths(
VP8LDecoder* const dec, const int* const code_length_code_lengths, VP8LDecoder* const dec, const int* const code_length_code_lengths,
int num_symbols, int* const code_lengths) { int num_symbols, int* const code_lengths) {
...@@ -327,10 +318,10 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, ...@@ -327,10 +318,10 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
hdr->huffman_subsample_bits_ = huffman_precision; hdr->huffman_subsample_bits_ = huffman_precision;
for (i = 0; i < huffman_pixs; ++i) { for (i = 0; i < huffman_pixs; ++i) {
// The huffman data is stored in red and green bytes. // The huffman data is stored in red and green bytes.
const int index = (huffman_image[i] >> 8) & 0xffff; const int group = (huffman_image[i] >> 8) & 0xffff;
huffman_image[i] = index; huffman_image[i] = group;
if (index >= num_htree_groups) { if (group >= num_htree_groups) {
num_htree_groups = index + 1; num_htree_groups = group + 1;
} }
} }
} }
...@@ -1146,9 +1137,9 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) { ...@@ -1146,9 +1137,9 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
return 1; return 1;
Error: Error:
VP8LClear(dec); VP8LClear(dec);
assert(dec->status_ != VP8_STATUS_OK); assert(dec->status_ != VP8_STATUS_OK);
return 0; return 0;
} }
int VP8LDecodeImage(VP8LDecoder* const dec) { int VP8LDecodeImage(VP8LDecoder* const dec) {
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#include "./vp8i.h" #include "./vp8i.h"
#include "./vp8li.h" #include "./vp8li.h"
#include "./webpi.h" #include "./webpi.h"
#include "../webp/format_constants.h" #include "../webp/mux_types.h" // ALPHA_FLAG
#if defined(__cplusplus) || defined(c_plusplus) #if defined(__cplusplus) || defined(c_plusplus)
extern "C" { extern "C" {
...@@ -40,8 +40,8 @@ extern "C" { ...@@ -40,8 +40,8 @@ extern "C" {
// 20..23 VP8X flags bit-map corresponding to the chunk-types present. // 20..23 VP8X flags bit-map corresponding to the chunk-types present.
// 24..26 Width of the Canvas Image. // 24..26 Width of the Canvas Image.
// 27..29 Height of the Canvas Image. // 27..29 Height of the Canvas Image.
// There can be extra chunks after the "VP8X" chunk (ICCP, TILE, FRM, VP8, // There can be extra chunks after the "VP8X" chunk (ICCP, FRGM, ANMF, VP8,
// META ...) // VP8L, XMP, EXIF ...)
// All sizes are in little-endian order. // All sizes are in little-endian order.
// Note: chunk data size must be padded to multiple of 2 when written. // Note: chunk data size must be padded to multiple of 2 when written.
...@@ -276,6 +276,7 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data, ...@@ -276,6 +276,7 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
int* const width, int* const width,
int* const height, int* const height,
int* const has_alpha, int* const has_alpha,
int* const has_animation,
WebPHeaderStructure* const headers) { WebPHeaderStructure* const headers) {
int found_riff = 0; int found_riff = 0;
int found_vp8x = 0; int found_vp8x = 0;
...@@ -308,7 +309,8 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data, ...@@ -308,7 +309,8 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
// necessary to send VP8X chunk to the decoder. // necessary to send VP8X chunk to the decoder.
return VP8_STATUS_BITSTREAM_ERROR; return VP8_STATUS_BITSTREAM_ERROR;
} }
if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG_BIT); if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG);
if (has_animation != NULL) *has_animation = !!(flags & ANIMATION_FLAG);
if (found_vp8x && headers == NULL) { if (found_vp8x && headers == NULL) {
return VP8_STATUS_OK; // Return features from VP8X header. return VP8_STATUS_OK; // Return features from VP8X header.
} }
...@@ -370,10 +372,19 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data, ...@@ -370,10 +372,19 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
} }
VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) { VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
VP8StatusCode status;
int has_animation = 0;
assert(headers != NULL); assert(headers != NULL);
// fill out headers, ignore width/height/has_alpha. // fill out headers, ignore width/height/has_alpha.
return ParseHeadersInternal(headers->data, headers->data_size, status = ParseHeadersInternal(headers->data, headers->data_size,
NULL, NULL, NULL, headers); NULL, NULL, NULL, &has_animation, headers);
if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
// TODO(jzern): full support of animation frames will require API additions.
if (has_animation) {
status = VP8_STATUS_UNSUPPORTED_FEATURE;
}
}
return status;
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
...@@ -625,10 +636,11 @@ static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size, ...@@ -625,10 +636,11 @@ static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size,
} }
DefaultFeatures(features); DefaultFeatures(features);
// Only parse enough of the data to retrieve width/height/has_alpha. // Only parse enough of the data to retrieve the features.
return ParseHeadersInternal(data, data_size, return ParseHeadersInternal(data, data_size,
&features->width, &features->height, &features->width, &features->height,
&features->has_alpha, NULL); &features->has_alpha, &features->has_animation,
NULL);
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
...@@ -672,19 +684,13 @@ int WebPInitDecoderConfigInternal(WebPDecoderConfig* config, ...@@ -672,19 +684,13 @@ int WebPInitDecoderConfigInternal(WebPDecoderConfig* config,
VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, size_t data_size, VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, size_t data_size,
WebPBitstreamFeatures* features, WebPBitstreamFeatures* features,
int version) { int version) {
VP8StatusCode status;
if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) { if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
return VP8_STATUS_INVALID_PARAM; // version mismatch return VP8_STATUS_INVALID_PARAM; // version mismatch
} }
if (features == NULL) { if (features == NULL) {
return VP8_STATUS_INVALID_PARAM; return VP8_STATUS_INVALID_PARAM;
} }
return GetFeatures(data, data_size, features);
status = GetFeatures(data, data_size, features);
if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
return VP8_STATUS_BITSTREAM_ERROR; // Not-enough-data treated as error.
}
return status;
} }
VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size, VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
......
...@@ -61,10 +61,10 @@ typedef struct { ...@@ -61,10 +61,10 @@ typedef struct {
} WebPHeaderStructure; } WebPHeaderStructure;
// Skips over all valid chunks prior to the first VP8/VP8L frame header. // Skips over all valid chunks prior to the first VP8/VP8L frame header.
// Returns VP8_STATUS_OK on success, // Returns: VP8_STATUS_OK, VP8_STATUS_BITSTREAM_ERROR (invalid header/chunk),
// VP8_STATUS_BITSTREAM_ERROR if an invalid header/chunk is found, and // VP8_STATUS_NOT_ENOUGH_DATA (partial input) or VP8_STATUS_UNSUPPORTED_FEATURE
// VP8_STATUS_NOT_ENOUGH_DATA if case of insufficient data. // in the case of non-decodable features (animation for instance).
// In 'headers', compressed_size, offset, alpha_data, alpha_size and lossless // In 'headers', compressed_size, offset, alpha_data, alpha_size, and lossless
// fields are updated appropriately upon success. // fields are updated appropriately upon success.
VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers); VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers);
......
...@@ -426,11 +426,16 @@ static void HE8uv(uint8_t *dst) { // horizontal ...@@ -426,11 +426,16 @@ static void HE8uv(uint8_t *dst) { // horizontal
} }
// helper for chroma-DC predictions // helper for chroma-DC predictions
static WEBP_INLINE void Put8x8uv(uint64_t v, uint8_t* dst) { static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
int j; int j;
#ifndef WEBP_REFERENCE_IMPLEMENTATION
const uint64_t v = (uint64_t)value * 0x0101010101010101ULL;
for (j = 0; j < 8; ++j) { for (j = 0; j < 8; ++j) {
*(uint64_t*)(dst + j * BPS) = v; *(uint64_t*)(dst + j * BPS) = v;
} }
#else
for (j = 0; j < 8; ++j) memset(dst + j * BPS, value, 8);
#endif
} }
static void DC8uv(uint8_t *dst) { // DC static void DC8uv(uint8_t *dst) { // DC
...@@ -439,7 +444,7 @@ static void DC8uv(uint8_t *dst) { // DC ...@@ -439,7 +444,7 @@ static void DC8uv(uint8_t *dst) { // DC
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
dc0 += dst[i - BPS] + dst[-1 + i * BPS]; dc0 += dst[i - BPS] + dst[-1 + i * BPS];
} }
Put8x8uv((uint64_t)((dc0 >> 4) * 0x0101010101010101ULL), dst); Put8x8uv(dc0 >> 4, dst);
} }
static void DC8uvNoLeft(uint8_t *dst) { // DC with no left samples static void DC8uvNoLeft(uint8_t *dst) { // DC with no left samples
...@@ -448,7 +453,7 @@ static void DC8uvNoLeft(uint8_t *dst) { // DC with no left samples ...@@ -448,7 +453,7 @@ static void DC8uvNoLeft(uint8_t *dst) { // DC with no left samples
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
dc0 += dst[i - BPS]; dc0 += dst[i - BPS];
} }
Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst); Put8x8uv(dc0 >> 3, dst);
} }
static void DC8uvNoTop(uint8_t *dst) { // DC with no top samples static void DC8uvNoTop(uint8_t *dst) { // DC with no top samples
...@@ -457,11 +462,11 @@ static void DC8uvNoTop(uint8_t *dst) { // DC with no top samples ...@@ -457,11 +462,11 @@ static void DC8uvNoTop(uint8_t *dst) { // DC with no top samples
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
dc0 += dst[-1 + i * BPS]; dc0 += dst[-1 + i * BPS];
} }
Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst); Put8x8uv(dc0 >> 3, dst);
} }
static void DC8uvNoTopLeft(uint8_t *dst) { // DC with nothing static void DC8uvNoTopLeft(uint8_t *dst) { // DC with nothing
Put8x8uv(0x8080808080808080ULL, dst); Put8x8uv(0x80, dst);
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
......
...@@ -79,7 +79,7 @@ extern "C" { ...@@ -79,7 +79,7 @@ extern "C" {
"vld4.8 {" #c1"[6], " #c2"[6], " #c3"[6], " #c4"[6]}," #b1 "," #stride"\n" \ "vld4.8 {" #c1"[6], " #c2"[6], " #c3"[6], " #c4"[6]}," #b1 "," #stride"\n" \
"vld4.8 {" #c1"[7], " #c2"[7], " #c3"[7], " #c4"[7]}," #b2 "," #stride"\n" "vld4.8 {" #c1"[7], " #c2"[7], " #c3"[7], " #c4"[7]}," #b2 "," #stride"\n"
#define STORE8x2(c1, c2, p,stride) \ #define STORE8x2(c1, c2, p, stride) \
"vst2.8 {" #c1"[0], " #c2"[0]}," #p "," #stride " \n" \ "vst2.8 {" #c1"[0], " #c2"[0]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[1], " #c2"[1]}," #p "," #stride " \n" \ "vst2.8 {" #c1"[1], " #c2"[1]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[2], " #c2"[2]}," #p "," #stride " \n" \ "vst2.8 {" #c1"[2], " #c2"[2]}," #p "," #stride " \n" \
...@@ -155,6 +155,9 @@ static void SimpleHFilter16iNEON(uint8_t* p, int stride, int thresh) { ...@@ -155,6 +155,9 @@ static void SimpleHFilter16iNEON(uint8_t* p, int stride, int thresh) {
} }
} }
//-----------------------------------------------------------------------------
// Inverse transforms (Paragraph 14.4)
static void TransformOneNEON(const int16_t *in, uint8_t *dst) { static void TransformOneNEON(const int16_t *in, uint8_t *dst) {
const int kBPS = BPS; const int kBPS = BPS;
const int16_t constants[] = {20091, 17734, 0, 0}; const int16_t constants[] = {20091, 17734, 0, 0};
...@@ -311,6 +314,73 @@ static void TransformTwoNEON(const int16_t* in, uint8_t* dst, int do_two) { ...@@ -311,6 +314,73 @@ static void TransformTwoNEON(const int16_t* in, uint8_t* dst, int do_two) {
} }
} }
static void TransformWHT(const int16_t* in, int16_t* out) {
const int kStep = 32; // The store is only incrementing the pointer as if we
// had stored a single byte.
__asm__ volatile (
// part 1
// load data into q0, q1
"vld1.16 {q0, q1}, [%[in]] \n"
"vaddl.s16 q2, d0, d3 \n" // a0 = in[0] + in[12]
"vaddl.s16 q3, d1, d2 \n" // a1 = in[4] + in[8]
"vsubl.s16 q4, d1, d2 \n" // a2 = in[4] - in[8]
"vsubl.s16 q5, d0, d3 \n" // a3 = in[0] - in[12]
"vadd.s32 q0, q2, q3 \n" // tmp[0] = a0 + a1
"vsub.s32 q2, q2, q3 \n" // tmp[8] = a0 - a1
"vadd.s32 q1, q5, q4 \n" // tmp[4] = a3 + a2
"vsub.s32 q3, q5, q4 \n" // tmp[12] = a3 - a2
// Transpose
// q0 = tmp[0, 4, 8, 12], q1 = tmp[2, 6, 10, 14]
// q2 = tmp[1, 5, 9, 13], q3 = tmp[3, 7, 11, 15]
"vswp d1, d4 \n" // vtrn.64 q0, q2
"vswp d3, d6 \n" // vtrn.64 q1, q3
"vtrn.32 q0, q1 \n"
"vtrn.32 q2, q3 \n"
"vmov.s32 q4, #3 \n" // dc = 3
"vadd.s32 q0, q0, q4 \n" // dc = tmp[0] + 3
"vadd.s32 q6, q0, q3 \n" // a0 = dc + tmp[3]
"vadd.s32 q7, q1, q2 \n" // a1 = tmp[1] + tmp[2]
"vsub.s32 q8, q1, q2 \n" // a2 = tmp[1] - tmp[2]
"vsub.s32 q9, q0, q3 \n" // a3 = dc - tmp[3]
"vadd.s32 q0, q6, q7 \n"
"vshrn.s32 d0, q0, #3 \n" // (a0 + a1) >> 3
"vadd.s32 q1, q9, q8 \n"
"vshrn.s32 d1, q1, #3 \n" // (a3 + a2) >> 3
"vsub.s32 q2, q6, q7 \n"
"vshrn.s32 d2, q2, #3 \n" // (a0 - a1) >> 3
"vsub.s32 q3, q9, q8 \n"
"vshrn.s32 d3, q3, #3 \n" // (a3 - a2) >> 3
// set the results to output
"vst1.16 d0[0], [%[out]], %[kStep] \n"
"vst1.16 d1[0], [%[out]], %[kStep] \n"
"vst1.16 d2[0], [%[out]], %[kStep] \n"
"vst1.16 d3[0], [%[out]], %[kStep] \n"
"vst1.16 d0[1], [%[out]], %[kStep] \n"
"vst1.16 d1[1], [%[out]], %[kStep] \n"
"vst1.16 d2[1], [%[out]], %[kStep] \n"
"vst1.16 d3[1], [%[out]], %[kStep] \n"
"vst1.16 d0[2], [%[out]], %[kStep] \n"
"vst1.16 d1[2], [%[out]], %[kStep] \n"
"vst1.16 d2[2], [%[out]], %[kStep] \n"
"vst1.16 d3[2], [%[out]], %[kStep] \n"
"vst1.16 d0[3], [%[out]], %[kStep] \n"
"vst1.16 d1[3], [%[out]], %[kStep] \n"
"vst1.16 d2[3], [%[out]], %[kStep] \n"
"vst1.16 d3[3], [%[out]], %[kStep] \n"
: [out] "+r"(out) // modified registers
: [in] "r"(in), [kStep] "r"(kStep) // constants
: "memory", "q0", "q1", "q2", "q3", "q4",
"q5", "q6", "q7", "q8", "q9" // clobbered
);
}
#endif // WEBP_USE_NEON #endif // WEBP_USE_NEON
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
...@@ -321,6 +391,7 @@ extern void VP8DspInitNEON(void); ...@@ -321,6 +391,7 @@ extern void VP8DspInitNEON(void);
void VP8DspInitNEON(void) { void VP8DspInitNEON(void) {
#if defined(WEBP_USE_NEON) #if defined(WEBP_USE_NEON)
VP8Transform = TransformTwoNEON; VP8Transform = TransformTwoNEON;
VP8TransformWHT = TransformWHT;
VP8SimpleVFilter16 = SimpleVFilter16NEON; VP8SimpleVFilter16 = SimpleVFilter16NEON;
VP8SimpleHFilter16 = SimpleHFilter16NEON; VP8SimpleHFilter16 = SimpleHFilter16NEON;
......
...@@ -194,7 +194,7 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) { ...@@ -194,7 +194,7 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
// Add inverse transform to 'dst' and store. // Add inverse transform to 'dst' and store.
{ {
const __m128i zero = _mm_set1_epi16(0); const __m128i zero = _mm_setzero_si128();
// Load the reference(s). // Load the reference(s).
__m128i dst0, dst1, dst2, dst3; __m128i dst0, dst1, dst2, dst3;
if (do_two) { if (do_two) {
...@@ -278,14 +278,14 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) { ...@@ -278,14 +278,14 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
#define GET_NOTHEV(p1, p0, q0, q1, hev_thresh, not_hev) { \ #define GET_NOTHEV(p1, p0, q0, q1, hev_thresh, not_hev) { \
const __m128i zero = _mm_setzero_si128(); \ const __m128i zero = _mm_setzero_si128(); \
const __m128i t1 = MM_ABS(p1, p0); \ const __m128i t_1 = MM_ABS(p1, p0); \
const __m128i t2 = MM_ABS(q1, q0); \ const __m128i t_2 = MM_ABS(q1, q0); \
\ \
const __m128i h = _mm_set1_epi8(hev_thresh); \ const __m128i h = _mm_set1_epi8(hev_thresh); \
const __m128i t3 = _mm_subs_epu8(t1, h); /* abs(p1 - p0) - hev_tresh */ \ const __m128i t_3 = _mm_subs_epu8(t_1, h); /* abs(p1 - p0) - hev_tresh */ \
const __m128i t4 = _mm_subs_epu8(t2, h); /* abs(q1 - q0) - hev_tresh */ \ const __m128i t_4 = _mm_subs_epu8(t_2, h); /* abs(q1 - q0) - hev_tresh */ \
\ \
not_hev = _mm_or_si128(t3, t4); \ not_hev = _mm_or_si128(t_3, t_4); \
not_hev = _mm_cmpeq_epi8(not_hev, zero); /* not_hev <= t1 && not_hev <= t2 */\ not_hev = _mm_cmpeq_epi8(not_hev, zero); /* not_hev <= t1 && not_hev <= t2 */\
} }
...@@ -314,13 +314,13 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) { ...@@ -314,13 +314,13 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
// Updates values of 2 pixels at MB edge during complex filtering. // Updates values of 2 pixels at MB edge during complex filtering.
// Update operations: // Update operations:
// q = q - a and p = p + a; where a = [(a_hi >> 7), (a_lo >> 7)] // q = q - delta and p = p + delta; where delta = [(a_hi >> 7), (a_lo >> 7)]
#define UPDATE_2PIXELS(pi, qi, a_lo, a_hi) { \ #define UPDATE_2PIXELS(pi, qi, a_lo, a_hi) { \
const __m128i a_lo7 = _mm_srai_epi16(a_lo, 7); \ const __m128i a_lo7 = _mm_srai_epi16(a_lo, 7); \
const __m128i a_hi7 = _mm_srai_epi16(a_hi, 7); \ const __m128i a_hi7 = _mm_srai_epi16(a_hi, 7); \
const __m128i a = _mm_packs_epi16(a_lo7, a_hi7); \ const __m128i delta = _mm_packs_epi16(a_lo7, a_hi7); \
pi = _mm_adds_epi8(pi, a); \ pi = _mm_adds_epi8(pi, delta); \
qi = _mm_subs_epi8(qi, a); \ qi = _mm_subs_epi8(qi, delta); \
} }
static void NeedsFilter(const __m128i* p1, const __m128i* p0, const __m128i* q0, static void NeedsFilter(const __m128i* p1, const __m128i* p0, const __m128i* q0,
......
...@@ -49,8 +49,6 @@ extern VP8CPUInfo VP8GetCPUInfo; ...@@ -49,8 +49,6 @@ extern VP8CPUInfo VP8GetCPUInfo;
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Encoding // Encoding
int VP8GetAlpha(const int histo[]);
// Transforms // Transforms
// VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms // VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
// will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4). // will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
...@@ -85,10 +83,11 @@ typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16], ...@@ -85,10 +83,11 @@ typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
int n, const struct VP8Matrix* const mtx); int n, const struct VP8Matrix* const mtx);
extern VP8QuantizeBlock VP8EncQuantizeBlock; extern VP8QuantizeBlock VP8EncQuantizeBlock;
// Compute susceptibility based on DCT-coeff histograms: // Collect histogram for susceptibility calculation and accumulate in histo[].
// the higher, the "easier" the macroblock is to compress. struct VP8Histogram;
typedef int (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred, typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block); int start_block, int end_block,
struct VP8Histogram* const histo);
extern const int VP8DspScan[16 + 4 + 4]; extern const int VP8DspScan[16 + 4 + 4];
extern VP8CHisto VP8CollectHistogram; extern VP8CHisto VP8CollectHistogram;
...@@ -104,7 +103,7 @@ extern VP8DecIdct2 VP8Transform; ...@@ -104,7 +103,7 @@ extern VP8DecIdct2 VP8Transform;
extern VP8DecIdct VP8TransformUV; extern VP8DecIdct VP8TransformUV;
extern VP8DecIdct VP8TransformDC; extern VP8DecIdct VP8TransformDC;
extern VP8DecIdct VP8TransformDCUV; extern VP8DecIdct VP8TransformDCUV;
extern void (*VP8TransformWHT)(const int16_t* in, int16_t* out); extern VP8WHT VP8TransformWHT;
// *dst is the destination block, with stride BPS. Boundary samples are // *dst is the destination block, with stride BPS. Boundary samples are
// assumed accessible when needed. // assumed accessible when needed.
...@@ -159,6 +158,9 @@ extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */]; ...@@ -159,6 +158,9 @@ extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
// Initializes SSE2 version of the fancy upsamplers. // Initializes SSE2 version of the fancy upsamplers.
void WebPInitUpsamplersSSE2(void); void WebPInitUpsamplersSSE2(void);
// NEON version
void WebPInitUpsamplersNEON(void);
#endif // FANCY_UPSAMPLING #endif // FANCY_UPSAMPLING
// Point-sampling methods. // Point-sampling methods.
...@@ -200,6 +202,7 @@ extern void (*WebPApplyAlphaMultiply4444)( ...@@ -200,6 +202,7 @@ extern void (*WebPApplyAlphaMultiply4444)(
void WebPInitPremultiply(void); void WebPInitPremultiply(void);
void WebPInitPremultiplySSE2(void); // should not be called directly. void WebPInitPremultiplySSE2(void); // should not be called directly.
void WebPInitPremultiplyNEON(void);
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
......
...@@ -17,31 +17,18 @@ ...@@ -17,31 +17,18 @@
extern "C" { extern "C" {
#endif #endif
//------------------------------------------------------------------------------ static WEBP_INLINE uint8_t clip_8b(int v) {
// Compute susceptibility based on DCT-coeff histograms: return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
// the higher, the "easier" the macroblock is to compress.
static int ClipAlpha(int alpha) {
return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha;
} }
int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]) { static WEBP_INLINE int clip_max(int v, int max) {
int num = 0, den = 0, val = 0; return (v > max) ? max : v;
int k;
int alpha;
// note: changing this loop to avoid the numerous "k + 1" slows things down.
for (k = 0; k < MAX_COEFF_THRESH; ++k) {
if (histo[k + 1]) {
val += histo[k + 1];
num += val * (k + 1);
den += (k + 1) * (k + 1);
}
}
// we scale the value to a usable [0..255] range
alpha = den ? 10 * num / den - 5 : 0;
return ClipAlpha(alpha);
} }
//------------------------------------------------------------------------------
// Compute susceptibility based on DCT-coeff histograms:
// the higher, the "easier" the macroblock is to compress.
const int VP8DspScan[16 + 4 + 4] = { const int VP8DspScan[16 + 4 + 4] = {
// Luma // Luma
0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
...@@ -53,27 +40,23 @@ const int VP8DspScan[16 + 4 + 4] = { ...@@ -53,27 +40,23 @@ const int VP8DspScan[16 + 4 + 4] = {
8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V
}; };
static int CollectHistogram(const uint8_t* ref, const uint8_t* pred, static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block) { int start_block, int end_block,
int histo[MAX_COEFF_THRESH + 1] = { 0 }; VP8Histogram* const histo) {
int16_t out[16]; int j;
int j, k;
for (j = start_block; j < end_block; ++j) { for (j = start_block; j < end_block; ++j) {
VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); int k;
int16_t out[16];
// Convert coefficients to bin (within out[]). VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
for (k = 0; k < 16; ++k) {
const int v = abs(out[k]) >> 2;
out[k] = (v > MAX_COEFF_THRESH) ? MAX_COEFF_THRESH : v;
}
// Use bin to update histogram. // Convert coefficients to bin.
for (k = 0; k < 16; ++k) { for (k = 0; k < 16; ++k) {
histo[out[k]]++; const int v = abs(out[k]) >> 3; // TODO(skal): add rounding?
const int clipped_value = clip_max(v, MAX_COEFF_THRESH);
histo->distribution[clipped_value]++;
} }
} }
return VP8GetAlpha(histo);
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
...@@ -89,15 +72,12 @@ static void InitTables(void) { ...@@ -89,15 +72,12 @@ static void InitTables(void) {
if (!tables_ok) { if (!tables_ok) {
int i; int i;
for (i = -255; i <= 255 + 255; ++i) { for (i = -255; i <= 255 + 255; ++i) {
clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i; clip1[255 + i] = clip_8b(i);
} }
tables_ok = 1; tables_ok = 1;
} }
} }
static WEBP_INLINE uint8_t clip_8b(int v) {
return (!(v & ~0xff)) ? v : v < 0 ? 0 : 255;
}
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Transforms (Paragraph 14.4) // Transforms (Paragraph 14.4)
...@@ -154,25 +134,25 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) { ...@@ -154,25 +134,25 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
int i; int i;
int tmp[16]; int tmp[16];
for (i = 0; i < 4; ++i, src += BPS, ref += BPS) { for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
const int d0 = src[0] - ref[0]; const int d0 = src[0] - ref[0]; // 9bit dynamic range ([-255,255])
const int d1 = src[1] - ref[1]; const int d1 = src[1] - ref[1];
const int d2 = src[2] - ref[2]; const int d2 = src[2] - ref[2];
const int d3 = src[3] - ref[3]; const int d3 = src[3] - ref[3];
const int a0 = (d0 + d3) << 3; const int a0 = (d0 + d3); // 10b [-510,510]
const int a1 = (d1 + d2) << 3; const int a1 = (d1 + d2);
const int a2 = (d1 - d2) << 3; const int a2 = (d1 - d2);
const int a3 = (d0 - d3) << 3; const int a3 = (d0 - d3);
tmp[0 + i * 4] = (a0 + a1); tmp[0 + i * 4] = (a0 + a1) << 3; // 14b [-8160,8160]
tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 14500) >> 12; tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9; // [-7536,7542]
tmp[2 + i * 4] = (a0 - a1); tmp[2 + i * 4] = (a0 - a1) << 3;
tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 7500) >> 12; tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 937) >> 9;
} }
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
const int a0 = (tmp[0 + i] + tmp[12 + i]); const int a0 = (tmp[0 + i] + tmp[12 + i]); // 15b
const int a1 = (tmp[4 + i] + tmp[ 8 + i]); const int a1 = (tmp[4 + i] + tmp[ 8 + i]);
const int a2 = (tmp[4 + i] - tmp[ 8 + i]); const int a2 = (tmp[4 + i] - tmp[ 8 + i]);
const int a3 = (tmp[0 + i] - tmp[12 + i]); const int a3 = (tmp[0 + i] - tmp[12 + i]);
out[0 + i] = (a0 + a1 + 7) >> 4; out[0 + i] = (a0 + a1 + 7) >> 4; // 12b
out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0); out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0);
out[8 + i] = (a0 - a1 + 7) >> 4; out[8 + i] = (a0 - a1 + 7) >> 4;
out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16); out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
...@@ -589,30 +569,30 @@ static int TTransform(const uint8_t* in, const uint16_t* w) { ...@@ -589,30 +569,30 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
int i; int i;
// horizontal pass // horizontal pass
for (i = 0; i < 4; ++i, in += BPS) { for (i = 0; i < 4; ++i, in += BPS) {
const int a0 = (in[0] + in[2]) << 2; const int a0 = in[0] + in[2];
const int a1 = (in[1] + in[3]) << 2; const int a1 = in[1] + in[3];
const int a2 = (in[1] - in[3]) << 2; const int a2 = in[1] - in[3];
const int a3 = (in[0] - in[2]) << 2; const int a3 = in[0] - in[2];
tmp[0 + i * 4] = a0 + a1 + (a0 != 0); tmp[0 + i * 4] = a0 + a1;
tmp[1 + i * 4] = a3 + a2; tmp[1 + i * 4] = a3 + a2;
tmp[2 + i * 4] = a3 - a2; tmp[2 + i * 4] = a3 - a2;
tmp[3 + i * 4] = a0 - a1; tmp[3 + i * 4] = a0 - a1;
} }
// vertical pass // vertical pass
for (i = 0; i < 4; ++i, ++w) { for (i = 0; i < 4; ++i, ++w) {
const int a0 = (tmp[0 + i] + tmp[8 + i]); const int a0 = tmp[0 + i] + tmp[8 + i];
const int a1 = (tmp[4 + i] + tmp[12+ i]); const int a1 = tmp[4 + i] + tmp[12+ i];
const int a2 = (tmp[4 + i] - tmp[12+ i]); const int a2 = tmp[4 + i] - tmp[12+ i];
const int a3 = (tmp[0 + i] - tmp[8 + i]); const int a3 = tmp[0 + i] - tmp[8 + i];
const int b0 = a0 + a1; const int b0 = a0 + a1;
const int b1 = a3 + a2; const int b1 = a3 + a2;
const int b2 = a3 - a2; const int b2 = a3 - a2;
const int b3 = a0 - a1; const int b3 = a0 - a1;
// abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3
sum += w[ 0] * ((abs(b0) + 3) >> 3); sum += w[ 0] * abs(b0);
sum += w[ 4] * ((abs(b1) + 3) >> 3); sum += w[ 4] * abs(b1);
sum += w[ 8] * ((abs(b2) + 3) >> 3); sum += w[ 8] * abs(b2);
sum += w[12] * ((abs(b3) + 3) >> 3); sum += w[12] * abs(b3);
} }
return sum; return sum;
} }
...@@ -621,7 +601,7 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b, ...@@ -621,7 +601,7 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) { const uint16_t* const w) {
const int sum1 = TTransform(a, w); const int sum1 = TTransform(a, w);
const int sum2 = TTransform(b, w); const int sum2 = TTransform(b, w);
return (abs(sum2 - sum1) + 8) >> 4; return abs(sum2 - sum1) >> 5;
} }
static int Disto16x16(const uint8_t* const a, const uint8_t* const b, static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
...@@ -651,13 +631,13 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16], ...@@ -651,13 +631,13 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
for (; n < 16; ++n) { for (; n < 16; ++n) {
const int j = kZigzag[n]; const int j = kZigzag[n];
const int sign = (in[j] < 0); const int sign = (in[j] < 0);
int coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j]; const int coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
if (coeff > 2047) coeff = 2047;
if (coeff > mtx->zthresh_[j]) { if (coeff > mtx->zthresh_[j]) {
const int Q = mtx->q_[j]; const int Q = mtx->q_[j];
const int iQ = mtx->iq_[j]; const int iQ = mtx->iq_[j];
const int B = mtx->bias_[j]; const int B = mtx->bias_[j];
out[n] = QUANTDIV(coeff, iQ, B); out[n] = QUANTDIV(coeff, iQ, B);
if (out[n] > MAX_LEVEL) out[n] = MAX_LEVEL;
if (sign) out[n] = -out[n]; if (sign) out[n] = -out[n];
in[j] = out[n] * Q; in[j] = out[n] * Q;
if (out[n]) last = n; if (out[n]) last = n;
...@@ -706,6 +686,7 @@ VP8QuantizeBlock VP8EncQuantizeBlock; ...@@ -706,6 +686,7 @@ VP8QuantizeBlock VP8EncQuantizeBlock;
VP8BlockCopy VP8Copy4x4; VP8BlockCopy VP8Copy4x4;
extern void VP8EncDspInitSSE2(void); extern void VP8EncDspInitSSE2(void);
extern void VP8EncDspInitNEON(void);
void VP8EncDspInit(void) { void VP8EncDspInit(void) {
InitTables(); InitTables();
...@@ -734,6 +715,10 @@ void VP8EncDspInit(void) { ...@@ -734,6 +715,10 @@ void VP8EncDspInit(void) {
if (VP8GetCPUInfo(kSSE2)) { if (VP8GetCPUInfo(kSSE2)) {
VP8EncDspInitSSE2(); VP8EncDspInitSSE2();
} }
#elif defined(WEBP_USE_NEON)
if (VP8GetCPUInfo(kNEON)) {
VP8EncDspInitNEON();
}
#endif #endif
} }
} }
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -59,10 +59,20 @@ static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size, ...@@ -59,10 +59,20 @@ static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size,
return (size + (1 << sampling_bits) - 1) >> sampling_bits; return (size + (1 << sampling_bits) - 1) >> sampling_bits;
} }
// Faster logarithm for integers, with the property of log2(0) == 0. // Faster logarithm for integers. Small values use a look-up table.
float VP8LFastLog2(int v); #define LOG_LOOKUP_IDX_MAX 256
extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX];
extern float VP8LFastLog2Slow(int v);
extern float VP8LFastSLog2Slow(int v);
static WEBP_INLINE float VP8LFastLog2(int v) {
return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v);
}
// Fast calculation of v * log2(v) for integer input. // Fast calculation of v * log2(v) for integer input.
static WEBP_INLINE float VP8LFastSLog2(int v) { return VP8LFastLog2(v) * v; } static WEBP_INLINE float VP8LFastSLog2(int v) {
return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
}
// In-place difference of each component with mod 256. // In-place difference of each component with mod 256.
static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) { static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
......
...@@ -32,7 +32,7 @@ WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST]; ...@@ -32,7 +32,7 @@ WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST];
// ([3*a + b + 9*c + 3*d a + 3*b + 3*c + 9*d] [8 8]) / 16 // ([3*a + b + 9*c + 3*d a + 3*b + 3*c + 9*d] [8 8]) / 16
// We process u and v together stashed into 32bit (16bit each). // We process u and v together stashed into 32bit (16bit each).
#define LOAD_UV(u,v) ((u) | ((v) << 16)) #define LOAD_UV(u, v) ((u) | ((v) << 16))
#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \ #define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
...@@ -327,6 +327,11 @@ void WebPInitUpsamplers(void) { ...@@ -327,6 +327,11 @@ void WebPInitUpsamplers(void) {
if (VP8GetCPUInfo(kSSE2)) { if (VP8GetCPUInfo(kSSE2)) {
WebPInitUpsamplersSSE2(); WebPInitUpsamplersSSE2();
} }
#endif
#if defined(WEBP_USE_NEON)
if (VP8GetCPUInfo(kNEON)) {
WebPInitUpsamplersNEON();
}
#endif #endif
} }
#endif // FANCY_UPSAMPLING #endif // FANCY_UPSAMPLING
...@@ -347,6 +352,11 @@ void WebPInitPremultiply(void) { ...@@ -347,6 +352,11 @@ void WebPInitPremultiply(void) {
if (VP8GetCPUInfo(kSSE2)) { if (VP8GetCPUInfo(kSSE2)) {
WebPInitPremultiplySSE2(); WebPInitPremultiplySSE2();
} }
#endif
#if defined(WEBP_USE_NEON)
if (VP8GetCPUInfo(kNEON)) {
WebPInitPremultiplyNEON();
}
#endif #endif
} }
#endif // FANCY_UPSAMPLING #endif // FANCY_UPSAMPLING
......
This diff is collapsed.
...@@ -51,12 +51,12 @@ extern "C" { ...@@ -51,12 +51,12 @@ extern "C" {
// pack and store two alterning pixel rows // pack and store two alterning pixel rows
#define PACK_AND_STORE(a, b, da, db, out) do { \ #define PACK_AND_STORE(a, b, da, db, out) do { \
const __m128i ta = _mm_avg_epu8(a, da); /* (9a + 3b + 3c + d + 8) / 16 */ \ const __m128i t_a = _mm_avg_epu8(a, da); /* (9a + 3b + 3c + d + 8) / 16 */ \
const __m128i tb = _mm_avg_epu8(b, db); /* (3a + 9b + c + 3d + 8) / 16 */ \ const __m128i t_b = _mm_avg_epu8(b, db); /* (3a + 9b + c + 3d + 8) / 16 */ \
const __m128i t1 = _mm_unpacklo_epi8(ta, tb); \ const __m128i t_1 = _mm_unpacklo_epi8(t_a, t_b); \
const __m128i t2 = _mm_unpackhi_epi8(ta, tb); \ const __m128i t_2 = _mm_unpackhi_epi8(t_a, t_b); \
_mm_store_si128(((__m128i*)(out)) + 0, t1); \ _mm_store_si128(((__m128i*)(out)) + 0, t_1); \
_mm_store_si128(((__m128i*)(out)) + 1, t2); \ _mm_store_si128(((__m128i*)(out)) + 1, t_2); \
} while (0) } while (0)
// Loads 17 pixels each from rows r1 and r2 and generates 32 pixels. // Loads 17 pixels each from rows r1 and r2 and generates 32 pixels.
...@@ -128,7 +128,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \ ...@@ -128,7 +128,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
const uint8_t* top_u, const uint8_t* top_v, \ const uint8_t* top_u, const uint8_t* top_v, \
const uint8_t* cur_u, const uint8_t* cur_v, \ const uint8_t* cur_u, const uint8_t* cur_v, \
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \ uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
int b; \ int block; \
/* 16 byte aligned array to cache reconstructed u and v */ \ /* 16 byte aligned array to cache reconstructed u and v */ \
uint8_t uv_buf[4 * 32 + 15]; \ uint8_t uv_buf[4 * 32 + 15]; \
uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \ uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
...@@ -154,11 +154,11 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \ ...@@ -154,11 +154,11 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
FUNC(bottom_y[0], u0, v0, bottom_dst); \ FUNC(bottom_y[0], u0, v0, bottom_dst); \
} \ } \
\ \
for (b = 0; b < num_blocks; ++b) { \ for (block = 0; block < num_blocks; ++block) { \
UPSAMPLE_32PIXELS(top_u, cur_u, r_uv + 0 * 32); \ UPSAMPLE_32PIXELS(top_u, cur_u, r_uv + 0 * 32); \
UPSAMPLE_32PIXELS(top_v, cur_v, r_uv + 1 * 32); \ UPSAMPLE_32PIXELS(top_v, cur_v, r_uv + 1 * 32); \
CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, r_uv, top_dst, bottom_dst, \ CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, r_uv, top_dst, bottom_dst, \
32 * b + 1, 32) \ 32 * block + 1, 32) \
top_u += 16; \ top_u += 16; \
cur_u += 16; \ cur_u += 16; \
top_v += 16; \ top_v += 16; \
...@@ -211,3 +211,5 @@ void WebPInitPremultiplySSE2(void) { ...@@ -211,3 +211,5 @@ void WebPInitPremultiplySSE2(void) {
#if defined(__cplusplus) || defined(c_plusplus) #if defined(__cplusplus) || defined(c_plusplus)
} // extern "C" } // extern "C"
#endif #endif
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
extern "C" { extern "C" {
#endif #endif
enum { YUV_HALF = 1 << (YUV_FIX - 1) }; #ifdef WEBP_YUV_USE_TABLE
int16_t VP8kVToR[256], VP8kUToB[256]; int16_t VP8kVToR[256], VP8kUToB[256];
int32_t VP8kVToG[256], VP8kUToG[256]; int32_t VP8kVToG[256], VP8kUToG[256];
...@@ -33,6 +33,7 @@ void VP8YUVInit(void) { ...@@ -33,6 +33,7 @@ void VP8YUVInit(void) {
if (done) { if (done) {
return; return;
} }
#ifndef USE_YUVj
for (i = 0; i < 256; ++i) { for (i = 0; i < 256; ++i) {
VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX; VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX;
VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF; VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF;
...@@ -44,9 +45,29 @@ void VP8YUVInit(void) { ...@@ -44,9 +45,29 @@ void VP8YUVInit(void) {
VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255); VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15); VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
} }
#else
for (i = 0; i < 256; ++i) {
VP8kVToR[i] = (91881 * (i - 128) + YUV_HALF) >> YUV_FIX;
VP8kUToG[i] = -22554 * (i - 128) + YUV_HALF;
VP8kVToG[i] = -46802 * (i - 128);
VP8kUToB[i] = (116130 * (i - 128) + YUV_HALF) >> YUV_FIX;
}
for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
const int k = i;
VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
}
#endif
done = 1; done = 1;
} }
#else
void VP8YUVInit(void) {}
#endif // WEBP_YUV_USE_TABLE
#if defined(__cplusplus) || defined(c_plusplus) #if defined(__cplusplus) || defined(c_plusplus)
} // extern "C" } // extern "C"
#endif #endif
This diff is collapsed.
...@@ -79,18 +79,17 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, ...@@ -79,18 +79,17 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
WebPConfigInit(&config); WebPConfigInit(&config);
config.lossless = 1; config.lossless = 1;
config.method = effort_level; // impact is very small config.method = effort_level; // impact is very small
// Set moderate default quality setting for alpha. Higher qualities (80 and // Set a moderate default quality setting for alpha.
// above) could be very slow. config.quality = 5.f * effort_level;
config.quality = 10.f + 15.f * effort_level; assert(config.quality >= 0 && config.quality <= 100.f);
if (config.quality > 100.f) config.quality = 100.f;
ok = VP8LBitWriterInit(&tmp_bw, (width * height) >> 3); ok = VP8LBitWriterInit(&tmp_bw, (width * height) >> 3);
ok = ok && (VP8LEncodeStream(&config, &picture, &tmp_bw) == VP8_ENC_OK); ok = ok && (VP8LEncodeStream(&config, &picture, &tmp_bw) == VP8_ENC_OK);
WebPPictureFree(&picture); WebPPictureFree(&picture);
if (ok) { if (ok) {
const uint8_t* const data = VP8LBitWriterFinish(&tmp_bw); const uint8_t* const buffer = VP8LBitWriterFinish(&tmp_bw);
const size_t data_size = VP8LBitWriterNumBytes(&tmp_bw); const size_t buffer_size = VP8LBitWriterNumBytes(&tmp_bw);
VP8BitWriterAppend(bw, data, data_size); VP8BitWriterAppend(bw, buffer, buffer_size);
} }
VP8LBitWriterDestroy(&tmp_bw); VP8LBitWriterDestroy(&tmp_bw);
return ok && !bw->error_; return ok && !bw->error_;
...@@ -128,8 +127,8 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, ...@@ -128,8 +127,8 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
VP8BitWriterAppend(bw, &header, ALPHA_HEADER_LEN); VP8BitWriterAppend(bw, &header, ALPHA_HEADER_LEN);
filter_func = WebPFilters[filter]; filter_func = WebPFilters[filter];
if (filter_func) { if (filter_func != NULL) {
filter_func(data, width, height, 1, width, tmp_alpha); filter_func(data, width, height, width, tmp_alpha);
alpha_src = tmp_alpha; alpha_src = tmp_alpha;
} else { } else {
alpha_src = data; alpha_src = data;
...@@ -287,42 +286,80 @@ static int EncodeAlpha(VP8Encoder* const enc, ...@@ -287,42 +286,80 @@ static int EncodeAlpha(VP8Encoder* const enc,
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Main calls // Main calls
static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) {
const WebPConfig* config = enc->config_;
uint8_t* alpha_data = NULL;
size_t alpha_size = 0;
const int effort_level = config->method; // maps to [0..6]
const WEBP_FILTER_TYPE filter =
(config->alpha_filtering == 0) ? WEBP_FILTER_NONE :
(config->alpha_filtering == 1) ? WEBP_FILTER_FAST :
WEBP_FILTER_BEST;
if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression,
filter, effort_level, &alpha_data, &alpha_size)) {
return 0;
}
if (alpha_size != (uint32_t)alpha_size) { // Sanity check.
free(alpha_data);
return 0;
}
enc->alpha_data_size_ = (uint32_t)alpha_size;
enc->alpha_data_ = alpha_data;
(void)dummy;
return 1;
}
void VP8EncInitAlpha(VP8Encoder* const enc) { void VP8EncInitAlpha(VP8Encoder* const enc) {
enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_); enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_);
enc->alpha_data_ = NULL; enc->alpha_data_ = NULL;
enc->alpha_data_size_ = 0; enc->alpha_data_size_ = 0;
if (enc->thread_level_ > 0) {
WebPWorker* const worker = &enc->alpha_worker_;
WebPWorkerInit(worker);
worker->data1 = enc;
worker->data2 = NULL;
worker->hook = (WebPWorkerHook)CompressAlphaJob;
}
} }
int VP8EncFinishAlpha(VP8Encoder* const enc) { int VP8EncStartAlpha(VP8Encoder* const enc) {
if (enc->has_alpha_) { if (enc->has_alpha_) {
const WebPConfig* config = enc->config_; if (enc->thread_level_ > 0) {
uint8_t* tmp_data = NULL; WebPWorker* const worker = &enc->alpha_worker_;
size_t tmp_size = 0; if (!WebPWorkerReset(worker)) { // Makes sure worker is good to go.
const int effort_level = config->method; // maps to [0..6] return 0;
const WEBP_FILTER_TYPE filter = }
(config->alpha_filtering == 0) ? WEBP_FILTER_NONE : WebPWorkerLaunch(worker);
(config->alpha_filtering == 1) ? WEBP_FILTER_FAST : return 1;
WEBP_FILTER_BEST; } else {
return CompressAlphaJob(enc, NULL); // just do the job right away
if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression,
filter, effort_level, &tmp_data, &tmp_size)) {
return 0;
} }
if (tmp_size != (uint32_t)tmp_size) { // Sanity check. }
free(tmp_data); return 1;
return 0; }
int VP8EncFinishAlpha(VP8Encoder* const enc) {
if (enc->has_alpha_) {
if (enc->thread_level_ > 0) {
WebPWorker* const worker = &enc->alpha_worker_;
if (!WebPWorkerSync(worker)) return 0; // error
} }
enc->alpha_data_size_ = (uint32_t)tmp_size;
enc->alpha_data_ = tmp_data;
} }
return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_); return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
} }
void VP8EncDeleteAlpha(VP8Encoder* const enc) { int VP8EncDeleteAlpha(VP8Encoder* const enc) {
int ok = 1;
if (enc->thread_level_ > 0) {
WebPWorker* const worker = &enc->alpha_worker_;
ok = WebPWorkerSync(worker); // finish anything left in flight
WebPWorkerEnd(worker); // still need to end the worker, even if !ok
}
free(enc->alpha_data_); free(enc->alpha_data_);
enc->alpha_data_ = NULL; enc->alpha_data_ = NULL;
enc->alpha_data_size_ = 0; enc->alpha_data_size_ = 0;
enc->has_alpha_ = 0; enc->has_alpha_ = 0;
return ok;
} }
#if defined(__cplusplus) || defined(c_plusplus) #if defined(__cplusplus) || defined(c_plusplus)
......
This diff is collapsed.
This diff is collapsed.
...@@ -35,7 +35,8 @@ extern "C" { ...@@ -35,7 +35,8 @@ extern "C" {
#if defined(__GNUC__) && \ #if defined(__GNUC__) && \
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4) ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
static WEBP_INLINE int BitsLog2Floor(uint32_t n) { static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
return n == 0 ? -1 : 31 ^ __builtin_clz(n); assert(n != 0);
return 31 ^ __builtin_clz(n);
} }
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
#include <intrin.h> #include <intrin.h>
...@@ -43,15 +44,18 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) { ...@@ -43,15 +44,18 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
static WEBP_INLINE int BitsLog2Floor(uint32_t n) { static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
unsigned long first_set_bit; unsigned long first_set_bit;
return _BitScanReverse(&first_set_bit, n) ? first_set_bit : -1; assert(n != 0);
_BitScanReverse(&first_set_bit, n);
return first_set_bit;
} }
#else #else
// Returns (int)floor(log2(n)). n must be > 0.
static WEBP_INLINE int BitsLog2Floor(uint32_t n) { static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
int log = 0; int log = 0;
uint32_t value = n; uint32_t value = n;
int i; int i;
if (value == 0) return -1; assert(n != 0);
for (i = 4; i >= 0; --i) { for (i = 4; i >= 0; --i) {
const int shift = (1 << i); const int shift = (1 << i);
const uint32_t x = value >> shift; const uint32_t x = value >> shift;
...@@ -65,11 +69,11 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) { ...@@ -65,11 +69,11 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
#endif #endif
static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) { static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
const int floor = BitsLog2Floor(n); const int log_floor = BitsLog2Floor(n);
if (n == (n & ~(n - 1))) // zero or a power of two. if (n == (n & ~(n - 1))) // zero or a power of two.
return floor; return log_floor;
else else
return floor + 1; return log_floor + 1;
} }
// Splitting of distance and length codes into prefixes and // Splitting of distance and length codes into prefixes and
...@@ -78,16 +82,17 @@ static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) { ...@@ -78,16 +82,17 @@ static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
static WEBP_INLINE void PrefixEncode(int distance, int* const code, static WEBP_INLINE void PrefixEncode(int distance, int* const code,
int* const extra_bits_count, int* const extra_bits_count,
int* const extra_bits_value) { int* const extra_bits_value) {
// Collect the two most significant bits where the highest bit is 1. if (distance > 2) { // Collect the two most significant bits.
const int highest_bit = BitsLog2Floor(--distance); const int highest_bit = BitsLog2Floor(--distance);
// & 0x3f is to make behavior well defined when highest_bit const int second_highest_bit = (distance >> (highest_bit - 1)) & 1;
// does not exist or is the least significant bit. *extra_bits_count = highest_bit - 1;
const int second_highest_bit = *extra_bits_value = distance & ((1 << *extra_bits_count) - 1);
(distance >> ((highest_bit - 1) & 0x3f)) & 1; *code = 2 * highest_bit + second_highest_bit;
*extra_bits_count = (highest_bit > 0) ? (highest_bit - 1) : 0; } else {
*extra_bits_value = distance & ((1 << *extra_bits_count) - 1); *extra_bits_count = 0;
*code = (highest_bit > 0) ? (2 * highest_bit + second_highest_bit) *extra_bits_value = 0;
: (highest_bit == 0) ? 1 : 0; *code = (distance == 2) ? 1 : 0;
}
} }
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
......
...@@ -31,9 +31,9 @@ int WebPConfigInitInternal(WebPConfig* config, ...@@ -31,9 +31,9 @@ int WebPConfigInitInternal(WebPConfig* config,
config->target_PSNR = 0.; config->target_PSNR = 0.;
config->method = 4; config->method = 4;
config->sns_strength = 50; config->sns_strength = 50;
config->filter_strength = 20; // default: light filtering config->filter_strength = 60; // rather high filtering, helps w/ gradients.
config->filter_sharpness = 0; config->filter_sharpness = 0;
config->filter_type = 0; // default: simple config->filter_type = 1; // default: strong (so U/V is filtered too)
config->partitions = 0; config->partitions = 0;
config->segments = 4; config->segments = 4;
config->pass = 1; config->pass = 1;
...@@ -46,6 +46,9 @@ int WebPConfigInitInternal(WebPConfig* config, ...@@ -46,6 +46,9 @@ int WebPConfigInitInternal(WebPConfig* config,
config->alpha_quality = 100; config->alpha_quality = 100;
config->lossless = 0; config->lossless = 0;
config->image_hint = WEBP_HINT_DEFAULT; config->image_hint = WEBP_HINT_DEFAULT;
config->emulate_jpeg_size = 0;
config->thread_level = 0;
config->low_memory = 0;
// TODO(skal): tune. // TODO(skal): tune.
switch (preset) { switch (preset) {
...@@ -122,6 +125,12 @@ int WebPValidateConfig(const WebPConfig* config) { ...@@ -122,6 +125,12 @@ int WebPValidateConfig(const WebPConfig* config) {
return 0; return 0;
if (config->image_hint >= WEBP_HINT_LAST) if (config->image_hint >= WEBP_HINT_LAST)
return 0; return 0;
if (config->emulate_jpeg_size < 0 || config->emulate_jpeg_size > 1)
return 0;
if (config->thread_level < 0 || config->thread_level > 1)
return 0;
if (config->low_memory < 0 || config->low_memory > 1)
return 0;
return 1; return 1;
} }
......
...@@ -75,7 +75,7 @@ const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = { ...@@ -75,7 +75,7 @@ const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = {
// fixed costs for coding levels, deduce from the coding tree. // fixed costs for coding levels, deduce from the coding tree.
// This is only the part that doesn't depend on the probability state. // This is only the part that doesn't depend on the probability state.
const uint16_t VP8LevelFixedCosts[2048] = { const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1] = {
0, 256, 256, 256, 256, 432, 618, 630, 0, 256, 256, 256, 256, 432, 618, 630,
731, 640, 640, 828, 901, 948, 1021, 1101, 731, 640, 640, 828, 901, 948, 1021, 1101,
1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202, 1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202,
...@@ -359,7 +359,7 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) { ...@@ -359,7 +359,7 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) {
for (ctype = 0; ctype < NUM_TYPES; ++ctype) { for (ctype = 0; ctype < NUM_TYPES; ++ctype) {
for (band = 0; band < NUM_BANDS; ++band) { for (band = 0; band < NUM_BANDS; ++band) {
for(ctx = 0; ctx < NUM_CTX; ++ctx) { for (ctx = 0; ctx < NUM_CTX; ++ctx) {
const uint8_t* const p = proba->coeffs_[ctype][band][ctx]; const uint8_t* const p = proba->coeffs_[ctype][band][ctx];
uint16_t* const table = proba->level_cost_[ctype][band][ctx]; uint16_t* const table = proba->level_cost_[ctype][band][ctx];
const int cost_base = VP8BitCost(1, p[1]); const int cost_base = VP8BitCost(1, p[1]);
......
...@@ -18,7 +18,8 @@ ...@@ -18,7 +18,8 @@
extern "C" { extern "C" {
#endif #endif
extern const uint16_t VP8LevelFixedCosts[2048]; // approximate cost per level // approximate cost per level:
extern const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1];
extern const uint16_t VP8EntropyCost[256]; // 8bit fixed-point log(p) extern const uint16_t VP8EntropyCost[256]; // 8bit fixed-point log(p)
// Cost of coding one event with probability 'proba'. // Cost of coding one event with probability 'proba'.
......
This diff is collapsed.
This diff is collapsed.
...@@ -80,22 +80,6 @@ double VP8LHistogramEstimateBits(const VP8LHistogram* const p); ...@@ -80,22 +80,6 @@ double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
// represent the entropy code itself. // represent the entropy code itself.
double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p); double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p);
static WEBP_INLINE void VP8LHistogramAdd(VP8LHistogram* const p,
const VP8LHistogram* const a) {
int i;
for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
p->literal_[i] += a->literal_[i];
}
for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
p->distance_[i] += a->distance_[i];
}
for (i = 0; i < 256; ++i) {
p->red_[i] += a->red_[i];
p->blue_[i] += a->blue_[i];
p->alpha_[i] += a->alpha_[i];
}
}
static WEBP_INLINE int VP8LHistogramNumCodes(const VP8LHistogram* const p) { static WEBP_INLINE int VP8LHistogramNumCodes(const VP8LHistogram* const p) {
return 256 + NUM_LENGTH_CODES + return 256 + NUM_LENGTH_CODES +
((p->palette_code_bits_ > 0) ? (1 << p->palette_code_bits_) : 0); ((p->palette_code_bits_ > 0) ? (1 << p->palette_code_bits_) : 0);
......
This diff is collapsed.
This diff is collapsed.
...@@ -11,7 +11,9 @@ ...@@ -11,7 +11,9 @@
#include <assert.h> #include <assert.h>
#include "../webp/format_constants.h" #include "../utils/utils.h"
#include "../webp/format_constants.h" // RIFF constants
#include "../webp/mux_types.h" // ALPHA_FLAG
#include "./vp8enci.h" #include "./vp8enci.h"
#if defined(__cplusplus) || defined(c_plusplus) #if defined(__cplusplus) || defined(c_plusplus)
...@@ -21,25 +23,12 @@ extern "C" { ...@@ -21,25 +23,12 @@ extern "C" {
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Helper functions // Helper functions
// TODO(later): Move to webp/format_constants.h?
static void PutLE24(uint8_t* const data, uint32_t val) {
data[0] = (val >> 0) & 0xff;
data[1] = (val >> 8) & 0xff;
data[2] = (val >> 16) & 0xff;
}
static void PutLE32(uint8_t* const data, uint32_t val) {
PutLE24(data, val);
data[3] = (val >> 24) & 0xff;
}
static int IsVP8XNeeded(const VP8Encoder* const enc) { static int IsVP8XNeeded(const VP8Encoder* const enc) {
return !!enc->has_alpha_; // Currently the only case when VP8X is needed. return !!enc->has_alpha_; // Currently the only case when VP8X is needed.
// This could change in the future. // This could change in the future.
} }
static int PutPaddingByte(const WebPPicture* const pic) { static int PutPaddingByte(const WebPPicture* const pic) {
const uint8_t pad_byte[1] = { 0 }; const uint8_t pad_byte[1] = { 0 };
return !!pic->writer(pad_byte, 1, pic); return !!pic->writer(pad_byte, 1, pic);
} }
...@@ -73,14 +62,14 @@ static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) { ...@@ -73,14 +62,14 @@ static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) {
assert(pic->width <= MAX_CANVAS_SIZE && pic->height <= MAX_CANVAS_SIZE); assert(pic->width <= MAX_CANVAS_SIZE && pic->height <= MAX_CANVAS_SIZE);
if (enc->has_alpha_) { if (enc->has_alpha_) {
flags |= ALPHA_FLAG_BIT; flags |= ALPHA_FLAG;
} }
PutLE32(vp8x + TAG_SIZE, VP8X_CHUNK_SIZE); PutLE32(vp8x + TAG_SIZE, VP8X_CHUNK_SIZE);
PutLE32(vp8x + CHUNK_HEADER_SIZE, flags); PutLE32(vp8x + CHUNK_HEADER_SIZE, flags);
PutLE24(vp8x + CHUNK_HEADER_SIZE + 4, pic->width - 1); PutLE24(vp8x + CHUNK_HEADER_SIZE + 4, pic->width - 1);
PutLE24(vp8x + CHUNK_HEADER_SIZE + 7, pic->height - 1); PutLE24(vp8x + CHUNK_HEADER_SIZE + 7, pic->height - 1);
if(!pic->writer(vp8x, sizeof(vp8x), pic)) { if (!pic->writer(vp8x, sizeof(vp8x), pic)) {
return VP8_ENC_ERROR_BAD_WRITE; return VP8_ENC_ERROR_BAD_WRITE;
} }
return VP8_ENC_OK; return VP8_ENC_OK;
...@@ -327,7 +316,9 @@ static size_t GeneratePartition0(VP8Encoder* const enc) { ...@@ -327,7 +316,9 @@ static size_t GeneratePartition0(VP8Encoder* const enc) {
PutSegmentHeader(bw, enc); PutSegmentHeader(bw, enc);
PutFilterHeader(bw, &enc->filter_hdr_); PutFilterHeader(bw, &enc->filter_hdr_);
VP8PutValue(bw, enc->config_->partitions, 2); VP8PutValue(bw, enc->num_parts_ == 8 ? 3 :
enc->num_parts_ == 4 ? 2 :
enc->num_parts_ == 2 ? 1 : 0, 2);
PutQuant(bw, enc); PutQuant(bw, enc);
VP8PutBitUniform(bw, 0); // no proba update VP8PutBitUniform(bw, 0); // no proba update
VP8WriteProbas(bw, &enc->proba_); VP8WriteProbas(bw, &enc->proba_);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -30,18 +30,19 @@ typedef enum { ...@@ -30,18 +30,19 @@ typedef enum {
} WEBP_FILTER_TYPE; } WEBP_FILTER_TYPE;
typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height, typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height,
int bpp, int stride, uint8_t* out); int stride, uint8_t* out);
typedef void (*WebPUnfilterFunc)(int width, int height, int stride,
uint8_t* data);
// Filter the given data using the given predictor. // Filter the given data using the given predictor.
// 'in' corresponds to a 2-dimensional pixel array of size (stride * height) // 'in' corresponds to a 2-dimensional pixel array of size (stride * height)
// in raster order. // in raster order.
// 'bpp' is number of bytes per pixel, and
// 'stride' is number of bytes per scan line (with possible padding). // 'stride' is number of bytes per scan line (with possible padding).
// 'out' should be pre-allocated. // 'out' should be pre-allocated.
extern const WebPFilterFunc WebPFilters[WEBP_FILTER_LAST]; extern const WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
// Reconstruct the original data from the given filtered data. // In-place reconstruct the original data from the given filtered data.
extern const WebPFilterFunc WebPUnfilters[WEBP_FILTER_LAST]; extern const WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST];
// Fast estimate of a potentially good filter. // Fast estimate of a potentially good filter.
extern WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data, extern WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data,
......
...@@ -138,13 +138,8 @@ static int CompareHuffmanTrees(const void* ptr1, const void* ptr2) { ...@@ -138,13 +138,8 @@ static int CompareHuffmanTrees(const void* ptr1, const void* ptr2) {
} else if (t1->total_count_ < t2->total_count_) { } else if (t1->total_count_ < t2->total_count_) {
return 1; return 1;
} else { } else {
if (t1->value_ < t2->value_) { assert(t1->value_ != t2->value_);
return -1; return (t1->value_ < t2->value_) ? -1 : 1;
}
if (t1->value_ > t2->value_) {
return 1;
}
return 0;
} }
} }
...@@ -193,6 +188,10 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size, ...@@ -193,6 +188,10 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size,
} }
} }
if (tree_size_orig == 0) { // pretty optimal already!
return 1;
}
// 3 * tree_size is enough to cover all the nodes representing a // 3 * tree_size is enough to cover all the nodes representing a
// population and all the inserted nodes combining two existing nodes. // population and all the inserted nodes combining two existing nodes.
// The tree pool needs 2 * (tree_size_orig - 1) entities, and the // The tree pool needs 2 * (tree_size_orig - 1) entities, and the
...@@ -234,7 +233,7 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size, ...@@ -234,7 +233,7 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size,
tree_pool[tree_pool_size++] = tree[tree_size - 1]; tree_pool[tree_pool_size++] = tree[tree_size - 1];
tree_pool[tree_pool_size++] = tree[tree_size - 2]; tree_pool[tree_pool_size++] = tree[tree_size - 2];
count = tree_pool[tree_pool_size - 1].total_count_ + count = tree_pool[tree_pool_size - 1].total_count_ +
tree_pool[tree_pool_size - 2].total_count_; tree_pool[tree_pool_size - 2].total_count_;
tree_size -= 2; tree_size -= 2;
{ {
// Search for the insertion point. // Search for the insertion point.
......
...@@ -140,15 +140,6 @@ int QuantizeLevels(uint8_t* const data, int width, int height, ...@@ -140,15 +140,6 @@ int QuantizeLevels(uint8_t* const data, int width, int height,
return 1; return 1;
} }
int DequantizeLevels(uint8_t* const data, int width, int height) {
if (data == NULL || width <= 0 || height <= 0) return 0;
// TODO(skal): implement gradient smoothing.
(void)data;
(void)width;
(void)height;
return 1;
}
#if defined(__cplusplus) || defined(c_plusplus) #if defined(__cplusplus) || defined(c_plusplus)
} // extern "C" } // extern "C"
#endif #endif
...@@ -27,11 +27,6 @@ extern "C" { ...@@ -27,11 +27,6 @@ extern "C" {
int QuantizeLevels(uint8_t* const data, int width, int height, int num_levels, int QuantizeLevels(uint8_t* const data, int width, int height, int num_levels,
uint64_t* const sse); uint64_t* const sse);
// Apply post-processing to input 'data' of size 'width'x'height' assuming
// that the source was quantized to a reduced number of levels.
// Returns false in case of error (data is NULL, invalid parameters, ...).
int DequantizeLevels(uint8_t* const data, int width, int height);
#if defined(__cplusplus) || defined(c_plusplus) #if defined(__cplusplus) || defined(c_plusplus)
} // extern "C" } // extern "C"
#endif #endif
......
This diff is collapsed.
This diff is collapsed.
...@@ -20,7 +20,7 @@ extern "C" { ...@@ -20,7 +20,7 @@ extern "C" {
#endif #endif
#define RFIX 30 #define RFIX 30
#define MULT_FIX(x,y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX) #define MULT_FIX(x, y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)
void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height, void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
uint8_t* const dst, int dst_width, int dst_height, uint8_t* const dst, int dst_width, int dst_height,
......
...@@ -9,10 +9,6 @@ ...@@ -9,10 +9,6 @@
// //
// Author: Skal (pascal.massimino@gmail.com) // Author: Skal (pascal.massimino@gmail.com)
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <assert.h> #include <assert.h>
#include <string.h> // for memset() #include <string.h> // for memset()
#include "./thread.h" #include "./thread.h"
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment