update libwebp up to 0.3.0

740941c8 · AoD314 · db45e04d · 740941c8 · 740941c8 · 740941c8
Commit 740941c8 authored Apr 02, 2013 by AoD314
64 changed files
--- a/3rdparty/libwebp/dec/alpha.c
+++ b/3rdparty/libwebp/dec/alpha.c
@@ -13,7 +13,7 @@
 #include "./vp8i.h"
 #include "./vp8li.h"
 #include "../utils/filters.h"
-#include "../utils/quant_levels.h"
+#include "../utils/quant_levels_dec.h"
 #include "../webp/format_constants.h"
 #if defined(__cplusplus) || defined(c_plusplus)
@@ -44,7 +44,6 @@ static int DecodeAlpha(const uint8_t* data, size_t data_size,
                       int width, int height, int stride, uint8_t* output) {
  uint8_t* decoded_data = NULL;
  const size_t decoded_size = height * width;
-  uint8_t* unfiltered_data = NULL;
  WEBP_FILTER_TYPE filter;
  int pre_processing;
  int rsrv;
@@ -83,29 +82,19 @@ static int DecodeAlpha(const uint8_t* data, size_t data_size,
  }
  if (ok) {
-    WebPFilterFunc unfilter_func = WebPUnfilters[filter];
+    WebPUnfilterFunc unfilter_func = WebPUnfilters[filter];
    if (unfilter_func != NULL) {
-      unfiltered_data = (uint8_t*)malloc(decoded_size);
-      if (unfiltered_data == NULL) {
-        ok = 0;
-        goto Error;
-      }
      // TODO(vikas): Implement on-the-fly decoding & filter mechanism to decode
      // and apply filter per image-row.
-      unfilter_func(decoded_data, width, height, 1, width, unfiltered_data);
+      unfilter_func(width, height, width, decoded_data);
-      // Construct raw_data (height x stride) from alpha data (height x width).
-      CopyPlane(unfiltered_data, width, output, stride, width, height);
-      free(unfiltered_data);
-    } else {
-      // Construct raw_data (height x stride) from alpha data (height x width).
-      CopyPlane(decoded_data, width, output, stride, width, height);
    }
+    // Construct raw_data (height x stride) from alpha data (height x width).
+    CopyPlane(decoded_data, width, output, stride, width, height);
    if (pre_processing == ALPHA_PREPROCESSED_LEVELS) {
      ok = DequantizeLevels(decoded_data, width, height);
    }
  }
- Error:
  if (method != ALPHA_NO_COMPRESSION) {
    free(decoded_data);
  }

--- a/3rdparty/libwebp/dec/frame.c
+++ b/3rdparty/libwebp/dec/frame.c
@@ -97,54 +97,51 @@ static void FilterRow(const VP8Decoder* const dec) {
 }
 //------------------------------------------------------------------------------
+// Precompute the filtering strength for each segment and each i4x4/i16x16 mode.
-void VP8StoreBlock(VP8Decoder* const dec) {
+static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
  if (dec->filter_type_ > 0) {
-    VP8FInfo* const info = dec->f_info_ + dec->mb_x_;
+    int s;
-    const int skip = dec->mb_info_[dec->mb_x_].skip_;
+    const VP8FilterHeader* const hdr = &dec->filter_hdr_;
-    int level = dec->filter_levels_[dec->segment_];
+    for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
-    if (dec->filter_hdr_.use_lf_delta_) {
+      int i4x4;
-      // TODO(skal): only CURRENT is handled for now.
+      // First, compute the initial level
-      level += dec->filter_hdr_.ref_lf_delta_[0];
+      int base_level;
-      if (dec->is_i4x4_) {
+      if (dec->segment_hdr_.use_segment_) {
-        level += dec->filter_hdr_.mode_lf_delta_[0];
+        base_level = dec->segment_hdr_.filter_strength_[s];
-      }
+        if (!dec->segment_hdr_.absolute_delta_) {
-    }
+          base_level += hdr->level_;
-    level = (level < 0) ? 0 : (level > 63) ? 63 : level;
+        }
-    info->f_level_ = level;
-    if (dec->filter_hdr_.sharpness_ > 0) {
-      if (dec->filter_hdr_.sharpness_ > 4) {
-        level >>= 2;
      } else {
-        level >>= 1;
+        base_level = hdr->level_;
      }
-      if (level > 9 - dec->filter_hdr_.sharpness_) {
+      for (i4x4 = 0; i4x4 <= 1; ++i4x4) {
-        level = 9 - dec->filter_hdr_.sharpness_;
+        VP8FInfo* const info = &dec->fstrengths_[s][i4x4];
+        int level = base_level;
+        if (hdr->use_lf_delta_) {
+          // TODO(skal): only CURRENT is handled for now.
+          level += hdr->ref_lf_delta_[0];
+          if (i4x4) {
+            level += hdr->mode_lf_delta_[0];
+          }
+        }
+        level = (level < 0) ? 0 : (level > 63) ? 63 : level;
+        info->f_level_ = level;
+        if (hdr->sharpness_ > 0) {
+          if (hdr->sharpness_ > 4) {
+            level >>= 2;
+          } else {
+            level >>= 1;
+          }
+          if (level > 9 - hdr->sharpness_) {
+            level = 9 - hdr->sharpness_;
+          }
+        }
+        info->f_ilevel_ = (level < 1) ? 1 : level;
+        info->f_inner_ = 0;
      }
    }
-    info->f_ilevel_ = (level < 1) ? 1 : level;
-    info->f_inner_ = (!skip || dec->is_i4x4_);
-  }
-  {
-    // Transfer samples to row cache
-    int y;
-    const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;
-    const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;
-    uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;
-    uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;
-    uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;
-    for (y = 0; y < 16; ++y) {
-      memcpy(ydst + y * dec->cache_y_stride_,
-             dec->yuv_b_ + Y_OFF + y * BPS, 16);
-    }
-    for (y = 0; y < 8; ++y) {
-      memcpy(udst + y * dec->cache_uv_stride_,
-           dec->yuv_b_ + U_OFF + y * BPS, 8);
-      memcpy(vdst + y * dec->cache_uv_stride_,
-           dec->yuv_b_ + V_OFF + y * BPS, 8);
-    }
  }
 }
@@ -339,6 +336,7 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
      dec->br_mb_y_ = dec->mb_h_;
    }
  }
+  PrecomputeFilterStrengths(dec);
  return VP8_STATUS_OK;
 }
@@ -496,6 +494,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
  // alpha plane
  dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
  mem += alpha_size;
+  assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);
  // note: left-info is initialized once for all.
  memset(dec->mb_info_ - 1, 0, mb_info_size);
@@ -551,6 +550,7 @@ static WEBP_INLINE void Copy32b(uint8_t* dst, uint8_t* src) {
 }
 void VP8ReconstructBlock(VP8Decoder* const dec) {
+  int j;
  uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
  uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
  uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
@@ -558,7 +558,6 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
  // Rotate in the left samples from previously decoded block. We move four
  // pixels at a time for alignment reason, and because of in-loop filter.
  if (dec->mb_x_ > 0) {
-    int j;
    for (j = -1; j < 16; ++j) {
      Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
    }
@@ -567,7 +566,6 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
      Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
    }
  } else {
-    int j;
    for (j = 0; j < 16; ++j) {
      y_dst[j * BPS - 1] = 129;
    }
@@ -670,6 +668,21 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
      }
    }
  }
+  // Transfer reconstructed samples from yuv_b_ cache to final destination.
+  {
+    const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;
+    const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;
+    uint8_t* const y_out = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;
+    uint8_t* const u_out = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;
+    uint8_t* const v_out = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;
+    for (j = 0; j < 16; ++j) {
+      memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);
+    }
+    for (j = 0; j < 8; ++j) {
+      memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);
+      memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);
+    }
+  }
 }
 //------------------------------------------------------------------------------

--- a/3rdparty/libwebp/dec/idec.c
+++ b/3rdparty/libwebp/dec/idec.c
@@ -425,9 +425,8 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
        }
        return VP8_STATUS_SUSPENDED;
      }
+      // Reconstruct and emit samples.
      VP8ReconstructBlock(dec);
-      // Store data and save block's filtering params
-      VP8StoreBlock(dec);
      // Release buffer only if there is only one partition
      if (dec->num_parts_ == 1) {
@@ -596,12 +595,22 @@ void WebPIDelete(WebPIDecoder* idec) {
 WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
                          size_t output_buffer_size, int output_stride) {
+  const int is_external_memory = (output_buffer != NULL);
  WebPIDecoder* idec;
  if (mode >= MODE_YUV) return NULL;
+  if (!is_external_memory) {    // Overwrite parameters to sane values.
+    output_buffer_size = 0;
+    output_stride = 0;
+  } else {  // A buffer was passed. Validate the other params.
+    if (output_stride == 0 || output_buffer_size == 0) {
+      return NULL;   // invalid parameter.
+    }
+  }
  idec = WebPINewDecoder(NULL);
  if (idec == NULL) return NULL;
  idec->output_.colorspace = mode;
-  idec->output_.is_external_memory = 1;
+  idec->output_.is_external_memory = is_external_memory;
  idec->output_.u.RGBA.rgba = output_buffer;
  idec->output_.u.RGBA.stride = output_stride;
  idec->output_.u.RGBA.size = output_buffer_size;
@@ -612,10 +621,30 @@ WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
                           uint8_t* u, size_t u_size, int u_stride,
                           uint8_t* v, size_t v_size, int v_stride,
                           uint8_t* a, size_t a_size, int a_stride) {
-  WebPIDecoder* const idec = WebPINewDecoder(NULL);
+  const int is_external_memory = (luma != NULL);
+  WebPIDecoder* idec;
+  WEBP_CSP_MODE colorspace;
+  if (!is_external_memory) {    // Overwrite parameters to sane values.
+    luma_size = u_size = v_size = a_size = 0;
+    luma_stride = u_stride = v_stride = a_stride = 0;
+    u = v = a = NULL;
+    colorspace = MODE_YUVA;
+  } else {  // A luma buffer was passed. Validate the other parameters.
+    if (u == NULL || v == NULL) return NULL;
+    if (luma_size == 0 || u_size == 0 || v_size == 0) return NULL;
+    if (luma_stride == 0 || u_stride == 0 || v_stride == 0) return NULL;
+    if (a != NULL) {
+      if (a_size == 0 || a_stride == 0) return NULL;
+    }
+    colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA;
+  }
+  idec = WebPINewDecoder(NULL);
  if (idec == NULL) return NULL;
-  idec->output_.colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA;
-  idec->output_.is_external_memory = 1;
+  idec->output_.colorspace = colorspace;
+  idec->output_.is_external_memory = is_external_memory;
  idec->output_.u.YUVA.y = luma;
  idec->output_.u.YUVA.y_stride = luma_stride;
  idec->output_.u.YUVA.y_size = luma_size;

--- a/3rdparty/libwebp/dec/vp8.c
+++ b/3rdparty/libwebp/dec/vp8.c
@@ -236,20 +236,6 @@ static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) {
    }
  }
  dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2;
-  if (dec->filter_type_ > 0) {    // precompute filter levels per segment
-    if (dec->segment_hdr_.use_segment_) {
-      int s;
-      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
-        int strength = dec->segment_hdr_.filter_strength_[s];
-        if (!dec->segment_hdr_.absolute_delta_) {
-          strength += hdr->level_;
-        }
-        dec->filter_levels_[s] = strength;
-      }
-    } else {
-      dec->filter_levels_[0] = hdr->level_;
-    }
-  }
  return !br->eof_;
 }
@@ -458,7 +444,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
 //------------------------------------------------------------------------------
 // Residual decoding (Paragraph 13.2 / 13.3)
-static const uint8_t kBands[16 + 1] = {
+static const int kBands[16 + 1] = {
  0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
  0  // extra entry as sentinel
 };
@@ -474,6 +460,39 @@ static const uint8_t kZigzag[16] = {
 };
 typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS];  // for const-casting
+typedef const uint8_t (*ProbaCtxArray)[NUM_PROBAS];
+// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2
+static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
+  int v;
+  if (!VP8GetBit(br, p[3])) {
+    if (!VP8GetBit(br, p[4])) {
+      v = 2;
+    } else {
+      v = 3 + VP8GetBit(br, p[5]);
+    }
+  } else {
+    if (!VP8GetBit(br, p[6])) {
+      if (!VP8GetBit(br, p[7])) {
+        v = 5 + VP8GetBit(br, 159);
+      } else {
+        v = 7 + 2 * VP8GetBit(br, 165);
+        v += VP8GetBit(br, 145);
+      }
+    } else {
+      const uint8_t* tab;
+      const int bit1 = VP8GetBit(br, p[8]);
+      const int bit0 = VP8GetBit(br, p[9 + bit1]);
+      const int cat = 2 * bit1 + bit0;
+      v = 0;
+      for (tab = kCat3456[cat]; *tab; ++tab) {
+        v += v + VP8GetBit(br, *tab);
+      }
+      v += 3 + (8 << cat);
+    }
+  }
+  return v;
+}
 // Returns the position of the last non-zero coeff plus one
 // (and 0 if there's no coeff at all)
@@ -484,54 +503,26 @@ static int GetCoeffs(VP8BitReader* const br, ProbaArray prob,
  if (!VP8GetBit(br, p[0])) {   // first EOB is more a 'CBP' bit.
    return 0;
  }
-  while (1) {
+  for (; n < 16; ++n) {
-    ++n;
+    const ProbaCtxArray p_ctx = prob[kBands[n + 1]];
    if (!VP8GetBit(br, p[1])) {
-      p = prob[kBands[n]][0];
+      p = p_ctx[0];
    } else {  // non zero coeff
-      int v, j;
+      int v;
      if (!VP8GetBit(br, p[2])) {
-        p = prob[kBands[n]][1];
        v = 1;
+        p = p_ctx[1];
      } else {
-        if (!VP8GetBit(br, p[3])) {
+        v = GetLargeValue(br, p);
-          if (!VP8GetBit(br, p[4])) {
+        p = p_ctx[2];
-            v = 2;
-          } else {
-            v = 3 + VP8GetBit(br, p[5]);
-          }
-        } else {
-          if (!VP8GetBit(br, p[6])) {
-            if (!VP8GetBit(br, p[7])) {
-              v = 5 + VP8GetBit(br, 159);
-            } else {
-              v = 7 + 2 * VP8GetBit(br, 165);
-              v += VP8GetBit(br, 145);
-            }
-          } else {
-            const uint8_t* tab;
-            const int bit1 = VP8GetBit(br, p[8]);
-            const int bit0 = VP8GetBit(br, p[9 + bit1]);
-            const int cat = 2 * bit1 + bit0;
-            v = 0;
-            for (tab = kCat3456[cat]; *tab; ++tab) {
-              v += v + VP8GetBit(br, *tab);
-            }
-            v += 3 + (8 << cat);
-          }
-        }
-        p = prob[kBands[n]][2];
      }
-      j = kZigzag[n - 1];
+      out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0];
-      out[j] = VP8GetSigned(br, v) * dq[j > 0];
+      if (n < 15 && !VP8GetBit(br, p[0])) {   // EOB
-      if (n == 16 || !VP8GetBit(br, p[0])) {   // EOB
+        return n + 1;
-        return n;
      }
    }
-    if (n == 16) {
-      return 16;
-    }
  }
+  return 16;
 }
 // Alias-safe way of converting 4bytes to 32bits.
@@ -670,6 +661,12 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
    dec->non_zero_ac_ = 0;
  }
+  if (dec->filter_type_ > 0) {  // store filter info
+    VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_;
+    *finfo = dec->fstrengths_[dec->segment_][dec->is_i4x4_];
+    finfo->f_inner_ = (!info->skip_ || dec->is_i4x4_);
+  }
  return (!token_br->eof_);
 }
@@ -693,10 +690,8 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
        return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
                           "Premature end-of-file encountered.");
      }
+      // Reconstruct and emit samples.
      VP8ReconstructBlock(dec);
-      // Store data and save block's filtering params
-      VP8StoreBlock(dec);
    }
    if (!VP8ProcessRow(dec, io)) {
      return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");

--- a/3rdparty/libwebp/dec/vp8i.h
+++ b/3rdparty/libwebp/dec/vp8i.h
@@ -27,8 +27,8 @@ extern "C" {
 // version numbers
 #define DEC_MAJ_VERSION 0
-#define DEC_MIN_VERSION 2
+#define DEC_MIN_VERSION 3
-#define DEC_REV_VERSION 1
+#define DEC_REV_VERSION 0
 #define ONLY_KEYFRAME_CODE      // to remove any code related to P-Frames
@@ -157,7 +157,7 @@ typedef struct {  // filter specs
 } VP8FInfo;
 typedef struct {  // used for syntax-parsing
-  unsigned int nz_;          // non-zero AC/DC coeffs
+  unsigned int nz_:24;       // non-zero AC/DC coeffs (24bit)
  unsigned int dc_nz_:1;     // non-zero DC coeffs
  unsigned int skip_:1;      // block type
 } VP8MB;
@@ -269,9 +269,9 @@ struct VP8Decoder {
  uint32_t non_zero_ac_;
  // Filtering side-info
-  int filter_type_;                         // 0=off, 1=simple, 2=complex
+  int filter_type_;                          // 0=off, 1=simple, 2=complex
-  int filter_row_;                          // per-row flag
+  int filter_row_;                           // per-row flag
-  uint8_t filter_levels_[NUM_MB_SEGMENTS];  // precalculated per-segment
+  VP8FInfo fstrengths_[NUM_MB_SEGMENTS][2];  // precalculated per-segment/type
  // extensions
  const uint8_t* alpha_data_;   // compressed alpha data (if present)
@@ -312,8 +312,6 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io);
 int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
 // Process the last decoded row (filtering + output)
 int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
-// Store a block, along with filtering params
-void VP8StoreBlock(VP8Decoder* const dec);
 // To be called at the start of a new scanline, to initialize predictors.
 void VP8InitScanline(VP8Decoder* const dec);
 // Decode one macroblock. Returns false if there is not enough data.

--- a/3rdparty/libwebp/dec/vp8l.c
+++ b/3rdparty/libwebp/dec/vp8l.c
@@ -58,18 +58,18 @@ static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = {
 #define CODE_TO_PLANE_CODES        120
 static const uint8_t code_to_plane_lut[CODE_TO_PLANE_CODES] = {
-   0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a,
+  0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a,
-   0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a,
+  0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a,
-   0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b,
+  0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b,
-   0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03,
+  0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03,
-   0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c,
+  0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c,
-   0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
+  0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
-   0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b,
+  0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b,
-   0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f,
+  0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f,
-   0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b,
+  0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b,
-   0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41,
+  0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41,
-   0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f,
+  0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f,
-   0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70
+  0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70
 };
 static int DecodeImageStream(int xsize, int ysize,
@@ -149,31 +149,22 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) {
 //------------------------------------------------------------------------------
 // Decodes the next Huffman code from bit-stream.
 // FillBitWindow(br) needs to be called at minimum every second call
-// to ReadSymbolUnsafe.
+// to ReadSymbol, in order to pre-fetch enough bits.
-static int ReadSymbolUnsafe(const HuffmanTree* tree, VP8LBitReader* const br) {
+static WEBP_INLINE int ReadSymbol(const HuffmanTree* tree,
+                                  VP8LBitReader* const br) {
  const HuffmanTreeNode* node = tree->root_;
+  int num_bits = 0;
+  uint32_t bits = VP8LPrefetchBits(br);
  assert(node != NULL);
  while (!HuffmanTreeNodeIsLeaf(node)) {
-    node = HuffmanTreeNextNode(node, VP8LReadOneBitUnsafe(br));
+    node = HuffmanTreeNextNode(node, bits & 1);
+    bits >>= 1;
+    ++num_bits;
  }
+  VP8LDiscardBits(br, num_bits);
  return node->symbol_;
 }
-static WEBP_INLINE int ReadSymbol(const HuffmanTree* tree,
-                                  VP8LBitReader* const br) {
-  const int read_safe = (br->pos_ + 8 > br->len_);
-  if (!read_safe) {
-    return ReadSymbolUnsafe(tree, br);
-  } else {
-    const HuffmanTreeNode* node = tree->root_;
-    assert(node != NULL);
-    while (!HuffmanTreeNodeIsLeaf(node)) {
-      node = HuffmanTreeNextNode(node, VP8LReadOneBit(br));
-    }
-    return node->symbol_;
-  }
-}
 static int ReadHuffmanCodeLengths(
    VP8LDecoder* const dec, const int* const code_length_code_lengths,
    int num_symbols, int* const code_lengths) {
@@ -327,10 +318,10 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
    hdr->huffman_subsample_bits_ = huffman_precision;
    for (i = 0; i < huffman_pixs; ++i) {
      // The huffman data is stored in red and green bytes.
-      const int index = (huffman_image[i] >> 8) & 0xffff;
+      const int group = (huffman_image[i] >> 8) & 0xffff;
-      huffman_image[i] = index;
+      huffman_image[i] = group;
-      if (index >= num_htree_groups) {
+      if (group >= num_htree_groups) {
-        num_htree_groups = index + 1;
+        num_htree_groups = group + 1;
      }
    }
  }
@@ -1146,9 +1137,9 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
  return 1;
 Error:
-   VP8LClear(dec);
+  VP8LClear(dec);
-   assert(dec->status_ != VP8_STATUS_OK);
+  assert(dec->status_ != VP8_STATUS_OK);
-   return 0;
+  return 0;
 }
 int VP8LDecodeImage(VP8LDecoder* const dec) {

--- a/3rdparty/libwebp/dec/webp.c
+++ b/3rdparty/libwebp/dec/webp.c
@@ -14,7 +14,7 @@
 #include "./vp8i.h"
 #include "./vp8li.h"
 #include "./webpi.h"
-#include "../webp/format_constants.h"
+#include "../webp/mux_types.h"  // ALPHA_FLAG
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
@@ -40,8 +40,8 @@ extern "C" {
 //   20..23  VP8X flags bit-map corresponding to the chunk-types present.
 //   24..26  Width of the Canvas Image.
 //   27..29  Height of the Canvas Image.
-// There can be extra chunks after the "VP8X" chunk (ICCP, TILE, FRM, VP8,
+// There can be extra chunks after the "VP8X" chunk (ICCP, FRGM, ANMF, VP8,
-// META  ...)
+// VP8L, XMP, EXIF  ...)
 // All sizes are in little-endian order.
 // Note: chunk data size must be padded to multiple of 2 when written.
@@ -276,6 +276,7 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
                                          int* const width,
                                          int* const height,
                                          int* const has_alpha,
+                                          int* const has_animation,
                                          WebPHeaderStructure* const headers) {
  int found_riff = 0;
  int found_vp8x = 0;
@@ -308,7 +309,8 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
      // necessary to send VP8X chunk to the decoder.
      return VP8_STATUS_BITSTREAM_ERROR;
    }
-    if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG_BIT);
+    if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG);
+    if (has_animation != NULL) *has_animation = !!(flags & ANIMATION_FLAG);
    if (found_vp8x && headers == NULL) {
      return VP8_STATUS_OK;  // Return features from VP8X header.
    }
@@ -370,10 +372,19 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
 }
 VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
+  VP8StatusCode status;
+  int has_animation = 0;
  assert(headers != NULL);
  // fill out headers, ignore width/height/has_alpha.
-  return ParseHeadersInternal(headers->data, headers->data_size,
+  status = ParseHeadersInternal(headers->data, headers->data_size,
-                              NULL, NULL, NULL, headers);
+                                NULL, NULL, NULL, &has_animation, headers);
+  if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
+    // TODO(jzern): full support of animation frames will require API additions.
+    if (has_animation) {
+      status = VP8_STATUS_UNSUPPORTED_FEATURE;
+    }
+  }
+  return status;
 }
 //------------------------------------------------------------------------------
@@ -625,10 +636,11 @@ static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size,
  }
  DefaultFeatures(features);
-  // Only parse enough of the data to retrieve width/height/has_alpha.
+  // Only parse enough of the data to retrieve the features.
  return ParseHeadersInternal(data, data_size,
                              &features->width, &features->height,
-                              &features->has_alpha, NULL);
+                              &features->has_alpha, &features->has_animation,
+                              NULL);
 }
 //------------------------------------------------------------------------------
@@ -672,19 +684,13 @@ int WebPInitDecoderConfigInternal(WebPDecoderConfig* config,
 VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, size_t data_size,
                                      WebPBitstreamFeatures* features,
                                      int version) {
-  VP8StatusCode status;
  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
    return VP8_STATUS_INVALID_PARAM;   // version mismatch
  }
  if (features == NULL) {
    return VP8_STATUS_INVALID_PARAM;
  }
+  return GetFeatures(data, data_size, features);
-  status = GetFeatures(data, data_size, features);
-  if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
-    return VP8_STATUS_BITSTREAM_ERROR;  // Not-enough-data treated as error.
-  }
-  return status;
 }
 VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,

--- a/3rdparty/libwebp/dec/webpi.h
+++ b/3rdparty/libwebp/dec/webpi.h
@@ -61,10 +61,10 @@ typedef struct {
 } WebPHeaderStructure;
 // Skips over all valid chunks prior to the first VP8/VP8L frame header.
-// Returns VP8_STATUS_OK on success,
+// Returns: VP8_STATUS_OK, VP8_STATUS_BITSTREAM_ERROR (invalid header/chunk),
-//         VP8_STATUS_BITSTREAM_ERROR if an invalid header/chunk is found, and
+// VP8_STATUS_NOT_ENOUGH_DATA (partial input) or VP8_STATUS_UNSUPPORTED_FEATURE
-//         VP8_STATUS_NOT_ENOUGH_DATA if case of insufficient data.
+// in the case of non-decodable features (animation for instance).
-// In 'headers', compressed_size, offset, alpha_data, alpha_size and lossless
+// In 'headers', compressed_size, offset, alpha_data, alpha_size, and lossless
 // fields are updated appropriately upon success.
 VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers);

--- a/3rdparty/libwebp/mux/demux.c
+++ b/3rdparty/libwebp/mux/demux.c
--- a/3rdparty/libwebp/dsp/dec.c
+++ b/3rdparty/libwebp/dsp/dec.c
@@ -426,11 +426,16 @@ static void HE8uv(uint8_t *dst) {    // horizontal
 }
 // helper for chroma-DC predictions
-static WEBP_INLINE void Put8x8uv(uint64_t v, uint8_t* dst) {
+static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
  int j;
+#ifndef WEBP_REFERENCE_IMPLEMENTATION
+  const uint64_t v = (uint64_t)value * 0x0101010101010101ULL;
  for (j = 0; j < 8; ++j) {
    *(uint64_t*)(dst + j * BPS) = v;
  }
+#else
+  for (j = 0; j < 8; ++j) memset(dst + j * BPS, value, 8);
+#endif
 }
 static void DC8uv(uint8_t *dst) {     // DC
@@ -439,7 +444,7 @@ static void DC8uv(uint8_t *dst) {     // DC
  for (i = 0; i < 8; ++i) {
    dc0 += dst[i - BPS] + dst[-1 + i * BPS];
  }
-  Put8x8uv((uint64_t)((dc0 >> 4) * 0x0101010101010101ULL), dst);
+  Put8x8uv(dc0 >> 4, dst);
 }
 static void DC8uvNoLeft(uint8_t *dst) {   // DC with no left samples
@@ -448,7 +453,7 @@ static void DC8uvNoLeft(uint8_t *dst) {   // DC with no left samples
  for (i = 0; i < 8; ++i) {
    dc0 += dst[i - BPS];
  }
-  Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst);
+  Put8x8uv(dc0 >> 3, dst);
 }
 static void DC8uvNoTop(uint8_t *dst) {  // DC with no top samples
@@ -457,11 +462,11 @@ static void DC8uvNoTop(uint8_t *dst) {  // DC with no top samples
  for (i = 0; i < 8; ++i) {
    dc0 += dst[-1 + i * BPS];
  }
-  Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst);
+  Put8x8uv(dc0 >> 3, dst);
 }
 static void DC8uvNoTopLeft(uint8_t *dst) {    // DC with nothing
-  Put8x8uv(0x8080808080808080ULL, dst);
+  Put8x8uv(0x80, dst);
 }
 //------------------------------------------------------------------------------

--- a/3rdparty/libwebp/dsp/dec_neon.c
+++ b/3rdparty/libwebp/dsp/dec_neon.c
@@ -79,7 +79,7 @@ extern "C" {
  "vld4.8   {" #c1"[6], " #c2"[6], " #c3"[6], " #c4"[6]}," #b1 "," #stride"\n" \
  "vld4.8   {" #c1"[7], " #c2"[7], " #c3"[7], " #c4"[7]}," #b2 "," #stride"\n"
-#define STORE8x2(c1, c2, p,stride)                                             \
+#define STORE8x2(c1, c2, p, stride)                                            \
  "vst2.8   {" #c1"[0], " #c2"[0]}," #p "," #stride " \n"                      \
  "vst2.8   {" #c1"[1], " #c2"[1]}," #p "," #stride " \n"                      \
  "vst2.8   {" #c1"[2], " #c2"[2]}," #p "," #stride " \n"                      \
@@ -155,6 +155,9 @@ static void SimpleHFilter16iNEON(uint8_t* p, int stride, int thresh) {
  }
 }
+//-----------------------------------------------------------------------------
+// Inverse transforms (Paragraph 14.4)
 static void TransformOneNEON(const int16_t *in, uint8_t *dst) {
  const int kBPS = BPS;
  const int16_t constants[] = {20091, 17734, 0, 0};
@@ -311,6 +314,73 @@ static void TransformTwoNEON(const int16_t* in, uint8_t* dst, int do_two) {
  }
 }
+static void TransformWHT(const int16_t* in, int16_t* out) {
+  const int kStep = 32;  // The store is only incrementing the pointer as if we
+                         // had stored a single byte.
+  __asm__ volatile (
+    // part 1
+    // load data into q0, q1
+    "vld1.16         {q0, q1}, [%[in]]           \n"
+    "vaddl.s16       q2, d0, d3                  \n" // a0 = in[0] + in[12]
+    "vaddl.s16       q3, d1, d2                  \n" // a1 = in[4] + in[8]
+    "vsubl.s16       q4, d1, d2                  \n" // a2 = in[4] - in[8]
+    "vsubl.s16       q5, d0, d3                  \n" // a3 = in[0] - in[12]
+    "vadd.s32        q0, q2, q3                  \n" // tmp[0] = a0 + a1
+    "vsub.s32        q2, q2, q3                  \n" // tmp[8] = a0 - a1
+    "vadd.s32        q1, q5, q4                  \n" // tmp[4] = a3 + a2
+    "vsub.s32        q3, q5, q4                  \n" // tmp[12] = a3 - a2
+    // Transpose
+    // q0 = tmp[0, 4, 8, 12], q1 = tmp[2, 6, 10, 14]
+    // q2 = tmp[1, 5, 9, 13], q3 = tmp[3, 7, 11, 15]
+    "vswp            d1, d4                      \n" // vtrn.64 q0, q2
+    "vswp            d3, d6                      \n" // vtrn.64 q1, q3
+    "vtrn.32         q0, q1                      \n"
+    "vtrn.32         q2, q3                      \n"
+    "vmov.s32        q4, #3                      \n" // dc = 3
+    "vadd.s32        q0, q0, q4                  \n" // dc = tmp[0] + 3
+    "vadd.s32        q6, q0, q3                  \n" // a0 = dc + tmp[3]
+    "vadd.s32        q7, q1, q2                  \n" // a1 = tmp[1] + tmp[2]
+    "vsub.s32        q8, q1, q2                  \n" // a2 = tmp[1] - tmp[2]
+    "vsub.s32        q9, q0, q3                  \n" // a3 = dc - tmp[3]
+    "vadd.s32        q0, q6, q7                  \n"
+    "vshrn.s32       d0, q0, #3                  \n" // (a0 + a1) >> 3
+    "vadd.s32        q1, q9, q8                  \n"
+    "vshrn.s32       d1, q1, #3                  \n" // (a3 + a2) >> 3
+    "vsub.s32        q2, q6, q7                  \n"
+    "vshrn.s32       d2, q2, #3                  \n" // (a0 - a1) >> 3
+    "vsub.s32        q3, q9, q8                  \n"
+    "vshrn.s32       d3, q3, #3                  \n" // (a3 - a2) >> 3
+    // set the results to output
+    "vst1.16         d0[0], [%[out]], %[kStep]   \n"
+    "vst1.16         d1[0], [%[out]], %[kStep]   \n"
+    "vst1.16         d2[0], [%[out]], %[kStep]   \n"
+    "vst1.16         d3[0], [%[out]], %[kStep]   \n"
+    "vst1.16         d0[1], [%[out]], %[kStep]   \n"
+    "vst1.16         d1[1], [%[out]], %[kStep]   \n"
+    "vst1.16         d2[1], [%[out]], %[kStep]   \n"
+    "vst1.16         d3[1], [%[out]], %[kStep]   \n"
+    "vst1.16         d0[2], [%[out]], %[kStep]   \n"
+    "vst1.16         d1[2], [%[out]], %[kStep]   \n"
+    "vst1.16         d2[2], [%[out]], %[kStep]   \n"
+    "vst1.16         d3[2], [%[out]], %[kStep]   \n"
+    "vst1.16         d0[3], [%[out]], %[kStep]   \n"
+    "vst1.16         d1[3], [%[out]], %[kStep]   \n"
+    "vst1.16         d2[3], [%[out]], %[kStep]   \n"
+    "vst1.16         d3[3], [%[out]], %[kStep]   \n"
+    : [out] "+r"(out)  // modified registers
+    : [in] "r"(in), [kStep] "r"(kStep)  // constants
+    : "memory", "q0", "q1", "q2", "q3", "q4",
+      "q5", "q6", "q7", "q8", "q9"  // clobbered
+  );
+}
 #endif   // WEBP_USE_NEON
 //------------------------------------------------------------------------------
@@ -321,6 +391,7 @@ extern void VP8DspInitNEON(void);
 void VP8DspInitNEON(void) {
 #if defined(WEBP_USE_NEON)
  VP8Transform = TransformTwoNEON;
+  VP8TransformWHT = TransformWHT;
  VP8SimpleVFilter16 = SimpleVFilter16NEON;
  VP8SimpleHFilter16 = SimpleHFilter16NEON;

--- a/3rdparty/libwebp/dsp/dec_sse2.c
+++ b/3rdparty/libwebp/dsp/dec_sse2.c
@@ -194,7 +194,7 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
  // Add inverse transform to 'dst' and store.
  {
-    const __m128i zero = _mm_set1_epi16(0);
+    const __m128i zero = _mm_setzero_si128();
    // Load the reference(s).
    __m128i dst0, dst1, dst2, dst3;
    if (do_two) {
@@ -278,14 +278,14 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
 #define GET_NOTHEV(p1, p0, q0, q1, hev_thresh, not_hev) {                      \
  const __m128i zero = _mm_setzero_si128();                                    \
-  const __m128i t1 = MM_ABS(p1, p0);                                           \
+  const __m128i t_1 = MM_ABS(p1, p0);                                          \
-  const __m128i t2 = MM_ABS(q1, q0);                                           \
+  const __m128i t_2 = MM_ABS(q1, q0);                                          \
                                                                               \
  const __m128i h = _mm_set1_epi8(hev_thresh);                                 \
-  const __m128i t3 = _mm_subs_epu8(t1, h);  /* abs(p1 - p0) - hev_tresh */     \
+  const __m128i t_3 = _mm_subs_epu8(t_1, h);  /* abs(p1 - p0) - hev_tresh */   \
-  const __m128i t4 = _mm_subs_epu8(t2, h);  /* abs(q1 - q0) - hev_tresh */     \
+  const __m128i t_4 = _mm_subs_epu8(t_2, h);  /* abs(q1 - q0) - hev_tresh */   \
                                                                               \
-  not_hev = _mm_or_si128(t3, t4);                                              \
+  not_hev = _mm_or_si128(t_3, t_4);                                            \
  not_hev = _mm_cmpeq_epi8(not_hev, zero); /* not_hev <= t1 && not_hev <= t2 */\
 }
@@ -314,13 +314,13 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
 // Updates values of 2 pixels at MB edge during complex filtering.
 // Update operations:
-// q = q - a and p = p + a; where a = [(a_hi >> 7), (a_lo >> 7)]
+// q = q - delta and p = p + delta; where delta = [(a_hi >> 7), (a_lo >> 7)]
 #define UPDATE_2PIXELS(pi, qi, a_lo, a_hi) {                                   \
  const __m128i a_lo7 = _mm_srai_epi16(a_lo, 7);                               \
  const __m128i a_hi7 = _mm_srai_epi16(a_hi, 7);                               \
-  const __m128i a = _mm_packs_epi16(a_lo7, a_hi7);                             \
+  const __m128i delta = _mm_packs_epi16(a_lo7, a_hi7);                         \
-  pi = _mm_adds_epi8(pi, a);                                                   \
+  pi = _mm_adds_epi8(pi, delta);                                               \
-  qi = _mm_subs_epi8(qi, a);                                                   \
+  qi = _mm_subs_epi8(qi, delta);                                               \
 }
 static void NeedsFilter(const __m128i* p1, const __m128i* p0, const __m128i* q0,

--- a/3rdparty/libwebp/dsp/dsp.h
+++ b/3rdparty/libwebp/dsp/dsp.h
@@ -49,8 +49,6 @@ extern VP8CPUInfo VP8GetCPUInfo;
 //------------------------------------------------------------------------------
 // Encoding
-int VP8GetAlpha(const int histo[]);
 // Transforms
 // VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
 //          will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
@@ -85,10 +83,11 @@ typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
                                int n, const struct VP8Matrix* const mtx);
 extern VP8QuantizeBlock VP8EncQuantizeBlock;
-// Compute susceptibility based on DCT-coeff histograms:
+// Collect histogram for susceptibility calculation and accumulate in histo[].
-// the higher, the "easier" the macroblock is to compress.
+struct VP8Histogram;
-typedef int (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
+typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
-                         int start_block, int end_block);
+                          int start_block, int end_block,
+                          struct VP8Histogram* const histo);
 extern const int VP8DspScan[16 + 4 + 4];
 extern VP8CHisto VP8CollectHistogram;
@@ -104,7 +103,7 @@ extern VP8DecIdct2 VP8Transform;
 extern VP8DecIdct VP8TransformUV;
 extern VP8DecIdct VP8TransformDC;
 extern VP8DecIdct VP8TransformDCUV;
-extern void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
+extern VP8WHT VP8TransformWHT;
 // *dst is the destination block, with stride BPS. Boundary samples are
 // assumed accessible when needed.
@@ -159,6 +158,9 @@ extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
 // Initializes SSE2 version of the fancy upsamplers.
 void WebPInitUpsamplersSSE2(void);
+// NEON version
+void WebPInitUpsamplersNEON(void);
 #endif    // FANCY_UPSAMPLING
 // Point-sampling methods.
@@ -200,6 +202,7 @@ extern void (*WebPApplyAlphaMultiply4444)(
 void WebPInitPremultiply(void);
 void WebPInitPremultiplySSE2(void);   // should not be called directly.
+void WebPInitPremultiplyNEON(void);
 //------------------------------------------------------------------------------

--- a/3rdparty/libwebp/dsp/enc.c
+++ b/3rdparty/libwebp/dsp/enc.c
@@ -17,31 +17,18 @@
 extern "C" {
 #endif
-//------------------------------------------------------------------------------
+static WEBP_INLINE uint8_t clip_8b(int v) {
-// Compute susceptibility based on DCT-coeff histograms:
+  return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
-// the higher, the "easier" the macroblock is to compress.
-static int ClipAlpha(int alpha) {
-  return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha;
 }
-int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]) {
+static WEBP_INLINE int clip_max(int v, int max) {
-  int num = 0, den = 0, val = 0;
+  return (v > max) ? max : v;
-  int k;
-  int alpha;
-  // note: changing this loop to avoid the numerous "k + 1" slows things down.
-  for (k = 0; k < MAX_COEFF_THRESH; ++k) {
-    if (histo[k + 1]) {
-      val += histo[k + 1];
-      num += val * (k + 1);
-      den += (k + 1) * (k + 1);
-    }
-  }
-  // we scale the value to a usable [0..255] range
-  alpha = den ? 10 * num / den - 5 : 0;
-  return ClipAlpha(alpha);
 }
+//------------------------------------------------------------------------------
+// Compute susceptibility based on DCT-coeff histograms:
+// the higher, the "easier" the macroblock is to compress.
 const int VP8DspScan[16 + 4 + 4] = {
  // Luma
  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
@@ -53,27 +40,23 @@ const int VP8DspScan[16 + 4 + 4] = {
  8 + 0 * BPS,  12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS     // V
 };
-static int CollectHistogram(const uint8_t* ref, const uint8_t* pred,
+static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
-                            int start_block, int end_block) {
+                             int start_block, int end_block,
-  int histo[MAX_COEFF_THRESH + 1] = { 0 };
+                             VP8Histogram* const histo) {
-  int16_t out[16];
+  int j;
-  int j, k;
  for (j = start_block; j < end_block; ++j) {
-    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+    int k;
+    int16_t out[16];
-    // Convert coefficients to bin (within out[]).
+    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
-    for (k = 0; k < 16; ++k) {
-      const int v = abs(out[k]) >> 2;
-      out[k] = (v > MAX_COEFF_THRESH) ? MAX_COEFF_THRESH : v;
-    }
-    // Use bin to update histogram.
+    // Convert coefficients to bin.
    for (k = 0; k < 16; ++k) {
-      histo[out[k]]++;
+      const int v = abs(out[k]) >> 3;  // TODO(skal): add rounding?
+      const int clipped_value = clip_max(v, MAX_COEFF_THRESH);
+      histo->distribution[clipped_value]++;
    }
  }
-  return VP8GetAlpha(histo);
 }
 //------------------------------------------------------------------------------
@@ -89,15 +72,12 @@ static void InitTables(void) {
  if (!tables_ok) {
    int i;
    for (i = -255; i <= 255 + 255; ++i) {
-      clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;
+      clip1[255 + i] = clip_8b(i);
    }
    tables_ok = 1;
  }
 }
-static WEBP_INLINE uint8_t clip_8b(int v) {
-  return (!(v & ~0xff)) ? v : v < 0 ? 0 : 255;
-}
 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)
@@ -154,25 +134,25 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
  int i;
  int tmp[16];
  for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
-    const int d0 = src[0] - ref[0];
+    const int d0 = src[0] - ref[0];   // 9bit dynamic range ([-255,255])
    const int d1 = src[1] - ref[1];
    const int d2 = src[2] - ref[2];
    const int d3 = src[3] - ref[3];
-    const int a0 = (d0 + d3) << 3;
+    const int a0 = (d0 + d3);         // 10b                      [-510,510]
-    const int a1 = (d1 + d2) << 3;
+    const int a1 = (d1 + d2);
-    const int a2 = (d1 - d2) << 3;
+    const int a2 = (d1 - d2);
-    const int a3 = (d0 - d3) << 3;
+    const int a3 = (d0 - d3);
-    tmp[0 + i * 4] = (a0 + a1);
+    tmp[0 + i * 4] = (a0 + a1) << 3;  // 14b                      [-8160,8160]
-    tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 14500) >> 12;
+    tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9;      // [-7536,7542]
-    tmp[2 + i * 4] = (a0 - a1);
+    tmp[2 + i * 4] = (a0 - a1) << 3;
-    tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 +  7500) >> 12;
+    tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 +  937) >> 9;
  }
  for (i = 0; i < 4; ++i) {
-    const int a0 = (tmp[0 + i] + tmp[12 + i]);
+    const int a0 = (tmp[0 + i] + tmp[12 + i]);  // 15b
    const int a1 = (tmp[4 + i] + tmp[ 8 + i]);
    const int a2 = (tmp[4 + i] - tmp[ 8 + i]);
    const int a3 = (tmp[0 + i] - tmp[12 + i]);
-    out[0 + i] = (a0 + a1 + 7) >> 4;
+    out[0 + i] = (a0 + a1 + 7) >> 4;            // 12b
    out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0);
    out[8 + i] = (a0 - a1 + 7) >> 4;
    out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
@@ -589,30 +569,30 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
  int i;
  // horizontal pass
  for (i = 0; i < 4; ++i, in += BPS) {
-    const int a0 = (in[0] + in[2]) << 2;
+    const int a0 = in[0] + in[2];
-    const int a1 = (in[1] + in[3]) << 2;
+    const int a1 = in[1] + in[3];
-    const int a2 = (in[1] - in[3]) << 2;
+    const int a2 = in[1] - in[3];
-    const int a3 = (in[0] - in[2]) << 2;
+    const int a3 = in[0] - in[2];
-    tmp[0 + i * 4] = a0 + a1 + (a0 != 0);
+    tmp[0 + i * 4] = a0 + a1;
    tmp[1 + i * 4] = a3 + a2;
    tmp[2 + i * 4] = a3 - a2;
    tmp[3 + i * 4] = a0 - a1;
  }
  // vertical pass
  for (i = 0; i < 4; ++i, ++w) {
-    const int a0 = (tmp[0 + i] + tmp[8 + i]);
+    const int a0 = tmp[0 + i] + tmp[8 + i];
-    const int a1 = (tmp[4 + i] + tmp[12+ i]);
+    const int a1 = tmp[4 + i] + tmp[12+ i];
-    const int a2 = (tmp[4 + i] - tmp[12+ i]);
+    const int a2 = tmp[4 + i] - tmp[12+ i];
-    const int a3 = (tmp[0 + i] - tmp[8 + i]);
+    const int a3 = tmp[0 + i] - tmp[8 + i];
    const int b0 = a0 + a1;
    const int b1 = a3 + a2;
    const int b2 = a3 - a2;
    const int b3 = a0 - a1;
-    // abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3
-    sum += w[ 0] * ((abs(b0) + 3) >> 3);
+    sum += w[ 0] * abs(b0);
-    sum += w[ 4] * ((abs(b1) + 3) >> 3);
+    sum += w[ 4] * abs(b1);
-    sum += w[ 8] * ((abs(b2) + 3) >> 3);
+    sum += w[ 8] * abs(b2);
-    sum += w[12] * ((abs(b3) + 3) >> 3);
+    sum += w[12] * abs(b3);
  }
  return sum;
 }
@@ -621,7 +601,7 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
                    const uint16_t* const w) {
  const int sum1 = TTransform(a, w);
  const int sum2 = TTransform(b, w);
-  return (abs(sum2 - sum1) + 8) >> 4;
+  return abs(sum2 - sum1) >> 5;
 }
 static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
@@ -651,13 +631,13 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
  for (; n < 16; ++n) {
    const int j = kZigzag[n];
    const int sign = (in[j] < 0);
-    int coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
+    const int coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
-    if (coeff > 2047) coeff = 2047;
    if (coeff > mtx->zthresh_[j]) {
      const int Q = mtx->q_[j];
      const int iQ = mtx->iq_[j];
      const int B = mtx->bias_[j];
      out[n] = QUANTDIV(coeff, iQ, B);
+      if (out[n] > MAX_LEVEL) out[n] = MAX_LEVEL;
      if (sign) out[n] = -out[n];
      in[j] = out[n] * Q;
      if (out[n]) last = n;
@@ -706,6 +686,7 @@ VP8QuantizeBlock VP8EncQuantizeBlock;
 VP8BlockCopy VP8Copy4x4;
 extern void VP8EncDspInitSSE2(void);
+extern void VP8EncDspInitNEON(void);
 void VP8EncDspInit(void) {
  InitTables();
@@ -734,6 +715,10 @@ void VP8EncDspInit(void) {
    if (VP8GetCPUInfo(kSSE2)) {
      VP8EncDspInitSSE2();
    }
+#elif defined(WEBP_USE_NEON)
+    if (VP8GetCPUInfo(kNEON)) {
+      VP8EncDspInitNEON();
+    }
 #endif
  }
 }

--- a/3rdparty/libwebp/dsp/enc_neon.c
+++ b/3rdparty/libwebp/dsp/enc_neon.c
--- a/3rdparty/libwebp/dsp/enc_sse2.c
+++ b/3rdparty/libwebp/dsp/enc_sse2.c
--- a/3rdparty/libwebp/dsp/lossless.c
+++ b/3rdparty/libwebp/dsp/lossless.c
--- a/3rdparty/libwebp/dsp/lossless.h
+++ b/3rdparty/libwebp/dsp/lossless.h
@@ -59,10 +59,20 @@ static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size,
  return (size + (1 << sampling_bits) - 1) >> sampling_bits;
 }
-// Faster logarithm for integers, with the property of log2(0) == 0.
+// Faster logarithm for integers. Small values use a look-up table.
-float VP8LFastLog2(int v);
+#define LOG_LOOKUP_IDX_MAX 256
+extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
+extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX];
+extern float VP8LFastLog2Slow(int v);
+extern float VP8LFastSLog2Slow(int v);
+static WEBP_INLINE float VP8LFastLog2(int v) {
+  return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v);
+}
 // Fast calculation of v * log2(v) for integer input.
-static WEBP_INLINE float VP8LFastSLog2(int v) { return VP8LFastLog2(v) * v; }
+static WEBP_INLINE float VP8LFastSLog2(int v) {
+  return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
+}
 // In-place difference of each component with mod 256.
 static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {

--- a/3rdparty/libwebp/dsp/upsampling.c
+++ b/3rdparty/libwebp/dsp/upsampling.c
@@ -32,7 +32,7 @@ WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST];
 //  ([3*a +   b + 9*c + 3*d      a + 3*b + 3*c + 9*d]   [8 8]) / 16
 // We process u and v together stashed into 32bit (16bit each).
-#define LOAD_UV(u,v) ((u) | ((v) << 16))
+#define LOAD_UV(u, v) ((u) | ((v) << 16))
 #define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP)                                  \
 static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
@@ -327,6 +327,11 @@ void WebPInitUpsamplers(void) {
    if (VP8GetCPUInfo(kSSE2)) {
      WebPInitUpsamplersSSE2();
    }
+#endif
+#if defined(WEBP_USE_NEON)
+    if (VP8GetCPUInfo(kNEON)) {
+      WebPInitUpsamplersNEON();
+    }
 #endif
  }
 #endif  // FANCY_UPSAMPLING
@@ -347,6 +352,11 @@ void WebPInitPremultiply(void) {
    if (VP8GetCPUInfo(kSSE2)) {
      WebPInitPremultiplySSE2();
    }
+#endif
+#if defined(WEBP_USE_NEON)
+    if (VP8GetCPUInfo(kNEON)) {
+      WebPInitPremultiplyNEON();
+    }
 #endif
  }
 #endif  // FANCY_UPSAMPLING

--- a/3rdparty/libwebp/dsp/upsampling_neon.c
+++ b/3rdparty/libwebp/dsp/upsampling_neon.c
--- a/3rdparty/libwebp/dsp/upsampling_sse2.c
+++ b/3rdparty/libwebp/dsp/upsampling_sse2.c
@@ -51,12 +51,12 @@ extern "C" {
 // pack and store two alterning pixel rows
 #define PACK_AND_STORE(a, b, da, db, out) do {                                 \
-  const __m128i ta = _mm_avg_epu8(a, da);  /* (9a + 3b + 3c +  d + 8) / 16 */  \
+  const __m128i t_a = _mm_avg_epu8(a, da);  /* (9a + 3b + 3c +  d + 8) / 16 */ \
-  const __m128i tb = _mm_avg_epu8(b, db);  /* (3a + 9b +  c + 3d + 8) / 16 */  \
+  const __m128i t_b = _mm_avg_epu8(b, db);  /* (3a + 9b +  c + 3d + 8) / 16 */ \
-  const __m128i t1 = _mm_unpacklo_epi8(ta, tb);                                \
+  const __m128i t_1 = _mm_unpacklo_epi8(t_a, t_b);                             \
-  const __m128i t2 = _mm_unpackhi_epi8(ta, tb);                                \
+  const __m128i t_2 = _mm_unpackhi_epi8(t_a, t_b);                             \
-  _mm_store_si128(((__m128i*)(out)) + 0, t1);                                  \
+  _mm_store_si128(((__m128i*)(out)) + 0, t_1);                                 \
-  _mm_store_si128(((__m128i*)(out)) + 1, t2);                                  \
+  _mm_store_si128(((__m128i*)(out)) + 1, t_2);                                 \
 } while (0)
 // Loads 17 pixels each from rows r1 and r2 and generates 32 pixels.
@@ -128,7 +128,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
                      const uint8_t* top_u, const uint8_t* top_v,              \
                      const uint8_t* cur_u, const uint8_t* cur_v,              \
                      uint8_t* top_dst, uint8_t* bottom_dst, int len) {        \
-  int b;                                                                       \
+  int block;                                                                   \
  /* 16 byte aligned array to cache reconstructed u and v */                   \
  uint8_t uv_buf[4 * 32 + 15];                                                 \
  uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15);            \
@@ -154,11 +154,11 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
    FUNC(bottom_y[0], u0, v0, bottom_dst);                                     \
  }                                                                            \
                                                                               \
-  for (b = 0; b < num_blocks; ++b) {                                           \
+  for (block = 0; block < num_blocks; ++block) {                               \
    UPSAMPLE_32PIXELS(top_u, cur_u, r_uv + 0 * 32);                            \
    UPSAMPLE_32PIXELS(top_v, cur_v, r_uv + 1 * 32);                            \
    CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, r_uv, top_dst, bottom_dst,       \
-                32 * b + 1, 32)                                                \
+                32 * block + 1, 32)                                            \
    top_u += 16;                                                               \
    cur_u += 16;                                                               \
    top_v += 16;                                                               \
@@ -211,3 +211,5 @@ void WebPInitPremultiplySSE2(void) {
 #if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif
--- a/3rdparty/libwebp/dsp/yuv.c
+++ b/3rdparty/libwebp/dsp/yuv.c
@@ -15,7 +15,7 @@
 extern "C" {
 #endif
-enum { YUV_HALF = 1 << (YUV_FIX - 1) };
+#ifdef WEBP_YUV_USE_TABLE
 int16_t VP8kVToR[256], VP8kUToB[256];
 int32_t VP8kVToG[256], VP8kUToG[256];
@@ -33,6 +33,7 @@ void VP8YUVInit(void) {
  if (done) {
    return;
  }
+#ifndef USE_YUVj
  for (i = 0; i < 256; ++i) {
    VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX;
    VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF;
@@ -44,9 +45,29 @@ void VP8YUVInit(void) {
    VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
    VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
  }
+#else
+  for (i = 0; i < 256; ++i) {
+    VP8kVToR[i] = (91881 * (i - 128) + YUV_HALF) >> YUV_FIX;
+    VP8kUToG[i] = -22554 * (i - 128) + YUV_HALF;
+    VP8kVToG[i] = -46802 * (i - 128);
+    VP8kUToB[i] = (116130 * (i - 128) + YUV_HALF) >> YUV_FIX;
+  }
+  for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
+    const int k = i;
+    VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
+    VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
+  }
+#endif
  done = 1;
 }
+#else
+void VP8YUVInit(void) {}
+#endif  // WEBP_YUV_USE_TABLE
 #if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif
--- a/3rdparty/libwebp/dsp/yuv.h
+++ b/3rdparty/libwebp/dsp/yuv.h
--- a/3rdparty/libwebp/enc/alpha.c
+++ b/3rdparty/libwebp/enc/alpha.c
@@ -79,18 +79,17 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
  WebPConfigInit(&config);
  config.lossless = 1;
  config.method = effort_level;  // impact is very small
-  // Set moderate default quality setting for alpha. Higher qualities (80 and
+  // Set a moderate default quality setting for alpha.
-  // above) could be very slow.
+  config.quality = 5.f * effort_level;
-  config.quality = 10.f + 15.f * effort_level;
+  assert(config.quality >= 0 && config.quality <= 100.f);
-  if (config.quality > 100.f) config.quality = 100.f;
  ok = VP8LBitWriterInit(&tmp_bw, (width * height) >> 3);
  ok = ok && (VP8LEncodeStream(&config, &picture, &tmp_bw) == VP8_ENC_OK);
  WebPPictureFree(&picture);
  if (ok) {
-    const uint8_t* const data = VP8LBitWriterFinish(&tmp_bw);
+    const uint8_t* const buffer = VP8LBitWriterFinish(&tmp_bw);
-    const size_t data_size = VP8LBitWriterNumBytes(&tmp_bw);
+    const size_t buffer_size = VP8LBitWriterNumBytes(&tmp_bw);
-    VP8BitWriterAppend(bw, data, data_size);
+    VP8BitWriterAppend(bw, buffer, buffer_size);
  }
  VP8LBitWriterDestroy(&tmp_bw);
  return ok && !bw->error_;
@@ -128,8 +127,8 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
  VP8BitWriterAppend(bw, &header, ALPHA_HEADER_LEN);
  filter_func = WebPFilters[filter];
-  if (filter_func) {
+  if (filter_func != NULL) {
-    filter_func(data, width, height, 1, width, tmp_alpha);
+    filter_func(data, width, height, width, tmp_alpha);
    alpha_src = tmp_alpha;
  }  else {
    alpha_src = data;
@@ -287,42 +286,80 @@ static int EncodeAlpha(VP8Encoder* const enc,
 //------------------------------------------------------------------------------
 // Main calls
+static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) {
+  const WebPConfig* config = enc->config_;
+  uint8_t* alpha_data = NULL;
+  size_t alpha_size = 0;
+  const int effort_level = config->method;  // maps to [0..6]
+  const WEBP_FILTER_TYPE filter =
+      (config->alpha_filtering == 0) ? WEBP_FILTER_NONE :
+      (config->alpha_filtering == 1) ? WEBP_FILTER_FAST :
+                                       WEBP_FILTER_BEST;
+  if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression,
+                   filter, effort_level, &alpha_data, &alpha_size)) {
+    return 0;
+  }
+  if (alpha_size != (uint32_t)alpha_size) {  // Sanity check.
+    free(alpha_data);
+    return 0;
+  }
+  enc->alpha_data_size_ = (uint32_t)alpha_size;
+  enc->alpha_data_ = alpha_data;
+  (void)dummy;
+  return 1;
+}
 void VP8EncInitAlpha(VP8Encoder* const enc) {
  enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_);
  enc->alpha_data_ = NULL;
  enc->alpha_data_size_ = 0;
+  if (enc->thread_level_ > 0) {
+    WebPWorker* const worker = &enc->alpha_worker_;
+    WebPWorkerInit(worker);
+    worker->data1 = enc;
+    worker->data2 = NULL;
+    worker->hook = (WebPWorkerHook)CompressAlphaJob;
+  }
 }
-int VP8EncFinishAlpha(VP8Encoder* const enc) {
+int VP8EncStartAlpha(VP8Encoder* const enc) {
  if (enc->has_alpha_) {
-    const WebPConfig* config = enc->config_;
+    if (enc->thread_level_ > 0) {
-    uint8_t* tmp_data = NULL;
+      WebPWorker* const worker = &enc->alpha_worker_;
-    size_t tmp_size = 0;
+      if (!WebPWorkerReset(worker)) {    // Makes sure worker is good to go.
-    const int effort_level = config->method;  // maps to [0..6]
+        return 0;
-    const WEBP_FILTER_TYPE filter =
+      }
-        (config->alpha_filtering == 0) ? WEBP_FILTER_NONE :
+      WebPWorkerLaunch(worker);
-        (config->alpha_filtering == 1) ? WEBP_FILTER_FAST :
+      return 1;
-                                         WEBP_FILTER_BEST;
+    } else {
+      return CompressAlphaJob(enc, NULL);   // just do the job right away
-    if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression,
-                     filter, effort_level, &tmp_data, &tmp_size)) {
-      return 0;
    }
-    if (tmp_size != (uint32_t)tmp_size) {  // Sanity check.
+  }
-      free(tmp_data);
+  return 1;
-      return 0;
+}
+int VP8EncFinishAlpha(VP8Encoder* const enc) {
+  if (enc->has_alpha_) {
+    if (enc->thread_level_ > 0) {
+      WebPWorker* const worker = &enc->alpha_worker_;
+      if (!WebPWorkerSync(worker)) return 0;  // error
    }
-    enc->alpha_data_size_ = (uint32_t)tmp_size;
-    enc->alpha_data_ = tmp_data;
  }
  return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
 }
-void VP8EncDeleteAlpha(VP8Encoder* const enc) {
+int VP8EncDeleteAlpha(VP8Encoder* const enc) {
+  int ok = 1;
+  if (enc->thread_level_ > 0) {
+    WebPWorker* const worker = &enc->alpha_worker_;
+    ok = WebPWorkerSync(worker);  // finish anything left in flight
+    WebPWorkerEnd(worker);  // still need to end the worker, even if !ok
+  }
  free(enc->alpha_data_);
  enc->alpha_data_ = NULL;
  enc->alpha_data_size_ = 0;
  enc->has_alpha_ = 0;
+  return ok;
 }
 #if defined(__cplusplus) || defined(c_plusplus)

--- a/3rdparty/libwebp/enc/analysis.c
+++ b/3rdparty/libwebp/enc/analysis.c
--- a/3rdparty/libwebp/enc/backward_references.c
+++ b/3rdparty/libwebp/enc/backward_references.c
--- a/3rdparty/libwebp/enc/backward_references.h
+++ b/3rdparty/libwebp/enc/backward_references.h
@@ -35,7 +35,8 @@ extern "C" {
 #if defined(__GNUC__) && \
    ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
 static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
-  return n == 0 ? -1 : 31 ^ __builtin_clz(n);
+  assert(n != 0);
+  return 31 ^ __builtin_clz(n);
 }
 #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
 #include <intrin.h>
@@ -43,15 +44,18 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
 static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
  unsigned long first_set_bit;
-  return _BitScanReverse(&first_set_bit, n) ? first_set_bit : -1;
+  assert(n != 0);
+  _BitScanReverse(&first_set_bit, n);
+  return first_set_bit;
 }
 #else
+// Returns (int)floor(log2(n)). n must be > 0.
 static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
  int log = 0;
  uint32_t value = n;
  int i;
-  if (value == 0) return -1;
+  assert(n != 0);
  for (i = 4; i >= 0; --i) {
    const int shift = (1 << i);
    const uint32_t x = value >> shift;
@@ -65,11 +69,11 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
 #endif
 static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
-  const int floor = BitsLog2Floor(n);
+  const int log_floor = BitsLog2Floor(n);
  if (n == (n & ~(n - 1)))  // zero or a power of two.
-    return floor;
+    return log_floor;
  else
-    return floor + 1;
+    return log_floor + 1;
 }
 // Splitting of distance and length codes into prefixes and
@@ -78,16 +82,17 @@ static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
 static WEBP_INLINE void PrefixEncode(int distance, int* const code,
                                     int* const extra_bits_count,
                                     int* const extra_bits_value) {
-  // Collect the two most significant bits where the highest bit is 1.
+  if (distance > 2) {  // Collect the two most significant bits.
-  const int highest_bit = BitsLog2Floor(--distance);
+    const int highest_bit = BitsLog2Floor(--distance);
-  // & 0x3f is to make behavior well defined when highest_bit
+    const int second_highest_bit = (distance >> (highest_bit - 1)) & 1;
-  // does not exist or is the least significant bit.
+    *extra_bits_count = highest_bit - 1;
-  const int second_highest_bit =
+    *extra_bits_value = distance & ((1 << *extra_bits_count) - 1);
-      (distance >> ((highest_bit - 1) & 0x3f)) & 1;
+    *code = 2 * highest_bit + second_highest_bit;
-  *extra_bits_count = (highest_bit > 0) ? (highest_bit - 1) : 0;
+  } else {
-  *extra_bits_value = distance & ((1 << *extra_bits_count) - 1);
+    *extra_bits_count = 0;
-  *code = (highest_bit > 0) ? (2 * highest_bit + second_highest_bit)
+    *extra_bits_value = 0;
-                            : (highest_bit == 0) ? 1 : 0;
+    *code = (distance == 2) ? 1 : 0;
+  }
 }
 // -----------------------------------------------------------------------------

--- a/3rdparty/libwebp/enc/config.c
+++ b/3rdparty/libwebp/enc/config.c
@@ -31,9 +31,9 @@ int WebPConfigInitInternal(WebPConfig* config,
  config->target_PSNR = 0.;
  config->method = 4;
  config->sns_strength = 50;
-  config->filter_strength = 20;   // default: light filtering
+  config->filter_strength = 60;   // rather high filtering, helps w/ gradients.
  config->filter_sharpness = 0;
-  config->filter_type = 0;        // default: simple
+  config->filter_type = 1;        // default: strong (so U/V is filtered too)
  config->partitions = 0;
  config->segments = 4;
  config->pass = 1;
@@ -46,6 +46,9 @@ int WebPConfigInitInternal(WebPConfig* config,
  config->alpha_quality = 100;
  config->lossless = 0;
  config->image_hint = WEBP_HINT_DEFAULT;
+  config->emulate_jpeg_size = 0;
+  config->thread_level = 0;
+  config->low_memory = 0;
  // TODO(skal): tune.
  switch (preset) {
@@ -122,6 +125,12 @@ int WebPValidateConfig(const WebPConfig* config) {
    return 0;
  if (config->image_hint >= WEBP_HINT_LAST)
    return 0;
+  if (config->emulate_jpeg_size < 0 || config->emulate_jpeg_size > 1)
+    return 0;
+  if (config->thread_level < 0 || config->thread_level > 1)
+    return 0;
+  if (config->low_memory < 0 || config->low_memory > 1)
+    return 0;
  return 1;
 }

--- a/3rdparty/libwebp/enc/cost.c
+++ b/3rdparty/libwebp/enc/cost.c
@@ -75,7 +75,7 @@ const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = {
 // fixed costs for coding levels, deduce from the coding tree.
 // This is only the part that doesn't depend on the probability state.
-const uint16_t VP8LevelFixedCosts[2048] = {
+const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1] = {
     0,  256,  256,  256,  256,  432,  618,  630,
   731,  640,  640,  828,  901,  948, 1021, 1101,
  1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202,
@@ -359,7 +359,7 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) {
  for (ctype = 0; ctype < NUM_TYPES; ++ctype) {
    for (band = 0; band < NUM_BANDS; ++band) {
-      for(ctx = 0; ctx < NUM_CTX; ++ctx) {
+      for (ctx = 0; ctx < NUM_CTX; ++ctx) {
        const uint8_t* const p = proba->coeffs_[ctype][band][ctx];
        uint16_t* const table = proba->level_cost_[ctype][band][ctx];
        const int cost_base = VP8BitCost(1, p[1]);

--- a/3rdparty/libwebp/enc/cost.h
+++ b/3rdparty/libwebp/enc/cost.h
@@ -18,7 +18,8 @@
 extern "C" {
 #endif
-extern const uint16_t VP8LevelFixedCosts[2048];   // approximate cost per level
+// approximate cost per level:
+extern const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1];
 extern const uint16_t VP8EntropyCost[256];        // 8bit fixed-point log(p)
 // Cost of coding one event with probability 'proba'.

--- a/3rdparty/libwebp/enc/frame.c
+++ b/3rdparty/libwebp/enc/frame.c
--- a/3rdparty/libwebp/enc/histogram.c
+++ b/3rdparty/libwebp/enc/histogram.c
--- a/3rdparty/libwebp/enc/histogram.h
+++ b/3rdparty/libwebp/enc/histogram.h
@@ -80,22 +80,6 @@ double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
 // represent the entropy code itself.
 double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p);
-static WEBP_INLINE void VP8LHistogramAdd(VP8LHistogram* const p,
-                                         const VP8LHistogram* const a) {
-  int i;
-  for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
-    p->literal_[i] += a->literal_[i];
-  }
-  for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
-    p->distance_[i] += a->distance_[i];
-  }
-  for (i = 0; i < 256; ++i) {
-    p->red_[i] += a->red_[i];
-    p->blue_[i] += a->blue_[i];
-    p->alpha_[i] += a->alpha_[i];
-  }
-}
 static WEBP_INLINE int VP8LHistogramNumCodes(const VP8LHistogram* const p) {
  return 256 + NUM_LENGTH_CODES +
      ((p->palette_code_bits_ > 0) ? (1 << p->palette_code_bits_) : 0);

--- a/3rdparty/libwebp/enc/picture.c
+++ b/3rdparty/libwebp/enc/picture.c
--- a/3rdparty/libwebp/enc/quant.c
+++ b/3rdparty/libwebp/enc/quant.c
--- a/3rdparty/libwebp/enc/syntax.c
+++ b/3rdparty/libwebp/enc/syntax.c
@@ -11,7 +11,9 @@
 #include <assert.h>
-#include "../webp/format_constants.h"
+#include "../utils/utils.h"
+#include "../webp/format_constants.h"  // RIFF constants
+#include "../webp/mux_types.h"         // ALPHA_FLAG
 #include "./vp8enci.h"
 #if defined(__cplusplus) || defined(c_plusplus)
@@ -21,25 +23,12 @@ extern "C" {
 //------------------------------------------------------------------------------
 // Helper functions
-// TODO(later): Move to webp/format_constants.h?
-static void PutLE24(uint8_t* const data, uint32_t val) {
-  data[0] = (val >>  0) & 0xff;
-  data[1] = (val >>  8) & 0xff;
-  data[2] = (val >> 16) & 0xff;
-}
-static void PutLE32(uint8_t* const data, uint32_t val) {
-  PutLE24(data, val);
-  data[3] = (val >> 24) & 0xff;
-}
 static int IsVP8XNeeded(const VP8Encoder* const enc) {
  return !!enc->has_alpha_;  // Currently the only case when VP8X is needed.
                             // This could change in the future.
 }
 static int PutPaddingByte(const WebPPicture* const pic) {
  const uint8_t pad_byte[1] = { 0 };
  return !!pic->writer(pad_byte, 1, pic);
 }
@@ -73,14 +62,14 @@ static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) {
  assert(pic->width <= MAX_CANVAS_SIZE && pic->height <= MAX_CANVAS_SIZE);
  if (enc->has_alpha_) {
-    flags |= ALPHA_FLAG_BIT;
+    flags |= ALPHA_FLAG;
  }
  PutLE32(vp8x + TAG_SIZE,              VP8X_CHUNK_SIZE);
  PutLE32(vp8x + CHUNK_HEADER_SIZE,     flags);
  PutLE24(vp8x + CHUNK_HEADER_SIZE + 4, pic->width - 1);
  PutLE24(vp8x + CHUNK_HEADER_SIZE + 7, pic->height - 1);
-  if(!pic->writer(vp8x, sizeof(vp8x), pic)) {
+  if (!pic->writer(vp8x, sizeof(vp8x), pic)) {
    return VP8_ENC_ERROR_BAD_WRITE;
  }
  return VP8_ENC_OK;
@@ -327,7 +316,9 @@ static size_t GeneratePartition0(VP8Encoder* const enc) {
  PutSegmentHeader(bw, enc);
  PutFilterHeader(bw, &enc->filter_hdr_);
-  VP8PutValue(bw, enc->config_->partitions, 2);
+  VP8PutValue(bw, enc->num_parts_ == 8 ? 3 :
+                  enc->num_parts_ == 4 ? 2 :
+                  enc->num_parts_ == 2 ? 1 : 0, 2);
  PutQuant(bw, enc);
  VP8PutBitUniform(bw, 0);   // no proba update
  VP8WriteProbas(bw, &enc->proba_);

--- a/3rdparty/libwebp/enc/token.c
+++ b/3rdparty/libwebp/enc/token.c
--- a/3rdparty/libwebp/enc/vp8enci.h
+++ b/3rdparty/libwebp/enc/vp8enci.h
--- a/3rdparty/libwebp/enc/vp8l.c
+++ b/3rdparty/libwebp/enc/vp8l.c
--- a/3rdparty/libwebp/enc/webpenc.c
+++ b/3rdparty/libwebp/enc/webpenc.c
--- a/3rdparty/libwebp/mux/muxedit.c
+++ b/3rdparty/libwebp/mux/muxedit.c
--- a/3rdparty/libwebp/mux/muxi.h
+++ b/3rdparty/libwebp/mux/muxi.h
--- a/3rdparty/libwebp/mux/muxinternal.c
+++ b/3rdparty/libwebp/mux/muxinternal.c
--- a/3rdparty/libwebp/mux/muxread.c
+++ b/3rdparty/libwebp/mux/muxread.c
--- a/3rdparty/libwebp/utils/bit_reader.c
+++ b/3rdparty/libwebp/utils/bit_reader.c
--- a/3rdparty/libwebp/utils/bit_reader.h
+++ b/3rdparty/libwebp/utils/bit_reader.h
--- a/3rdparty/libwebp/utils/filters.c
+++ b/3rdparty/libwebp/utils/filters.c
--- a/3rdparty/libwebp/utils/filters.h
+++ b/3rdparty/libwebp/utils/filters.h
@@ -30,18 +30,19 @@ typedef enum {
 } WEBP_FILTER_TYPE;
 typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height,
-                               int bpp, int stride, uint8_t* out);
+                               int stride, uint8_t* out);
+typedef void (*WebPUnfilterFunc)(int width, int height, int stride,
+                                 uint8_t* data);
 // Filter the given data using the given predictor.
 // 'in' corresponds to a 2-dimensional pixel array of size (stride * height)
 // in raster order.
-// 'bpp' is number of bytes per pixel, and
 // 'stride' is number of bytes per scan line (with possible padding).
 // 'out' should be pre-allocated.
 extern const WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
-// Reconstruct the original data from the given filtered data.
+// In-place reconstruct the original data from the given filtered data.
-extern const WebPFilterFunc WebPUnfilters[WEBP_FILTER_LAST];
+extern const WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST];
 // Fast estimate of a potentially good filter.
 extern WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data,

--- a/3rdparty/libwebp/utils/huffman_encode.c
+++ b/3rdparty/libwebp/utils/huffman_encode.c
@@ -138,13 +138,8 @@ static int CompareHuffmanTrees(const void* ptr1, const void* ptr2) {
  } else if (t1->total_count_ < t2->total_count_) {
    return 1;
  } else {
-    if (t1->value_ < t2->value_) {
+    assert(t1->value_ != t2->value_);
-      return -1;
+    return (t1->value_ < t2->value_) ? -1 : 1;
-    }
-    if (t1->value_ > t2->value_) {
-      return 1;
-    }
-    return 0;
  }
 }
@@ -193,6 +188,10 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size,
    }
  }
+  if (tree_size_orig == 0) {   // pretty optimal already!
+    return 1;
+  }
  // 3 * tree_size is enough to cover all the nodes representing a
  // population and all the inserted nodes combining two existing nodes.
  // The tree pool needs 2 * (tree_size_orig - 1) entities, and the
@@ -234,7 +233,7 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size,
        tree_pool[tree_pool_size++] = tree[tree_size - 1];
        tree_pool[tree_pool_size++] = tree[tree_size - 2];
        count = tree_pool[tree_pool_size - 1].total_count_ +
-            tree_pool[tree_pool_size - 2].total_count_;
+                tree_pool[tree_pool_size - 2].total_count_;
        tree_size -= 2;
        {
          // Search for the insertion point.

--- a/3rdparty/libwebp/utils/quant_levels.c
+++ b/3rdparty/libwebp/utils/quant_levels.c
@@ -140,15 +140,6 @@ int QuantizeLevels(uint8_t* const data, int width, int height,
  return 1;
 }
-int DequantizeLevels(uint8_t* const data, int width, int height) {
-  if (data == NULL || width <= 0 || height <= 0) return 0;
-  // TODO(skal): implement gradient smoothing.
-  (void)data;
-  (void)width;
-  (void)height;
-  return 1;
-}
 #if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif
--- a/3rdparty/libwebp/utils/quant_levels.h
+++ b/3rdparty/libwebp/utils/quant_levels.h
@@ -27,11 +27,6 @@ extern "C" {
 int QuantizeLevels(uint8_t* const data, int width, int height, int num_levels,
                   uint64_t* const sse);
-// Apply post-processing to input 'data' of size 'width'x'height' assuming
-// that the source was quantized to a reduced number of levels.
-// Returns false in case of error (data is NULL, invalid parameters, ...).
-int DequantizeLevels(uint8_t* const data, int width, int height);
 #if defined(__cplusplus) || defined(c_plusplus)
 }    // extern "C"
 #endif

--- a/3rdparty/libwebp/utils/quant_levels_dec.c
+++ b/3rdparty/libwebp/utils/quant_levels_dec.c
--- a/3rdparty/libwebp/utils/quant_levels_dec.h
+++ b/3rdparty/libwebp/utils/quant_levels_dec.h
--- a/3rdparty/libwebp/utils/rescaler.c
+++ b/3rdparty/libwebp/utils/rescaler.c
@@ -20,7 +20,7 @@ extern "C" {
 #endif
 #define RFIX 30
-#define MULT_FIX(x,y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)
+#define MULT_FIX(x, y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)
 void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
                      uint8_t* const dst, int dst_width, int dst_height,

--- a/3rdparty/libwebp/utils/thread.c
+++ b/3rdparty/libwebp/utils/thread.c
@@ -9,10 +9,6 @@
 //
 // Author: Skal (pascal.massimino@gmail.com)
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
 #include <assert.h>
 #include <string.h>   // for memset()
 #include "./thread.h"

--- a/3rdparty/libwebp/utils/thread.h
+++ b/3rdparty/libwebp/utils/thread.h
--- a/3rdparty/libwebp/utils/utils.c
+++ b/3rdparty/libwebp/utils/utils.c
--- a/3rdparty/libwebp/utils/utils.h
+++ b/3rdparty/libwebp/utils/utils.h
--- a/3rdparty/libwebp/webp/decode.h
+++ b/3rdparty/libwebp/webp/decode.h
--- a/3rdparty/libwebp/webp/demux.h
+++ b/3rdparty/libwebp/webp/demux.h
--- a/3rdparty/libwebp/webp/encode.h
+++ b/3rdparty/libwebp/webp/encode.h
--- a/3rdparty/libwebp/webp/format_constants.h
+++ b/3rdparty/libwebp/webp/format_constants.h
--- a/3rdparty/libwebp/webp/mux.h
+++ b/3rdparty/libwebp/webp/mux.h
--- a/3rdparty/libwebp/webp/mux_types.h
+++ b/3rdparty/libwebp/webp/mux_types.h