Commit 9bcc9a25 authored by fbarchard@google.com's avatar fbarchard@google.com

ARGBInterpolateRow_SSSE3 for motion blur. Used to use bilinear row filter,…

ARGBInterpolateRow_SSSE3 for motion blur.  Used to use bilinear row filter, which extrudes edges.  This branches off the code so the extrude can be removed for Interpolate.
BUG=none
TEST=build\release\libyuv_unittest.exe --gtest_catch_exceptions=0 --gtest_filter=*
Review URL: https://webrtc-codereview.appspot.com/786007

git-svn-id: http://libyuv.googlecode.com/svn/trunk@354 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent a2cc341b
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 353 Version: 354
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -65,6 +65,10 @@ typedef signed char int8; ...@@ -65,6 +65,10 @@ typedef signed char int8;
defined(__i386__) || defined(_M_IX86) defined(__i386__) || defined(_M_IX86)
#define CPU_X86 1 #define CPU_X86 1
#endif #endif
// Detect compiler is for arm.
#if defined(__arm__) || defined(_M_ARM)
#define CPU_ARM 1
#endif
#define ALIGNP(p, t) \ #define ALIGNP(p, t) \
(reinterpret_cast<uint8*>(((reinterpret_cast<uintptr_t>(p) + \ (reinterpret_cast<uint8*>(((reinterpret_cast<uintptr_t>(p) + \
......
...@@ -21,7 +21,7 @@ extern "C" { ...@@ -21,7 +21,7 @@ extern "C" {
// Compute a hash for specified memory. Seed of 5381 recommended. // Compute a hash for specified memory. Seed of 5381 recommended.
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed); uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed);
// Sum Square Error - used to compute Mean Square Error or PSNR // Sum Square Error - used to compute Mean Square Error or PSNR.
uint64 ComputeSumSquareError(const uint8* src_a, uint64 ComputeSumSquareError(const uint8* src_a,
const uint8* src_b, int count); const uint8* src_b, int count);
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
#define INCLUDE_LIBYUV_CONVERT_H_ #define INCLUDE_LIBYUV_CONVERT_H_
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
// TODO(fbarchard): Remove the following headers includes // TODO(fbarchard): Remove the following headers includes.
#include "libyuv/convert_from.h" #include "libyuv/convert_from.h"
#include "libyuv/planar_functions.h" #include "libyuv/planar_functions.h"
#include "libyuv/rotate.h" #include "libyuv/rotate.h"
...@@ -22,7 +22,7 @@ namespace libyuv { ...@@ -22,7 +22,7 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// Alias // Alias.
#define I420ToI420 I420Copy #define I420ToI420 I420Copy
// Copy I420 to I420. // Copy I420 to I420.
...@@ -112,56 +112,63 @@ int V210ToI420(const uint8* src_uyvy, int src_stride_uyvy, ...@@ -112,56 +112,63 @@ int V210ToI420(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// ARGB little endian (bgra in memory) to I420 // ARGB little endian (bgra in memory) to I420.
int ARGBToI420(const uint8* src_frame, int src_stride_frame, int ARGBToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// BGRA little endian (argb in memory) to I420 // BGRA little endian (argb in memory) to I420.
int BGRAToI420(const uint8* src_frame, int src_stride_frame, int BGRAToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// ABGR little endian (rgba in memory) to I420 // ABGR little endian (rgba in memory) to I420.
int ABGRToI420(const uint8* src_frame, int src_stride_frame, int ABGRToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// RGB little endian (bgr in memory) to I420 // RGBA little endian (rgba in memory) to I420.
int RGBAToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGB little endian (bgr in memory) to I420.
int RGB24ToI420(const uint8* src_frame, int src_stride_frame, int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// RGB big endian (rgb in memory) to I420 // RGB big endian (rgb in memory) to I420.
int RAWToI420(const uint8* src_frame, int src_stride_frame, int RAWToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// RGB16 (RGBP fourcc) little endian to I420 // RGB16 (RGBP fourcc) little endian to I420.
int RGB565ToI420(const uint8* src_frame, int src_stride_frame, int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// RGB15 (RGBO fourcc) little endian to I420 // RGB15 (RGBO fourcc) little endian to I420.
int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame, int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// RGB12 (R444 fourcc) little endian to I420 // RGB12 (R444 fourcc) little endian to I420.
int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame, int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
...@@ -169,7 +176,7 @@ int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame, ...@@ -169,7 +176,7 @@ int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
int width, int height); int width, int height);
#ifdef HAVE_JPEG #ifdef HAVE_JPEG
// src_width/height provided by capture // src_width/height provided by capture.
// dst_width/height for clipping determine final size. // dst_width/height for clipping determine final size.
int MJPGToI420(const uint8* sample, size_t sample_size, int MJPGToI420(const uint8* sample, size_t sample_size,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
......
...@@ -28,7 +28,7 @@ namespace libyuv { ...@@ -28,7 +28,7 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// Alias // Alias.
#define ARGBToARGB ARGBCopy #define ARGBToARGB ARGBCopy
// Copy ARGB to ARGB. // Copy ARGB to ARGB.
...@@ -112,17 +112,17 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, ...@@ -112,17 +112,17 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
// uint8* dst_argb, int dst_stride_argb, // uint8* dst_argb, int dst_stride_argb,
// int width, int height); // int width, int height);
// BGRA little endian (argb in memory) to ARGB // BGRA little endian (argb in memory) to ARGB.
int BGRAToARGB(const uint8* src_frame, int src_stride_frame, int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// ABGR little endian (rgba in memory) to ARGB // ABGR little endian (rgba in memory) to ARGB.
int ABGRToARGB(const uint8* src_frame, int src_stride_frame, int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// RGBA little endian (abgr in memory) to ARGB // RGBA little endian (abgr in memory) to ARGB.
int RGBAToARGB(const uint8* src_frame, int src_stride_frame, int RGBAToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
...@@ -130,27 +130,27 @@ int RGBAToARGB(const uint8* src_frame, int src_stride_frame, ...@@ -130,27 +130,27 @@ int RGBAToARGB(const uint8* src_frame, int src_stride_frame,
// Deprecated function name. // Deprecated function name.
#define BG24ToARGB RGB24ToARGB #define BG24ToARGB RGB24ToARGB
// RGB little endian (bgr in memory) to ARGB // RGB little endian (bgr in memory) to ARGB.
int RGB24ToARGB(const uint8* src_frame, int src_stride_frame, int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// RGB big endian (rgb in memory) to ARGB // RGB big endian (rgb in memory) to ARGB.
int RAWToARGB(const uint8* src_frame, int src_stride_frame, int RAWToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// RGB16 (RGBP fourcc) little endian to ARGB // RGB16 (RGBP fourcc) little endian to ARGB.
int RGB565ToARGB(const uint8* src_frame, int src_stride_frame, int RGB565ToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// RGB15 (RGBO fourcc) little endian to ARGB // RGB15 (RGBO fourcc) little endian to ARGB.
int ARGB1555ToARGB(const uint8* src_frame, int src_stride_frame, int ARGB1555ToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// RGB12 (R444 fourcc) little endian to ARGB // RGB12 (R444 fourcc) little endian to ARGB.
int ARGB4444ToARGB(const uint8* src_frame, int src_stride_frame, int ARGB4444ToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
...@@ -164,7 +164,7 @@ int MJPGToARGB(const uint8* sample, size_t sample_size, ...@@ -164,7 +164,7 @@ int MJPGToARGB(const uint8* sample, size_t sample_size,
int dst_width, int dst_height); int dst_width, int dst_height);
#endif #endif
// Note Bayer formats (BGGR) to ARGB are in format_conversion.h // Note Bayer formats (BGGR) to ARGB are in format_conversion.h.
// Convert camera sample to ARGB with cropping, rotation and vertical flip. // Convert camera sample to ARGB with cropping, rotation and vertical flip.
// "src_size" is needed to parse MJPG. // "src_size" is needed to parse MJPG.
......
...@@ -19,9 +19,9 @@ namespace libyuv { ...@@ -19,9 +19,9 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// See Also convert.h for conversions from formats to I420 // See Also convert.h for conversions from formats to I420.
// I420Copy in convert to I420ToI420 // I420Copy in convert to I420ToI420.
int I420ToI422(const uint8* src_y, int src_stride_y, int I420ToI422(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
...@@ -47,7 +47,7 @@ int I420ToI411(const uint8* src_y, int src_stride_y, ...@@ -47,7 +47,7 @@ int I420ToI411(const uint8* src_y, int src_stride_y,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// Copy to I400. Source can be I420,422,444,400,NV12,NV21 // Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21.
int I400Copy(const uint8* src_y, int src_stride_y, int I400Copy(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
int width, int height); int width, int height);
...@@ -92,6 +92,12 @@ int I420ToABGR(const uint8* src_y, int src_stride_y, ...@@ -92,6 +92,12 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
int I420ToRGBA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_rgba, int dst_stride_rgba,
int width, int height);
int I420ToRGB24(const uint8* src_y, int src_stride_y, int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
...@@ -122,7 +128,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y, ...@@ -122,7 +128,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
uint8* dst_frame, int dst_stride_frame, uint8* dst_frame, int dst_stride_frame,
int width, int height); int width, int height);
// Note Bayer formats (BGGR) To I420 are in format_conversion.h // Note Bayer formats (BGGR) To I420 are in format_conversion.h.
// Convert I420 to specified format. // Convert I420 to specified format.
// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the // "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
......
...@@ -43,7 +43,7 @@ int BayerRGGBToI420(const uint8* src_bayer, int src_stride_bayer, ...@@ -43,7 +43,7 @@ int BayerRGGBToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// Temporary API mapper // Temporary API mapper.
#define BayerRGBToI420(b, bs, f, y, ys, u, us, v, vs, w, h) \ #define BayerRGBToI420(b, bs, f, y, ys, u, us, v, vs, w, h) \
BayerToI420(b, bs, y, ys, u, us, v, vs, w, h, f) BayerToI420(b, bs, y, ys, u, us, v, vs, w, h, f)
...@@ -79,7 +79,7 @@ int I420ToBayerRGGB(const uint8* src_y, int src_stride_y, ...@@ -79,7 +79,7 @@ int I420ToBayerRGGB(const uint8* src_y, int src_stride_y,
uint8* dst_frame, int dst_stride_frame, uint8* dst_frame, int dst_stride_frame,
int width, int height); int width, int height);
// Temporary API mapper // Temporary API mapper.
#define I420ToBayerRGB(y, ys, u, us, v, vs, b, bs, f, w, h) \ #define I420ToBayerRGB(y, ys, u, us, v, vs, b, bs, f, w, h) \
I420ToBayer(y, ys, u, us, v, vs, b, bs, w, h, f) I420ToBayer(y, ys, u, us, v, vs, b, bs, w, h, f)
...@@ -107,7 +107,7 @@ int BayerRGGBToARGB(const uint8* src_bayer, int src_stride_bayer, ...@@ -107,7 +107,7 @@ int BayerRGGBToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// Temporary API mapper // Temporary API mapper.
#define BayerRGBToARGB(b, bs, f, a, as, w, h) BayerToARGB(b, bs, a, as, w, h, f) #define BayerRGBToARGB(b, bs, f, a, as, w, h) BayerToARGB(b, bs, a, as, w, h, f)
int BayerToARGB(const uint8* src_bayer, int src_stride_bayer, int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
...@@ -132,7 +132,7 @@ int ARGBToBayerRGGB(const uint8* src_argb, int src_stride_argb, ...@@ -132,7 +132,7 @@ int ARGBToBayerRGGB(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer, uint8* dst_bayer, int dst_stride_bayer,
int width, int height); int width, int height);
// Temporary API mapper // Temporary API mapper.
#define ARGBToBayerRGB(a, as, b, bs, f, w, h) ARGBToBayer(b, bs, a, as, w, h, f) #define ARGBToBayerRGB(a, as, b, bs, f, w, h) ARGBToBayer(b, bs, a, as, w, h, f)
int ARGBToBayer(const uint8* src_argb, int src_stride_argb, int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
// NOTE: For a simplified public API use convert.h MJPGToI420().
struct jpeg_common_struct; struct jpeg_common_struct;
struct jpeg_decompress_struct; struct jpeg_decompress_struct;
struct jpeg_source_mgr; struct jpeg_source_mgr;
...@@ -85,10 +87,10 @@ class MJpegDecoder { ...@@ -85,10 +87,10 @@ class MJpegDecoder {
int GetVertSubSampFactor(int component); int GetVertSubSampFactor(int component);
// Public for testability // Public for testability.
int GetImageScanlinesPerImcuRow(); int GetImageScanlinesPerImcuRow();
// Public for testability // Public for testability.
int GetComponentScanlinesPerImcuRow(int component); int GetComponentScanlinesPerImcuRow(int component);
// Width of a component in bytes. // Width of a component in bytes.
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
// TODO(fbarchard): Remove the following headers includes // TODO(fbarchard): Remove the following headers includes.
#include "libyuv/convert.h" #include "libyuv/convert.h"
#include "libyuv/convert_argb.h" #include "libyuv/convert_argb.h"
...@@ -31,7 +31,7 @@ void CopyPlane(const uint8* src_y, int src_stride_y, ...@@ -31,7 +31,7 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
int width, int height); int width, int height);
// Convert I420 to I400. (calls CopyPlane ignoring u/v) // Convert I420 to I400. (calls CopyPlane ignoring u/v).
int I420ToI400(const uint8* src_y, int src_stride_y, int I420ToI400(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
...@@ -103,7 +103,7 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb, ...@@ -103,7 +103,7 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
int width, int height); int width, int height);
// ARGB little endian (bgra in memory) to I422 // ARGB little endian (bgra in memory) to I422.
int ARGBToI422(const uint8* src_frame, int src_stride_frame, int ARGBToI422(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
......
...@@ -31,7 +31,7 @@ enum RotationMode { ...@@ -31,7 +31,7 @@ enum RotationMode {
kRotateCounterClockwise = 270, kRotateCounterClockwise = 270,
}; };
// Rotate I420 frame // Rotate I420 frame.
int I420Rotate(const uint8* src_y, int src_stride_y, int I420Rotate(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
...@@ -40,7 +40,7 @@ int I420Rotate(const uint8* src_y, int src_stride_y, ...@@ -40,7 +40,7 @@ int I420Rotate(const uint8* src_y, int src_stride_y,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int src_width, int src_height, RotationMode mode); int src_width, int src_height, RotationMode mode);
// Rotate NV12 input and store in I420 // Rotate NV12 input and store in I420.
int NV12ToI420Rotate(const uint8* src_y, int src_stride_y, int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv, const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
......
...@@ -20,9 +20,9 @@ extern "C" { ...@@ -20,9 +20,9 @@ extern "C" {
// Supported filtering // Supported filtering
enum FilterMode { enum FilterMode {
kFilterNone = 0, // Point sample; Fastest kFilterNone = 0, // Point sample; Fastest.
kFilterBilinear = 1, // Faster than box, but lower quality scaling down. kFilterBilinear = 1, // Faster than box, but lower quality scaling down.
kFilterBox = 2 // Highest quality kFilterBox = 2 // Highest quality.
}; };
// Scale a YUV plane. // Scale a YUV plane.
...@@ -52,7 +52,7 @@ int I420Scale(const uint8* src_y, int src_stride_y, ...@@ -52,7 +52,7 @@ int I420Scale(const uint8* src_y, int src_stride_y,
int dst_width, int dst_height, int dst_width, int dst_height,
FilterMode filtering); FilterMode filtering);
// Legacy API. Deprecated // Legacy API. Deprecated.
int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v, int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
int src_stride_y, int src_stride_u, int src_stride_v, int src_stride_y, int src_stride_u, int src_stride_v,
int src_width, int src_height, int src_width, int src_height,
...@@ -61,12 +61,12 @@ int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v, ...@@ -61,12 +61,12 @@ int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
int dst_width, int dst_height, int dst_width, int dst_height,
bool interpolate); bool interpolate);
// Legacy API. Deprecated // Legacy API. Deprecated.
int ScaleOffset(const uint8* src, int src_width, int src_height, int ScaleOffset(const uint8* src, int src_width, int src_height,
uint8* dst, int dst_width, int dst_height, int dst_yoffset, uint8* dst, int dst_width, int dst_height, int dst_yoffset,
bool interpolate); bool interpolate);
// For testing, allow disabling of optimizations. // For testing, allow disabling of specialized scalers.
void SetUseReferenceImpl(bool use); void SetUseReferenceImpl(bool use);
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 353 #define LIBYUV_VERSION 354
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
// Common definitions for video, including fourcc and VideoFormat // Common definitions for video, including fourcc and VideoFormat.
#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_ // NOLINT
#define INCLUDE_LIBYUV_VIDEO_COMMON_H_ #define INCLUDE_LIBYUV_VIDEO_COMMON_H_
...@@ -107,7 +107,7 @@ enum FourCCBpp { ...@@ -107,7 +107,7 @@ enum FourCCBpp {
FOURCC_BPP_UYVY = 16, FOURCC_BPP_UYVY = 16,
FOURCC_BPP_M420 = 12, FOURCC_BPP_M420 = 12,
FOURCC_BPP_Q420 = 12, FOURCC_BPP_Q420 = 12,
FOURCC_BPP_V210 = 22, // 22.5 actually FOURCC_BPP_V210 = 22, // 128 / 6 actually.
FOURCC_BPP_24BG = 24, FOURCC_BPP_24BG = 24,
FOURCC_BPP_ARGB = 32, FOURCC_BPP_ARGB = 32,
FOURCC_BPP_BGRA = 32, FOURCC_BPP_BGRA = 32,
......
...@@ -598,7 +598,7 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y, ...@@ -598,7 +598,7 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y,
#if !defined(YUV_DISABLE_ASM) && defined(__ARM_NEON__) #if !defined(YUV_DISABLE_ASM) && defined(__ARM_NEON__)
#define HAS_SETROW_NEON #define HAS_SETROW_NEON
static void SetRow8_NEON(uint8* dst, uint32 v32, int count) { static void SetRow8_NEON(uint8* dst, uint32 v32, int count) {
asm volatile ( asm volatile ( // NOLINT
"vdup.u32 q0, %2 \n" // duplicate 4 ints "vdup.u32 q0, %2 \n" // duplicate 4 ints
"1: \n" "1: \n"
"subs %1, %1, #16 \n" // 16 bytes per loop "subs %1, %1, #16 \n" // 16 bytes per loop
...@@ -669,7 +669,7 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width, ...@@ -669,7 +669,7 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width,
#define HAS_SETROW_X86 #define HAS_SETROW_X86
static void SetRow8_X86(uint8* dst, uint32 v32, int width) { static void SetRow8_X86(uint8* dst, uint32 v32, int width) {
size_t width_tmp = static_cast<size_t>(width); size_t width_tmp = static_cast<size_t>(width);
asm volatile ( asm volatile ( // NOLINT
"shr $0x2,%1 \n" "shr $0x2,%1 \n"
"rep stosl \n" "rep stosl \n"
: "+D"(dst), // %0 : "+D"(dst), // %0
...@@ -683,7 +683,7 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width, ...@@ -683,7 +683,7 @@ static void SetRows32_X86(uint8* dst, uint32 v32, int width,
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
size_t width_tmp = static_cast<size_t>(width); size_t width_tmp = static_cast<size_t>(width);
uint32* d = reinterpret_cast<uint32*>(dst); uint32* d = reinterpret_cast<uint32*>(dst);
asm volatile ( asm volatile ( // NOLINT
"rep stosl \n" "rep stosl \n"
: "+D"(d), // %0 : "+D"(d), // %0
"+c"(width_tmp) // %1 "+c"(width_tmp) // %1
...@@ -1176,17 +1176,6 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb, ...@@ -1176,17 +1176,6 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb,
return 0; return 0;
} }
#if !defined(YUV_DISABLE_ASM) && (defined(_M_IX86) || \
(defined(__x86_64__) || defined(__i386__)))
#define HAS_SCALEARGBFILTERROWS_SSSE3
#endif
void ScaleARGBFilterRows_C(uint8* dst_ptr,
const uint8* src_ptr, ptrdiff_t src_stride,
int dst_width, int source_y_fraction);
void ScaleARGBFilterRows_SSSE3(uint8* dst_ptr,
const uint8* src_ptr, ptrdiff_t src_stride,
int dst_width, int source_y_fraction);
// Interpolate 2 ARGB images by specified amount (0 to 255). // Interpolate 2 ARGB images by specified amount (0 to 255).
int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
const uint8* src_argb1, int src_stride_argb1, const uint8* src_argb1, int src_stride_argb1,
...@@ -1201,24 +1190,20 @@ int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, ...@@ -1201,24 +1190,20 @@ int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb; dst_stride_argb = -dst_stride_argb;
} }
void (*ScaleARGBFilterRows)(uint8* dst_ptr, const uint8* src_ptr, void (*ARGBInterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width, ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = ScaleARGBFilterRows_C; int source_y_fraction) = ARGBInterpolateRow_C;
#if defined(HAS_SCALEARGBFILTERROWS_SSSE3) #if defined(HAS_ARGBINTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) && IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) && IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ScaleARGBFilterRows = ScaleARGBFilterRows_SSSE3; ARGBInterpolateRow = ARGBInterpolateRow_SSSE3;
} }
#endif #endif
uint8 last16[16];
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
// Filter extrudes edge for its scaling purpose. ARGBInterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
memcpy(last16, dst_argb + width * 4, 16); // Save last 16 beyond end.
ScaleARGBFilterRows(dst_argb, src_argb0, src_argb1 - src_argb0,
width, interpolation); width, interpolation);
memcpy(dst_argb + width * 4, last16, 16); // Restore last 16 beyond end.
src_argb0 += src_stride_argb0; src_argb0 += src_stride_argb0;
src_argb1 += src_stride_argb1; src_argb1 += src_stride_argb1;
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
......
...@@ -83,9 +83,7 @@ extern "C" { ...@@ -83,9 +83,7 @@ extern "C" {
#define HAS_CUMULATIVESUMTOAVERAGE_SSE2 #define HAS_CUMULATIVESUMTOAVERAGE_SSE2
#define HAS_ARGBSHADE_SSE2 #define HAS_ARGBSHADE_SSE2
#define HAS_ARGBAFFINEROW_SSE2 #define HAS_ARGBAFFINEROW_SSE2
// HAS_ARGBBLENDROW_SSE2 may be faster than SSSE3 version on some CPUs, so #define HAS_ARGBINTERPOLATEROW_SSSE3
// enable it here instead of LIBYUV_SSSE3_ONLY section.
#define HAS_ARGBBLENDROW_SSE2
#endif #endif
// The following are Windows only: // The following are Windows only:
...@@ -102,6 +100,7 @@ extern "C" { ...@@ -102,6 +100,7 @@ extern "C" {
!defined(LIBYUV_SSSE3_ONLY) !defined(LIBYUV_SSSE3_ONLY)
#define HAS_MIRRORROW_SSE2 #define HAS_MIRRORROW_SSE2
#define HAS_ARGBATTENUATE_SSE2 #define HAS_ARGBATTENUATE_SSE2
#define HAS_ARGBBLENDROW_SSE2
#endif #endif
// The following are available on Neon platforms // The following are available on Neon platforms
...@@ -553,6 +552,13 @@ void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, ...@@ -553,6 +552,13 @@ void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width); uint8* dst_argb, const float* uv_dudv, int width);
void ARGBInterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride,
int dst_width, int source_y_fraction);
void ARGBInterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv
......
...@@ -1081,6 +1081,29 @@ void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, ...@@ -1081,6 +1081,29 @@ void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
} }
} }
// C version 2x2 -> 2x1.
void ARGBInterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride,
int dst_width, int source_y_fraction) {
int y1_fraction = source_y_fraction;
int y0_fraction = 256 - y1_fraction;
const uint8* src_ptr1 = src_ptr + src_stride;
uint8* end = dst_ptr + (dst_width << 2);
do {
dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
dst_ptr[2] = (src_ptr[2] * y0_fraction + src_ptr1[2] * y1_fraction) >> 8;
dst_ptr[3] = (src_ptr[3] * y0_fraction + src_ptr1[3] * y1_fraction) >> 8;
dst_ptr[4] = (src_ptr[4] * y0_fraction + src_ptr1[4] * y1_fraction) >> 8;
dst_ptr[5] = (src_ptr[5] * y0_fraction + src_ptr1[5] * y1_fraction) >> 8;
dst_ptr[6] = (src_ptr[6] * y0_fraction + src_ptr1[6] * y1_fraction) >> 8;
dst_ptr[7] = (src_ptr[7] * y0_fraction + src_ptr1[7] * y1_fraction) >> 8;
src_ptr += 8;
src_ptr1 += 8;
dst_ptr += 8;
} while (dst_ptr < end);
}
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv
......
...@@ -3560,6 +3560,71 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, ...@@ -3560,6 +3560,71 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
} }
#endif // HAS_ARGBAFFINEROW_SSE2 #endif // HAS_ARGBAFFINEROW_SSE2
// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
void ARGBInterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) {
asm volatile (
"sub %1,%0 \n"
"shr %3 \n"
"cmp $0x0,%3 \n"
"je 2f \n"
"cmp $0x40,%3 \n"
"je 3f \n"
"movd %3,%%xmm0 \n"
"neg %3 \n"
"add $0x80,%3 \n"
"movd %3,%%xmm5 \n"
"punpcklbw %%xmm0,%%xmm5 \n"
"punpcklwd %%xmm5,%%xmm5 \n"
"pshufd $0x0,%%xmm5,%%xmm5 \n"
".p2align 4 \n"
"1: \n"
"movdqa (%1),%%xmm0 \n"
"movdqa (%1,%4,1),%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm0 \n"
"punpckhbw %%xmm2,%%xmm1 \n"
"pmaddubsw %%xmm5,%%xmm0 \n"
"pmaddubsw %%xmm5,%%xmm1 \n"
"psrlw $0x7,%%xmm0 \n"
"psrlw $0x7,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
"sub $0x4,%2 \n"
"movdqa %%xmm0,(%1,%0,1) \n"
"lea 0x10(%1),%1 \n"
"jg 1b \n"
"jmp 4f \n"
".p2align 4 \n"
"2: \n"
"movdqa (%1),%%xmm0 \n"
"sub $0x4,%2 \n"
"movdqa %%xmm0,(%1,%0,1) \n"
"lea 0x10(%1),%1 \n"
"jg 2b \n"
"jmp 4f \n"
".p2align 4 \n"
"3: \n"
"movdqa (%1),%%xmm0 \n"
"pavgb (%1,%4,1),%%xmm0 \n"
"sub $0x4,%2 \n"
"movdqa %%xmm0,(%1,%0,1) \n"
"lea 0x10(%1),%1 \n"
"jg 3b \n"
"4: \n"
".p2align 4 \n"
: "+r"(dst_ptr), // %0
"+r"(src_ptr), // %1
"+r"(dst_width), // %2
"+r"(source_y_fraction) // %3
: "r"(static_cast<intptr_t>(src_stride)) // %4
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm5"
#endif
);
}
#endif // defined(__x86_64__) || defined(__i386__) #endif // defined(__x86_64__) || defined(__i386__)
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -3664,6 +3664,81 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, ...@@ -3664,6 +3664,81 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
} }
#endif // HAS_ARGBAFFINEROW_SSE2 #endif // HAS_ARGBAFFINEROW_SSE2
// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version.
__declspec(naked) __declspec(align(16))
void ARGBInterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) {
__asm {
push esi
push edi
mov edi, [esp + 8 + 4] // dst_ptr
mov esi, [esp + 8 + 8] // src_ptr
mov edx, [esp + 8 + 12] // src_stride
mov ecx, [esp + 8 + 16] // dst_width
mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
sub edi, esi
shr eax, 1
cmp eax, 0
je xloop1
cmp eax, 64
je xloop2
movd xmm0, eax // high fraction 0..127
neg eax
add eax, 128
movd xmm5, eax // low fraction 128..1
punpcklbw xmm5, xmm0
punpcklwd xmm5, xmm5
pshufd xmm5, xmm5, 0
align 16
xloop:
movdqa xmm0, [esi]
movdqa xmm2, [esi + edx]
movdqa xmm1, xmm0
punpcklbw xmm0, xmm2
punpckhbw xmm1, xmm2
pmaddubsw xmm0, xmm5
pmaddubsw xmm1, xmm5
psrlw xmm0, 7
psrlw xmm1, 7
packuswb xmm0, xmm1
sub ecx, 4
movdqa [esi + edi], xmm0
lea esi, [esi + 16]
jg xloop
pop edi
pop esi
ret
align 16
xloop1:
movdqa xmm0, [esi]
sub ecx, 4
movdqa [esi + edi], xmm0
lea esi, [esi + 16]
jg xloop1
pop edi
pop esi
ret
align 16
xloop2:
movdqa xmm0, [esi]
pavgb xmm0, [esi + edx]
sub ecx, 4
movdqa [esi + edi], xmm0
lea esi, [esi + 16]
jg xloop2
pop edi
pop esi
ret
}
}
#endif // _M_IX86 #endif // _M_IX86
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -80,7 +80,7 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N##_OptVsC) { \ ...@@ -80,7 +80,7 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N##_OptVsC) { \
} }
#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B) \ #define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B) \
TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ,) \ TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, , +) \
TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, Invert, -) TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, Invert, -)
TESTPLANARTOB(I420, 2, 2, ARGB, 4) TESTPLANARTOB(I420, 2, 2, ARGB, 4)
...@@ -151,7 +151,7 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N##_OptVsC) { \ ...@@ -151,7 +151,7 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N##_OptVsC) { \
} }
#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B) \ #define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ,) \ TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, , +) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, Invert, -) TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, Invert, -)
TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4) TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4)
...@@ -233,7 +233,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N##_OptVsC) { \ ...@@ -233,7 +233,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N##_OptVsC) { \
} }
#define TESTATOPLANAR(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ #define TESTATOPLANAR(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, ,) \ TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, , +) \
TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, Invert, -) TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, Invert, -)
TESTATOPLANAR(ARGB, 4, I420, 2, 2) TESTATOPLANAR(ARGB, 4, I420, 2, 2)
...@@ -293,7 +293,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##N##_OptVsC) { \ ...@@ -293,7 +293,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##N##_OptVsC) { \
free_aligned_buffer_16(dst_argb_opt) \ free_aligned_buffer_16(dst_argb_opt) \
} }
#define TESTATOB(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B) \ #define TESTATOB(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, ,) \ TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, , +) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, Invert, -) TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, Invert, -)
TESTATOB(ARGB, 4, 4, ARGB, 4) TESTATOB(ARGB, 4, 4, ARGB, 4)
...@@ -853,14 +853,9 @@ TEST_F(libyuvTest, TestShade) { ...@@ -853,14 +853,9 @@ TEST_F(libyuvTest, TestShade) {
} }
TEST_F(libyuvTest, TestInterpolate) { TEST_F(libyuvTest, TestInterpolate) {
// Interpolate internally used bilinear filtering, which duplicates the last
// value, but the interpolate saves and restores it. The buffer must be
// padded by 16 extra bytes. TODO(fbarchard): Reimplement interpolate with
// code that does not duplicate the last value and remove kPad.
const int kPad = 16;
SIMD_ALIGNED(uint8 orig_pixels_0[256][4]); SIMD_ALIGNED(uint8 orig_pixels_0[256][4]);
SIMD_ALIGNED(uint8 orig_pixels_1[256][4]); SIMD_ALIGNED(uint8 orig_pixels_1[256][4]);
SIMD_ALIGNED(uint8 interpolate_pixels[256 + kPad][4]); SIMD_ALIGNED(uint8 interpolate_pixels[256][4]);
orig_pixels_0[0][0] = 16u; orig_pixels_0[0][0] = 16u;
orig_pixels_0[0][1] = 32u; orig_pixels_0[0][1] = 32u;
...@@ -930,7 +925,7 @@ TEST_F(libyuvTest, TestInterpolate) { ...@@ -930,7 +925,7 @@ TEST_F(libyuvTest, TestInterpolate) {
EXPECT_EQ(16u, interpolate_pixels[0][2]); EXPECT_EQ(16u, interpolate_pixels[0][2]);
EXPECT_EQ(32u, interpolate_pixels[0][3]); EXPECT_EQ(32u, interpolate_pixels[0][3]);
for (int i = 0; i < benchmark_iterations_ * 1280 * 720 / 256; ++i) { for (int i = 0; i < benchmark_iterations_ * (1280 * 720 / 256); ++i) {
ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0, ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
&interpolate_pixels[0][0], 0, 256, 1, 128); &interpolate_pixels[0][0], 0, 256, 1, 128);
} }
......
...@@ -25,7 +25,9 @@ TEST_F(libyuvTest, TestVersion) { ...@@ -25,7 +25,9 @@ TEST_F(libyuvTest, TestVersion) {
printf("LIBYUV_VERSION %d\n", LIBYUV_VERSION); printf("LIBYUV_VERSION %d\n", LIBYUV_VERSION);
#ifdef LIBYUV_SVNREVISION #ifdef LIBYUV_SVNREVISION
const char *ver = strchr(LIBYUV_SVNREVISION, ':'); const char *ver = strchr(LIBYUV_SVNREVISION, ':');
if (!ver) { if (ver) {
++ver;
} else {
ver = LIBYUV_SVNREVISION; ver = LIBYUV_SVNREVISION;
} }
int svn_revision = atoi(ver); int svn_revision = atoi(ver);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment