Commit cda9d38a authored by Frank Barchard's avatar Frank Barchard

xmmword cast for clang

clangcl use compare_win for 32 bit, allowing fallback and enabling avx2 code for clang.
move defines/protos to compare_row.h
fix issue with odd width ARGBCopyAlpha functions by copying destination to temp buffer, then doing alpha copy, then copy back to destination.

R=harryjin@google.com
TBR=harryjin@google.com
BUG=libyuv:484

Review URL: https://webrtc-codereview.appspot.com/59379004.
parent baf6a3c1
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1465 Version: 1466
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
/*
* Copyright 2013 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_COMPARE_ROW_H_ // NOLINT
#define INCLUDE_LIBYUV_COMPARE_ROW_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__i386__) && !defined(__SSE2__))
#define LIBYUV_DISABLE_X86
#endif
// Visual C 2012 required for AVX2.
#if defined(_M_IX86) && !defined(__clang__) && \
defined(_MSC_VER) && _MSC_VER >= 1700
#define VISUALC_HAS_AVX2 1
#endif // VisualStudio >= 2012
// clang >= 3.4.0 required for AVX2.
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
#define CLANG_HAS_AVX2 1
#endif // clang >= 3.4
#endif // __clang__
#if defined(_M_IX86) && (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
#define HAS_HASHDJB2_AVX2
#endif
// The following are available for Visual C and GCC:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) || defined(_M_IX86)))
#define HAS_HASHDJB2_SSE41
#define HAS_SUMSQUAREERROR_SSE2
#endif
// The following are available for Visual C and clangcl 32 bit:
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
#define HAS_HASHDJB2_AVX2
#define HAS_SUMSQUAREERROR_AVX2
#endif
// The following are available for Neon:
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SUMSQUAREERROR_NEON
#endif
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_COMPARE_ROW_H_ NOLINT
...@@ -365,7 +365,7 @@ extern "C" { ...@@ -365,7 +365,7 @@ extern "C" {
#endif #endif
#endif #endif
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__) #if defined(_MSC_VER) && !defined(__CLR_VER)
#define SIMD_ALIGNED(var) __declspec(align(16)) var #define SIMD_ALIGNED(var) __declspec(align(16)) var
#define SIMD_ALIGNED32(var) __declspec(align(64)) var #define SIMD_ALIGNED32(var) __declspec(align(64)) var
typedef __declspec(align(16)) int16 vec16[8]; typedef __declspec(align(16)) int16 vec16[8];
...@@ -380,7 +380,7 @@ typedef __declspec(align(32)) int8 lvec8[32]; ...@@ -380,7 +380,7 @@ typedef __declspec(align(32)) int8 lvec8[32];
typedef __declspec(align(32)) uint16 ulvec16[16]; typedef __declspec(align(32)) uint16 ulvec16[16];
typedef __declspec(align(32)) uint32 ulvec32[8]; typedef __declspec(align(32)) uint32 ulvec32[8];
typedef __declspec(align(32)) uint8 ulvec8[32]; typedef __declspec(align(32)) uint8 ulvec8[32];
#elif defined(__GNUC__) || defined(__clang__) #elif defined(__GNUC__)
// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const. // Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
#define SIMD_ALIGNED32(var) var __attribute__((aligned(64))) #define SIMD_ALIGNED32(var) var __attribute__((aligned(64)))
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1465 #define LIBYUV_VERSION 1466
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#endif #endif
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#include "libyuv/row.h" #include "libyuv/row.h"
#include "libyuv/video_common.h" #include "libyuv/video_common.h"
...@@ -26,30 +27,13 @@ namespace libyuv { ...@@ -26,30 +27,13 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// hash seed of 5381 recommended.
// Internal C version of HashDjb2 with int sized count for efficiency.
uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
// This module is for Visual C x86
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || \
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
#define HAS_HASHDJB2_SSE41
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
#if defined(_M_IX86) && (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
#define HAS_HASHDJB2_AVX2
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
#endif
#endif // HAS_HASHDJB2_SSE41
// hash seed of 5381 recommended. // hash seed of 5381 recommended.
LIBYUV_API LIBYUV_API
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
const int kBlockSize = 1 << 15; // 32768; const int kBlockSize = 1 << 15; // 32768;
int remainder; int remainder;
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C; uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) =
HashDjb2_C;
#if defined(HAS_HASHDJB2_SSE41) #if defined(HAS_HASHDJB2_SSE41)
if (TestCpuFlag(kCpuHasSSE41)) { if (TestCpuFlag(kCpuHasSSE41)) {
HashDjb2_SSE = HashDjb2_SSE41; HashDjb2_SSE = HashDjb2_SSE41;
...@@ -127,23 +111,6 @@ uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) { ...@@ -127,23 +111,6 @@ uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
return fourcc; return fourcc;
} }
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SUMSQUAREERROR_NEON
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
#endif
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_SUMSQUAREERROR_SSE2
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
#endif
#ifdef VISUALC_HAS_AVX2
#define HAS_SUMSQUAREERROR_AVX2
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
#endif
// TODO(fbarchard): Refactor into row function. // TODO(fbarchard): Refactor into row function.
LIBYUV_API LIBYUV_API
uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
......
...@@ -10,6 +10,8 @@ ...@@ -10,6 +10,8 @@
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#ifdef __cplusplus #ifdef __cplusplus
namespace libyuv { namespace libyuv {
extern "C" { extern "C" {
......
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
*/ */
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h" #include "libyuv/row.h"
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
*/ */
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h" #include "libyuv/row.h"
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
*/ */
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h" #include "libyuv/row.h"
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
*/ */
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h" #include "libyuv/row.h"
#ifdef __cplusplus #ifdef __cplusplus
...@@ -133,28 +135,28 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { ...@@ -133,28 +135,28 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
movd xmm0, [esp + 12] // seed movd xmm0, [esp + 12] // seed
pxor xmm7, xmm7 // constant 0 for unpck pxor xmm7, xmm7 // constant 0 for unpck
movdqa xmm6, kHash16x33 movdqa xmm6, xmmword ptr kHash16x33
wloop: wloop:
movdqu xmm1, [eax] // src[0-15] movdqu xmm1, [eax] // src[0-15]
lea eax, [eax + 16] lea eax, [eax + 16]
pmulld xmm0, xmm6 // hash *= 33 ^ 16 pmulld xmm0, xmm6 // hash *= 33 ^ 16
movdqa xmm5, kHashMul0 movdqa xmm5, xmmword ptr kHashMul0
movdqa xmm2, xmm1 movdqa xmm2, xmm1
punpcklbw xmm2, xmm7 // src[0-7] punpcklbw xmm2, xmm7 // src[0-7]
movdqa xmm3, xmm2 movdqa xmm3, xmm2
punpcklwd xmm3, xmm7 // src[0-3] punpcklwd xmm3, xmm7 // src[0-3]
pmulld xmm3, xmm5 pmulld xmm3, xmm5
movdqa xmm5, kHashMul1 movdqa xmm5, xmmword ptr kHashMul1
movdqa xmm4, xmm2 movdqa xmm4, xmm2
punpckhwd xmm4, xmm7 // src[4-7] punpckhwd xmm4, xmm7 // src[4-7]
pmulld xmm4, xmm5 pmulld xmm4, xmm5
movdqa xmm5, kHashMul2 movdqa xmm5, xmmword ptr kHashMul2
punpckhbw xmm1, xmm7 // src[8-15] punpckhbw xmm1, xmm7 // src[8-15]
movdqa xmm2, xmm1 movdqa xmm2, xmm1
punpcklwd xmm2, xmm7 // src[8-11] punpcklwd xmm2, xmm7 // src[8-11]
pmulld xmm2, xmm5 pmulld xmm2, xmm5
movdqa xmm5, kHashMul3 movdqa xmm5, xmmword ptr kHashMul3
punpckhwd xmm1, xmm7 // src[12-15] punpckhwd xmm1, xmm7 // src[12-15]
pmulld xmm1, xmm5 pmulld xmm1, xmm5
paddd xmm3, xmm4 // add 16 results paddd xmm3, xmm4 // add 16 results
...@@ -181,32 +183,32 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) { ...@@ -181,32 +183,32 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
__asm { __asm {
mov eax, [esp + 4] // src mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count mov ecx, [esp + 8] // count
movd xmm0, [esp + 12] // seed vmovd xmm0, [esp + 12] // seed
movdqa xmm6, kHash16x33
wloop: wloop:
vpmovzxbd xmm3, [eax] // src[0-3] vpmovzxbd xmm3, [eax] // src[0-3]
pmulld xmm0, xmm6 // hash *= 33 ^ 16 vpmulld xmm0, xmm0, xmmword ptr kHash16x33 // hash *= 33 ^ 16
vpmovzxbd xmm4, [eax + 4] // src[4-7] vpmovzxbd xmm4, [eax + 4] // src[4-7]
pmulld xmm3, kHashMul0 vpmulld xmm3, xmm3, xmmword ptr kHashMul0
vpmovzxbd xmm2, [eax + 8] // src[8-11] vpmovzxbd xmm2, [eax + 8] // src[8-11]
pmulld xmm4, kHashMul1 vpmulld xmm4, xmm4, xmmword ptr kHashMul1
vpmovzxbd xmm1, [eax + 12] // src[12-15] vpmovzxbd xmm1, [eax + 12] // src[12-15]
pmulld xmm2, kHashMul2 vpmulld xmm2, xmm2, xmmword ptr kHashMul2
lea eax, [eax + 16] lea eax, [eax + 16]
pmulld xmm1, kHashMul3 vpmulld xmm1, xmm1, xmmword ptr kHashMul3
paddd xmm3, xmm4 // add 16 results vpaddd xmm3, xmm3, xmm4 // add 16 results
paddd xmm1, xmm2 vpaddd xmm1, xmm1, xmm2
paddd xmm1, xmm3 vpaddd xmm1, xmm1, xmm3
pshufd xmm2, xmm1, 0x0e // upper 2 dwords vpshufd xmm2, xmm1, 0x0e // upper 2 dwords
paddd xmm1, xmm2 vpaddd xmm1, xmm1,xmm2
pshufd xmm2, xmm1, 0x01 vpshufd xmm2, xmm1, 0x01
paddd xmm1, xmm2 vpaddd xmm1, xmm1, xmm2
paddd xmm0, xmm1 vpaddd xmm0, xmm0, xmm1
sub ecx, 16 sub ecx, 16
jg wloop jg wloop
movd eax, xmm0 // return hash vmovd eax, xmm0 // return hash
vzeroupper
ret ret
} }
} }
......
...@@ -245,18 +245,6 @@ ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31) ...@@ -245,18 +245,6 @@ ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31)
#ifdef HAS_COPYROW_NEON #ifdef HAS_COPYROW_NEON
ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31) ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31)
#endif #endif
#ifdef HAS_ARGBCOPYALPHAROW_AVX2
ANY11(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 1, 4, 15)
#endif
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
ANY11(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 1, 4, 7)
#endif
#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
ANY11(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15)
#endif
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
ANY11(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
#endif
#if defined(HAS_ARGBTORGB24ROW_SSSE3) #if defined(HAS_ARGBTORGB24ROW_SSSE3)
ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15) ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15)
ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15) ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15)
...@@ -410,6 +398,36 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) ...@@ -410,6 +398,36 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
#endif #endif
#undef ANY11 #undef ANY11
// Any 1 to 1 blended.
#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
SIMD_ALIGNED(uint8 temp[128 * 2]); \
memset(temp, 0, 128 * 2); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, n); \
} \
memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
memcpy(temp + 128, dst_ptr + n * BPP, r * BPP); \
ANY_SIMD(temp, temp + 128, MASK + 1); \
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
#ifdef HAS_ARGBCOPYALPHAROW_AVX2
ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15)
#endif
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7)
#endif
#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2
ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15)
#endif
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
#endif
#undef ANY11B
// Any 1 to 1 with parameter. // Any 1 to 1 with parameter.
#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ #define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \ void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \
......
...@@ -517,7 +517,7 @@ TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1, 0, ARGB, 4) ...@@ -517,7 +517,7 @@ TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1, 0, ARGB, 4)
#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ #define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, W1280, DIFF, N, NEG, OFF, FMT_C, BPP_C) \ YALIGN, W1280, DIFF, N, NEG, OFF) \
TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
...@@ -558,21 +558,10 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \ ...@@ -558,21 +558,10 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
kWidth, NEG kHeight); \ kWidth, NEG kHeight); \
} \ } \
int max_diff = 0; \ int max_diff = 0; \
/* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
align_buffer_64(dst_argb32_c, kWidth * BPP_C * kHeight); \
align_buffer_64(dst_argb32_opt, kWidth * BPP_C * kHeight); \
memset(dst_argb32_c, 2, kWidth * BPP_C * kHeight); \
memset(dst_argb32_opt, 102, kWidth * BPP_C * kHeight); \
FMT_B##To##FMT_C(dst_argb_c + OFF, kStrideB, \
dst_argb32_c, kWidth * BPP_C , \
kWidth, kHeight); \
FMT_B##To##FMT_C(dst_argb_opt + OFF, kStrideB, \
dst_argb32_opt, kWidth * BPP_C , \
kWidth, kHeight); \
for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \
int abs_diff = \ int abs_diff = \
abs(static_cast<int>(dst_argb32_c[i]) - \ abs(static_cast<int>(dst_argb_c[i + OFF]) - \
static_cast<int>(dst_argb32_opt[i])); \ static_cast<int>(dst_argb_opt[i + OFF])); \
if (abs_diff > max_diff) { \ if (abs_diff > max_diff) { \
max_diff = abs_diff; \ max_diff = abs_diff; \
} \ } \
...@@ -584,22 +573,20 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \ ...@@ -584,22 +573,20 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
free_aligned_buffer_64(src_a); \ free_aligned_buffer_64(src_a); \
free_aligned_buffer_64(dst_argb_c); \ free_aligned_buffer_64(dst_argb_c); \
free_aligned_buffer_64(dst_argb_opt); \ free_aligned_buffer_64(dst_argb_opt); \
free_aligned_buffer_64(dst_argb32_c); \
free_aligned_buffer_64(dst_argb32_opt); \
} }
#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ #define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, DIFF, FMT_C, BPP_C) \ YALIGN, DIFF) \
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, FMT_C, BPP_C) \ YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0) \
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, FMT_C, BPP_C) \ YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1) \
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, DIFF, _Invert, -, 0, FMT_C, BPP_C) \ YALIGN, benchmark_width_, DIFF, _Invert, -, 0) \
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C) YALIGN, benchmark_width_, DIFF, _Opt, +, 0)
TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4) TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1, 2)
#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ #define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
W1280, DIFF, N, NEG, OFF) \ W1280, DIFF, N, NEG, OFF) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment