Commit 3c7bb050 authored by fbarchard@google.com's avatar fbarchard@google.com

Unattenuate AVX2

BUG=190
TEST=planar_test
Review URL: https://webrtc-codereview.appspot.com/1112004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@577 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent d5ee3dc9
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 576 Version: 577
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -140,6 +140,7 @@ extern "C" { ...@@ -140,6 +140,7 @@ extern "C" {
// Effects // Effects
#define HAS_ARGBATTENUATEROW_AVX2 #define HAS_ARGBATTENUATEROW_AVX2
#define HAS_ARGBUNATTENUATEROW_AVX2
#endif #endif
#endif #endif
...@@ -1324,6 +1325,7 @@ void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb, ...@@ -1324,6 +1325,7 @@ void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
extern uint32 fixed_invtbl8[256]; extern uint32 fixed_invtbl8[256];
void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width); void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
......
...@@ -145,11 +145,9 @@ LIBYUV_API ...@@ -145,11 +145,9 @@ LIBYUV_API
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b, const uint8* src_b, int stride_b,
int width, int height) { int width, int height) {
if (stride_a == width && stride_b == width) { if (stride_a == width && stride_b == width) {
return ComputeSumSquareError(src_a, src_b, width * height); return ComputeSumSquareError(src_a, src_b, width * height);
} }
uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) = uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
SumSquareError_C; SumSquareError_C;
#if defined(HAS_SUMSQUAREERROR_NEON) #if defined(HAS_SUMSQUAREERROR_NEON)
......
...@@ -1085,6 +1085,14 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, ...@@ -1085,6 +1085,14 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2; ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
} }
#endif #endif
#if defined(HAS_ARGBUNATTENUATEROW_AVX2)
bool clear = false;
if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
bool clear = true;
ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
}
#endif
// TODO(fbarchard): Neon version.
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
ARGBUnattenuateRow(src_argb, dst_argb, width); ARGBUnattenuateRow(src_argb, dst_argb, width);
......
...@@ -1528,7 +1528,7 @@ void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { ...@@ -1528,7 +1528,7 @@ void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
// 8.16 fixed point inverse table // 8.16 fixed point inverse table
#define T(a) 0x10000 / a #define T(a) 0x10000 / a
uint32 fixed_invtbl8[256] = { uint32 fixed_invtbl8[256] = {
0x0100, T(0x01), T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07), 0xffff, 0xffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f), T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
......
...@@ -4462,6 +4462,53 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, ...@@ -4462,6 +4462,53 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
} }
#endif // HAS_ARGBUNATTENUATEROW_SSE2 #endif // HAS_ARGBUNATTENUATEROW_SSE2
#ifdef HAS_ARGBUNATTENUATEROW_AVX2
// Shuffle table duplicating alpha.
static const ulvec8 kUnattenShuffleAlpha_AVX2 = {
0u, 1u, 0u, 1u, 0u, 1u, 128u, 128u,
8u, 9u, 8u, 9u, 8u, 9u, 128u, 128u,
0u, 1u, 0u, 1u, 0u, 1u, 128u, 128u,
8u, 9u, 8u, 9u, 8u, 9u, 128u, 128u,
};
__declspec(naked) __declspec(align(16))
void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
int width) {
__asm {
mov eax, [esp + 4] // src_argb0
mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // width
sub edx, eax
vmovdqa ymm4, kUnattenShuffleAlpha_AVX2
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000
vpslld ymm5, ymm5, 24
align 16
convertloop:
vmovdqu ymm6, [eax] // read 8 pixels.
vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xffffffff for gather.
vpsrld ymm2, ymm6, 24 // alpha in low 8 bits.
vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated.
vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated.
vpgatherdd ymm3, [ymm2 * 4 + fixed_invtbl8], ymm7 // ymm7 cleared.
vpunpcklwd ymm2, ymm3, ymm7 // low 4 inverted alphas. mutated.
vpunpckhwd ymm3, ymm3, ymm7 // high 4 inverted alphas. mutated.
vpshufb ymm2, ymm2, ymm4 // replicate low 4 alphas
vpshufb ymm3, ymm3, ymm4 // replicate high 4 alphas
vpmulhuw ymm0, ymm0, ymm2 // rgb * ia
vpmulhuw ymm1, ymm1, ymm3 // rgb * ia
vpand ymm6, ymm6, ymm5 // isolate alpha
vpackuswb ymm0, ymm0, ymm1 // unmutated.
vpor ymm0, ymm0, ymm6 // copy original alpha
sub ecx, 8
vmovdqu [eax + edx], ymm0
lea eax, [eax + 32]
jg convertloop
ret
}
}
#endif // HAS_ARGBATTENUATEROW_AVX2
#ifdef HAS_ARGBGRAYROW_SSSE3 #ifdef HAS_ARGBGRAYROW_SSSE3
// Constant for ARGB color to gray scale: 0.11 * B + 0.59 * G + 0.30 * R // Constant for ARGB color to gray scale: 0.11 * B + 0.59 * G + 0.30 * R
static const vec8 kARGBToGray = { static const vec8 kARGBToGray = {
......
...@@ -42,7 +42,7 @@ cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix ...@@ -42,7 +42,7 @@ cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix
%endif %endif
packuswb m0, m0, m1 packuswb m0, m0, m1
%if cpuflag(AVX2) %if cpuflag(AVX2)
vpermq m0, m0, 0xd8 vpermq m0, m0, 0xd8
%endif %endif
sub pixd, mmsize sub pixd, mmsize
mov%2 [dst_yq], m0 mov%2 [dst_yq], m0
...@@ -86,8 +86,8 @@ cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix ...@@ -86,8 +86,8 @@ cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
packuswb m0, m0, m1 packuswb m0, m0, m1
packuswb m2, m2, m3 packuswb m2, m2, m3
%if cpuflag(AVX2) %if cpuflag(AVX2)
vpermq m0, m0, 0xd8 vpermq m0, m0, 0xd8
vpermq m2, m2, 0xd8 vpermq m2, m2, 0xd8
%endif %endif
mov%1 [dst_uq], m0 mov%1 [dst_uq], m0
mov%1 [dst_uq + dst_vq], m2 mov%1 [dst_uq + dst_vq], m2
......
...@@ -42,13 +42,7 @@ void SetUseReferenceImpl(bool use) { ...@@ -42,13 +42,7 @@ void SetUseReferenceImpl(bool use) {
} }
// ScaleRowDown2Int also used by planar functions // ScaleRowDown2Int also used by planar functions
// NEON downscalers with interpolation.
/**
* NEON downscalers with interpolation.
*
* Provided by Fritz Koenig
*
*/
#if !defined(YUV_DISABLE_ASM) && (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) #if !defined(YUV_DISABLE_ASM) && (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
#define HAS_SCALEROWDOWN2_NEON #define HAS_SCALEROWDOWN2_NEON
...@@ -98,13 +92,7 @@ void ScaleFilterRows_NEON(uint8* dst_ptr, ...@@ -98,13 +92,7 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
const uint8* src_ptr, ptrdiff_t src_stride, const uint8* src_ptr, ptrdiff_t src_stride,
int dst_width, int source_y_fraction); int dst_width, int source_y_fraction);
/** // SSE2 downscalers with interpolation.
* SSE2 downscalers with interpolation.
*
* Provided by Frank Barchard (fbarchard@google.com)
*
*/
// Constants for SSSE3 code // Constants for SSSE3 code
#elif !defined(YUV_DISABLE_ASM) && \ #elif !defined(YUV_DISABLE_ASM) && \
(defined(_M_IX86) || defined(__i386__) || defined(__x86_64__)) (defined(_M_IX86) || defined(__i386__) || defined(__x86_64__))
...@@ -2630,13 +2618,10 @@ void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -2630,13 +2618,10 @@ void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
} }
} }
/** // Scale plane, 1/2
* Scale plane, 1/2 // This is an optimized version for scaling down a plane to 1/2 of
* // its original size.
* This is an optimized version for scaling down a plane to 1/2 of
* its original size.
*
*/
static void ScalePlaneDown2(int /* src_width */, int /* src_height */, static void ScalePlaneDown2(int /* src_width */, int /* src_height */,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
...@@ -2676,12 +2661,10 @@ static void ScalePlaneDown2(int /* src_width */, int /* src_height */, ...@@ -2676,12 +2661,10 @@ static void ScalePlaneDown2(int /* src_width */, int /* src_height */,
} }
} }
/** // Scale plane, 1/4
* Scale plane, 1/4 // This is an optimized version for scaling down a plane to 1/4 of
* // its original size.
* This is an optimized version for scaling down a plane to 1/4 of
* its original size.
*/
static void ScalePlaneDown4(int /* src_width */, int /* src_height */, static void ScalePlaneDown4(int /* src_width */, int /* src_height */,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
...@@ -2717,13 +2700,10 @@ static void ScalePlaneDown4(int /* src_width */, int /* src_height */, ...@@ -2717,13 +2700,10 @@ static void ScalePlaneDown4(int /* src_width */, int /* src_height */,
} }
} }
/** // Scale plane, 1/8
* Scale plane, 1/8 // This is an optimized version for scaling down a plane to 1/8
* // of its original size.
* This is an optimized version for scaling down a plane to 1/8
* of its original size.
*
*/
static void ScalePlaneDown8(int /* src_width */, int /* src_height */, static void ScalePlaneDown8(int /* src_width */, int /* src_height */,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
...@@ -2748,12 +2728,8 @@ static void ScalePlaneDown8(int /* src_width */, int /* src_height */, ...@@ -2748,12 +2728,8 @@ static void ScalePlaneDown8(int /* src_width */, int /* src_height */,
} }
} }
/** // Scale plane down, 3/4
* Scale plane down, 3/4
*
* Provided by Frank Barchard (fbarchard@google.com)
*
*/
static void ScalePlaneDown34(int /* src_width */, int /* src_height */, static void ScalePlaneDown34(int /* src_width */, int /* src_height */,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
...@@ -2839,23 +2815,22 @@ static void ScalePlaneDown34(int /* src_width */, int /* src_height */, ...@@ -2839,23 +2815,22 @@ static void ScalePlaneDown34(int /* src_width */, int /* src_height */,
} }
} }
/**
* Scale plane, 3/8 // Scale plane, 3/8
* // This is an optimized version for scaling down a plane to 3/8
* This is an optimized version for scaling down a plane to 3/8 // of its original size.
* of its original size. //
* // Uses box filter arranges like this
* Uses box filter arranges like this // aaabbbcc -> abc
* aaabbbcc -> abc // aaabbbcc def
* aaabbbcc def // aaabbbcc ghi
* aaabbbcc ghi // dddeeeff
* dddeeeff // dddeeeff
* dddeeeff // dddeeeff
* dddeeeff // ggghhhii
* ggghhhii // ggghhhii
* ggghhhii // Boxes are 3x3, 2x3, 3x2 and 2x2
* Boxes are 3x3, 2x3, 3x2 and 2x2
*/
static void ScalePlaneDown38(int /* src_width */, int /* src_height */, static void ScalePlaneDown38(int /* src_width */, int /* src_height */,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
...@@ -2991,15 +2966,14 @@ static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx, ...@@ -2991,15 +2966,14 @@ static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
} }
} }
/** // Scale plane down to any dimensions, with interpolation.
* Scale plane down to any dimensions, with interpolation. // (boxfilter).
* (boxfilter). //
* // Same method as SimpleScale, which is fixed point, outputting
* Same method as SimpleScale, which is fixed point, outputting // one pixel of destination using fixed point (16.16) to step
* one pixel of destination using fixed point (16.16) to step // through source, sampling a box of pixel with simple
* through source, sampling a box of pixel with simple // averaging.
* averaging.
*/
static void ScalePlaneBox(int src_width, int src_height, static void ScalePlaneBox(int src_width, int src_height,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
...@@ -3008,8 +2982,6 @@ static void ScalePlaneBox(int src_width, int src_height, ...@@ -3008,8 +2982,6 @@ static void ScalePlaneBox(int src_width, int src_height,
assert(dst_height > 0); assert(dst_height > 0);
int dx = (src_width << 16) / dst_width; int dx = (src_width << 16) / dst_width;
int dy = (src_height << 16) / dst_height; int dy = (src_height << 16) / dst_height;
// int x = (dx >= 65536) ? ((dx >> 1) - 32768) : (dx >> 1);
// int y = (dy >= 65536) ? ((dy >> 1) - 32768) : (dy >> 1);
int x = 0; int x = 0;
int y = 0; int y = 0;
int maxy = (src_height << 16); int maxy = (src_height << 16);
...@@ -3063,9 +3035,8 @@ static void ScalePlaneBox(int src_width, int src_height, ...@@ -3063,9 +3035,8 @@ static void ScalePlaneBox(int src_width, int src_height,
} }
} }
/** // Scale plane to/from any dimensions, with interpolation.
* Scale plane to/from any dimensions, with interpolation.
*/
static void ScalePlaneBilinearSimple(int src_width, int src_height, static void ScalePlaneBilinearSimple(int src_width, int src_height,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
...@@ -3104,10 +3075,9 @@ static void ScalePlaneBilinearSimple(int src_width, int src_height, ...@@ -3104,10 +3075,9 @@ static void ScalePlaneBilinearSimple(int src_width, int src_height,
} }
} }
/**
* Scale plane to/from any dimensions, with bilinear // Scale plane to/from any dimensions, with bilinear interpolation.
* interpolation.
*/
void ScalePlaneBilinear(int src_width, int src_height, void ScalePlaneBilinear(int src_width, int src_height,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
...@@ -3170,12 +3140,11 @@ void ScalePlaneBilinear(int src_width, int src_height, ...@@ -3170,12 +3140,11 @@ void ScalePlaneBilinear(int src_width, int src_height,
} }
} }
/** // Scale plane to/from any dimensions, without interpolation.
* Scale plane to/from any dimensions, without interpolation. // Fixed point math is used for performance: The upper 16 bits
* Fixed point math is used for performance: The upper 16 bits // of x and dx is the integer part of the source position and
* of x and dx is the integer part of the source position and // the lower 16 bits are the fixed decimal part.
* the lower 16 bits are the fixed decimal part.
*/
static void ScalePlaneSimple(int src_width, int src_height, static void ScalePlaneSimple(int src_width, int src_height,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
...@@ -3197,9 +3166,8 @@ static void ScalePlaneSimple(int src_width, int src_height, ...@@ -3197,9 +3166,8 @@ static void ScalePlaneSimple(int src_width, int src_height,
} }
} }
/** // Scale plane to/from any dimensions.
* Scale plane to/from any dimensions.
*/
static void ScalePlaneAnySize(int src_width, int src_height, static void ScalePlaneAnySize(int src_width, int src_height,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
...@@ -3215,14 +3183,12 @@ static void ScalePlaneAnySize(int src_width, int src_height, ...@@ -3215,14 +3183,12 @@ static void ScalePlaneAnySize(int src_width, int src_height,
} }
} }
/** // Scale plane down, any size
* Scale plane down, any size //
* // This is an optimized version for scaling down a plane to any size.
* This is an optimized version for scaling down a plane to any size. // The current implementation is ~10 times faster compared to the
* The current implementation is ~10 times faster compared to the // reference implementation for e.g. XGA->LowResPAL
* reference implementation for e.g. XGA->LowResPAL
*
*/
static void ScalePlaneDown(int src_width, int src_height, static void ScalePlaneDown(int src_width, int src_height,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
......
...@@ -44,10 +44,6 @@ void ScaleARGBFilterRows_NEON(uint8* dst_ptr, ...@@ -44,10 +44,6 @@ void ScaleARGBFilterRows_NEON(uint8* dst_ptr,
int dst_width, int source_y_fraction); int dst_width, int source_y_fraction);
#endif #endif
/**
* SSE2 downscalers with bilinear interpolation.
*/
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) #if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
#define HAS_SCALEARGBROWDOWN2_SSE2 #define HAS_SCALEARGBROWDOWN2_SSE2
...@@ -880,13 +876,10 @@ void ScaleARGBFilterRows_C(uint8* dst_argb, const uint8* src_argb, ...@@ -880,13 +876,10 @@ void ScaleARGBFilterRows_C(uint8* dst_argb, const uint8* src_argb,
dst_argb[3] = dst_argb[-1]; dst_argb[3] = dst_argb[-1];
} }
/** // ScaleARGB ARGB, 1/2
* ScaleARGB ARGB, 1/2 // This is an optimized version for scaling down a ARGB to 1/2 of
* // its original size.
* This is an optimized version for scaling down a ARGB to 1/2 of
* its original size.
*
*/
static void ScaleARGBDown2(int /* src_width */, int /* src_height */, static void ScaleARGBDown2(int /* src_width */, int /* src_height */,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
...@@ -918,13 +911,10 @@ static void ScaleARGBDown2(int /* src_width */, int /* src_height */, ...@@ -918,13 +911,10 @@ static void ScaleARGBDown2(int /* src_width */, int /* src_height */,
} }
} }
/** // ScaleARGB ARGB Even
* ScaleARGB ARGB Even // This is an optimized version for scaling down a ARGB to even
* // multiple of its original size.
* This is an optimized version for scaling down a ARGB to even
* multiple of its original size.
*
*/
static void ScaleARGBDownEven(int src_width, int src_height, static void ScaleARGBDownEven(int src_width, int src_height,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
...@@ -959,10 +949,9 @@ static void ScaleARGBDownEven(int src_width, int src_height, ...@@ -959,10 +949,9 @@ static void ScaleARGBDownEven(int src_width, int src_height,
dst_argb += dst_stride; dst_argb += dst_stride;
} }
} }
/**
* ScaleARGB ARGB to/from any dimensions, with bilinear // ScaleARGB ARGB to/from any dimensions, with bilinear
* interpolation. // interpolation.
*/
// Maximum width handled by 2 pass Bilinear. // Maximum width handled by 2 pass Bilinear.
static const int kMaxInputWidth = 2560; static const int kMaxInputWidth = 2560;
...@@ -1033,12 +1022,11 @@ static void ScaleARGBCols(uint8* dst_argb, const uint8* src_argb, ...@@ -1033,12 +1022,11 @@ static void ScaleARGBCols(uint8* dst_argb, const uint8* src_argb,
} }
} }
/**
* ScaleARGB ARGB to/from any dimensions, without interpolation. // ScaleARGB ARGB to/from any dimensions, without interpolation.
* Fixed point math is used for performance: The upper 16 bits // Fixed point math is used for performance: The upper 16 bits
* of x and dx is the integer part of the source position and // of x and dx is the integer part of the source position and
* the lower 16 bits are the fixed decimal part. // the lower 16 bits are the fixed decimal part.
*/
static void ScaleARGBSimple(int src_width, int src_height, static void ScaleARGBSimple(int src_width, int src_height,
int dst_width, int dst_height, int dst_width, int dst_height,
...@@ -1056,9 +1044,8 @@ static void ScaleARGBSimple(int src_width, int src_height, ...@@ -1056,9 +1044,8 @@ static void ScaleARGBSimple(int src_width, int src_height,
} }
} }
/** // ScaleARGB ARGB to/from any dimensions.
* ScaleARGB ARGB to/from any dimensions.
*/
static void ScaleARGBAnySize(int src_width, int src_height, static void ScaleARGBAnySize(int src_width, int src_height,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_stride, int dst_stride, int src_stride, int dst_stride,
......
...@@ -167,6 +167,72 @@ TEST_F(libyuvTest, ARGBAttenuate_Opt) { ...@@ -167,6 +167,72 @@ TEST_F(libyuvTest, ARGBAttenuate_Opt) {
EXPECT_LE(max_diff, 2); EXPECT_LE(max_diff, 2);
} }
static int TestUnattenuateI(int width, int height, int benchmark_iterations,
int invert, int off) {
const int kBpp = 4;
const int kStride = (width * kBpp + 15) & ~15;
align_buffer_64(src_argb, kStride * height + off);
align_buffer_64(dst_argb_c, kStride * height);
align_buffer_64(dst_argb_opt, kStride * height);
srandom(time(NULL));
for (int i = 0; i < kStride * height; ++i) {
src_argb[i + off] = (random() & 0xff);
}
ARGBAttenuate(src_argb + off, kStride,
src_argb + off, kStride,
width, height);
memset(dst_argb_c, 0, kStride * height);
memset(dst_argb_opt, 0, kStride * height);
MaskCpuFlags(0);
ARGBUnattenuate(src_argb + off, kStride,
dst_argb_c, kStride,
width, invert * height);
MaskCpuFlags(-1);
for (int i = 0; i < benchmark_iterations; ++i) {
ARGBUnattenuate(src_argb + off, kStride,
dst_argb_opt, kStride,
width, invert * height);
}
int max_diff = 0;
for (int i = 0; i < kStride * height; ++i) {
int abs_diff =
abs(static_cast<int>(dst_argb_c[i]) -
static_cast<int>(dst_argb_opt[i]));
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
}
free_aligned_buffer_64(src_argb)
free_aligned_buffer_64(dst_argb_c)
free_aligned_buffer_64(dst_argb_opt)
return max_diff;
}
TEST_F(libyuvTest, ARGBUnattenuate_Any) {
int max_diff = TestUnattenuateI(benchmark_width_ - 1, benchmark_height_,
benchmark_iterations_, +1, 0);
EXPECT_LE(max_diff, 2);
}
TEST_F(libyuvTest, ARGBUnattenuate_Unaligned) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, +1, 1);
EXPECT_LE(max_diff, 2);
}
TEST_F(libyuvTest, ARGBUnattenuate_Invert) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, -1, 0);
EXPECT_LE(max_diff, 2);
}
TEST_F(libyuvTest, ARGBUnattenuate_Opt) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, +1, 0);
EXPECT_LE(max_diff, 2);
}
TEST_F(libyuvTest, TestARGBComputeCumulativeSum) { TEST_F(libyuvTest, TestARGBComputeCumulativeSum) {
SIMD_ALIGNED(uint8 orig_pixels[16][16][4]); SIMD_ALIGNED(uint8 orig_pixels[16][16][4]);
SIMD_ALIGNED(int32 added_pixels[16][16][4]); SIMD_ALIGNED(int32 added_pixels[16][16][4]);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment