Commit 4e0d7cc2 authored by fbarchard@google.com's avatar fbarchard@google.com

Y coefficients for J420 need to be scaled by 255/219 to full range.

BUG=159
TESTED=out\release\libyuv_unittest --gtest_filter=*J*
Review URL: https://webrtc-codereview.appspot.com/1264004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@624 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent d8431003
......@@ -1061,7 +1061,7 @@ int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
// Coalesce contiguous rows.
if (src_stride_argb == width * 4 &&
dst_stride_yj == width) {
return ARGBToI400(src_argb, 0,
return ARGBToJ400(src_argb, 0,
dst_yj, 0,
width * height, 1);
}
......
......@@ -256,8 +256,23 @@ MAKEROWY(RGB24, 2, 1, 0, 3)
MAKEROWY(RAW, 0, 1, 2, 3)
#undef MAKEROWY
// BT.601 mpeg range
// b 0.1016 * 255 = 25.908 = 25
// g 0.5078 * 255 = 129.489 = 129
// r 0.2578 * 255 = 65.739 = 66
// = 0.8672. 1/.8672 = 1.1531
// BT.601 full range 8 bit (not used)
// b 0.1016 * 1.1531 = 0.1172 * 255 = 29.886 = 30
// g 0.5078 * 1.1531 = 0.5855 * 255 = 149.3025 = 149
// r 0.2578 * 1.1531 = 0.2973 * 255 = 75.8115 = 76
// 30 + 149 + 76 = 255
// BT.601 full range 7 bit
// b 0.1172 * 127 = 14.8844 = 15
// g 0.5855 * 127 = 74.35855 = 74
// r 0.2973 * 127 = 37.7571 = 38
static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
return (66 * r + 129 * g + 25 * b + 0x0080) >> 8;
return (38 * r + 74 * g + 15 * b + 64) >> 7;
}
#define MAKEROWYJ(NAME, R, G, B, BPP) \
......
......@@ -1338,9 +1338,9 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
asm volatile (
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
"vmov.u8 d26, #33 \n" // R * 0.2578 coefficient
"vmov.u8 d24, #15 \n" // B * 0.1172 coefficient
"vmov.u8 d25, #74 \n" // G * 0.5855 coefficient
"vmov.u8 d26, #38 \n" // R * 0.2973 coefficient
".p2align 2 \n"
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
......
......@@ -35,6 +35,11 @@ CONST vec8 kARGBToY = {
13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
};
// JPeg full range.
CONST vec8 kARGBToYJ = {
15, 74, 38, 0, 15, 74, 38, 0, 15, 74, 38, 0, 15, 74, 38, 0
};
CONST vec8 kARGBToU = {
112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
};
......@@ -86,6 +91,10 @@ CONST uvec8 kAddY16 = {
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
};
CONST vec16 kAddYJ64 = {
64, 64, 64, 64, 64, 64, 64, 64
};
CONST uvec8 kAddUV128 = {
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
......@@ -645,6 +654,7 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
asm volatile (
"movdqa %3,%%xmm4 \n"
"movdqa %4,%%xmm5 \n"
".p2align 4 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
......@@ -658,6 +668,8 @@ void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
"lea 0x40(%0),%0 \n"
"phaddw %%xmm1,%%xmm0 \n"
"phaddw %%xmm3,%%xmm2 \n"
"paddw %%xmm5,%%xmm0 \n"
"paddw %%xmm5,%%xmm2 \n"
"psrlw $0x7,%%xmm0 \n"
"psrlw $0x7,%%xmm2 \n"
"packuswb %%xmm2,%%xmm0 \n"
......@@ -668,10 +680,11 @@ void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
: "m"(kARGBToY) // %3
: "m"(kARGBToYJ), // %3
"m"(kAddYJ64) // %4
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
#endif
);
}
......@@ -716,6 +729,7 @@ void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
asm volatile (
"movdqa %3,%%xmm4 \n"
"movdqa %4,%%xmm5 \n"
".p2align 4 \n"
"1: \n"
"movdqu (%0),%%xmm0 \n"
......@@ -729,6 +743,8 @@ void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
"lea 0x40(%0),%0 \n"
"phaddw %%xmm1,%%xmm0 \n"
"phaddw %%xmm3,%%xmm2 \n"
"paddw %%xmm5,%%xmm0 \n"
"paddw %%xmm5,%%xmm2 \n"
"psrlw $0x7,%%xmm0 \n"
"psrlw $0x7,%%xmm2 \n"
"packuswb %%xmm2,%%xmm0 \n"
......@@ -739,13 +755,15 @@ void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
: "m"(kARGBToY) // %3
: "m"(kARGBToYJ), // %3
"m"(kAddYJ64) // %4
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
#endif
);
}
// TODO(fbarchard): pass xmm constants to single block of assembly.
// fpic on GCC 4.2 for OSX runs out of GPR registers. "m" effectively takes
// 3 registers - ebx, ebp and eax. "m" can be passed with 3 normal registers,
......
......@@ -25,6 +25,11 @@ static const vec8 kARGBToY = {
13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
};
// JPeg full range.
static const vec8 kARGBToYJ = {
15, 74, 38, 0, 15, 74, 38, 0, 15, 74, 38, 0, 15, 74, 38, 0
};
static const lvec8 kARGBToY_AVX = {
13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0,
13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
......@@ -103,6 +108,10 @@ static const uvec8 kAddY16 = {
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
};
static const vec16 kAddYJ64 = {
64, 64, 64, 64, 64, 64, 64, 64
};
static const ulvec8 kAddY16_AVX = {
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
......@@ -671,7 +680,8 @@ void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */
movdqa xmm4, kARGBToY
movdqa xmm4, kARGBToYJ
movdqa xmm5, kAddYJ64
align 16
convertloop:
......@@ -686,6 +696,8 @@ void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
lea eax, [eax + 64]
phaddw xmm0, xmm1
phaddw xmm2, xmm3
paddw xmm0, xmm5
paddw xmm2, xmm5
psrlw xmm0, 7
psrlw xmm2, 7
packuswb xmm0, xmm2
......@@ -776,7 +788,8 @@ void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */
movdqa xmm4, kARGBToY
movdqa xmm4, kARGBToYJ
movdqa xmm5, kAddYJ64
align 16
convertloop:
......@@ -791,6 +804,8 @@ void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
lea eax, [eax + 64]
phaddw xmm0, xmm1
phaddw xmm2, xmm3
paddw xmm0, xmm5
paddw xmm2, xmm5
psrlw xmm0, 7
psrlw xmm2, 7
packuswb xmm0, xmm2
......
......@@ -203,7 +203,9 @@ TEST_F(libyuvTest, Psnr) {
kSrcWidth, kSrcHeight);
EXPECT_GT(err, 4.0);
EXPECT_LT(err, 5.0);
if (kSrcWidth * kSrcHeight >= 256) {
EXPECT_LT(err, 5.0);
}
srandom(time(NULL));
......
......@@ -35,7 +35,7 @@ namespace libyuv {
#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \
TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
const int kWidth = W1280; \
const int kWidth = W1280 > 1 ? W1280 : 1; \
const int kHeight = benchmark_height_; \
align_buffer_64(src_y, kWidth * kHeight + OFF); \
align_buffer_64(src_u, \
......@@ -170,7 +170,7 @@ TESTPLANARTOP(I444, 1, 1, I444, 1, 1)
#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \
TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
const int kWidth = W1280; \
const int kWidth = W1280 > 1 ? W1280 : 1; \
const int kHeight = benchmark_height_; \
align_buffer_64(src_y, kWidth * kHeight + OFF); \
align_buffer_64(src_u, \
......@@ -273,7 +273,7 @@ TESTPLANARTOBP(I420, 2, 2, NV21, 2, 2)
#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \
TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
const int kWidth = W1280; \
const int kWidth = W1280 > 1 ? W1280 : 1; \
const int kHeight = benchmark_height_; \
align_buffer_64(src_y, kWidth * kHeight + OFF); \
align_buffer_64(src_uv, 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
......@@ -389,7 +389,7 @@ TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2)
#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
W1280, DIFF, N, NEG, OFF, FMT_C, BPP_C) \
TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kWidth = W1280 > 1 ? W1280 : 1; \
const int kHeight = benchmark_height_; \
const int kStrideB = ((kWidth * 8 * BPP_B + 7) / 8 + ALIGN - 1) / \
ALIGN * ALIGN; \
......@@ -503,7 +503,7 @@ TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 0, ARGB, 4)
#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
W1280, DIFF, N, NEG, OFF) \
TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kWidth = W1280 > 1 ? W1280 : 1; \
const int kHeight = benchmark_height_; \
const int kStrideB = kWidth * BPP_B; \
align_buffer_64(src_y, kWidth * kHeight + OFF); \
......@@ -582,7 +582,7 @@ TESTBIPLANARTOB(NV21, 2, 2, RGB565, 2, 9)
#define TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
W1280, DIFF, N, NEG, OFF) \
TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \
const int kWidth = W1280; \
const int kWidth = W1280 > 1 ? W1280 : 1; \
const int kHeight = benchmark_height_; \
const int kStride = (kWidth * 8 * BPP_A + 7) / 8; \
align_buffer_64(src_argb, kStride * kHeight + OFF); \
......@@ -712,7 +712,7 @@ TESTATOPLANAR(BayerGRBG, 1, I420, 2, 2, 4)
#define TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
W1280, N, NEG, OFF) \
TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \
const int kWidth = W1280; \
const int kWidth = W1280 > 1 ? W1280 : 1; \
const int kHeight = benchmark_height_; \
const int kStride = (kWidth * 8 * BPP_A + 7) / 8; \
align_buffer_64(src_argb, kStride * kHeight + OFF); \
......@@ -789,7 +789,7 @@ TESTATOBIPLANAR(ARGB, 4, NV21, 2, 2)
FMT_B, BPP_B, STRIDE_B, \
W1280, DIFF, N, NEG, OFF) \
TEST_F(libyuvTest, FMT_A##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kWidth = W1280 > 1 ? W1280 : 1; \
const int kHeight = benchmark_height_; \
const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
......@@ -814,6 +814,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##N) { \
} \
int max_diff = 0; \
for (int i = 0; i < kStrideB * kHeight; ++i) { \
EXPECT_NEAR(dst_argb_c[i], dst_argb_opt[i], DIFF); \
int abs_diff = \
abs(static_cast<int>(dst_argb_c[i]) - \
static_cast<int>(dst_argb_opt[i])); \
......@@ -859,6 +860,7 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##_Random) { \
int abs_diff = \
abs(static_cast<int>(dst_argb_c[i]) - \
static_cast<int>(dst_argb_opt[i])); \
EXPECT_NEAR(dst_argb_c[i], dst_argb_opt[i], DIFF); \
if (abs_diff > max_diff) { \
max_diff = abs_diff; \
} \
......@@ -903,7 +905,7 @@ TESTATOB(ARGB, 4, 4, 1, BayerGRBG, 1, 2, 2, 0)
TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4)
TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4)
TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2)
TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2)
TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 0)
TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0)
......
......@@ -107,6 +107,9 @@ TEST_F(libyuvTest, TestAttenuate) {
static int TestAttenuateI(int width, int height, int benchmark_iterations,
int invert, int off) {
if (width < 1) {
width = 1;
}
const int kBpp = 4;
const int kStride = (width * kBpp + 15) & ~15;
align_buffer_64(src_argb, kStride * height + off);
......@@ -170,6 +173,9 @@ TEST_F(libyuvTest, ARGBAttenuate_Opt) {
static int TestUnattenuateI(int width, int height, int benchmark_iterations,
int invert, int off) {
if (width < 1) {
width = 1;
}
const int kBpp = 4;
const int kStride = (width * kBpp + 15) & ~15;
align_buffer_64(src_argb, kStride * height + off);
......@@ -787,6 +793,9 @@ TESTINTERPOLATE(85)
static int TestBlend(int width, int height, int benchmark_iterations,
int invert, int off) {
if (width < 1) {
width = 1;
}
const int kBpp = 4;
const int kStride = width * kBpp;
align_buffer_64(src_argb_a, kStride * height + off);
......@@ -1101,6 +1110,9 @@ TEST_F(libyuvTest, TestCopyPlane) {
static int TestMultiply(int width, int height, int benchmark_iterations,
int invert, int off) {
if (width < 1) {
width = 1;
}
const int kBpp = 4;
const int kStride = (width * kBpp + 15) & ~15;
align_buffer_64(src_argb_a, kStride * height + off);
......@@ -1169,6 +1181,9 @@ TEST_F(libyuvTest, ARGBMultiply_Opt) {
static int TestAdd(int width, int height, int benchmark_iterations,
int invert, int off) {
if (width < 1) {
width = 1;
}
const int kBpp = 4;
const int kStride = (width * kBpp + 15) & ~15;
align_buffer_64(src_argb_a, kStride * height + off);
......@@ -1237,6 +1252,9 @@ TEST_F(libyuvTest, ARGBAdd_Opt) {
static int TestSubtract(int width, int height, int benchmark_iterations,
int invert, int off) {
if (width < 1) {
width = 1;
}
const int kBpp = 4;
const int kStride = (width * kBpp + 15) & ~15;
align_buffer_64(src_argb_a, kStride * height + off);
......@@ -1305,6 +1323,9 @@ TEST_F(libyuvTest, ARGBSubtract_Opt) {
static int TestSobel(int width, int height, int benchmark_iterations,
int invert, int off) {
if (width < 1) {
width = 1;
}
const int kBpp = 4;
const int kStride = (width * kBpp + 15) & ~15;
align_buffer_64(src_argb_a, kStride * height + off);
......@@ -1368,6 +1389,9 @@ TEST_F(libyuvTest, ARGBSobel_Opt) {
static int TestSobelXY(int width, int height, int benchmark_iterations,
int invert, int off) {
if (width < 1) {
width = 1;
}
const int kBpp = 4;
const int kStride = (width * kBpp + 15) & ~15;
align_buffer_64(src_argb_a, kStride * height + off);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment