Commit afd1d6b4 authored by fbarchard@google.com's avatar fbarchard@google.com

Fix 2 bugs with Luma scale

BUG=267
TEST=luma unittest improved
R=ryanpetrie@google.com

Review URL: https://webrtc-codereview.appspot.com/2260005

git-svn-id: http://libyuv.googlecode.com/svn/trunk@794 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 7a0d01ef
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 793
Version: 794
License: BSD
License File: LICENSE
......
......@@ -146,7 +146,8 @@ extern "C" {
// TODO(fbarchard): Port to gcc.
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
// Effects:
#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
// TODO(fbarchard): Optimize and enable
// #define HAS_ARGBLUMACOLORTABLEROW_SSSE3
// Caveat: Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 793
#define LIBYUV_VERSION 794
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -2090,7 +2090,7 @@ int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, const uint8* luma,
int width) = ARGBLumaColorTableRow_C;
#if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
}
#endif
......
......@@ -6893,8 +6893,8 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb,
uint8* dst_argb, const uint8* luma,
int width) {
SIMD_ALIGNED(uint8* lut4[4]);
ARGBToYJx4_SSSE3(src_argb, luma, lut4);
for (int i = 0; i < width - 3; i += 4) {
ARGBToYJx4_SSSE3(src_argb, luma, lut4);
// Luminance in rows, color values in columns.
const uint8* luma0 = lut4[0];
dst_argb[0] = luma0[src_argb[0]];
......
......@@ -1658,7 +1658,8 @@ TEST_F(libyuvTest, ARGBBlur_Opt) {
TEST_F(libyuvTest, TestARGBPolynomial) {
SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
SIMD_ALIGNED(uint8 dst_pixels[1280][4]);
SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]);
SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]);
memset(orig_pixels, 0, sizeof(orig_pixels));
SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = {
......@@ -1683,30 +1684,39 @@ TEST_F(libyuvTest, TestARGBPolynomial) {
orig_pixels[2][1] = 0u;
orig_pixels[2][2] = 255u;
orig_pixels[2][3] = 255u;
// Test white
orig_pixels[3][0] = 255u;
orig_pixels[3][1] = 255u;
orig_pixels[3][2] = 255u;
orig_pixels[3][3] = 255u;
// Test color
orig_pixels[3][0] = 16u;
orig_pixels[3][1] = 64u;
orig_pixels[3][2] = 192u;
orig_pixels[3][3] = 224u;
orig_pixels[4][0] = 16u;
orig_pixels[4][1] = 64u;
orig_pixels[4][2] = 192u;
orig_pixels[4][3] = 224u;
// Do 16 to test asm version.
ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0,
ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
&kWarmifyPolynomial[0], 16, 1);
EXPECT_EQ(235u, dst_pixels[0][0]);
EXPECT_EQ(0u, dst_pixels[0][1]);
EXPECT_EQ(0u, dst_pixels[0][2]);
EXPECT_EQ(128u, dst_pixels[0][3]);
EXPECT_EQ(0u, dst_pixels[1][0]);
EXPECT_EQ(233u, dst_pixels[1][1]);
EXPECT_EQ(0u, dst_pixels[1][2]);
EXPECT_EQ(0u, dst_pixels[1][3]);
EXPECT_EQ(0u, dst_pixels[2][0]);
EXPECT_EQ(0u, dst_pixels[2][1]);
EXPECT_EQ(241u, dst_pixels[2][2]);
EXPECT_EQ(255u, dst_pixels[2][3]);
EXPECT_EQ(10u, dst_pixels[3][0]);
EXPECT_EQ(59u, dst_pixels[3][1]);
EXPECT_EQ(188u, dst_pixels[3][2]);
EXPECT_EQ(224u, dst_pixels[3][3]);
EXPECT_EQ(235u, dst_pixels_opt[0][0]);
EXPECT_EQ(0u, dst_pixels_opt[0][1]);
EXPECT_EQ(0u, dst_pixels_opt[0][2]);
EXPECT_EQ(128u, dst_pixels_opt[0][3]);
EXPECT_EQ(0u, dst_pixels_opt[1][0]);
EXPECT_EQ(233u, dst_pixels_opt[1][1]);
EXPECT_EQ(0u, dst_pixels_opt[1][2]);
EXPECT_EQ(0u, dst_pixels_opt[1][3]);
EXPECT_EQ(0u, dst_pixels_opt[2][0]);
EXPECT_EQ(0u, dst_pixels_opt[2][1]);
EXPECT_EQ(241u, dst_pixels_opt[2][2]);
EXPECT_EQ(255u, dst_pixels_opt[2][3]);
EXPECT_EQ(235u, dst_pixels_opt[3][0]);
EXPECT_EQ(233u, dst_pixels_opt[3][1]);
EXPECT_EQ(241u, dst_pixels_opt[3][2]);
EXPECT_EQ(255u, dst_pixels_opt[3][3]);
EXPECT_EQ(10u, dst_pixels_opt[4][0]);
EXPECT_EQ(59u, dst_pixels_opt[4][1]);
EXPECT_EQ(188u, dst_pixels_opt[4][2]);
EXPECT_EQ(224u, dst_pixels_opt[4][3]);
for (int i = 0; i < 1280; ++i) {
orig_pixels[i][0] = i;
......@@ -1714,15 +1724,29 @@ TEST_F(libyuvTest, TestARGBPolynomial) {
orig_pixels[i][2] = i / 3;
orig_pixels[i][3] = i;
}
MaskCpuFlags(0);
ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
&kWarmifyPolynomial[0], 1280, 1);
MaskCpuFlags(-1);
for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0,
ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
&kWarmifyPolynomial[0], 1280, 1);
}
for (int i = 0; i < 1280; ++i) {
EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
}
}
TEST_F(libyuvTest, TestARGBLumaColorTable) {
SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
SIMD_ALIGNED(uint8 dst_pixels[1280][4]);
SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]);
SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]);
memset(orig_pixels, 0, sizeof(orig_pixels));
SIMD_ALIGNED(uint8 kLumaColorTable[32768]);
......@@ -1752,24 +1776,24 @@ TEST_F(libyuvTest, TestARGBLumaColorTable) {
orig_pixels[3][2] = 192u;
orig_pixels[3][3] = 224u;
// Do 16 to test asm version.
ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0,
ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
&kLumaColorTable[0], 16, 1);
EXPECT_EQ(253u, dst_pixels[0][0]);
EXPECT_EQ(0u, dst_pixels[0][1]);
EXPECT_EQ(0u, dst_pixels[0][2]);
EXPECT_EQ(128u, dst_pixels[0][3]);
EXPECT_EQ(0u, dst_pixels[1][0]);
EXPECT_EQ(253u, dst_pixels[1][1]);
EXPECT_EQ(0u, dst_pixels[1][2]);
EXPECT_EQ(0u, dst_pixels[1][3]);
EXPECT_EQ(0u, dst_pixels[2][0]);
EXPECT_EQ(0u, dst_pixels[2][1]);
EXPECT_EQ(253u, dst_pixels[2][2]);
EXPECT_EQ(255u, dst_pixels[2][3]);
EXPECT_EQ(48u, dst_pixels[3][0]);
EXPECT_EQ(192u, dst_pixels[3][1]);
EXPECT_EQ(64u, dst_pixels[3][2]);
EXPECT_EQ(224u, dst_pixels[3][3]);
EXPECT_EQ(253u, dst_pixels_opt[0][0]);
EXPECT_EQ(0u, dst_pixels_opt[0][1]);
EXPECT_EQ(0u, dst_pixels_opt[0][2]);
EXPECT_EQ(128u, dst_pixels_opt[0][3]);
EXPECT_EQ(0u, dst_pixels_opt[1][0]);
EXPECT_EQ(253u, dst_pixels_opt[1][1]);
EXPECT_EQ(0u, dst_pixels_opt[1][2]);
EXPECT_EQ(0u, dst_pixels_opt[1][3]);
EXPECT_EQ(0u, dst_pixels_opt[2][0]);
EXPECT_EQ(0u, dst_pixels_opt[2][1]);
EXPECT_EQ(253u, dst_pixels_opt[2][2]);
EXPECT_EQ(255u, dst_pixels_opt[2][3]);
EXPECT_EQ(48u, dst_pixels_opt[3][0]);
EXPECT_EQ(192u, dst_pixels_opt[3][1]);
EXPECT_EQ(64u, dst_pixels_opt[3][2]);
EXPECT_EQ(224u, dst_pixels_opt[3][3]);
for (int i = 0; i < 1280; ++i) {
orig_pixels[i][0] = i;
......@@ -1777,10 +1801,22 @@ TEST_F(libyuvTest, TestARGBLumaColorTable) {
orig_pixels[i][2] = i / 3;
orig_pixels[i][3] = i;
}
MaskCpuFlags(0);
ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
&kLumaColorTable[0], 1280, 1);
MaskCpuFlags(-1);
for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0,
ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
&kLumaColorTable[0], 1280, 1);
}
for (int i = 0; i < 1280; ++i) {
EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment