Commit ae0091e3 authored by fbarchard@google.com's avatar fbarchard@google.com

ARGBPolynomial for applying a 3 term polynomial matrix to pixels.

BUG=265
TEST=ARGBPolynomial
R=thorcarpenter@google.com

Review URL: https://webrtc-codereview.appspot.com/2159005

git-svn-id: http://libyuv.googlecode.com/svn/trunk@778 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 2154de41
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 777
Version: 778
License: BSD
License File: LICENSE
......
......@@ -210,6 +210,20 @@ int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
const uint8* table_argb,
int x, int y, int width, int height);
// Apply a 3 term polynomial to ARGB values.
// poly points to a 4x4 matrix. The first row is constants. The 2nd row is
// coefficients for b, g, r and a. The 3rd row is coefficients for b squared,
// g squared, r squared and a squared. The 4rd row is coefficients for b to
// the 3, g to the 3, r to the 3 and a to the 3. The values are summed and
// result clamped to 0 to 255.
// A polynomial approximation can be dirived using software such as 'R'.
LIBYUV_API
int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
const float* poly,
int width, int height);
// Quantize a rectangle of ARGB. Alpha unaffected.
// scale is a 16 bit fractional fixed point scaler between 0 and 65535.
// interval_size should be a value between 1 and 255.
......
......@@ -140,8 +140,11 @@ extern "C" {
// The following are Windows only:
// TODO(fbarchard): Port to gcc.
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
// Effects:
#define HAS_ARGBCOLORTABLEROW_X86
#define HAS_RGBCOLORTABLEROW_X86
#define HAS_ARGBPOLYNOMIALROW_SSE2
// Caveat: Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
#define HAS_ARGBSHUFFLEROW_AVX2
......@@ -320,6 +323,7 @@ typedef __declspec(align(32)) uint32 ulvec32[8];
typedef __declspec(align(32)) uint8 ulvec8[32];
#elif defined(__GNUC__)
// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
typedef int16 __attribute__((vector_size(16))) vec16;
typedef int32 __attribute__((vector_size(16))) vec32;
......@@ -1542,6 +1546,14 @@ void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width);
void ARGBPolynomialRow_C(const uint8* src_argb,
uint8* dst_argb, const float* poly,
int width);
void ARGBPolynomialRow_SSE2(const uint8* src_argb,
uint8* dst_argb, const float* poly,
int width);
// Divide num by div and return as 16.16 fixed point result.
int FixedDiv_C(int num, int div);
int FixedDiv_X86(int num, int div);
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 777
#define LIBYUV_VERSION 778
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -2032,6 +2032,38 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
return 0;
}
// Apply a 4x4 polynomial to each ARGB pixel.
LIBYUV_API
int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
const float* poly,
int width, int height) {
if (!src_argb || !dst_argb || !poly || width <= 0 || height <= 0) {
return -1;
}
// Coalesce contiguous rows.
if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
return ARGBPolynomial(src_argb, 0,
dst_argb, 0,
poly,
width * height, 1);
}
void (*ARGBPolynomialRow)(const uint8* src_argb,
uint8* dst_argb, const float* poly,
int width) = ARGBPolynomialRow_C;
#if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
}
#endif
for (int y = 0; y < height; ++y) {
ARGBPolynomialRow(src_argb, dst_argb, poly, width);
src_argb += src_stride_argb;
dst_argb += dst_stride_argb;
}
return 0;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
......@@ -2009,8 +2009,48 @@ void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width);
I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
}
#endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
#endif // !defined(LIBYUV_DISABLE_X86)
void ARGBPolynomialRow_C(const uint8* src_argb,
uint8* dst_argb, const float* poly,
int width) {
for (int i = 0; i < width; ++i) {
float b = static_cast<float>(src_argb[0]);
float g = static_cast<float>(src_argb[1]);
float r = static_cast<float>(src_argb[2]);
float a = static_cast<float>(src_argb[3]);
float b2 = b * b;
float g2 = g * g;
float r2 = r * r;
float a2 = a * a;
float db = poly[0] + poly[4] * b;
float dg = poly[1] + poly[5] * g;
float dr = poly[2] + poly[6] * r;
float da = poly[3] + poly[7] * a;
db += poly[8] * b2;
dg += poly[9] * g2;
dr += poly[10] * r2;
da += poly[11] * a2;
float b3 = b2 * b;
float g3 = g2 * g;
float r3 = r2 * r;
float a3 = a2 * a;
db += poly[12] * b3;
dg += poly[13] * g3;
dr += poly[14] * r3;
da += poly[15] * a3;
dst_argb[0]= Clamp(static_cast<int32>(db));
dst_argb[1]= Clamp(static_cast<int32>(dg));
dst_argb[2]= Clamp(static_cast<int32>(dr));
dst_argb[3]= Clamp(static_cast<int32>(da));
src_argb += 4;
dst_argb += 4;
}
}
#undef clamp0
#undef clamp255
......
......@@ -6767,6 +6767,53 @@ int FixedDiv_X86(int num, int div) {
}
}
#endif // HAS_FIXEDDIV_X86
#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
__declspec(naked) __declspec(align(16))
void ARGBPolynomialRow_SSE2(const uint8* src_argb,
uint8* dst_argb, const float* poly,
int width) {
__asm {
mov eax, [esp + 12] /* poly */
movdqu xmm4, [eax]
movdqu xmm5, [eax + 16]
movdqu xmm6, [eax + 32]
movdqu xmm7, [eax + 48]
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_argb */
mov ecx, [esp + 16] /* width */
pxor xmm3, xmm3 // 4 bytes to 4 ints
align 16
convertloop:
movd xmm0, [eax] // BGRA
lea eax, [eax + 4]
punpcklbw xmm0, xmm3
punpcklwd xmm0, xmm3
cvtdq2ps xmm0, xmm0 // 4 floats
movdqa xmm1, xmm0 // X
mulps xmm0, xmm5 // C1 * X
addps xmm0, xmm4 // result = C0 + C1 * X
movdqa xmm2, xmm1
mulps xmm2, xmm1 // X * X
mulps xmm1, xmm2 // X * X * X
mulps xmm2, xmm6 // C2 * X * X
mulps xmm1, xmm7 // C3 * X * X * X
addps xmm0, xmm2 // result += C2 * X * X
addps xmm0, xmm1 // result += C3 * X * X * X
cvttps2dq xmm0, xmm0
packuswb xmm0, xmm0
packuswb xmm0, xmm0
sub ecx, 1
movd [edx], xmm0
lea edx, [edx + 4]
jg convertloop
ret
}
}
#endif // HAS_ARGBPOLYNOMIALROW_SSE2
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
#ifdef __cplusplus
......
......@@ -1656,4 +1656,68 @@ TEST_F(libyuvTest, ARGBBlur_Opt) {
EXPECT_LE(max_diff, 1);
}
TEST_F(libyuvTest, TestARGBPolynomial) {
SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
SIMD_ALIGNED(uint8 dst_pixels[1280][4]);
static const float kWarmifyPolynomial[16] = {
0.94230f, -3.03300f, -2.92500f, 0.f, // C0
0.584500f, 1.112000f, 1.535000f, 1.f, // C1 x
0.001313f, -0.002503f, -0.004496f, 0.f, // C2 x * x
0.0f, 0.000006965f, 0.000008781f, 0.f, // C3 x * x * x
};
// Test blue
orig_pixels[0][0] = 255u;
orig_pixels[0][1] = 0u;
orig_pixels[0][2] = 0u;
orig_pixels[0][3] = 128u;
// Test green
orig_pixels[1][0] = 0u;
orig_pixels[1][1] = 255u;
orig_pixels[1][2] = 0u;
orig_pixels[1][3] = 0u;
// Test red
orig_pixels[2][0] = 0u;
orig_pixels[2][1] = 0u;
orig_pixels[2][2] = 255u;
orig_pixels[2][3] = 255u;
// Test color
orig_pixels[3][0] = 16u;
orig_pixels[3][1] = 64u;
orig_pixels[3][2] = 192u;
orig_pixels[3][3] = 224u;
// Do 16 to test asm version.
ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0,
&kWarmifyPolynomial[0], 16, 1);
EXPECT_EQ(235u, dst_pixels[0][0]);
EXPECT_EQ(0u, dst_pixels[0][1]);
EXPECT_EQ(0u, dst_pixels[0][2]);
EXPECT_EQ(128u, dst_pixels[0][3]);
EXPECT_EQ(0u, dst_pixels[1][0]);
EXPECT_EQ(233u, dst_pixels[1][1]);
EXPECT_EQ(0u, dst_pixels[1][2]);
EXPECT_EQ(0u, dst_pixels[1][3]);
EXPECT_EQ(0u, dst_pixels[2][0]);
EXPECT_EQ(0u, dst_pixels[2][1]);
EXPECT_EQ(241u, dst_pixels[2][2]);
EXPECT_EQ(255u, dst_pixels[2][3]);
EXPECT_EQ(10u, dst_pixels[3][0]);
EXPECT_EQ(59u, dst_pixels[3][1]);
EXPECT_EQ(188u, dst_pixels[3][2]);
EXPECT_EQ(224u, dst_pixels[3][3]);
for (int i = 0; i < 1280; ++i) {
orig_pixels[i][0] = i;
orig_pixels[i][1] = i / 2;
orig_pixels[i][2] = i / 3;
orig_pixels[i][3] = i;
}
for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0,
&kWarmifyPolynomial[0], 1280, 1);
}
}
} // namespace libyuv
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment