Commit c3b04796 authored by fbarchard@google.com's avatar fbarchard@google.com

ARGBPolynomial ported to gcc

BUG=265
TESTED=try bots
R=changjun.yang@intel.com

Review URL: https://webrtc-codereview.appspot.com/2191007

git-svn-id: http://libyuv.googlecode.com/svn/trunk@786 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 7e7c7753
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 785
Version: 786
License: BSD
License File: LICENSE
......
......@@ -98,6 +98,7 @@ extern "C" {
// Conversions:
#define HAS_ARGB1555TOARGBROW_SSE2
#define HAS_ARGB4444TOARGBROW_SSE2
#define HAS_ARGBPOLYNOMIALROW_SSE2
#define HAS_ARGBTOARGB1555ROW_SSE2
#define HAS_ARGBTOARGB4444ROW_SSE2
#define HAS_ARGBTORAWROW_SSSE3
......@@ -142,7 +143,6 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
// Effects:
#define HAS_ARGBCOLORTABLEROW_X86
#define HAS_ARGBPOLYNOMIALROW_SSE2
#define HAS_RGBCOLORTABLEROW_X86
#define HAS_ARGBLUMACOLORTABLEROW_SSE2
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 785
#define LIBYUV_VERSION 786
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -2075,13 +2075,22 @@ void ARGBLumaColorTableRow_C(const uint8* src_argb,
dst_argb[3] = src_argb[3];
const uint8* luma1 = RGBToYJx256(src_argb[6], src_argb[5], src_argb[4]) +
luma;
dst_argb[4] = luma0[src_argb[4]];
dst_argb[5] = luma0[src_argb[5]];
dst_argb[6] = luma0[src_argb[6]];
dst_argb[4] = luma1[src_argb[4]];
dst_argb[5] = luma1[src_argb[5]];
dst_argb[6] = luma1[src_argb[6]];
dst_argb[7] = src_argb[7];
src_argb += 8;
dst_argb += 8;
}
if (width & 1) {
// Luminance in rows, color values in columns.
const uint8* luma0 = RGBToYJx256(src_argb[2], src_argb[1], src_argb[0]) +
luma;
dst_argb[0] = luma0[src_argb[0]];
dst_argb[1] = luma0[src_argb[1]];
dst_argb[2] = luma0[src_argb[2]];
dst_argb[3] = src_argb[3];
}
}
#undef clamp0
......
......@@ -5816,6 +5816,65 @@ int FixedDiv_X86(int num, int div) {
return num;
}
#endif // HAS_FIXEDDIV_X86
#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
void ARGBPolynomialRow_SSE2(const uint8* src_argb,
uint8* dst_argb, const float* poly,
int width) {
asm volatile (
"pxor %%xmm3,%%xmm3 \n"
// 2 pixel loop.
".p2align 4 \n"
"1: \n"
"movq (%0),%%xmm0 \n"
"lea 0x8(%0),%0 \n"
"punpcklbw %%xmm3,%%xmm0 \n"
"movdqa %%xmm0,%%xmm4 \n"
"punpcklwd %%xmm3,%%xmm0 \n"
"punpckhwd %%xmm3,%%xmm4 \n"
"cvtdq2ps %%xmm0,%%xmm0 \n"
"cvtdq2ps %%xmm4,%%xmm4 \n"
"movdqa %%xmm0,%%xmm1 \n"
"movdqa %%xmm4,%%xmm5 \n"
"mulps 0x10(%3),%%xmm0 \n"
"mulps 0x10(%3),%%xmm4 \n"
"addps (%3),%%xmm0 \n"
"addps (%3),%%xmm4 \n"
"movdqa %%xmm1,%%xmm2 \n"
"movdqa %%xmm5,%%xmm6 \n"
"mulps %%xmm1,%%xmm2 \n"
"mulps %%xmm5,%%xmm6 \n"
"mulps %%xmm2,%%xmm1 \n"
"mulps %%xmm6,%%xmm5 \n"
"mulps 0x20(%3),%%xmm2 \n"
"mulps 0x20(%3),%%xmm6 \n"
"mulps 0x30(%3),%%xmm1 \n"
"mulps 0x30(%3),%%xmm5 \n"
"addps %%xmm2,%%xmm0 \n"
"addps %%xmm6,%%xmm4 \n"
"addps %%xmm1,%%xmm0 \n"
"addps %%xmm5,%%xmm4 \n"
"cvttps2dq %%xmm0,%%xmm0 \n"
"cvttps2dq %%xmm4,%%xmm4 \n"
"packuswb %%xmm4,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"sub $0x2,%2 \n"
"movq %%xmm0,(%1) \n"
"lea 0x8(%1),%1 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: "r"(poly) // %3
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
#endif
);
}
#endif // HAS_ARGBPOLYNOMIALROW_SSE2
#endif // defined(__x86_64__) || defined(__i386__)
#ifdef __cplusplus
......
......@@ -6781,6 +6781,7 @@ void ARGBPolynomialRow_SSE2(const uint8* src_argb,
mov ecx, [esp + 4 + 16] /* width */
pxor xmm3, xmm3 // 0 constant for zero extending bytes to ints.
// 2 pixel loop.
align 16
convertloop:
// (slow) pmovzxbd xmm0, dword ptr [eax] // BGRA pixel
......@@ -6846,6 +6847,7 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb,
mov edx, [esp + 8] /* dst_argb */
mov ecx, [esp + 16] /* width */
// 2 pixel loop.
align 16
convertloop:
vpmovzxbd ymm0, qword ptr [eax] // 2 BGRA pixels
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment