Commit 11a0d48e authored by fbarchard@google.com's avatar fbarchard@google.com

pass parameter for yuv conversion

BUG=267
TEST=Luma
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/3169005

git-svn-id: http://libyuv.googlecode.com/svn/trunk@834 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 15df877b
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 833 Version: 834
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -1613,12 +1613,11 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb, ...@@ -1613,12 +1613,11 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb,
uint8* dst_argb, const float* poly, uint8* dst_argb, const float* poly,
int width); int width);
void ARGBLumaColorTableRow_C(const uint8* src_argb, void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
uint8* dst_argb, const uint8* luma, const uint8* luma, const uint32 lumacoeff);
int width); void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, int width, const uint8* luma,
uint8* dst_argb, const uint8* luma, const uint32 lumacoeff);
int width);
// Divide num by div and return as 16.16 fixed point result. // Divide num by div and return as 16.16 fixed point result.
int FixedDiv_C(int num, int div); int FixedDiv_C(int num, int div);
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 833 #define LIBYUV_VERSION 834
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -75,7 +75,7 @@ int TestOsSaveYmm() { ...@@ -75,7 +75,7 @@ int TestOsSaveYmm() {
mov xcr0, eax mov xcr0, eax
} }
#elif defined(__i386__) || defined(__x86_64__) #elif defined(__i386__) || defined(__x86_64__)
asm volatile (".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx" ); asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
#endif // defined(_MSC_VER) #endif // defined(_MSC_VER)
return((xcr0 & 6) == 6); // Is ymm saved? return((xcr0 & 6) == 6); // Is ymm saved?
} }
......
...@@ -2074,16 +2074,16 @@ int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb, ...@@ -2074,16 +2074,16 @@ int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
height = 1; height = 1;
src_stride_argb = dst_stride_argb = 0; src_stride_argb = dst_stride_argb = 0;
} }
void (*ARGBLumaColorTableRow)(const uint8* src_argb, void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb,
uint8* dst_argb, const uint8* luma, int width, const uint8* luma, const uint32 lumacoeff) =
int width) = ARGBLumaColorTableRow_C; ARGBLumaColorTableRow_C;
#if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3) #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) { if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3; ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
} }
#endif #endif
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
ARGBLumaColorTableRow(src_argb, dst_argb, luma, width); ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
src_argb += src_stride_argb; src_argb += src_stride_argb;
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
} }
......
...@@ -2082,30 +2082,22 @@ void ARGBPolynomialRow_C(const uint8* src_argb, ...@@ -2082,30 +2082,22 @@ void ARGBPolynomialRow_C(const uint8* src_argb,
} }
} }
// RGB to Luminance. void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
// Leverage the fact that we want shifted left by 8 by the caller. const uint8* luma, const uint32 lumacoeff) {
// uint32 bc = lumacoeff & 0xff;
// Borrowed from libyuv/files/source/row_common.cc. uint32 gc = (lumacoeff >> 8) & 0xff;
// JPeg 7 bit Y: uint32 rc = (lumacoeff >> 16) & 0xff;
// b 0.11400 * 128 = 14.592 = 15
// g 0.58700 * 128 = 75.136 = 75
// r 0.29900 * 128 = 38.272 = 38
static __inline unsigned int RGBToYJx256(uint8 r, uint8 g, uint8 b) {
return (38u * r + 75u * g + 15u * b) & 0x7F00u;
}
void ARGBLumaColorTableRow_C(const uint8* src_argb,
uint8* dst_argb, const uint8* luma,
int width) {
for (int i = 0; i < width - 1; i += 2) { for (int i = 0; i < width - 1; i += 2) {
// Luminance in rows, color values in columns. // Luminance in rows, color values in columns.
const uint8* luma0 = RGBToYJx256(src_argb[2], src_argb[1], src_argb[0]) + const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
luma; src_argb[2] * rc) & 0x7F00u) + luma;
dst_argb[0] = luma0[src_argb[0]]; dst_argb[0] = luma0[src_argb[0]];
dst_argb[1] = luma0[src_argb[1]]; dst_argb[1] = luma0[src_argb[1]];
dst_argb[2] = luma0[src_argb[2]]; dst_argb[2] = luma0[src_argb[2]];
dst_argb[3] = src_argb[3]; dst_argb[3] = src_argb[3];
const uint8* luma1 = RGBToYJx256(src_argb[6], src_argb[5], src_argb[4]) + const uint8* luma1 = ((src_argb[4] * bc + src_argb[5] * gc +
luma; src_argb[6] * rc) & 0x7F00u) + luma;
dst_argb[4] = luma1[src_argb[4]]; dst_argb[4] = luma1[src_argb[4]];
dst_argb[5] = luma1[src_argb[5]]; dst_argb[5] = luma1[src_argb[5]];
dst_argb[6] = luma1[src_argb[6]]; dst_argb[6] = luma1[src_argb[6]];
...@@ -2115,8 +2107,8 @@ void ARGBLumaColorTableRow_C(const uint8* src_argb, ...@@ -2115,8 +2107,8 @@ void ARGBLumaColorTableRow_C(const uint8* src_argb,
} }
if (width & 1) { if (width & 1) {
// Luminance in rows, color values in columns. // Luminance in rows, color values in columns.
const uint8* luma0 = RGBToYJx256(src_argb[2], src_argb[1], src_argb[0]) + const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
luma; src_argb[2] * rc) & 0x7F00u) + luma;
dst_argb[0] = luma0[src_argb[0]]; dst_argb[0] = luma0[src_argb[0]];
dst_argb[1] = luma0[src_argb[1]]; dst_argb[1] = luma0[src_argb[1]];
dst_argb[2] = luma0[src_argb[2]]; dst_argb[2] = luma0[src_argb[2]];
......
...@@ -4477,14 +4477,10 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) { ...@@ -4477,14 +4477,10 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
const int8* matrix_argb, int width) { const int8* matrix_argb, int width) {
asm volatile ( asm volatile (
"movd " MEMACCESS(3) ",%%xmm2 \n" "pshufd $0x00," MEMACCESS(3) ",%%xmm2 \n"
"movd " MEMACCESS2(0x4,3) ",%%xmm3 \n" "pshufd $0x55," MEMACCESS(3) ",%%xmm3 \n"
"movd " MEMACCESS2(0x8,3) ",%%xmm4 \n" "pshufd $0xaa," MEMACCESS(3) ",%%xmm4 \n"
"movd " MEMACCESS2(0xc,3) ",%%xmm5 \n" "pshufd $0xff," MEMACCESS(3) ",%%xmm5 \n"
"pshufd $0x0,%%xmm2,%%xmm2 \n"
"pshufd $0x0,%%xmm3,%%xmm3 \n"
"pshufd $0x0,%%xmm4,%%xmm4 \n"
"pshufd $0x0,%%xmm5,%%xmm5 \n"
// 8 pixel loop. // 8 pixel loop.
".p2align 4 \n" ".p2align 4 \n"
...@@ -6361,17 +6357,15 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) { ...@@ -6361,17 +6357,15 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
} }
#endif // HAS_RGBCOLORTABLEROW_X86 #endif // HAS_RGBCOLORTABLEROW_X86
// TODO(fbarchard): Ensure this works with minimal number of registers/gcc32.
#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3 #ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
// Tranform RGB pixels with luma table. // Tranform RGB pixels with luma table.
void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
uint8* dst_argb, const uint8* luma, int width,
int width) { const uint8* luma, uint32 lumacoeff) {
uintptr_t pixel_temp = 0u; uintptr_t pixel_temp = 0u;
uintptr_t table_temp = 0u; uintptr_t table_temp = 0u;
asm volatile ( asm volatile (
"mov $0x264b0f,%%edx \n" "movd %6,%%xmm3 \n"
"movd %%edx,%%xmm3 \n"
"pshufd $0x0,%%xmm3,%%xmm3 \n" "pshufd $0x0,%%xmm3,%%xmm3 \n"
"pcmpeqb %%xmm4,%%xmm4 \n" "pcmpeqb %%xmm4,%%xmm4 \n"
"psllw $0x8,%%xmm4 \n" "psllw $0x8,%%xmm4 \n"
...@@ -6456,7 +6450,8 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, ...@@ -6456,7 +6450,8 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb,
"+r"(src_argb), // %2 "+r"(src_argb), // %2
"+r"(dst_argb), // %3 "+r"(dst_argb), // %3
"+rm"(width) // %4 "+rm"(width) // %4
: "r"(luma) // %5 : "r"(luma), // %5
"rm"(lumacoeff) // %6
: "memory", "cc" : "memory", "cc"
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm3", "xmm4", "xmm5" , "xmm0", "xmm3", "xmm4", "xmm5"
......
...@@ -5146,17 +5146,13 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, ...@@ -5146,17 +5146,13 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
mov eax, [esp + 4] /* src_argb */ mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_argb */ mov edx, [esp + 8] /* dst_argb */
mov ecx, [esp + 12] /* matrix_argb */ mov ecx, [esp + 12] /* matrix_argb */
movd xmm2, [ecx] pshufd xmm2, [ecx], 0x00
movd xmm3, [ecx + 4] pshufd xmm3, [ecx], 0x55
movd xmm4, [ecx + 8] pshufd xmm4, [ecx], 0xaa
movd xmm5, [ecx + 12] pshufd xmm5, [ecx], 0xff
pshufd xmm2, xmm2, 0
pshufd xmm3, xmm3, 0
pshufd xmm4, xmm4, 0
pshufd xmm5, xmm5, 0
mov ecx, [esp + 16] /* width */ mov ecx, [esp + 16] /* width */
align 16 align 4
convertloop: convertloop:
movdqa xmm0, [eax] // B movdqa xmm0, [eax] // B
movdqa xmm7, [eax + 16] movdqa xmm7, [eax + 16]
...@@ -7142,19 +7138,18 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) { ...@@ -7142,19 +7138,18 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3 #ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
// Tranform RGB pixels with luma table. // Tranform RGB pixels with luma table.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
uint8* dst_argb, const uint8* luma, int width,
int width) { const uint8* luma, uint32 lumacoeff) {
__asm { __asm {
push esi push esi
push edi push edi
mov eax, [esp + 8 + 4] /* src_argb */ mov eax, [esp + 8 + 4] /* src_argb */
mov edi, [esp + 8 + 8] /* dst_argb */ mov edi, [esp + 8 + 8] /* dst_argb */
movd xmm2, dword ptr [esp + 8 + 12] /* table_argb */ mov ecx, [esp + 8 + 12] /* width */
movd xmm2, dword ptr [esp + 8 + 16] // luma table
movd xmm3, dword ptr [esp + 8 + 20] // lumacoeff
pshufd xmm2, xmm2, 0 pshufd xmm2, xmm2, 0
mov ecx, [esp + 8 + 16] /* width */
mov edx, 0x00264b0f // kARGBToYJ
movd xmm3, edx
pshufd xmm3, xmm3, 0 pshufd xmm3, xmm3, 0
pcmpeqb xmm4, xmm4 // generate mask 0xff00ff00 pcmpeqb xmm4, xmm4 // generate mask 0xff00ff00
psllw xmm4, 8 psllw xmm4, 8
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment