Commit 11a0d48e authored by fbarchard@google.com's avatar fbarchard@google.com

pass parameter for yuv conversion

BUG=267
TEST=Luma
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/3169005

git-svn-id: http://libyuv.googlecode.com/svn/trunk@834 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 15df877b
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 833
Version: 834
License: BSD
License File: LICENSE
......
......@@ -1613,12 +1613,11 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb,
uint8* dst_argb, const float* poly,
int width);
void ARGBLumaColorTableRow_C(const uint8* src_argb,
uint8* dst_argb, const uint8* luma,
int width);
void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb,
uint8* dst_argb, const uint8* luma,
int width);
void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
const uint8* luma, const uint32 lumacoeff);
void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
int width, const uint8* luma,
const uint32 lumacoeff);
// Divide num by div and return as 16.16 fixed point result.
int FixedDiv_C(int num, int div);
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 833
#define LIBYUV_VERSION 834
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -75,7 +75,7 @@ int TestOsSaveYmm() {
mov xcr0, eax
}
#elif defined(__i386__) || defined(__x86_64__)
asm volatile (".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx" );
asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
#endif // defined(_MSC_VER)
return((xcr0 & 6) == 6); // Is ymm saved?
}
......
......@@ -2074,16 +2074,16 @@ int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
height = 1;
src_stride_argb = dst_stride_argb = 0;
}
void (*ARGBLumaColorTableRow)(const uint8* src_argb,
uint8* dst_argb, const uint8* luma,
int width) = ARGBLumaColorTableRow_C;
void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb,
int width, const uint8* luma, const uint32 lumacoeff) =
ARGBLumaColorTableRow_C;
#if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
}
#endif
for (int y = 0; y < height; ++y) {
ARGBLumaColorTableRow(src_argb, dst_argb, luma, width);
ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
src_argb += src_stride_argb;
dst_argb += dst_stride_argb;
}
......
......@@ -2082,30 +2082,22 @@ void ARGBPolynomialRow_C(const uint8* src_argb,
}
}
// RGB to Luminance.
// Leverage the fact that we want shifted left by 8 by the caller.
//
// Borrowed from libyuv/files/source/row_common.cc.
// JPeg 7 bit Y:
// b 0.11400 * 128 = 14.592 = 15
// g 0.58700 * 128 = 75.136 = 75
// r 0.29900 * 128 = 38.272 = 38
static __inline unsigned int RGBToYJx256(uint8 r, uint8 g, uint8 b) {
return (38u * r + 75u * g + 15u * b) & 0x7F00u;
}
void ARGBLumaColorTableRow_C(const uint8* src_argb,
uint8* dst_argb, const uint8* luma,
int width) {
void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
const uint8* luma, const uint32 lumacoeff) {
uint32 bc = lumacoeff & 0xff;
uint32 gc = (lumacoeff >> 8) & 0xff;
uint32 rc = (lumacoeff >> 16) & 0xff;
for (int i = 0; i < width - 1; i += 2) {
// Luminance in rows, color values in columns.
const uint8* luma0 = RGBToYJx256(src_argb[2], src_argb[1], src_argb[0]) +
luma;
const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
src_argb[2] * rc) & 0x7F00u) + luma;
dst_argb[0] = luma0[src_argb[0]];
dst_argb[1] = luma0[src_argb[1]];
dst_argb[2] = luma0[src_argb[2]];
dst_argb[3] = src_argb[3];
const uint8* luma1 = RGBToYJx256(src_argb[6], src_argb[5], src_argb[4]) +
luma;
const uint8* luma1 = ((src_argb[4] * bc + src_argb[5] * gc +
src_argb[6] * rc) & 0x7F00u) + luma;
dst_argb[4] = luma1[src_argb[4]];
dst_argb[5] = luma1[src_argb[5]];
dst_argb[6] = luma1[src_argb[6]];
......@@ -2115,8 +2107,8 @@ void ARGBLumaColorTableRow_C(const uint8* src_argb,
}
if (width & 1) {
// Luminance in rows, color values in columns.
const uint8* luma0 = RGBToYJx256(src_argb[2], src_argb[1], src_argb[0]) +
luma;
const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
src_argb[2] * rc) & 0x7F00u) + luma;
dst_argb[0] = luma0[src_argb[0]];
dst_argb[1] = luma0[src_argb[1]];
dst_argb[2] = luma0[src_argb[2]];
......
......@@ -4477,14 +4477,10 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
const int8* matrix_argb, int width) {
asm volatile (
"movd " MEMACCESS(3) ",%%xmm2 \n"
"movd " MEMACCESS2(0x4,3) ",%%xmm3 \n"
"movd " MEMACCESS2(0x8,3) ",%%xmm4 \n"
"movd " MEMACCESS2(0xc,3) ",%%xmm5 \n"
"pshufd $0x0,%%xmm2,%%xmm2 \n"
"pshufd $0x0,%%xmm3,%%xmm3 \n"
"pshufd $0x0,%%xmm4,%%xmm4 \n"
"pshufd $0x0,%%xmm5,%%xmm5 \n"
"pshufd $0x00," MEMACCESS(3) ",%%xmm2 \n"
"pshufd $0x55," MEMACCESS(3) ",%%xmm3 \n"
"pshufd $0xaa," MEMACCESS(3) ",%%xmm4 \n"
"pshufd $0xff," MEMACCESS(3) ",%%xmm5 \n"
// 8 pixel loop.
".p2align 4 \n"
......@@ -6361,17 +6357,15 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
}
#endif // HAS_RGBCOLORTABLEROW_X86
// TODO(fbarchard): Ensure this works with minimal number of registers/gcc32.
#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
// Tranform RGB pixels with luma table.
void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb,
uint8* dst_argb, const uint8* luma,
int width) {
void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
int width,
const uint8* luma, uint32 lumacoeff) {
uintptr_t pixel_temp = 0u;
uintptr_t table_temp = 0u;
asm volatile (
"mov $0x264b0f,%%edx \n"
"movd %%edx,%%xmm3 \n"
"movd %6,%%xmm3 \n"
"pshufd $0x0,%%xmm3,%%xmm3 \n"
"pcmpeqb %%xmm4,%%xmm4 \n"
"psllw $0x8,%%xmm4 \n"
......@@ -6456,7 +6450,8 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb,
"+r"(src_argb), // %2
"+r"(dst_argb), // %3
"+rm"(width) // %4
: "r"(luma) // %5
: "r"(luma), // %5
"rm"(lumacoeff) // %6
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm3", "xmm4", "xmm5"
......
......@@ -5146,17 +5146,13 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_argb */
mov ecx, [esp + 12] /* matrix_argb */
movd xmm2, [ecx]
movd xmm3, [ecx + 4]
movd xmm4, [ecx + 8]
movd xmm5, [ecx + 12]
pshufd xmm2, xmm2, 0
pshufd xmm3, xmm3, 0
pshufd xmm4, xmm4, 0
pshufd xmm5, xmm5, 0
pshufd xmm2, [ecx], 0x00
pshufd xmm3, [ecx], 0x55
pshufd xmm4, [ecx], 0xaa
pshufd xmm5, [ecx], 0xff
mov ecx, [esp + 16] /* width */
align 16
align 4
convertloop:
movdqa xmm0, [eax] // B
movdqa xmm7, [eax + 16]
......@@ -7142,21 +7138,20 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
// Tranform RGB pixels with luma table.
__declspec(naked) __declspec(align(16))
void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb,
uint8* dst_argb, const uint8* luma,
int width) {
void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
int width,
const uint8* luma, uint32 lumacoeff) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] /* src_argb */
mov edi, [esp + 8 + 8] /* dst_argb */
movd xmm2, dword ptr [esp + 8 + 12] /* table_argb */
mov ecx, [esp + 8 + 12] /* width */
movd xmm2, dword ptr [esp + 8 + 16] // luma table
movd xmm3, dword ptr [esp + 8 + 20] // lumacoeff
pshufd xmm2, xmm2, 0
mov ecx, [esp + 8 + 16] /* width */
mov edx, 0x00264b0f // kARGBToYJ
movd xmm3, edx
pshufd xmm3, xmm3, 0
pcmpeqb xmm4, xmm4 // generate mask 0xff00ff00
pcmpeqb xmm4, xmm4 // generate mask 0xff00ff00
psllw xmm4, 8
pxor xmm5, xmm5
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment