Commit e442dc4c authored by fbarchard@google.com's avatar fbarchard@google.com

ARGBcolorMatrix for applying transforms such as grey and sepia in a more general…

ARGBcolorMatrix for applying transforms such as grey and sepia in a more general form.  Unittest does sepia for comparison.
BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/656004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@288 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 794fe123
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 287
Version: 288
License: BSD
License File: LICENSE
......
......@@ -216,6 +216,18 @@ int ARGBGray(uint8* dst_argb, int dst_stride_argb,
int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
int x, int y, int width, int height);
// Apply a 4x3 matrix rotation to each ARGB pixel.
// matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1.
int ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
const int8* matrix_argb,
int x, int y, int width, int height);
// Apply a color table each ARGB pixel.
// Table contains 256 ARGB values.
int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
const uint8* table_argb,
int x, int y, int width, int height);
// Copy ARGB to ARGB.
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
......
......@@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 287
#define LIBYUV_VERSION 288
#endif // INCLUDE_LIBYUV_VERSION_H_
......@@ -1468,7 +1468,51 @@ int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
return 0;
}
// Apply a 4x3 matrix rotation to each ARGB pixel.
int ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
const int8* matrix_argb,
int dst_x, int dst_y, int width, int height) {
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
return -1;
}
void (*ARGBColorMatrixRow)(uint8* dst_argb, const int8* matrix_argb,
int width) = ARGBColorMatrixRow_C;
#if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
}
#endif
uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
for (int y = 0; y < height; ++y) {
ARGBColorMatrixRow(dst, matrix_argb, width);
dst += dst_stride_argb;
}
return 0;
}
// Apply a color table each ARGB pixel.
// Table contains 256 ARGB values.
int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
const uint8* table_argb,
int dst_x, int dst_y, int width, int height) {
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
return -1;
}
void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
int width) = ARGBColorTableRow_C;
#if defined(HAS_ARGBCOLORTABLEROW_X86)
if (TestCpuFlag(kCpuHasX86)) {
ARGBColorTableRow = ARGBColorTableRow_X86;
}
#endif
uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
for (int y = 0; y < height; ++y) {
ARGBColorTableRow(dst, table_argb, width);
dst += dst_stride_argb;
}
return 0;
}
#ifdef HAVE_JPEG
struct ARGBBuffers {
uint8* argb;
......
......@@ -75,10 +75,17 @@ extern "C" {
#define HAS_YUY2TOYROW_SSE2
#define HAS_ARGBGRAYROW_SSSE3
#define HAS_ARGBSEPIAROW_SSSE3
#define HAS_ARGBCOLORMATRIXROW_SSSE3
#define HAS_COMPUTECUMULATIVESUMROW_SSE2
#define HAS_CUMULATIVESUMTOAVERAGE_SSE2
#endif
// The following are Windows only:
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
#define HAS_ARGBCOLORTABLEROW_X86
#endif
// The following are disabled when SSSE3 is available:
#if !defined(YUV_DISABLE_ASM) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
......@@ -482,6 +489,14 @@ void ARGBGrayRow_SSSE3(uint8* dst_argb, int width);
void ARGBSepiaRow_C(uint8* dst_argb, int width);
void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
void ARGBColorMatrixRow_C(uint8* dst_argb, const int8* matrix_argb, int width);
void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
int width);
void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
int width);
// Used for blur.
void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft,
int width, int area, uint8* dst, int count);
......
......@@ -293,16 +293,54 @@ void ARGBSepiaRow_C(uint8* dst_argb, int width) {
int b = dst_argb[0];
int g = dst_argb[1];
int r = dst_argb[2];
int sb = (r * 35 + g * 68 + b * 17) >> 7;
int sg = (r * 45 + g * 88 + b * 22) >> 7;
int sr = (r * 50 + g * 98 + b * 24) >> 7;
int sb = (b * 17 + g * 68 + r * 35) >> 7;
int sg = (b * 22 + g * 88 + r * 45) >> 7;
int sr = (b * 24 + g * 98 + r * 50) >> 7;
// b does not over flow. a is preserved from original.
if (sg > 255) {
sg = 255;
}
if (sr > 255) {
sr = 255;
}
dst_argb[0] = sb;
dst_argb[1] = sg;
dst_argb[2] = sr;
dst_argb += 4;
}
}
// Apply color matrix to a row of image. Matrix is signed.
void ARGBColorMatrixRow_C(uint8* dst_argb, const int8* matrix_argb, int width) {
for (int x = 0; x < width; ++x) {
int b = dst_argb[0];
int g = dst_argb[1];
int r = dst_argb[2];
int a = dst_argb[3];
int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
r * matrix_argb[2] + a * matrix_argb[3]) >> 7;
int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
r * matrix_argb[6] + a * matrix_argb[7]) >> 7;
int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
r * matrix_argb[10] + a * matrix_argb[11]) >> 7;
if (sb < 0) {
sb = 0;
}
if (sb > 255) {
sb = 255;
}
if (sg < 0) {
sg = 0;
}
if (sg > 255) {
sg = 255;
}
// b does not over flow. a is preserved from original.
if (sr < 0) {
sr = 0;
}
if (sr > 255) {
sr = 255;
}
dst_argb[0] = sb;
dst_argb[1] = sg;
dst_argb[2] = sr;
......@@ -310,6 +348,21 @@ void ARGBSepiaRow_C(uint8* dst_argb, int width) {
}
}
// Apply color table to a row of image.
void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
for (int x = 0; x < width; ++x) {
int b = dst_argb[0];
int g = dst_argb[1];
int r = dst_argb[2];
int a = dst_argb[3];
dst_argb[0] = table_argb[b * 4 + 0];
dst_argb[1] = table_argb[g * 4 + 1];
dst_argb[2] = table_argb[r * 4 + 2];
dst_argb[3] = table_argb[a * 4 + 3];
dst_argb += 4;
}
}
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
// Copy a Y to RGB.
for (int x = 0; x < width; ++x) {
......@@ -790,9 +843,9 @@ YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C, 1)
YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, 1)
#endif
#if defined(HAS_I422TOARGBROW_NEON)
YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C)
YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C)
YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C)
YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1)
YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1)
YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1)
#endif
#undef YANY
......
......@@ -2800,7 +2800,7 @@ CONST vec8 kARGBToSepiaR = {
24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0
};
// Convert 8 ARGB pixels (64 bytes) to 8 Sepia ARGB pixels
// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
asm volatile (
"movdqa %2,%%xmm2 \n"
......@@ -2859,6 +2859,69 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
}
#endif // HAS_ARGBSEPIAROW_SSSE3
#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3
// Tranform 8 ARGB pixels (32 bytes) with color matrix.
// Same as Sepia except matrix is provided.
void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
int width) {
asm volatile (
"movd (%2),%%xmm2 \n"
"movd 0x4(%2),%%xmm3 \n"
"movd 0x8(%2),%%xmm4 \n"
"pshufd $0x0,%%xmm2,%%xmm2 \n"
"pshufd $0x0,%%xmm3,%%xmm3 \n"
"pshufd $0x0,%%xmm4,%%xmm4 \n"
// 8 pixel loop \n"
".p2align 4 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa 0x10(%0),%%xmm6 \n"
"pmaddubsw %%xmm2,%%xmm0 \n"
"pmaddubsw %%xmm2,%%xmm6 \n"
"phaddw %%xmm6,%%xmm0 \n"
"psrlw $0x7,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"movdqa (%0),%%xmm5 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"pmaddubsw %%xmm3,%%xmm5 \n"
"pmaddubsw %%xmm3,%%xmm1 \n"
"phaddw %%xmm1,%%xmm5 \n"
"psrlw $0x7,%%xmm5 \n"
"packuswb %%xmm5,%%xmm5 \n"
"punpcklbw %%xmm5,%%xmm0 \n"
"movdqa (%0),%%xmm5 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"pmaddubsw %%xmm4,%%xmm5 \n"
"pmaddubsw %%xmm4,%%xmm1 \n"
"phaddw %%xmm1,%%xmm5 \n"
"psrlw $0x7,%%xmm5 \n"
"packuswb %%xmm5,%%xmm5 \n"
"movdqa (%0),%%xmm6 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"psrld $0x18,%%xmm6 \n"
"psrld $0x18,%%xmm1 \n"
"packuswb %%xmm1,%%xmm6 \n"
"packuswb %%xmm6,%%xmm6 \n"
"punpcklbw %%xmm6,%%xmm5 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklwd %%xmm5,%%xmm0 \n"
"punpckhwd %%xmm5,%%xmm1 \n"
"sub $0x8,%1 \n"
"movdqa %%xmm0,(%0) \n"
"movdqa %%xmm1,0x10(%0) \n"
"lea 0x20(%0),%0 \n"
"jg 1b \n"
: "+r"(dst_argb), // %0
"+r"(width) // %1
: "r"(matrix_argb) // %2
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
#endif
);
}
#endif // HAS_ARGBCOLORMATRIXROW_SSSE3
#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
// Creates a table of cumulative sums where each value is a sum of all values
// above and to the left of the value, inclusive of the value.
......
......@@ -2877,7 +2877,7 @@ static const vec8 kARGBToSepiaR = {
24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0
};
// Convert 8 ARGB pixels (64 bytes) to 8 Sepia ARGB pixels
// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
__declspec(naked) __declspec(align(16))
void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
__asm {
......@@ -2930,6 +2930,117 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
}
}
#endif // HAS_ARGBSEPIAROW_SSSE3
#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3
// Tranform 8 ARGB pixels (32 bytes) with color matrix.
// Same as Sepia except matrix is provided.
// TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R
// and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd.
// TODO(fbarchard): phaddw not paired.
// TODO(fbarchard): Test data copying from mem instead of from reg.
// TODO(fbarchard): packing and then unpacking the A - is simple pand/por faster
__declspec(naked) __declspec(align(16))
void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
int width) {
__asm {
mov eax, [esp + 4] /* dst_argb */
mov edx, [esp + 8] /* matrix_argb */
mov ecx, [esp + 12] /* width */
movd xmm2, [edx]
movd xmm3, [edx + 4]
movd xmm4, [edx + 8]
pshufd xmm2, xmm2, 0
pshufd xmm3, xmm3, 0
pshufd xmm4, xmm4, 0
align 16
convertloop:
movdqa xmm0, [eax] // B
movdqa xmm6, [eax + 16]
pmaddubsw xmm0, xmm2
pmaddubsw xmm6, xmm2
phaddw xmm0, xmm6
psrlw xmm0, 7
packuswb xmm0, xmm0 // 8 B values
movdqa xmm5, [eax] // G
movdqa xmm1, [eax + 16]
pmaddubsw xmm5, xmm3
pmaddubsw xmm1, xmm3
phaddw xmm5, xmm1
psrlw xmm5, 7
packuswb xmm5, xmm5 // 8 G values
punpcklbw xmm0, xmm5 // 8 BG values
movdqa xmm5, [eax] // R
movdqa xmm1, [eax + 16]
pmaddubsw xmm5, xmm4
pmaddubsw xmm1, xmm4
phaddw xmm5, xmm1
psrlw xmm5, 7
packuswb xmm5, xmm5 // 8 R values
movdqa xmm6, [eax] // A
movdqa xmm1, [eax + 16]
psrld xmm6, 24
psrld xmm1, 24
packuswb xmm6, xmm1
packuswb xmm6, xmm6 // 8 A values
punpcklbw xmm5, xmm6 // 8 RA values
movdqa xmm1, xmm0 // Weave BG, RA together
punpcklwd xmm0, xmm5 // BGRA first 4
punpckhwd xmm1, xmm5 // BGRA next 4
sub ecx, 8
movdqa [eax], xmm0
movdqa [eax + 16], xmm1
lea eax, [eax + 32]
jg convertloop
ret
}
}
#endif // HAS_ARGBCOLORMATRIXROW_SSSE3
#ifdef HAS_ARGBCOLORTABLEROW_X86
// Tranform ARGB pixels with color table.
__declspec(naked) __declspec(align(16))
void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
int width) {
__asm {
push ebx
push edi
push ebp
mov eax, [esp + 12 + 4] /* dst_argb */
mov edi, [esp + 12 + 8] /* table_argb */
mov ecx, [esp + 12 + 12] /* width */
xor ebx, ebx
xor edx, edx
align 16
convertloop:
mov ebp, dword ptr [eax] // BGRA
mov esi, ebp
and ebp, 255
shr esi, 8
and esi, 255
mov bl, [edi + ebp * 4 + 0] // B
mov dl, [edi + esi * 4 + 1] // G
mov ebp, dword ptr [eax] // BGRA
mov esi, ebp
shr ebp, 16
shr esi, 24
and ebp, 255
mov [eax], bl
mov [eax + 1], dl
mov bl, [edi + ebp * 4 + 2] // R
mov dl, [edi + esi * 4 + 3] // A
mov [eax + 2], bl
mov [eax + 3], dl
lea eax, [eax + 4]
sub ecx, 1
jg convertloop
pop ebp
pop edi
pop ebx
ret
}
}
#endif // HAS_ARGBCOLORTABLEROW_X86
#ifdef HAS_CUMULATIVESUMTOAVERAGE_SSE2
// Consider float CumulativeSum.
......
......@@ -487,4 +487,66 @@ TEST_F(libyuvTest, TestARGBSepia) {
ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 256, 1);
}
}
TEST_F(libyuvTest, TestARGBColorMatrix) {
SIMD_ALIGNED(uint8 orig_pixels[256][4]);
// Matrix for Sepia.
static const int8 kARGBToSepiaB[] = {
17, 68, 35, 0,
22, 88, 45, 0,
24, 98, 50, 0,
};
// Test blue
orig_pixels[0][0] = 255u;
orig_pixels[0][1] = 0u;
orig_pixels[0][2] = 0u;
orig_pixels[0][3] = 128u;
// Test green
orig_pixels[1][0] = 0u;
orig_pixels[1][1] = 255u;
orig_pixels[1][2] = 0u;
orig_pixels[1][3] = 0u;
// Test red
orig_pixels[2][0] = 0u;
orig_pixels[2][1] = 0u;
orig_pixels[2][2] = 255u;
orig_pixels[2][3] = 255u;
// Test color
orig_pixels[3][0] = 16u;
orig_pixels[3][1] = 64u;
orig_pixels[3][2] = 192u;
orig_pixels[3][3] = 224u;
// Do 16 to test asm version.
ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepiaB[0], 0, 0, 16, 1);
EXPECT_EQ(33u, orig_pixels[0][0]);
EXPECT_EQ(43u, orig_pixels[0][1]);
EXPECT_EQ(47u, orig_pixels[0][2]);
EXPECT_EQ(128u, orig_pixels[0][3]);
EXPECT_EQ(135u, orig_pixels[1][0]);
EXPECT_EQ(175u, orig_pixels[1][1]);
EXPECT_EQ(195u, orig_pixels[1][2]);
EXPECT_EQ(0u, orig_pixels[1][3]);
EXPECT_EQ(69u, orig_pixels[2][0]);
EXPECT_EQ(89u, orig_pixels[2][1]);
EXPECT_EQ(99u, orig_pixels[2][2]);
EXPECT_EQ(255u, orig_pixels[2][3]);
EXPECT_EQ(88u, orig_pixels[3][0]);
EXPECT_EQ(114u, orig_pixels[3][1]);
EXPECT_EQ(127u, orig_pixels[3][2]);
EXPECT_EQ(224u, orig_pixels[3][3]);
for (int i = 0; i < 256; ++i) {
orig_pixels[i][0] = i;
orig_pixels[i][1] = i / 2;
orig_pixels[i][2] = i / 3;
orig_pixels[i][3] = i;
}
for (int i = 0; i < 1000 * 1280 * 720 / 256; ++i) {
ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepiaB[0], 0, 0, 256, 1);
}
}
} // namespace libyuv
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment