Commit 221e602f authored by fbarchard@google.com's avatar fbarchard@google.com

ARGBSepia planarfunction for converting a region of ARGB image to Sepia tone. …

ARGBSepia planarfunction for converting a region of ARGB image to Sepia tone.  ARGBGray optimized weaving of alpha value.  551 ms from 568 ms.
BUG=none
TEST=libyuv_unittest --gtest_filter=*ARGBSepia*
Review URL: https://webrtc-codereview.appspot.com/573008

git-svn-id: http://libyuv.googlecode.com/svn/trunk@270 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent ffaea7ee
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 269
Version: 270
License: BSD
License File: LICENSE
......
......@@ -138,6 +138,11 @@ int ARGBGray(uint8* dst_argb, int dst_stride_argb,
int x, int y,
int width, int height);
// Make a rectangle of ARGB Sepia tone.
int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
int x, int y,
int width, int height);
// Copy ARGB to ARGB.
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
......
......@@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 269
#define LIBYUV_VERSION 270
#endif // INCLUDE_LIBYUV_VERSION_H_
......@@ -1059,6 +1059,28 @@ int ARGBGray(uint8* dst_argb, int dst_stride_argb,
return 0;
}
// Make a rectangle of ARGB Sepia tone.
int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
int dst_x, int dst_y,
int width, int height) {
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
return -1;
}
void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C;
#if defined(HAS_ARGBSEPIAROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBSepiaRow = ARGBSepiaRow_SSSE3;
}
#endif
uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
for (int y = 0; y < height; ++y) {
ARGBSepiaRow(dst, width);
dst += dst_stride_argb;
}
return 0;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
......@@ -72,6 +72,7 @@ extern "C" {
#define HAS_YUY2TOUVROW_SSE2
#define HAS_YUY2TOYROW_SSE2
#define HAS_ARGBGRAYROW_SSSE3
#define HAS_ARGBSEPIAROW_SSSE3
#endif
// The following are available only useful when SSSE3 is unavailable.
......@@ -403,6 +404,9 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBGrayRow_C(uint8* dst_argb, int width);
void ARGBGrayRow_SSSE3(uint8* dst_argb, int width);
void ARGBSepiaRow_C(uint8* dst_argb, int width);
void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
......@@ -249,6 +249,29 @@ void ARGBGrayRow_C(uint8* dst_argb, int width) {
}
}
// Convert a row of image to Sepia tone.
void ARGBSepiaRow_C(uint8* dst_argb, int width) {
for (int x = 0; x < width; ++x) {
int b = dst_argb[0];
int g = dst_argb[1];
int r = dst_argb[2];
int sb = (r * 35 + g * 68 + b * 17) >> 7;
int sg = (r * 45 + g * 88 + b * 22) >> 7;
int sr = (r * 50 + g * 98 + b * 24) >> 7;
if (sr > 255) {
sr = 255;
}
if (sg > 255) {
sg = 255;
}
// b does not over flow. a is preserved from original.
dst_argb[0] = sb;
dst_argb[1] = sg;
dst_argb[2] = sr;
dst_argb += 4;
}
}
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
// Copy a Y to RGB.
for (int x = 0; x < width; ++x) {
......
......@@ -108,11 +108,6 @@ CONST uvec8 kShuffleMaskARGBToRAW = {
2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
};
// Constant for ARGB color to gray scale. 0.11 * B + 0.59 * G + 0.30 * R
CONST vec8 kARGBToGray = {
14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0
};
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
......@@ -2533,15 +2528,15 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
#endif // HAS_ARGBUNATTENUATE_SSE2
#ifdef HAS_ARGBGRAYROW_SSSE3
// Constant for ARGB color to gray scale. 0.11 * B + 0.59 * G + 0.30 * R
CONST vec8 kARGBToGray = {
14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0
};
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) {
asm volatile (
"movdqa %2,%%xmm4 \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
"pslld $0x18,%%xmm5 \n"
"pcmpeqb %%xmm3,%%xmm3 \n"
"psrld $0x8,%%xmm3 \n"
// 8 pixel loop \n"
".p2align 4 \n"
"1: \n"
......@@ -2549,21 +2544,21 @@ void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) {
"movdqa 0x10(%0),%%xmm1 \n"
"pmaddubsw %%xmm4,%%xmm0 \n"
"pmaddubsw %%xmm4,%%xmm1 \n"
"movdqa (%0),%%xmm6 \n"
"movdqa 0x10(%0),%%xmm7 \n"
"pand %%xmm5,%%xmm6 \n"
"pand %%xmm5,%%xmm7 \n"
"phaddw %%xmm1,%%xmm0 \n"
"psrlw $0x7,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"movdqa (%0),%%xmm2 \n"
"movdqa 0x10(%0),%%xmm3 \n"
"psrld $0x18,%%xmm2 \n"
"psrld $0x18,%%xmm3 \n"
"packuswb %%xmm3,%%xmm2 \n"
"packuswb %%xmm2,%%xmm2 \n"
"movdqa %%xmm0,%%xmm3 \n"
"punpcklbw %%xmm0,%%xmm0 \n"
"punpcklbw %%xmm2,%%xmm3 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklwd %%xmm0,%%xmm0 \n"
"punpckhwd %%xmm1,%%xmm1 \n"
"pand %%xmm3,%%xmm0 \n"
"pand %%xmm3,%%xmm1 \n"
"por %%xmm6,%%xmm0 \n"
"por %%xmm7,%%xmm1 \n"
"punpcklwd %%xmm3,%%xmm0 \n"
"punpckhwd %%xmm3,%%xmm1 \n"
"sub $0x8,%1 \n"
"movdqa %%xmm0,(%0) \n"
"movdqa %%xmm1,0x10(%0) \n"
......@@ -2574,11 +2569,88 @@ void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) {
: "m"(kARGBToGray) // %2
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
#endif
);
}
#endif // HAS_ARGBGRAYROW_SSSE3
#ifdef HAS_ARGBSEPIAROW_SSSE3
// b = (r * 35 + g * 68 + b * 17) >> 7
// g = (r * 45 + g * 88 + b * 22) >> 7
// r = (r * 50 + g * 98 + b * 24) >> 7
// Constant for ARGB color to sepia tone
CONST vec8 kARGBToSepiaB = {
17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0
};
CONST vec8 kARGBToSepiaG = {
22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0
};
CONST vec8 kARGBToSepiaR = {
24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0
};
// Convert 8 ARGB pixels (64 bytes) to 8 Sepia ARGB pixels
void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
asm volatile (
"movdqa %2,%%xmm2 \n"
"movdqa %3,%%xmm3 \n"
"movdqa %4,%%xmm4 \n"
// 8 pixel loop \n"
".p2align 4 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa 0x10(%0),%%xmm6 \n"
"pmaddubsw %%xmm2,%%xmm0 \n"
"pmaddubsw %%xmm2,%%xmm6 \n"
"phaddw %%xmm6,%%xmm0 \n"
"psrlw $0x7,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"movdqa (%0),%%xmm5 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"pmaddubsw %%xmm3,%%xmm5 \n"
"pmaddubsw %%xmm3,%%xmm1 \n"
"phaddw %%xmm1,%%xmm5 \n"
"psrlw $0x7,%%xmm5 \n"
"packuswb %%xmm5,%%xmm5 \n"
"punpcklbw %%xmm5,%%xmm0 \n"
"movdqa (%0),%%xmm5 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"pmaddubsw %%xmm4,%%xmm5 \n"
"pmaddubsw %%xmm4,%%xmm1 \n"
"phaddw %%xmm1,%%xmm5 \n"
"psrlw $0x7,%%xmm5 \n"
"packuswb %%xmm5,%%xmm5 \n"
"movdqa (%0),%%xmm6 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"psrld $0x18,%%xmm6 \n"
"psrld $0x18,%%xmm1 \n"
"packuswb %%xmm1,%%xmm6 \n"
"packuswb %%xmm6,%%xmm6 \n"
"punpcklbw %%xmm6,%%xmm5 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklwd %%xmm5,%%xmm0 \n"
"punpckhwd %%xmm5,%%xmm1 \n"
"sub $0x8,%1 \n"
"movdqa %%xmm0,(%0) \n"
"movdqa %%xmm1,0x10(%0) \n"
"lea 0x20(%0),%0 \n"
"jg 1b \n"
: "+r"(dst_argb), // %0
"+r"(width) // %1
: "m"(kARGBToSepiaB), // %2
"m"(kARGBToSepiaG), // %3
"m"(kARGBToSepiaR) // %4
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
#endif
);
}
#endif // HAS_ARGBSEPIAROW_SSSE3
#endif // defined(__x86_64__) || defined(__i386__)
#ifdef __cplusplus
......
......@@ -98,11 +98,6 @@ static const uvec8 kShuffleMaskARGBToRAW = {
2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
};
// Constant for ARGB color to gray scale. 0.11 * B + 0.59 * G + 0.30 * R
static const vec8 kARGBToGray = {
14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0
};
__declspec(naked) __declspec(align(16))
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
__asm {
......@@ -2558,6 +2553,11 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
#endif // HAS_ARGBUNATTENUATE_SSE2
#ifdef HAS_ARGBGRAYROW_SSSE3
// Constant for ARGB color to gray scale. 0.11 * B + 0.59 * G + 0.30 * R
static const vec8 kARGBToGray = {
14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0
};
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
__declspec(naked) __declspec(align(16))
void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) {
......@@ -2565,33 +2565,28 @@ void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) {
mov eax, [esp + 4] /* dst_argb */
mov ecx, [esp + 8] /* width */
movdqa xmm4, kARGBToGray
pcmpeqb xmm5, xmm5 // generate mask 0xff000000
pslld xmm5, 24
pcmpeqb xmm3, xmm3 // generate mask 0x00ffffff
psrld xmm3, 8
align 16
convertloop:
movdqa xmm0, [eax]
movdqa xmm0, [eax] // G
movdqa xmm1, [eax + 16]
pmaddubsw xmm0, xmm4
pmaddubsw xmm1, xmm4
movdqa xmm6, [eax] // preserve alpha
movdqa xmm7, [eax + 16]
pand xmm6, xmm5
pand xmm7, xmm5
phaddw xmm0, xmm1
psrlw xmm0, 7
packuswb xmm0, xmm0 // 8 Y values
punpcklbw xmm0, xmm0
packuswb xmm0, xmm0 // 8 G bytes
movdqa xmm2, [eax] // A
movdqa xmm3, [eax + 16]
psrld xmm2, 24
psrld xmm3, 24
packuswb xmm2, xmm3
packuswb xmm2, xmm2 // 8 A bytes
movdqa xmm3, xmm0 // Weave into GG, GA, then GGGA
punpcklbw xmm0, xmm0 // 8 GG words
punpcklbw xmm3, xmm2 // 8 GA words
movdqa xmm1, xmm0
punpcklwd xmm0, xmm0
punpckhwd xmm1, xmm1
pand xmm0, xmm3 // mask in alpha
pand xmm1, xmm3
por xmm0, xmm6
por xmm1, xmm7
punpcklwd xmm0, xmm3 // GGGA first 4
punpckhwd xmm1, xmm3 // GGGA next 4
sub ecx, 8
movdqa [eax], xmm0
movdqa [eax + 16], xmm1
......@@ -2601,8 +2596,80 @@ void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) {
}
}
#endif // HAS_ARGBGRAYROW_SSSE3
#ifdef HAS_ARGBSEPIAROW_SSSE3
// b = (r * 35 + g * 68 + b * 17) >> 7
// g = (r * 45 + g * 88 + b * 22) >> 7
// r = (r * 50 + g * 98 + b * 24) >> 7
// Constant for ARGB color to sepia tone
static const vec8 kARGBToSepiaB = {
17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0
};
static const vec8 kARGBToSepiaG = {
22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0
};
static const vec8 kARGBToSepiaR = {
24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0
};
// Convert 8 ARGB pixels (64 bytes) to 8 Sepia ARGB pixels
__declspec(naked) __declspec(align(16))
void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
__asm {
mov eax, [esp + 4] /* dst_argb */
mov ecx, [esp + 8] /* width */
movdqa xmm2, kARGBToSepiaB
movdqa xmm3, kARGBToSepiaG
movdqa xmm4, kARGBToSepiaR
align 16
convertloop:
movdqa xmm0, [eax] // B
movdqa xmm6, [eax + 16]
pmaddubsw xmm0, xmm2
pmaddubsw xmm6, xmm2
phaddw xmm0, xmm6
psrlw xmm0, 7
packuswb xmm0, xmm0 // 8 B values
movdqa xmm5, [eax] // G
movdqa xmm1, [eax + 16]
pmaddubsw xmm5, xmm3
pmaddubsw xmm1, xmm3
phaddw xmm5, xmm1
psrlw xmm5, 7
packuswb xmm5, xmm5 // 8 G values
punpcklbw xmm0, xmm5 // 8 BG values
movdqa xmm5, [eax] // R
movdqa xmm1, [eax + 16]
pmaddubsw xmm5, xmm4
pmaddubsw xmm1, xmm4
phaddw xmm5, xmm1
psrlw xmm5, 7
packuswb xmm5, xmm5 // 8 R values
movdqa xmm6, [eax] // A
movdqa xmm1, [eax + 16]
psrld xmm6, 24
psrld xmm1, 24
packuswb xmm6, xmm1
packuswb xmm6, xmm6 // 8 A values
punpcklbw xmm5, xmm6 // 8 RA values
movdqa xmm1, xmm0 // Weave BG, RA together
punpcklwd xmm0, xmm5 // BGRA first 4
punpckhwd xmm1, xmm5 // BGRA next 4
sub ecx, 8
movdqa [eax], xmm0
movdqa [eax + 16], xmm1
lea eax, [eax + 32]
jg convertloop
ret
}
}
#endif // HAS_ARGBSEPIAROW_SSSE3
#endif // _M_IX86
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
......@@ -244,7 +244,8 @@ TEST_F(libyuvTest, TestARGBGray) {
orig_pixels[3][1] = 64u;
orig_pixels[3][2] = 192u;
orig_pixels[3][3] = 224u;
ARGBGray(&orig_pixels[0][0], 0, 0, 0, 4, 1);
// Do 16 to test asm version.
ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1);
EXPECT_EQ(27u, orig_pixels[0][0]);
EXPECT_EQ(27u, orig_pixels[0][1]);
EXPECT_EQ(27u, orig_pixels[0][2]);
......@@ -273,4 +274,58 @@ TEST_F(libyuvTest, TestARGBGray) {
ARGBGray(&orig_pixels[0][0], 0, 0, 0, 256, 1);
}
}
TEST_F(libyuvTest, TestARGBSepia) {
SIMD_ALIGNED(uint8 orig_pixels[256][4]);
// Test blue
orig_pixels[0][0] = 255u;
orig_pixels[0][1] = 0u;
orig_pixels[0][2] = 0u;
orig_pixels[0][3] = 128u;
// Test green
orig_pixels[1][0] = 0u;
orig_pixels[1][1] = 255u;
orig_pixels[1][2] = 0u;
orig_pixels[1][3] = 0u;
// Test red
orig_pixels[2][0] = 0u;
orig_pixels[2][1] = 0u;
orig_pixels[2][2] = 255u;
orig_pixels[2][3] = 255u;
// Test color
orig_pixels[3][0] = 16u;
orig_pixels[3][1] = 64u;
orig_pixels[3][2] = 192u;
orig_pixels[3][3] = 224u;
// Do 16 to test asm version.
ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 16, 1);
EXPECT_EQ(33u, orig_pixels[0][0]);
EXPECT_EQ(43u, orig_pixels[0][1]);
EXPECT_EQ(47u, orig_pixels[0][2]);
EXPECT_EQ(128u, orig_pixels[0][3]);
EXPECT_EQ(135u, orig_pixels[1][0]);
EXPECT_EQ(175u, orig_pixels[1][1]);
EXPECT_EQ(195u, orig_pixels[1][2]);
EXPECT_EQ(0u, orig_pixels[1][3]);
EXPECT_EQ(69u, orig_pixels[2][0]);
EXPECT_EQ(89u, orig_pixels[2][1]);
EXPECT_EQ(99u, orig_pixels[2][2]);
EXPECT_EQ(255u, orig_pixels[2][3]);
EXPECT_EQ(88u, orig_pixels[3][0]);
EXPECT_EQ(114u, orig_pixels[3][1]);
EXPECT_EQ(127u, orig_pixels[3][2]);
EXPECT_EQ(224u, orig_pixels[3][3]);
for (int i = 0; i < 256; ++i) {
orig_pixels[i][0] = i;
orig_pixels[i][1] = i / 2;
orig_pixels[i][2] = i / 3;
orig_pixels[i][3] = i;
}
for (int i = 0; i < 1000 * 1280 * 720 / 256; ++i) {
ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 256, 1);
}
}
} // namespace libyuv
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment