Commit 8f439eac authored by fbarchard@google.com's avatar fbarchard@google.com

ARGBColorMatrixRow_SSSE3 use signed psraw instead of psrlw before doing pack

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/652009

git-svn-id: http://libyuv.googlecode.com/svn/trunk@291 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent d9eb63fc
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 290
Version: 291
License: BSD
License File: LICENSE
......
......@@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 290
#define LIBYUV_VERSION 291
#endif // INCLUDE_LIBYUV_VERSION_H_
......@@ -2881,23 +2881,23 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
"movdqa 0x10(%0),%%xmm6 \n"
"pmaddubsw %%xmm2,%%xmm0 \n"
"pmaddubsw %%xmm2,%%xmm6 \n"
"phaddw %%xmm6,%%xmm0 \n"
"psrlw $0x7,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"movdqa (%0),%%xmm5 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"pmaddubsw %%xmm3,%%xmm5 \n"
"pmaddubsw %%xmm3,%%xmm1 \n"
"phaddw %%xmm1,%%xmm5 \n"
"psrlw $0x7,%%xmm5 \n"
"phaddsw %%xmm6,%%xmm0 \n"
"phaddsw %%xmm1,%%xmm5 \n"
"psraw $0x7,%%xmm0 \n"
"psraw $0x7,%%xmm5 \n"
"packuswb %%xmm0,%%xmm0 \n"
"packuswb %%xmm5,%%xmm5 \n"
"punpcklbw %%xmm5,%%xmm0 \n"
"movdqa (%0),%%xmm5 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"pmaddubsw %%xmm4,%%xmm5 \n"
"pmaddubsw %%xmm4,%%xmm1 \n"
"phaddw %%xmm1,%%xmm5 \n"
"psrlw $0x7,%%xmm5 \n"
"phaddsw %%xmm1,%%xmm5 \n"
"psraw $0x7,%%xmm5 \n"
"packuswb %%xmm5,%%xmm5 \n"
"movdqa (%0),%%xmm6 \n"
"movdqa 0x10(%0),%%xmm1 \n"
......@@ -2905,8 +2905,8 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
"psrld $0x18,%%xmm1 \n"
"packuswb %%xmm1,%%xmm6 \n"
"packuswb %%xmm6,%%xmm6 \n"
"punpcklbw %%xmm6,%%xmm5 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm6,%%xmm5 \n"
"punpcklwd %%xmm5,%%xmm0 \n"
"punpckhwd %%xmm5,%%xmm1 \n"
"sub $0x8,%1 \n"
......
......@@ -2936,9 +2936,6 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
// Same as Sepia except matrix is provided.
// TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R
// and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd.
// TODO(fbarchard): phaddw not paired.
// TODO(fbarchard): Test data copying from mem instead of from reg.
// TODO(fbarchard): packing and then unpacking the A - is simple pand/por faster
__declspec(naked) __declspec(align(16))
void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
int width) {
......@@ -2959,23 +2956,23 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
movdqa xmm6, [eax + 16]
pmaddubsw xmm0, xmm2
pmaddubsw xmm6, xmm2
phaddw xmm0, xmm6
psrlw xmm0, 7
packuswb xmm0, xmm0 // 8 B values
movdqa xmm5, [eax] // G
movdqa xmm1, [eax + 16]
pmaddubsw xmm5, xmm3
pmaddubsw xmm1, xmm3
phaddw xmm5, xmm1
psrlw xmm5, 7
phaddsw xmm0, xmm6 // B
phaddsw xmm5, xmm1 // G
psraw xmm0, 7 // B
psraw xmm5, 7 // G
packuswb xmm0, xmm0 // 8 B values
packuswb xmm5, xmm5 // 8 G values
punpcklbw xmm0, xmm5 // 8 BG values
movdqa xmm5, [eax] // R
movdqa xmm1, [eax + 16]
pmaddubsw xmm5, xmm4
pmaddubsw xmm1, xmm4
phaddw xmm5, xmm1
psrlw xmm5, 7
phaddsw xmm5, xmm1
psraw xmm5, 7
packuswb xmm5, xmm5 // 8 R values
movdqa xmm6, [eax] // A
movdqa xmm1, [eax + 16]
......@@ -2983,8 +2980,8 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
psrld xmm1, 24
packuswb xmm6, xmm1
packuswb xmm6, xmm6 // 8 A values
punpcklbw xmm5, xmm6 // 8 RA values
movdqa xmm1, xmm0 // Weave BG, RA together
punpcklbw xmm5, xmm6 // 8 RA values
punpcklwd xmm0, xmm5 // BGRA first 4
punpckhwd xmm1, xmm5 // BGRA next 4
sub ecx, 8
......
......@@ -218,9 +218,9 @@ TESTATOPLANAR(RGB24, 3, I420, 2, 2)
TESTATOPLANAR(RGB565, 2, I420, 2, 2)
TESTATOPLANAR(ARGB1555, 2, I420, 2, 2)
TESTATOPLANAR(ARGB4444, 2, I420, 2, 2)
//TESTATOPLANAR(ARGB, 4, I411, 4, 1)
// TESTATOPLANAR(ARGB, 4, I411, 4, 1)
TESTATOPLANAR(ARGB, 4, I422, 2, 1)
//TESTATOPLANAR(ARGB, 4, I444, 1, 1)
// TESTATOPLANAR(ARGB, 4, I444, 1, 1)
// TODO(fbarchard): Implement and test 411 and 444
#define TESTATOB(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B) \
......@@ -544,7 +544,7 @@ TEST_F(libyuvTest, TestARGBColorMatrix) {
SIMD_ALIGNED(uint8 orig_pixels[256][4]);
// Matrix for Sepia.
static const int8 kARGBToSepiaB[] = {
static const int8 kARGBToSepia[] = {
17, 68, 35, 0,
22, 88, 45, 0,
24, 98, 50, 0,
......@@ -571,7 +571,7 @@ TEST_F(libyuvTest, TestARGBColorMatrix) {
orig_pixels[3][2] = 192u;
orig_pixels[3][3] = 224u;
// Do 16 to test asm version.
ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepiaB[0], 0, 0, 16, 1);
ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepia[0], 0, 0, 16, 1);
EXPECT_EQ(33u, orig_pixels[0][0]);
EXPECT_EQ(43u, orig_pixels[0][1]);
EXPECT_EQ(47u, orig_pixels[0][2]);
......@@ -597,7 +597,7 @@ TEST_F(libyuvTest, TestARGBColorMatrix) {
}
for (int i = 0; i < 1000 * 1280 * 720 / 256; ++i) {
ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepiaB[0], 0, 0, 256, 1);
ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepia[0], 0, 0, 256, 1);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment