Commit 4a4b7374 authored by fbarchard@google.com's avatar fbarchard@google.com

Load matrix with one vector and splat to 4 different ones.

BUG=none
TEST=none
R=ryanpetrie@google.com

Review URL: https://webrtc-codereview.appspot.com/3299004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@838 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 6368c10c
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 837 Version: 838
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 837 #define LIBYUV_VERSION 838
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -4477,10 +4477,11 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) { ...@@ -4477,10 +4477,11 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
const int8* matrix_argb, int width) { const int8* matrix_argb, int width) {
asm volatile ( asm volatile (
"pshufd $0x00," MEMACCESS(3) ",%%xmm2 \n" "movdqu " MEMACCESS(3) ",%%xmm5 \n"
"pshufd $0x55," MEMACCESS(3) ",%%xmm3 \n" "pshufd $0x00,%%xmm5,%%xmm2 \n"
"pshufd $0xaa," MEMACCESS(3) ",%%xmm4 \n" "pshufd $0x55,%%xmm5,%%xmm3 \n"
"pshufd $0xff," MEMACCESS(3) ",%%xmm5 \n" "pshufd $0xaa,%%xmm5,%%xmm4 \n"
"pshufd $0xff,%%xmm5,%%xmm5 \n"
// 8 pixel loop. // 8 pixel loop.
".p2align 4 \n" ".p2align 4 \n"
......
...@@ -5146,10 +5146,11 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, ...@@ -5146,10 +5146,11 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
mov eax, [esp + 4] /* src_argb */ mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_argb */ mov edx, [esp + 8] /* dst_argb */
mov ecx, [esp + 12] /* matrix_argb */ mov ecx, [esp + 12] /* matrix_argb */
pshufd xmm2, [ecx], 0x00 movdqu xmm5, [ecx]
pshufd xmm3, [ecx], 0x55 pshufd xmm2, xmm5, 0x00
pshufd xmm4, [ecx], 0xaa pshufd xmm3, xmm5, 0x55
pshufd xmm5, [ecx], 0xff pshufd xmm4, xmm5, 0xaa
pshufd xmm5, xmm5, 0xff
mov ecx, [esp + 16] /* width */ mov ecx, [esp + 16] /* width */
align 4 align 4
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment