Commit b3c1a3fe authored by fbarchard@google.com's avatar fbarchard@google.com

ARGB Multiply

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/1028010

git-svn-id: http://libyuv.googlecode.com/svn/trunk@537 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 9780dd4e
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 535 Version: 537
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -118,6 +118,7 @@ extern "C" { ...@@ -118,6 +118,7 @@ extern "C" {
// TODO(fbarchard): Port to gcc. // TODO(fbarchard): Port to gcc.
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) #if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
#define HAS_ARGBCOLORTABLEROW_X86 #define HAS_ARGBCOLORTABLEROW_X86
#define HAS_ARGBMULTIPLYROW_SSE2
#endif #endif
// The following are Yasm x86 only. // The following are Yasm x86 only.
...@@ -1278,6 +1279,7 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb, ...@@ -1278,6 +1279,7 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb,
void ARGBInterpolateRow_NEON(uint8* dst_argb, const uint8* src_argb, void ARGBInterpolateRow_NEON(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride_argb, int dst_width, ptrdiff_t src_stride_argb, int dst_width,
int source_y_fraction); int source_y_fraction);
void ARGBMultiplyRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 536 #define LIBYUV_VERSION 537
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -4160,6 +4160,41 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, ...@@ -4160,6 +4160,41 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
} }
#endif // HAS_ARGBSHADEROW_SSE2 #endif // HAS_ARGBSHADEROW_SSE2
#ifdef HAS_ARGBMULTIPLYROW_SSE2
// Multiple 2 rows of ARGB pixels together, 4 pixels at a time.
// Aligned to 16 bytes.
__declspec(naked) __declspec(align(16))
void ARGBMultiplyRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
__asm {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // width
pxor xmm5, xmm5 // constant 0
sub edx, eax
align 16
convertloop:
movdqa xmm0, [eax] // read 4 pixels
movdqa xmm2, [eax + edx] // read 4 dest pixels
movdqa xmm1, xmm0
movdqa xmm3, xmm2
punpcklbw xmm0, xmm0 // first 2
punpckhbw xmm1, xmm1 // next 2
punpcklbw xmm2, xmm5 // first 2
punpckhbw xmm3, xmm5 // next 2
pmulhuw xmm0, xmm2 // argb * value
pmulhuw xmm1, xmm3 // argb * value
packuswb xmm0, xmm1
sub ecx, 4
movdqa [eax + edx], xmm0
lea eax, [eax + 16]
jg convertloop
ret
}
}
#endif // HAS_ARGBMULTIPLYROW_SSE2
#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 #ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
// Consider float CumulativeSum. // Consider float CumulativeSum.
// Consider calling CumulativeSum one row at time as needed. // Consider calling CumulativeSum one row at time as needed.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment