Commit 864f828a authored by fbarchard@google.com's avatar fbarchard@google.com

ARGBAffineRow_SSE2 function to copy pixels from a source with slope to a row of destination.

BUG=60
TEST=none
Review URL: https://webrtc-codereview.appspot.com/727004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@313 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 4f10e97f
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 312
Version: 313
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 312
#define LIBYUV_VERSION 313
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -87,6 +87,7 @@ extern "C" {
#define HAS_ARGBCOLORTABLEROW_X86
#define HAS_NV12TOARGBROW_SSSE3
#define HAS_NV21TOARGBROW_SSSE3
#define HAS_ARGBAFFINEROW_SSE2
#endif
// The following are disabled when SSSE3 is available:
......@@ -522,6 +523,8 @@ void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value);
void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value);
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width);
#ifdef __cplusplus
} // extern "C"
......
......@@ -3347,8 +3347,78 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
}
#endif // HAS_ARGBSHADE_SSE2
#endif // _M_IX86
#ifdef HAS_ARGBAFFINEROW_SSE2
// Copy ARGB pixels from source image with slope to a row of destination.
__declspec(naked) __declspec(align(16))
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width) {
__asm {
push esi
mov eax, [esp + 8] // src_argb
mov esi, [esp + 12] // stride
mov edx, [esp + 16] // dst_argb
mov ecx, [esp + 20] // pointer to uv_dudv
movq xmm2, qword ptr [ecx] // uv
movq xmm3, qword ptr [ecx + 8] // dudv
mov ecx, [esp + 24] // width
shl esi, 16 // 4, stride
add esi, 4
movd xmm4, esi
sub ecx, 2
jl l2b
movdqa xmm0, xmm2 // x0, y0, x1, y1
addps xmm0, xmm3
movlhps xmm2, xmm0
pshufd xmm4, xmm4, 0 // dup 4, stride
movlhps xmm3, xmm3 // dudv
addps xmm3, xmm3 // dudv *= 2
pshufd xmm4, xmm4, 0
// 2 pixel loop
align 4
l2:
cvttps2dq xmm1, xmm2 // x, y float to int
packssdw xmm1, xmm1 // x, y as shorts
pmaddwd xmm1, xmm4 // offset = x * 4 + y * stride
addps xmm2, xmm3 // x, y += dx, dy
movd esi, xmm1
movdqa xmm5, xmm1
pshufd xmm5, xmm5, 0x55
movd xmm0, [eax + esi] // read pixel 0
movd esi, xmm5
movd xmm5, [eax + esi] // read pixel 1
punpckldq xmm0, xmm5
sub ecx, 2
movq qword ptr [edx], xmm0
lea edx, [edx + 8]
jge l2
l2b:
add ecx, 2 - 1
jl l1b
// 1 pixel loop
align 4
l1:
cvttps2dq xmm1, xmm2 // x, y float to int
packssdw xmm1, xmm1 // x, y as shorts
pmaddwd xmm1, xmm4 // offset = x * 4 + y * stride
addps xmm2, xmm3 // x, y += dx, dy
movd esi, xmm1
movd xmm0, [eax + esi] // copy a pixel
sub ecx, 1
movd [edx], xmm0
lea edx, [edx + 4]
jge l1
l1b:
pop esi
ret
}
}
#endif // HAS_ARGBAFFINEROW_SSE2
#endif // _M_IX86
#ifdef __cplusplus
} // extern "C"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment