Commit 7344440f authored by fbarchard@google.com's avatar fbarchard@google.com

AffineRow for GCC.

BUG=62
TEST=planar_unittest
Review URL: https://webrtc-codereview.appspot.com/733004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@317 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 2a954657
......@@ -27,7 +27,7 @@ deps = {
# Dependencies used by libjpeg-turbo
# Optional jpeg decoder
"trunk/third_party/libjpeg_turbo/":
Var("chromium_trunk") + "/deps/third_party/libjpeg_turbo@119959",
Var("chromium_trunk") + "/deps/third_party/libjpeg_turbo@149334",
# Yasm assember required for libjpeg_turbo
"trunk/third_party/yasm/":
......
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 315
Version: 316
License: BSD
License File: LICENSE
......
......@@ -228,11 +228,17 @@ int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
uint8* dst_argb, int dst_stride_argb,
int width, int height, int interpolation);
#if defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
defined(TARGET_IPHONE_SIMULATOR)
#define YUV_DISABLE_ASM
#endif
// Row functions for copying a pixels from a source with a slope to a row
// of destination. Useful for scaling, rotation, mirror, texture mapping.
void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width);
#if defined(_MSC_VER)
// The following are available on all x86 platforms:
#if !defined(YUV_DISABLE_ASM) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width);
#define HAS_ARGBAFFINEROW_SSE2
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 315
#define LIBYUV_VERSION 316
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -79,6 +79,7 @@ extern "C" {
#define HAS_COMPUTECUMULATIVESUMROW_SSE2
#define HAS_CUMULATIVESUMTOAVERAGE_SSE2
#define HAS_ARGBSHADE_SSE2
#define HAS_ARGBAFFINEROW_SSE2
#endif
// The following are Windows only:
......@@ -87,7 +88,6 @@ extern "C" {
#define HAS_ARGBCOLORTABLEROW_X86
#define HAS_NV12TOARGBROW_SSSE3
#define HAS_NV21TOARGBROW_SSSE3
#define HAS_ARGBAFFINEROW_SSE2
#endif
// The following are disabled when SSSE3 is available:
......
......@@ -3219,6 +3219,82 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
}
#endif // HAS_ARGBSHADE_SSE2
#ifdef HAS_ARGBAFFINEROW_SSE2
// Copy ARGB pixels from source image with slope to a row of destination.
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width) {
intptr_t src_argb_stride_temp = src_argb_stride;
asm volatile (
"movq (%3),%%xmm2 \n"
"movq 0x8(%3),%%xmm3 \n"
"shl $0x10,%1 \n"
"add $0x4,%1 \n"
"movd %1,%%xmm4 \n"
"xor %1,%1 \n" // cleanse upper bits.
"sub $0x2,%4 \n"
"jl 29f \n"
"movdqa %%xmm2,%%xmm0 \n"
"addps %%xmm3,%%xmm0 \n"
"movlhps %%xmm0,%%xmm2 \n"
"pshufd $0x0,%%xmm4,%%xmm4 \n"
"movlhps %%xmm3,%%xmm3 \n"
"addps %%xmm3,%%xmm3 \n"
"pshufd $0x0,%%xmm4,%%xmm4 \n"
// 2 pixel loop \n"
".p2align 2 \n"
"20: \n"
"cvttps2dq %%xmm2,%%xmm1 \n"
"packssdw %%xmm1,%%xmm1 \n"
"pmaddwd %%xmm4,%%xmm1 \n"
"addps %%xmm3,%%xmm2 \n"
"movd %%xmm1,%1 \n"
"and $0x0fffffff,%1 \n"
"movdqa %%xmm1,%%xmm5 \n"
"pshufd $0x55,%%xmm5,%%xmm5 \n"
"movd (%0,%1,1),%%xmm0 \n"
"movd %%xmm5,%1 \n"
"and $0x0fffffff,%1 \n"
"movd (%0,%1,1),%%xmm5 \n"
"punpckldq %%xmm5,%%xmm0 \n"
"sub $0x2,%4 \n"
"movq %%xmm0,(%2) \n"
"lea 0x8(%2),%2 \n"
"jge 20b \n"
"29: \n"
"add $0x1,%4 \n"
"jl 19f \n"
// 1 pixel loop \n"
".p2align 2 \n"
"10: \n"
"cvttps2dq %%xmm2,%%xmm1 \n"
"packssdw %%xmm1,%%xmm1 \n"
"pmaddwd %%xmm4,%%xmm1 \n"
"addps %%xmm3,%%xmm2 \n"
"movd %%xmm1,%1 \n"
"and $0x0fffffff,%1 \n"
"movd (%0,%1,1),%%xmm0 \n"
"sub $0x1,%4 \n"
"movd %%xmm0,(%2) \n"
"lea 0x4(%2),%2 \n"
"jge 10b \n"
"19: \n"
: "+r"(src_argb), // %0
"+r"(src_argb_stride_temp), // %1
"+r"(dst_argb), // %2
"+r"(uv_dudv), // %3
"+rm"(width) // %4
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
#endif
);
}
#endif // HAS_ARGBAFFINEROW_SSE2
#endif // defined(__x86_64__) || defined(__i386__)
#ifdef __cplusplus
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment