Commit eaedc1d7 authored by fbarchard@google.com's avatar fbarchard@google.com

remove mmx functions

BUG=none
TEST=builds
Review URL: http://webrtc-codereview.appspot.com/269010

git-svn-id: http://libyuv.googlecode.com/svn/trunk@77 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent c82af4a5
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 69 Version: 77
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -1149,11 +1149,6 @@ int I420ToARGB(const uint8* src_y, int src_stride_y, ...@@ -1149,11 +1149,6 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
(width % 2 == 0)) { (width % 2 == 0)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSE2; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSE2;
} else } else
#endif
#if defined(HAS_FASTCONVERTYUVTOARGBROW_MMX)
if (width % 2 == 0) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_MMX;
} else
#endif #endif
{ {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
...@@ -1167,8 +1162,6 @@ int I420ToARGB(const uint8* src_y, int src_stride_y, ...@@ -1167,8 +1162,6 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
src_v += src_stride_v; src_v += src_stride_v;
} }
} }
// MMX used for FastConvertYUVToARGBRow requires an emms instruction.
EMMS();
return 0; return 0;
} }
...@@ -1201,11 +1194,6 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y, ...@@ -1201,11 +1194,6 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
(width % 2 == 0)) { (width % 2 == 0)) {
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_SSE2; FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_SSE2;
} else } else
#endif
#if defined(HAS_FASTCONVERTYUVTOBGRAROW_MMX)
if (width % 2 == 0) {
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_MMX;
} else
#endif #endif
{ {
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_C; FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_C;
...@@ -1219,7 +1207,6 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y, ...@@ -1219,7 +1207,6 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
src_v += src_stride_v; src_v += src_stride_v;
} }
} }
EMMS();
return 0; return 0;
} }
...@@ -1252,11 +1239,6 @@ int I420ToABGR(const uint8* src_y, int src_stride_y, ...@@ -1252,11 +1239,6 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
(width % 2 == 0)) { (width % 2 == 0)) {
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_SSE2; FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_SSE2;
} else } else
#endif
#if defined(HAS_FASTCONVERTYUVTOABGRROW_MMX)
if (width % 2 == 0) {
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_MMX;
} else
#endif #endif
{ {
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_C; FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_C;
...@@ -1270,7 +1252,6 @@ int I420ToABGR(const uint8* src_y, int src_stride_y, ...@@ -1270,7 +1252,6 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
src_v += src_stride_v; src_v += src_stride_v;
} }
} }
EMMS();
return 0; return 0;
} }
...@@ -1303,11 +1284,6 @@ int I422ToARGB(const uint8* src_y, int src_stride_y, ...@@ -1303,11 +1284,6 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
(width % 2 == 0)) { (width % 2 == 0)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSE2; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSE2;
} else } else
#endif
#if defined(HAS_FASTCONVERTYUVTOARGBROW_MMX)
if (width % 2 == 0) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_MMX;
} else
#endif #endif
{ {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
...@@ -1319,8 +1295,6 @@ int I422ToARGB(const uint8* src_y, int src_stride_y, ...@@ -1319,8 +1295,6 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
src_u += src_stride_u; src_u += src_stride_u;
src_v += src_stride_v; src_v += src_stride_v;
} }
// MMX used for FastConvertYUVToARGBRow requires an emms instruction.
EMMS();
return 0; return 0;
} }
...@@ -1353,13 +1327,9 @@ int I444ToARGB(const uint8* src_y, int src_stride_y, ...@@ -1353,13 +1327,9 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_SSE2; FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_SSE2;
} else } else
#endif #endif
#if defined(HAS_FASTCONVERTYUVTOARGBROW_MMX)
FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_MMX;
#else
{ {
FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_C; FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_C;
} }
#endif
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
FastConvertYUV444ToARGBRow(src_y, src_u, src_v, dst_argb, width); FastConvertYUV444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
...@@ -1367,8 +1337,6 @@ int I444ToARGB(const uint8* src_y, int src_stride_y, ...@@ -1367,8 +1337,6 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
src_u += src_stride_u; src_u += src_stride_u;
src_v += src_stride_v; src_v += src_stride_v;
} }
// MMX used for FastConvertYUVToARGBRow requires an emms instruction.
EMMS();
return 0; return 0;
} }
...@@ -1391,11 +1359,6 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y, ...@@ -1391,11 +1359,6 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) { IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
FastConvertYToARGBRow = FastConvertYToARGBRow_SSE2; FastConvertYToARGBRow = FastConvertYToARGBRow_SSE2;
} else } else
#endif
#if defined(HAS_FASTCONVERTYUVTOARGBROW_MMX)
if (width % 2 == 0) {
FastConvertYToARGBRow = FastConvertYToARGBRow_MMX;
} else
#endif #endif
{ {
FastConvertYToARGBRow = FastConvertYToARGBRow_C; FastConvertYToARGBRow = FastConvertYToARGBRow_C;
...@@ -1405,8 +1368,6 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y, ...@@ -1405,8 +1368,6 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
src_y += src_stride_y; src_y += src_stride_y;
} }
// MMX used for FastConvertYUVToARGBRow requires an emms instruction.
EMMS();
return 0; return 0;
} }
......
...@@ -13,21 +13,19 @@ ...@@ -13,21 +13,19 @@
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h" #include "libyuv/planar_functions.h"
#include "rotate_priv.h" #include "rotate_priv.h"
#include "row.h"
namespace libyuv { namespace libyuv {
#if (defined(WIN32) || defined(__x86_64__) || defined(__i386__)) \ #if (defined(WIN32) || defined(__x86_64__) || defined(__i386__)) && \
&& !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR) !defined(__APPLE__) && \
#if defined(_MSC_VER) !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
#define TALIGN16(t, var) static __declspec(align(16)) t _ ## var
#else
#define TALIGN16(t, var) t var __attribute__((aligned(16)))
#endif
// Shuffle table for reversing the bytes. // Shuffle table for reversing the bytes.
extern "C" TALIGN16(const uint8, kShuffleReverse[16]) = static const uvec8 kShuffleReverse =
{ 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u }; { 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u };
// Shuffle table for reversing the bytes of UV channels. // Shuffle table for reversing the bytes of UV channels.
extern "C" TALIGN16(const uint8, kShuffleReverseUV[16]) = static const uvec8 kShuffleReverseUV =
{ 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u }; { 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u };
#endif #endif
...@@ -73,7 +71,7 @@ __asm { ...@@ -73,7 +71,7 @@ __asm {
mov edx, [esp + 12 + 12] // dst mov edx, [esp + 12 + 12] // dst
mov esi, [esp + 12 + 16] // dst_stride mov esi, [esp + 12 + 16] // dst_stride
mov ecx, [esp + 12 + 20] // width mov ecx, [esp + 12 + 20] // width
convertloop : convertloop:
// Read in the data from the source pointer. // Read in the data from the source pointer.
// First round of bit swap. // First round of bit swap.
movq xmm0, qword ptr [eax] movq xmm0, qword ptr [eax]
...@@ -172,7 +170,7 @@ __asm { ...@@ -172,7 +170,7 @@ __asm {
and esp, ~15 and esp, ~15
mov [esp + 16], ecx mov [esp + 16], ecx
mov ecx, [ecx + 16 + 28] // w mov ecx, [ecx + 16 + 28] // w
convertloop : convertloop:
// Read in the data from the source pointer. // Read in the data from the source pointer.
// First round of bit swap. // First round of bit swap.
movdqa xmm0, [eax] movdqa xmm0, [eax]
...@@ -863,9 +861,9 @@ __asm { ...@@ -863,9 +861,9 @@ __asm {
mov eax, [esp + 4] // src mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // width mov ecx, [esp + 12] // width
movdqa xmm5, _kShuffleReverse movdqa xmm5, kShuffleReverse
lea eax, [eax + ecx - 16] lea eax, [eax + ecx - 16]
convertloop : convertloop:
movdqa xmm0, [eax] movdqa xmm0, [eax]
lea eax, [eax - 16] lea eax, [eax - 16]
pshufb xmm0, xmm5 pshufb xmm0, xmm5
...@@ -878,12 +876,16 @@ __asm { ...@@ -878,12 +876,16 @@ __asm {
} }
#elif (defined(__i386__) || defined(__x86_64__)) && \ #elif (defined(__i386__) || defined(__x86_64__)) && \
!defined(__APPLE__) && \
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR) !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
#define HAS_REVERSE_LINE_SSSE3 #define HAS_REVERSE_LINE_SSSE3
static void ReverseLine_SSSE3(const uint8* src, uint8* dst, int width) { static void ReverseLine_SSSE3(const uint8* src, uint8* dst, int width) {
intptr_t temp_width = static_cast<intptr_t>(width); intptr_t temp_width = static_cast<intptr_t>(width);
asm volatile ( asm volatile (
"movdqa (%3),%%xmm5 \n" "movdqa %0,%%xmm5 \n"
:: "m"(kShuffleReverse)
);
asm volatile (
"lea -0x10(%0,%2,1),%0 \n" "lea -0x10(%0,%2,1),%0 \n"
"1: \n" "1: \n"
"movdqa (%0),%%xmm0 \n" "movdqa (%0),%%xmm0 \n"
...@@ -896,12 +898,12 @@ static void ReverseLine_SSSE3(const uint8* src, uint8* dst, int width) { ...@@ -896,12 +898,12 @@ static void ReverseLine_SSSE3(const uint8* src, uint8* dst, int width) {
: "+r"(src), // %0 : "+r"(src), // %0
"+r"(dst), // %1 "+r"(dst), // %1
"+r"(temp_width) // %2 "+r"(temp_width) // %2
: "r"(kShuffleReverse) // %3 :
: "memory", "cc" : "memory", "cc"
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm5" , "xmm0", "xmm5"
#endif #endif
); );
} }
#endif #endif
...@@ -1066,10 +1068,10 @@ __asm { ...@@ -1066,10 +1068,10 @@ __asm {
mov edx, [esp + 4 + 8] // dst_a mov edx, [esp + 4 + 8] // dst_a
mov edi, [esp + 4 + 12] // dst_b mov edi, [esp + 4 + 12] // dst_b
mov ecx, [esp + 4 + 16] // width mov ecx, [esp + 4 + 16] // width
movdqa xmm5, _kShuffleReverseUV movdqa xmm5, kShuffleReverseUV
lea eax, [eax + ecx * 2 - 16] lea eax, [eax + ecx * 2 - 16]
convertloop : convertloop:
movdqa xmm0, [eax] movdqa xmm0, [eax]
lea eax, [eax - 16] lea eax, [eax - 16]
pshufb xmm0, xmm5 pshufb xmm0, xmm5
...@@ -1085,6 +1087,7 @@ __asm { ...@@ -1085,6 +1087,7 @@ __asm {
} }
#elif (defined(__i386__) || defined(__x86_64__)) && \ #elif (defined(__i386__) || defined(__x86_64__)) && \
!defined(__APPLE__) && \
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR) !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
#define HAS_REVERSE_LINE_UV_SSSE3 #define HAS_REVERSE_LINE_UV_SSSE3
void ReverseLineUV_SSSE3(const uint8* src, void ReverseLineUV_SSSE3(const uint8* src,
...@@ -1092,28 +1095,31 @@ void ReverseLineUV_SSSE3(const uint8* src, ...@@ -1092,28 +1095,31 @@ void ReverseLineUV_SSSE3(const uint8* src,
int width) { int width) {
intptr_t temp_width = static_cast<intptr_t>(width); intptr_t temp_width = static_cast<intptr_t>(width);
asm volatile ( asm volatile (
"movdqa (%4),%%xmm5 \n" "movdqa %0,%%xmm5 \n"
"lea -0x10(%0,%3,2),%0 \n" :: "m"(kShuffleReverseUV)
);
asm volatile (
"lea -16(%0,%3,2),%0 \n"
"1: \n" "1: \n"
"movdqa (%0),%%xmm0 \n" "movdqa (%0),%%xmm0 \n"
"lea -0x10(%0),%0 \n" "lea -16(%0),%0 \n"
"pshufb %%xmm5,%%xmm0 \n" "pshufb %%xmm5,%%xmm0 \n"
"movlpd %%xmm0,(%1) \n" "movlpd %%xmm0,(%1) \n"
"lea 0x8(%1),%1 \n" "lea 8(%1),%1 \n"
"movhpd %%xmm0,(%2) \n" "movhpd %%xmm0,(%2) \n"
"lea 0x8(%2),%2 \n" "lea 8(%2),%2 \n"
"sub $0x8,%3 \n" "sub $8,%3 \n"
"ja 1b \n" "ja 1b \n"
: "+r"(src), // %0 : "+r"(src), // %0
"+r"(dst_a), // %1 "+r"(dst_a), // %1
"+r"(dst_b), // %2 "+r"(dst_b), // %2
"+r"(temp_width) // %3 "+r"(temp_width) // %3
: "r"(kShuffleReverseUV) // %4 :
: "memory", "cc" : "memory", "cc"
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm5" , "xmm0", "xmm5"
#endif #endif
); );
} }
#endif #endif
......
...@@ -51,15 +51,6 @@ ...@@ -51,15 +51,6 @@
#define HAS_FASTCONVERTYTOARGBROW_SSE2 #define HAS_FASTCONVERTYTOARGBROW_SSE2
#endif #endif
// The following are available on Windows and GCC 32 bit
#if (defined(WIN32) || \
defined(__i386__)) && \
!defined(LIBYUV_DISABLE_ASM)
#define HAS_FASTCONVERTYUVTOARGBROW_MMX
#define HAS_FASTCONVERTYUVTOBGRAROW_MMX
#define HAS_FASTCONVERTYUVTOABGRROW_MMX
#endif
// The following are available on Windows // The following are available on Windows
#if defined(WIN32) && \ #if defined(WIN32) && \
!defined(LIBYUV_DISABLE_ASM) !defined(LIBYUV_DISABLE_ASM)
...@@ -128,12 +119,14 @@ void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix); ...@@ -128,12 +119,14 @@ void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
#if defined(_MSC_VER) #if defined(_MSC_VER)
#define SIMD_ALIGNED(var) __declspec(align(16)) var #define SIMD_ALIGNED(var) __declspec(align(16)) var
#define TALIGN16(t, var) static __declspec(align(16)) t _ ## var typedef __declspec(align(16)) signed char vec8[16];
typedef __declspec(align(16)) unsigned char uvec8[16];
typedef __declspec(align(16)) signed short vec16[8];
#else // __GNUC__ #else // __GNUC__
#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
#define TALIGN16(t, var) t var __attribute__((aligned(16)))
typedef signed char __attribute__((vector_size(16))) vec8; typedef signed char __attribute__((vector_size(16))) vec8;
typedef unsigned char __attribute__((vector_size(16))) uvec8; typedef unsigned char __attribute__((vector_size(16))) uvec8;
typedef signed short __attribute__((vector_size(16))) vec16;
#endif #endif
extern "C" SIMD_ALIGNED(const int16 kCoefficientsRgbY[768][4]); extern "C" SIMD_ALIGNED(const int16 kCoefficientsRgbY[768][4]);
...@@ -204,36 +197,6 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf, ...@@ -204,36 +197,6 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
int width); int width);
#endif #endif
#ifdef HAS_FASTCONVERTYUVTOARGBROW_MMX
void FastConvertYUVToARGBRow_MMX(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToBGRARow_MMX(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToABGRRow_MMX(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUV444ToARGBRow_MMX(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYToARGBRow_MMX(const uint8* y_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYUVTOARGBROW_SSSE3 #ifdef HAS_FASTCONVERTYUVTOARGBROW_SSSE3
void FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf, void FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
...@@ -268,42 +231,6 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf, ...@@ -268,42 +231,6 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
#endif #endif
// Method to force C version.
//#define USE_MMX 0
//#define USE_SSE2 0
#if !defined(USE_MMX)
// Windows, Mac and Linux use MMX
#if defined(__i386__) || defined(_MSC_VER)
#define USE_MMX 1
#else
#define USE_MMX 0
#endif
#endif
#if !defined(USE_SSE2)
#if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || _M_IX86_FP==2
#define USE_SSE2 1
#else
#define USE_SSE2 0
#endif
#endif
// x64 uses MMX2 (SSE) so emms is not required.
// Warning C4799: function has no EMMS instruction.
// EMMS() is slow and should be called by the calling function once per image.
#if USE_MMX && !defined(ARCH_CPU_X86_64)
#if defined(_MSC_VER)
#define EMMS() __asm emms
#pragma warning(disable: 4799)
#else
#define EMMS() asm("emms")
#endif
#else
#define EMMS()
#endif
} // extern "C" } // extern "C"
#endif // LIBYUV_SOURCE_ROW_H_ #endif // LIBYUV_SOURCE_ROW_H_
...@@ -542,231 +542,6 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf, // rdi ...@@ -542,231 +542,6 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf, // rdi
#endif #endif
); );
} }
#endif
#ifdef HAS_FASTCONVERTYUVTOARGBROW_MMX
// 32 bit mmx gcc version
#ifdef OSX
#define UNDERSCORE "_"
#else
#define UNDERSCORE ""
#endif
void FastConvertYUVToARGBRow_MMX(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
asm(
".text \n"
#if defined(OSX) || defined(IOS)
".globl _FastConvertYUVToARGBRow_MMX \n"
"_FastConvertYUVToARGBRow_MMX: \n"
#else
".global FastConvertYUVToARGBRow_MMX \n"
"FastConvertYUVToARGBRow_MMX: \n"
#endif
"pusha \n"
"mov 0x24(%esp),%edx \n"
"mov 0x28(%esp),%edi \n"
"mov 0x2c(%esp),%esi \n"
"mov 0x30(%esp),%ebp \n"
"mov 0x34(%esp),%ecx \n"
"1: \n"
"movzbl (%edi),%eax \n"
"lea 1(%edi),%edi \n"
"movzbl (%esi),%ebx \n"
"lea 1(%esi),%esi \n"
"movq " UNDERSCORE "kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"movzbl (%edx),%eax \n"
"paddsw " UNDERSCORE "kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
"movzbl 0x1(%edx),%ebx \n"
"movq " UNDERSCORE "kCoefficientsRgbY(,%eax,8),%mm1\n"
"lea 2(%edx),%edx \n"
"movq " UNDERSCORE "kCoefficientsRgbY(,%ebx,8),%mm2\n"
"paddsw %mm0,%mm1 \n"
"paddsw %mm0,%mm2 \n"
"psraw $0x6,%mm1 \n"
"psraw $0x6,%mm2 \n"
"packuswb %mm2,%mm1 \n"
"movq %mm1,0x0(%ebp) \n"
"lea 8(%ebp),%ebp \n"
"sub $0x2,%ecx \n"
"ja 1b \n"
"popa \n"
"ret \n"
);
void FastConvertYUVToBGRARow_MMX(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
asm(
".text \n"
#if defined(OSX) || defined(IOS)
".globl _FastConvertYUVToBGRARow_MMX \n"
"_FastConvertYUVToBGRARow_MMX: \n"
#else
".global FastConvertYUVToBGRARow_MMX \n"
"FastConvertYUVToBGRARow_MMX: \n"
#endif
"pusha \n"
"mov 0x24(%esp),%edx \n"
"mov 0x28(%esp),%edi \n"
"mov 0x2c(%esp),%esi \n"
"mov 0x30(%esp),%ebp \n"
"mov 0x34(%esp),%ecx \n"
"1: \n"
"movzbl (%edi),%eax \n"
"lea 1(%edi),%edi \n"
"movzbl (%esi),%ebx \n"
"lea 1(%esi),%esi \n"
"movq " UNDERSCORE "kCoefficientsBgraY+2048(,%eax,8),%mm0\n"
"movzbl (%edx),%eax \n"
"paddsw " UNDERSCORE "kCoefficientsBgraY+4096(,%ebx,8),%mm0\n"
"movzbl 0x1(%edx),%ebx \n"
"movq " UNDERSCORE "kCoefficientsBgraY(,%eax,8),%mm1\n"
"lea 2(%edx),%edx \n"
"movq " UNDERSCORE "kCoefficientsBgraY(,%ebx,8),%mm2\n"
"paddsw %mm0,%mm1 \n"
"paddsw %mm0,%mm2 \n"
"psraw $0x6,%mm1 \n"
"psraw $0x6,%mm2 \n"
"packuswb %mm2,%mm1 \n"
"movq %mm1,0x0(%ebp) \n"
"lea 8(%ebp),%ebp \n"
"sub $0x2,%ecx \n"
"ja 1b \n"
"popa \n"
"ret \n"
);
void FastConvertYUVToABGRRow_MMX(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
asm(
".text \n"
#if defined(OSX) || defined(IOS)
".globl _FastConvertYUVToABGRRow_MMX \n"
"_FastConvertYUVToABGRRow_MMX: \n"
#else
".global FastConvertYUVToABGRRow_MMX \n"
"FastConvertYUVToABGRRow_MMX: \n"
#endif
"pusha \n"
"mov 0x24(%esp),%edx \n"
"mov 0x28(%esp),%edi \n"
"mov 0x2c(%esp),%esi \n"
"mov 0x30(%esp),%ebp \n"
"mov 0x34(%esp),%ecx \n"
"1: \n"
"movzbl (%edi),%eax \n"
"lea 1(%edi),%edi \n"
"movzbl (%esi),%ebx \n"
"lea 1(%esi),%esi \n"
"movq " UNDERSCORE "kCoefficientsAbgrY+2048(,%eax,8),%mm0\n"
"movzbl (%edx),%eax \n"
"paddsw " UNDERSCORE "kCoefficientsAbgrY+4096(,%ebx,8),%mm0\n"
"movzbl 0x1(%edx),%ebx \n"
"movq " UNDERSCORE "kCoefficientsAbgrY(,%eax,8),%mm1\n"
"lea 2(%edx),%edx \n"
"movq " UNDERSCORE "kCoefficientsAbgrY(,%ebx,8),%mm2\n"
"paddsw %mm0,%mm1 \n"
"paddsw %mm0,%mm2 \n"
"psraw $0x6,%mm1 \n"
"psraw $0x6,%mm2 \n"
"packuswb %mm2,%mm1 \n"
"movq %mm1,0x0(%ebp) \n"
"lea 8(%ebp),%ebp \n"
"sub $0x2,%ecx \n"
"ja 1b \n"
"popa \n"
"ret \n"
);
void FastConvertYUV444ToARGBRow_MMX(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
asm(
".text \n"
#if defined(OSX) || defined(IOS)
".globl _FastConvertYUV444ToARGBRow_MMX \n"
"_FastConvertYUV444ToARGBRow_MMX: \n"
#else
".global FastConvertYUV444ToARGBRow_MMX \n"
"FastConvertYUV444ToARGBRow_MMX: \n"
#endif
"pusha \n"
"mov 0x24(%esp),%edx \n"
"mov 0x28(%esp),%edi \n"
"mov 0x2c(%esp),%esi \n"
"mov 0x30(%esp),%ebp \n"
"mov 0x34(%esp),%ecx \n"
"1: \n"
"movzbl (%edi),%eax \n"
"lea 1(%edi),%edi \n"
"movzbl (%esi),%ebx \n"
"lea 1(%esi),%esi \n"
"movq " UNDERSCORE "kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"movzbl (%edx),%eax \n"
"paddsw " UNDERSCORE "kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
"lea 1(%edx),%edx \n"
"paddsw " UNDERSCORE "kCoefficientsRgbY(,%eax,8),%mm0\n"
"psraw $0x6,%mm0 \n"
"packuswb %mm0,%mm0 \n"
"movd %mm0,0x0(%ebp) \n"
"lea 4(%ebp),%ebp \n"
"sub $0x1,%ecx \n"
"ja 1b \n"
"popa \n"
"ret \n"
);
void FastConvertYToARGBRow_MMX(const uint8* y_buf,
uint8* rgb_buf,
int width);
asm(
".text \n"
#if defined(OSX) || defined(IOS)
".globl _FastConvertYToARGBRow_MMX \n"
"_FastConvertYToARGBRow_MMX: \n"
#else
".global FastConvertYToARGBRow_MMX \n"
"FastConvertYToARGBRow_MMX: \n"
#endif
"push %ebx \n"
"mov 0x8(%esp),%eax \n"
"mov 0xc(%esp),%edx \n"
"mov 0x10(%esp),%ecx \n"
"1: \n"
"movzbl (%eax),%ebx \n"
"movq " UNDERSCORE "kCoefficientsRgbY(,%ebx,8),%mm0\n"
"psraw $0x6,%mm0 \n"
"movzbl 0x1(%eax),%ebx \n"
"movq " UNDERSCORE "kCoefficientsRgbY(,%ebx,8),%mm1\n"
"psraw $0x6,%mm1 \n"
"packuswb %mm1,%mm0 \n"
"lea 0x2(%eax),%eax \n"
"movq %mm0,(%edx) \n"
"lea 0x8(%edx),%edx \n"
"sub $0x2,%ecx \n"
"ja 1b \n"
"pop %ebx \n"
"ret \n"
);
#endif #endif
#ifdef HAS_ARGBTOYROW_SSSE3 #ifdef HAS_ARGBTOYROW_SSSE3
......
This diff is collapsed.
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <string.h> #include <string.h>
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#include "row.h"
#if defined(_MSC_VER) #if defined(_MSC_VER)
#define ALIGN16(var) __declspec(align(16)) var #define ALIGN16(var) __declspec(align(16)) var
...@@ -21,6 +22,7 @@ ...@@ -21,6 +22,7 @@
#define ALIGN16(var) var __attribute__((aligned(16))) #define ALIGN16(var) var __attribute__((aligned(16)))
#endif #endif
// Note: A Neon reference manual // Note: A Neon reference manual
// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0204j/CJAJIIGG.html // http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0204j/CJAJIIGG.html
// Note: Some SSE2 reference manuals // Note: Some SSE2 reference manuals
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment