Commit 94d42699 authored by Frank Barchard's avatar Frank Barchard

clang use scalewin

R=harryjin@google.com
TBR=harryjin@google.com
BUG=libyuv:469

Review URL: https://webrtc-codereview.appspot.com/51329004.
parent cda9d38a
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1466
Version: 1467
License: BSD
License File: LICENSE
......
......@@ -23,12 +23,6 @@ extern "C" {
#define LIBYUV_DISABLE_X86
#endif
// Visual C 2012 required for AVX2.
#if defined(_M_IX86) && !defined(__clang__) && \
defined(_MSC_VER) && _MSC_VER >= 1700
#define VISUALC_HAS_AVX2 1
#endif // VisualStudio >= 2012
// The following are available for Visual C and clangcl 32 bit:
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
#define HAS_TRANSPOSEWX8_SSSE3
......
......@@ -30,6 +30,13 @@ extern "C" {
#define VISUALC_HAS_AVX2 1
#endif // VisualStudio >= 2012
// clang >= 3.4.0 required for AVX2.
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
#define CLANG_HAS_AVX2 1
#endif // clang >= 3.4
#endif // __clang__
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
......@@ -48,15 +55,16 @@ extern "C" {
#define HAS_SCALEROWDOWN4_SSE2
#endif
// The following are available on VS2012:
#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
// The following are available for Visual C and clangcl 32 bit:
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
#define HAS_SCALEADDROW_AVX2
#define HAS_SCALEROWDOWN2_AVX2
#define HAS_SCALEROWDOWN4_AVX2
#endif
// The following are available on Visual C:
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && !defined(__clang__)
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
#define HAS_SCALEADDROW_SSE2
#endif
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1466
#define LIBYUV_VERSION 1467
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -16,7 +16,8 @@ extern "C" {
#endif
// This module is for GCC x86 and x64.
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
// Offsets for source bytes 0 to 9
static uvec8 kShuf0 =
......
......@@ -16,9 +16,8 @@ namespace libyuv {
extern "C" {
#endif
// This module is for Visual C x86.
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
defined(_MSC_VER) && !defined(__clang__)
// This module is for 32 bit Visual C x86 and clangcl
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
// Offsets for source bytes 0 to 9
static uvec8 kShuf0 =
......@@ -499,9 +498,9 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
// src_stride ignored
mov edx, [esp + 12] // dst_ptr
mov ecx, [esp + 16] // dst_width
movdqa xmm3, kShuf0
movdqa xmm4, kShuf1
movdqa xmm5, kShuf2
movdqa xmm3, xmmword ptr kShuf0
movdqa xmm4, xmmword ptr kShuf1
movdqa xmm5, xmmword ptr kShuf2
wloop:
movdqu xmm0, [eax]
......@@ -548,12 +547,12 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
mov esi, [esp + 4 + 8] // src_stride
mov edx, [esp + 4 + 12] // dst_ptr
mov ecx, [esp + 4 + 16] // dst_width
movdqa xmm2, kShuf01
movdqa xmm3, kShuf11
movdqa xmm4, kShuf21
movdqa xmm5, kMadd01
movdqa xmm6, kMadd11
movdqa xmm7, kRound34
movdqa xmm2, xmmword ptr kShuf01
movdqa xmm3, xmmword ptr kShuf11
movdqa xmm4, xmmword ptr kShuf21
movdqa xmm5, xmmword ptr kMadd01
movdqa xmm6, xmmword ptr kMadd11
movdqa xmm7, xmmword ptr kRound34
wloop:
movdqu xmm0, [eax] // pixels 0..7
......@@ -579,7 +578,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
lea eax, [eax + 32]
pavgb xmm0, xmm1
pshufb xmm0, xmm4
movdqa xmm1, kMadd21
movdqa xmm1, xmmword ptr kMadd21
pmaddubsw xmm0, xmm1
paddsw xmm0, xmm7
psrlw xmm0, 2
......@@ -605,12 +604,12 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
mov esi, [esp + 4 + 8] // src_stride
mov edx, [esp + 4 + 12] // dst_ptr
mov ecx, [esp + 4 + 16] // dst_width
movdqa xmm2, kShuf01
movdqa xmm3, kShuf11
movdqa xmm4, kShuf21
movdqa xmm5, kMadd01
movdqa xmm6, kMadd11
movdqa xmm7, kRound34
movdqa xmm2, xmmword ptr kShuf01
movdqa xmm3, xmmword ptr kShuf11
movdqa xmm4, xmmword ptr kShuf21
movdqa xmm5, xmmword ptr kMadd01
movdqa xmm6, xmmword ptr kMadd11
movdqa xmm7, xmmword ptr kRound34
wloop:
movdqu xmm0, [eax] // pixels 0..7
......@@ -639,7 +638,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
pavgb xmm1, xmm0
pavgb xmm0, xmm1
pshufb xmm0, xmm4
movdqa xmm1, kMadd21
movdqa xmm1, xmmword ptr kMadd21
pmaddubsw xmm0, xmm1
paddsw xmm0, xmm7
psrlw xmm0, 2
......@@ -665,8 +664,8 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
// src_stride ignored
mov edx, [esp + 12] // dst_ptr
mov ecx, [esp + 16] // dst_width
movdqa xmm4, kShuf38a
movdqa xmm5, kShuf38b
movdqa xmm4, xmmword ptr kShuf38a
movdqa xmm5, xmmword ptr kShuf38b
xloop:
movdqu xmm0, [eax] // 16 pixels -> 0,1,2,3,4,5
......@@ -698,9 +697,9 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
mov esi, [esp + 4 + 8] // src_stride
mov edx, [esp + 4 + 12] // dst_ptr
mov ecx, [esp + 4 + 16] // dst_width
movdqa xmm2, kShufAc
movdqa xmm3, kShufAc3
movdqa xmm4, kScaleAc33
movdqa xmm2, xmmword ptr kShufAc
movdqa xmm3, xmmword ptr kShufAc3
movdqa xmm4, xmmword ptr kScaleAc33
pxor xmm5, xmm5
xloop:
......@@ -763,10 +762,10 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
mov esi, [esp + 4 + 8] // src_stride
mov edx, [esp + 4 + 12] // dst_ptr
mov ecx, [esp + 4 + 16] // dst_width
movdqa xmm2, kShufAb0
movdqa xmm3, kShufAb1
movdqa xmm4, kShufAb2
movdqa xmm5, kScaleAb2
movdqa xmm2, xmmword ptr kShufAb0
movdqa xmm3, xmmword ptr kShufAb1
movdqa xmm4, xmmword ptr kShufAb2
movdqa xmm5, xmmword ptr kScaleAb2
xloop:
movdqu xmm0, [eax] // average 2 rows into xmm0
......@@ -1233,8 +1232,8 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
mov ecx, [esp + 8 + 12] // dst_width
movd xmm2, [esp + 8 + 16] // x
movd xmm3, [esp + 8 + 20] // dx
movdqa xmm4, kShuffleColARGB
movdqa xmm5, kShuffleFractions
movdqa xmm4, xmmword ptr kShuffleColARGB
movdqa xmm5, xmmword ptr kShuffleFractions
pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction.
psrlw xmm6, 9
pextrw eax, xmm2, 1 // get x0 integer. preroll
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment