Commit ec1f854f authored by fbarchard@google.com's avatar fbarchard@google.com

Use broadcast to duplicate constants from 16 bytes to 32 bytes to save data space.

BUG=none
TESTED=intelsde
R=brucedawson@google.com

Review URL: https://webrtc-codereview.appspot.com/32029004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1161 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent a843cafb
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1160 Version: 1161
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1160 #define LIBYUV_VERSION 1161
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -3804,11 +3804,9 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { ...@@ -3804,11 +3804,9 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
#ifdef HAS_ARGBATTENUATEROW_AVX2 #ifdef HAS_ARGBATTENUATEROW_AVX2
// Shuffle table duplicating alpha. // Shuffle table duplicating alpha.
static const ulvec8 kShuffleAlpha_AVX2 = { static const uvec8 kShuffleAlpha_AVX2 = {
6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u,
14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u, 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u
6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u,
14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u,
}; };
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) { void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
...@@ -3817,7 +3815,7 @@ void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) { ...@@ -3817,7 +3815,7 @@ void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) {
mov edx, [esp + 8] // dst_argb mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // width mov ecx, [esp + 12] // width
sub edx, eax sub edx, eax
vmovdqa ymm4, kShuffleAlpha_AVX2 vbroadcastf128 ymm4,kShuffleAlpha_AVX2
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000 vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000
vpslld ymm5, ymm5, 24 vpslld ymm5, ymm5, 24
...@@ -3899,8 +3897,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, ...@@ -3899,8 +3897,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
#ifdef HAS_ARGBUNATTENUATEROW_AVX2 #ifdef HAS_ARGBUNATTENUATEROW_AVX2
// Shuffle table duplicating alpha. // Shuffle table duplicating alpha.
static const ulvec8 kUnattenShuffleAlpha_AVX2 = { static const ulvec8 kUnattenShuffleAlpha_AVX2 = {
0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15, 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u
0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15,
}; };
// TODO(fbarchard): Enable USE_GATHER for future hardware if faster. // TODO(fbarchard): Enable USE_GATHER for future hardware if faster.
// USE_GATHER is not on by default, due to being a slow instruction. // USE_GATHER is not on by default, due to being a slow instruction.
...@@ -3913,7 +3910,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, ...@@ -3913,7 +3910,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
mov edx, [esp + 8] // dst_argb mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // width mov ecx, [esp + 12] // width
sub edx, eax sub edx, eax
vmovdqa ymm4, kUnattenShuffleAlpha_AVX2 vbroadcastf128 ymm4, kUnattenShuffleAlpha_AVX2
align 4 align 4
convertloop: convertloop:
...@@ -3949,7 +3946,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, ...@@ -3949,7 +3946,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
mov edx, [esp + 8] // dst_argb mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // width mov ecx, [esp + 12] // width
sub edx, eax sub edx, eax
vmovdqa ymm5, kUnattenShuffleAlpha_AVX2 vbroadcastf128 ymm5, kUnattenShuffleAlpha_AVX2
push esi push esi
push edi push edi
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment