Commit c2a889eb authored by fbarchard@google.com's avatar fbarchard@google.com

Bump reciprocal up by 1

BUG=none
TEST=none
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/3599004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@847 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 67a0987d
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 846 Version: 847
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 846 #define LIBYUV_VERSION 847
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -5090,19 +5090,20 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, ...@@ -5090,19 +5090,20 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
int width, int area, uint8* dst, int width, int area, uint8* dst,
int count) { int count) {
asm volatile ( asm volatile (
"movd %5,%%xmm4 \n" "movd %5,%%xmm5 \n"
"cvtdq2ps %%xmm4,%%xmm4 \n" "cvtdq2ps %%xmm5,%%xmm5 \n"
"rcpss %%xmm4,%%xmm4 \n" "rcpss %%xmm5,%%xmm4 \n"
"pshufd $0x0,%%xmm4,%%xmm4 \n" "pshufd $0x0,%%xmm4,%%xmm4 \n"
"sub $0x4,%3 \n" "sub $0x4,%3 \n"
"jl 49f \n" "jl 49f \n"
"cmpl $0x80,%5 \n" "cmpl $0x80,%5 \n"
"ja 40f \n" "ja 40f \n"
"pcmpeqb %%xmm5,%%xmm5 \n" "pshufd $0x0,%%xmm5,%%xmm5 \n"
"psrld $0x1f,%%xmm5 \n" "pcmpeqb %%xmm6,%%xmm6 \n"
"pslld $0x10,%%xmm5 \n" "psrld $0x10,%%xmm6 \n"
"cvtdq2ps %%xmm5,%%xmm5 \n" "cvtdq2ps %%xmm6,%%xmm6 \n"
"addps %%xmm6,%%xmm5 \n"
"mulps %%xmm4,%%xmm5 \n" "mulps %%xmm4,%%xmm5 \n"
"cvtps2dq %%xmm5,%%xmm5 \n" "cvtps2dq %%xmm5,%%xmm5 \n"
"packssdw %%xmm5,%%xmm5 \n" "packssdw %%xmm5,%%xmm5 \n"
...@@ -5222,7 +5223,7 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, ...@@ -5222,7 +5223,7 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
, "r14" , "r14"
#endif #endif
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
#endif #endif
); );
} }
......
...@@ -5763,11 +5763,11 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, ...@@ -5763,11 +5763,11 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
mov eax, topleft // eax topleft mov eax, topleft // eax topleft
mov esi, botleft // esi botleft mov esi, botleft // esi botleft
mov edx, width mov edx, width
movd xmm4, area movd xmm5, area
mov edi, dst mov edi, dst
mov ecx, count mov ecx, count
cvtdq2ps xmm4, xmm4 cvtdq2ps xmm5, xmm5
rcpss xmm4, xmm4 // 1.0f / area rcpss xmm4, xmm5 // 1.0f / area
pshufd xmm4, xmm4, 0 pshufd xmm4, xmm4, 0
sub ecx, 4 sub ecx, 4
jl l4b jl l4b
...@@ -5775,13 +5775,14 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, ...@@ -5775,13 +5775,14 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
cmp area, 128 // 128 pixels will not overflow 15 bits. cmp area, 128 // 128 pixels will not overflow 15 bits.
ja l4 ja l4
pcmpeqb xmm5, xmm5 // constant of 65536.0 pshufd xmm5, xmm5, 0 // area
psrld xmm5, 31 pcmpeqb xmm6, xmm6 // constant of 65536.0 - 1 = 65535.0
pslld xmm5, 16 psrld xmm6, 16
cvtdq2ps xmm5, xmm5 cvtdq2ps xmm6, xmm6
mulps xmm5, xmm4 // 65536.0 * 1 / area addps xmm5, xmm6 // (65536.0 + area - 1)
mulps xmm5, xmm4 // (65536.0 + area - 1) * 1 / area
cvtps2dq xmm5, xmm5 // 0.16 fixed point cvtps2dq xmm5, xmm5 // 0.16 fixed point
packssdw xmm5, xmm5 packssdw xmm5, xmm5 // 16 bit shorts
// 4 pixel loop small blocks. // 4 pixel loop small blocks.
align 4 align 4
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment