Commit c2a889eb authored by fbarchard@google.com's avatar fbarchard@google.com

Bump reciprocal up by 1

BUG=none
TEST=none
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/3599004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@847 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 67a0987d
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 846
Version: 847
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 846
#define LIBYUV_VERSION 847
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -5090,19 +5090,20 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
int width, int area, uint8* dst,
int count) {
asm volatile (
"movd %5,%%xmm4 \n"
"cvtdq2ps %%xmm4,%%xmm4 \n"
"rcpss %%xmm4,%%xmm4 \n"
"movd %5,%%xmm5 \n"
"cvtdq2ps %%xmm5,%%xmm5 \n"
"rcpss %%xmm5,%%xmm4 \n"
"pshufd $0x0,%%xmm4,%%xmm4 \n"
"sub $0x4,%3 \n"
"jl 49f \n"
"cmpl $0x80,%5 \n"
"ja 40f \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrld $0x1f,%%xmm5 \n"
"pslld $0x10,%%xmm5 \n"
"cvtdq2ps %%xmm5,%%xmm5 \n"
"pshufd $0x0,%%xmm5,%%xmm5 \n"
"pcmpeqb %%xmm6,%%xmm6 \n"
"psrld $0x10,%%xmm6 \n"
"cvtdq2ps %%xmm6,%%xmm6 \n"
"addps %%xmm6,%%xmm5 \n"
"mulps %%xmm4,%%xmm5 \n"
"cvtps2dq %%xmm5,%%xmm5 \n"
"packssdw %%xmm5,%%xmm5 \n"
......@@ -5222,7 +5223,7 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
, "r14"
#endif
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
#endif
);
}
......
......@@ -5763,11 +5763,11 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
mov eax, topleft // eax topleft
mov esi, botleft // esi botleft
mov edx, width
movd xmm4, area
movd xmm5, area
mov edi, dst
mov ecx, count
cvtdq2ps xmm4, xmm4
rcpss xmm4, xmm4 // 1.0f / area
cvtdq2ps xmm5, xmm5
rcpss xmm4, xmm5 // 1.0f / area
pshufd xmm4, xmm4, 0
sub ecx, 4
jl l4b
......@@ -5775,13 +5775,14 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
cmp area, 128 // 128 pixels will not overflow 15 bits.
ja l4
pcmpeqb xmm5, xmm5 // constant of 65536.0
psrld xmm5, 31
pslld xmm5, 16
cvtdq2ps xmm5, xmm5
mulps xmm5, xmm4 // 65536.0 * 1 / area
pshufd xmm5, xmm5, 0 // area
pcmpeqb xmm6, xmm6 // constant of 65536.0 - 1 = 65535.0
psrld xmm6, 16
cvtdq2ps xmm6, xmm6
addps xmm5, xmm6 // (65536.0 + area - 1)
mulps xmm5, xmm4 // (65536.0 + area - 1) * 1 / area
cvtps2dq xmm5, xmm5 // 0.16 fixed point
packssdw xmm5, xmm5
packssdw xmm5, xmm5 // 16 bit shorts
// 4 pixel loop small blocks.
align 4
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment