Commit 0e83b64e authored by Frank Barchard's avatar Frank Barchard

scalerow avx2 bug fix. was using ymm2 instead of ymm3.

R=harryjin@google.com
BUG=libyuv:462

Review URL: https://webrtc-codereview.appspot.com/56639004.
parent 715a2919
......@@ -50,8 +50,7 @@ extern "C" {
// The following are available on VS2012:
#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
// Some AVX2 versions disabled. See libyuv bug 462.
// #define HAS_SCALEADDROW_AVX2
#define HAS_SCALEADDROW_AVX2
#define HAS_SCALEROWDOWN2_AVX2
#define HAS_SCALEROWDOWN4_AVX2
#endif
......
......@@ -838,17 +838,15 @@ void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
// sum rows
xloop:
vmovdqu ymm3, [eax] // read 32 bytes
vpermq ymm3, ymm2, 0xd8 // unmutate for vpunpck
lea eax, [eax + 32]
vmovdqu ymm0, [edx] // read 32 words from destination
vmovdqu ymm1, [edx + 32]
vpermq ymm3, ymm3, 0xd8 // unmutate for vpunpck
vpunpcklbw ymm2, ymm3, ymm5
vpunpckhbw ymm3, ymm3, ymm5
vpaddusw ymm0, ymm0, ymm2 // sum 16 words
vpaddusw ymm1, ymm1, ymm3
vpaddusw ymm0, ymm2, [edx] // sum 16 words
vpaddusw ymm1, ymm3, [edx + 32]
vmovdqu [edx], ymm0 // write 32 words to destination
vmovdqu [edx + 32], ymm1
lea edx, [edx + 64]
lea edx, [edx + 64]
sub ecx, 32
jg xloop
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment