Commit b5777efe authored by Roman Donchenko's avatar Roman Donchenko Committed by OpenCV Buildbot

Merge pull request #1716 from ilya-lavrenov:resize_area_sse2_fix

parents 8b19df3f e9b54ab8
...@@ -1347,7 +1347,7 @@ public: ...@@ -1347,7 +1347,7 @@ public:
} }
} }
else if (cn == 3) else if (cn == 3)
for ( ; dx <= w - 6; dx += 6, S0 += 12, S1 += 12, D += 6) for ( ; dx <= w - 11; dx += 6, S0 += 12, S1 += 12, D += 6)
{ {
__m128i r0 = _mm_loadu_si128((const __m128i*)S0); __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
__m128i r1 = _mm_loadu_si128((const __m128i*)S1); __m128i r1 = _mm_loadu_si128((const __m128i*)S1);
...@@ -1372,6 +1372,9 @@ public: ...@@ -1372,6 +1372,9 @@ public:
else else
{ {
CV_Assert(cn == 4); CV_Assert(cn == 4);
int v[] = { 0, 0, -1, -1 };
__m128i mask = _mm_loadu_si128((const __m128i*)v);
for ( ; dx <= w - 8; dx += 8, S0 += 16, S1 += 16, D += 8) for ( ; dx <= w - 8; dx += 8, S0 += 16, S1 += 16, D += 8)
{ {
__m128i r0 = _mm_loadu_si128((const __m128i*)S0); __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
...@@ -1385,14 +1388,15 @@ public: ...@@ -1385,14 +1388,15 @@ public:
__m128i s0 = _mm_add_epi16(r0_16l, _mm_srli_si128(r0_16l, 8)); __m128i s0 = _mm_add_epi16(r0_16l, _mm_srli_si128(r0_16l, 8));
__m128i s1 = _mm_add_epi16(r1_16l, _mm_srli_si128(r1_16l, 8)); __m128i s1 = _mm_add_epi16(r1_16l, _mm_srli_si128(r1_16l, 8));
s0 = _mm_add_epi16(s1, _mm_add_epi16(s0, delta2)); s0 = _mm_add_epi16(s1, _mm_add_epi16(s0, delta2));
s0 = _mm_packus_epi16(_mm_srli_epi16(s0, 2), zero); __m128i res0 = _mm_srli_epi16(s0, 2);
_mm_storel_epi64((__m128i*)D, s0);
s0 = _mm_add_epi16(r0_16h, _mm_srli_si128(r0_16h, 8)); s0 = _mm_add_epi16(r0_16h, _mm_srli_si128(r0_16h, 8));
s1 = _mm_add_epi16(r1_16h, _mm_srli_si128(r1_16h, 8)); s1 = _mm_add_epi16(r1_16h, _mm_srli_si128(r1_16h, 8));
s0 = _mm_add_epi16(s1, _mm_add_epi16(s0, delta2)); s0 = _mm_add_epi16(s1, _mm_add_epi16(s0, delta2));
s0 = _mm_packus_epi16(_mm_srli_epi16(s0, 2), zero); __m128i res1 = _mm_srli_epi16(s0, 2);
_mm_storel_epi64((__m128i*)(D+4), s0); s0 = _mm_packus_epi16(_mm_or_si128(_mm_andnot_si128(mask, res0),
_mm_and_si128(mask, _mm_slli_si128(res1, 8))), zero);
_mm_storel_epi64((__m128i*)(D), s0);
} }
} }
...@@ -1445,7 +1449,7 @@ public: ...@@ -1445,7 +1449,7 @@ public:
} }
} }
else if (cn == 3) else if (cn == 3)
for ( ; dx <= w - 3; dx += 3, S0 += 6, S1 += 6, D += 3) for ( ; dx <= w - 4; dx += 3, S0 += 6, S1 += 6, D += 3)
{ {
__m128i r0 = _mm_loadu_si128((const __m128i*)S0); __m128i r0 = _mm_loadu_si128((const __m128i*)S0);
__m128i r1 = _mm_loadu_si128((const __m128i*)S1); __m128i r1 = _mm_loadu_si128((const __m128i*)S1);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment