Commit baff521b authored by Alexander Alekhin's avatar Alexander Alekhin

imgproc(Canny): eliminate unnecessary operations

drop manual loop unrolling:
- don't block compiler optimizations
- no effect on i5
parent 1fbdca83
...@@ -812,8 +812,6 @@ public: ...@@ -812,8 +812,6 @@ public:
~finalPass() {} ~finalPass() {}
finalPass& operator=(const finalPass&) {return *this;}
void operator()(const Range &boundaries) const void operator()(const Range &boundaries) const
{ {
// the final pass, form the final image // the final pass, form the final image
...@@ -821,77 +819,39 @@ public: ...@@ -821,77 +819,39 @@ public:
{ {
int j = 0; int j = 0;
uchar *pdst = dst.ptr<uchar>(i); uchar *pdst = dst.ptr<uchar>(i);
uchar *pmap; const uchar *pmap = map.ptr<uchar>(i + 1);
#if CV_SIMD128 #if CV_SIMD128
if(haveSIMD) if(haveSIMD)
pmap = (uchar*)map.ptr<uchar>(i + 1) + CV_MALLOC_SIMD128; pmap += CV_MALLOC_SIMD128;
else else
#endif #endif
pmap = (uchar*)map.ptr<uchar>(i + 1) + 1; pmap += 1;
#if CV_SIMD128 #if CV_SIMD128
if(haveSIMD) { if(haveSIMD) {
const v_int8x16 v_zero = v_setzero_s8(); const v_uint8x16 v_zero = v_setzero_u8();
const v_uint8x16 v_ff = ~v_zero;
for(; j <= dst.cols - 32; j += 32) { const v_uint8x16 v_two(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
v_uint8x16 v_pmap1 = v_load_aligned((const unsigned char*)(pmap + j));
v_uint8x16 v_pmap2 = v_load_aligned((const unsigned char*)(pmap + j + 16));
v_uint16x8 v_pmaplo1, v_pmaphi1, v_pmaplo2, v_pmaphi2;
v_expand(v_pmap1, v_pmaplo1, v_pmaphi1);
v_expand(v_pmap2, v_pmaplo2, v_pmaphi2);
v_pmaplo1 = v_pmaplo1 >> 1;
v_pmaphi1 = v_pmaphi1 >> 1;
v_pmaplo2 = v_pmaplo2 >> 1;
v_pmaphi2 = v_pmaphi2 >> 1;
v_pmap1 = v_pack(v_pmaplo1, v_pmaphi1);
v_pmap2 = v_pack(v_pmaplo2, v_pmaphi2);
v_pmap1 = v_reinterpret_as_u8(v_zero - v_reinterpret_as_s8(v_pmap1));
v_pmap2 = v_reinterpret_as_u8(v_zero - v_reinterpret_as_s8(v_pmap2));
v_store((pdst + j), v_pmap1); for (; j <= dst.cols - 16; j += 16)
v_store((pdst + j + 16), v_pmap2); {
}
if(j <= dst.cols - 16) {
v_uint8x16 v_pmap = v_load_aligned((const unsigned char*)(pmap + j)); v_uint8x16 v_pmap = v_load_aligned((const unsigned char*)(pmap + j));
v_pmap = v_select(v_pmap == v_two, v_ff, v_zero);
v_uint16x8 v_pmaplo;
v_uint16x8 v_pmaphi;
v_expand(v_pmap, v_pmaplo, v_pmaphi);
v_pmaplo = v_pmaplo >> 1;
v_pmaphi = v_pmaphi >> 1;
v_pmap = v_pack(v_pmaplo, v_pmaphi);
v_pmap = v_reinterpret_as_u8(v_zero - v_reinterpret_as_s8(v_pmap));
v_store((pdst + j), v_pmap); v_store((pdst + j), v_pmap);
j += 16;
} }
if(j <= dst.cols - 8) { if (j <= dst.cols - 8)
v_uint8x16 v_pmap = v_load_halves((const unsigned char*)(pmap + j), (const unsigned char*)(pmap + j)); {
v_uint8x16 v_pmap = v_load_low((const unsigned char*)(pmap + j));
v_uint16x8 v_pmaplo; v_pmap = v_select(v_pmap == v_two, v_ff, v_zero);
v_uint16x8 v_pmaphi;
v_expand(v_pmap, v_pmaplo, v_pmaphi);
v_pmaplo = v_pmaplo >> 1;
v_pmaphi = v_pmaphi >> 1;
v_pmap = v_pack(v_pmaplo, v_pmaphi);
v_pmap = v_reinterpret_as_u8(v_zero - v_reinterpret_as_s8(v_pmap));
v_store_low((pdst + j), v_pmap); v_store_low((pdst + j), v_pmap);
j += 8; j += 8;
} }
} }
#endif #endif
for (; j < dst.cols; j++) for (; j < dst.cols; j++)
{
pdst[j] = (uchar)-(pmap[j] >> 1); pdst[j] = (uchar)-(pmap[j] >> 1);
}
} }
} }
...@@ -901,6 +861,9 @@ private: ...@@ -901,6 +861,9 @@ private:
#if CV_SIMD128 #if CV_SIMD128
bool haveSIMD; bool haveSIMD;
#endif #endif
finalPass(const finalPass&); // = delete
finalPass& operator=(const finalPass&); // = delete
}; };
#ifdef HAVE_OPENVX #ifdef HAVE_OPENVX
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment