Commit 658f96b4 authored by Seon-Wook Park's avatar Seon-Wook Park

spatialGradient: L/R border handling outside. Kernelize.

parent 15ea4010
...@@ -43,9 +43,38 @@ ...@@ -43,9 +43,38 @@
#include "precomp.hpp" #include "precomp.hpp"
#include "opencv2/hal/intrin.hpp" #include "opencv2/hal/intrin.hpp"
#include <iostream>
namespace cv namespace cv
{ {
/* NOTE:
*
* Sobel-x: -1 0 1
* -2 0 2
* -1 0 1
*
* Sobel-y: -1 -2 -1
* 0 0 0
* 1 2 1
*/
template <typename T>
static inline void spatialGradientKernel( T& vx, T& vy,
T v00, T v01, T v02,
T v10, T v12,
T v20, T v21, T v22 )
{
// vx = (v22 - v00) + (v02 - v20) + 2 * (v12 - v10)
// vy = (v22 - v00) + (v20 - v02) + 2 * (v21 - v01)
T tmp_add = v22 - v00,
tmp_sub = v02 - v20,
tmp_x = v12 - v10,
tmp_y = v21 - v01;
vx = tmp_add + tmp_sub + tmp_x + tmp_x;
vy = tmp_add - tmp_sub + tmp_y + tmp_y;
}
void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy,
int ksize, int borderType ) int ksize, int borderType )
{ {
...@@ -115,7 +144,6 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, ...@@ -115,7 +144,6 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy,
int i_start = 0; int i_start = 0;
int j_start = 0; int j_start = 0;
#if CV_SIMD128 #if CV_SIMD128
uchar tmp;
uchar *m_src; uchar *m_src;
short *n_dx, *n_dy; short *n_dx, *n_dy;
...@@ -133,24 +161,13 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, ...@@ -133,24 +161,13 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy,
p_src = P_src[i]; c_src = P_src[i+1]; n_src = P_src[i+2]; m_src = P_src[i+3]; p_src = P_src[i]; c_src = P_src[i+1]; n_src = P_src[i+2]; m_src = P_src[i+3];
c_dx = P_dx[i]; c_dy = P_dy[i]; n_dx = P_dx[i+1]; n_dy = P_dy[i+1]; c_dx = P_dx[i]; c_dy = P_dy[i]; n_dx = P_dx[i+1]; n_dy = P_dy[i+1];
// 16-column chunks at a time // Process rest of columns 16-column chunks at a time
for ( j = 0; j < W - 15; j += 16 ) for ( j = 1; j < W - 16; j += 16 )
{ {
bool left = false, right = false;
if ( j == 0 ) left = true;
if ( j == W - 16 ) right = true;
// Load top row for 3x3 Sobel filter // Load top row for 3x3 Sobel filter
if ( left ) { tmp = p_src[j-1]; p_src[j-1] = p_src[j+j_offl]; }
v_uint8x16 v_um = v_load(&p_src[j-1]); v_uint8x16 v_um = v_load(&p_src[j-1]);
if ( left ) p_src[j-1] = tmp;
v_uint8x16 v_un = v_load(&p_src[j]); v_uint8x16 v_un = v_load(&p_src[j]);
if ( right ) { tmp = p_src[j+16]; p_src[j+16] = p_src[j+15+j_offr]; }
v_uint8x16 v_up = v_load(&p_src[j+1]); v_uint8x16 v_up = v_load(&p_src[j+1]);
if ( right ) p_src[j+16] = tmp;
v_uint16x8 v_um1, v_um2, v_un1, v_un2, v_up1, v_up2; v_uint16x8 v_um1, v_um2, v_un1, v_un2, v_up1, v_up2;
v_expand(v_um, v_um1, v_um2); v_expand(v_um, v_um1, v_um2);
v_expand(v_un, v_un1, v_un2); v_expand(v_un, v_un1, v_un2);
...@@ -163,16 +180,9 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, ...@@ -163,16 +180,9 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy,
v_int16x8 v_s1p2 = v_reinterpret_as_s16(v_up2); v_int16x8 v_s1p2 = v_reinterpret_as_s16(v_up2);
// Load second row for 3x3 Sobel filter // Load second row for 3x3 Sobel filter
if ( left ) { tmp = c_src[j-1]; c_src[j-1] = c_src[j+j_offl]; }
v_um = v_load(&c_src[j-1]); v_um = v_load(&c_src[j-1]);
if ( left ) c_src[j-1] = tmp;
v_un = v_load(&c_src[j]); v_un = v_load(&c_src[j]);
if ( right ) { tmp = c_src[j+16]; c_src[j+16] = c_src[j+15+j_offr]; }
v_up = v_load(&c_src[j+1]); v_up = v_load(&c_src[j+1]);
if ( right ) c_src[j+16] = tmp;
v_expand(v_um, v_um1, v_um2); v_expand(v_um, v_um1, v_um2);
v_expand(v_un, v_un1, v_un2); v_expand(v_un, v_un1, v_un2);
v_expand(v_up, v_up1, v_up2); v_expand(v_up, v_up1, v_up2);
...@@ -184,16 +194,9 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, ...@@ -184,16 +194,9 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy,
v_int16x8 v_s2p2 = v_reinterpret_as_s16(v_up2); v_int16x8 v_s2p2 = v_reinterpret_as_s16(v_up2);
// Load third row for 3x3 Sobel filter // Load third row for 3x3 Sobel filter
if ( left ) { tmp = n_src[j-1]; n_src[j-1] = n_src[j+j_offl]; }
v_um = v_load(&n_src[j-1]); v_um = v_load(&n_src[j-1]);
if ( left ) n_src[j-1] = tmp;
v_un = v_load(&n_src[j]); v_un = v_load(&n_src[j]);
if ( right ) { tmp = n_src[j+16]; n_src[j+16] = n_src[j+15+j_offr]; }
v_up = v_load(&n_src[j+1]); v_up = v_load(&n_src[j+1]);
if ( right ) n_src[j+16] = tmp;
v_expand(v_um, v_um1, v_um2); v_expand(v_um, v_um1, v_um2);
v_expand(v_un, v_un1, v_un2); v_expand(v_un, v_un1, v_un2);
v_expand(v_up, v_up1, v_up2); v_expand(v_up, v_up1, v_up2);
...@@ -205,15 +208,9 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, ...@@ -205,15 +208,9 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy,
v_int16x8 v_s3p2 = v_reinterpret_as_s16(v_up2); v_int16x8 v_s3p2 = v_reinterpret_as_s16(v_up2);
// Load fourth row for 3x3 Sobel filter // Load fourth row for 3x3 Sobel filter
if ( left ) { tmp = m_src[j-1]; m_src[j-1] = m_src[j+j_offl]; }
v_um = v_load(&m_src[j-1]); v_um = v_load(&m_src[j-1]);
if ( left ) m_src[j-1] = tmp;
v_un = v_load(&m_src[j]); v_un = v_load(&m_src[j]);
if ( right ) { tmp = m_src[j+16]; m_src[j+16] = m_src[j+15+j_offr]; }
v_up = v_load(&m_src[j+1]); v_up = v_load(&m_src[j+1]);
if ( right ) m_src[j+16] = tmp;
v_expand(v_um, v_um1, v_um2); v_expand(v_um, v_um1, v_um2);
v_expand(v_un, v_un1, v_un2); v_expand(v_un, v_un1, v_un2);
...@@ -225,17 +222,18 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, ...@@ -225,17 +222,18 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy,
v_int16x8 v_s4p1 = v_reinterpret_as_s16(v_up1); v_int16x8 v_s4p1 = v_reinterpret_as_s16(v_up1);
v_int16x8 v_s4p2 = v_reinterpret_as_s16(v_up2); v_int16x8 v_s4p2 = v_reinterpret_as_s16(v_up2);
// dx // dx & dy for rows 1, 2, 3
v_int16x8 v_tmp = v_s2p1 - v_s2m1; v_int16x8 v_sdx1, v_sdy1;
v_int16x8 v_sdx1 = (v_s1p1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s3m1); spatialGradientKernel<v_int16x8>( v_sdx1, v_sdy1,
v_tmp = v_s2p2 - v_s2m2; v_s1m1, v_s1n1, v_s1p1,
v_int16x8 v_sdx2 = (v_s1p2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s3m2); v_s2m1, v_s2p1,
v_s3m1, v_s3n1, v_s3p1 );
// dy v_int16x8 v_sdx2, v_sdy2;
v_tmp = v_s3n1 - v_s1n1; spatialGradientKernel<v_int16x8>( v_sdx2, v_sdy2,
v_int16x8 v_sdy1 = (v_s3m1 - v_s1m1) + (v_tmp + v_tmp) + (v_s3p1 - v_s1p1); v_s1m2, v_s1n2, v_s1p2,
v_tmp = v_s3n2 - v_s1n2; v_s2m2, v_s2p2,
v_int16x8 v_sdy2 = (v_s3m2 - v_s1m2) + (v_tmp + v_tmp) + (v_s3p2 - v_s1p2); v_s3m2, v_s3n2, v_s3p2 );
// Store // Store
v_store(&c_dx[j], v_sdx1); v_store(&c_dx[j], v_sdx1);
...@@ -243,17 +241,16 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, ...@@ -243,17 +241,16 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy,
v_store(&c_dy[j], v_sdy1); v_store(&c_dy[j], v_sdy1);
v_store(&c_dy[j+8], v_sdy2); v_store(&c_dy[j+8], v_sdy2);
// dx // dx & dy for rows 2, 3, 4
v_tmp = v_s3p1 - v_s3m1; spatialGradientKernel<v_int16x8>( v_sdx1, v_sdy1,
v_sdx1 = (v_s2p1 - v_s2m1) + (v_tmp + v_tmp) + (v_s4p1 - v_s4m1); v_s2m1, v_s2n1, v_s2p1,
v_tmp = v_s3p2 - v_s3m2; v_s3m1, v_s3p1,
v_sdx2 = (v_s2p2 - v_s2m2) + (v_tmp + v_tmp) + (v_s4p2 - v_s4m2); v_s4m1, v_s4n1, v_s4p1 );
// dy spatialGradientKernel<v_int16x8>( v_sdx2, v_sdy2,
v_tmp = v_s4n1 - v_s2n1; v_s2m2, v_s2n2, v_s2p2,
v_sdy1 = (v_s4m1 - v_s2m1) + (v_tmp + v_tmp) + (v_s4p1 - v_s2p1); v_s3m2, v_s3p2,
v_tmp = v_s4n2 - v_s2n2; v_s4m2, v_s4n2, v_s4p2 );
v_sdy2 = (v_s4m2 - v_s2m2) + (v_tmp + v_tmp) + (v_s4p2 - v_s2p2);
// Store // Store
v_store(&n_dx[j], v_sdx1); v_store(&n_dx[j], v_sdx1);
...@@ -265,16 +262,6 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, ...@@ -265,16 +262,6 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy,
i_start = i; i_start = i;
j_start = j; j_start = j;
#endif #endif
/* NOTE:
*
* Sobel-x: -1 0 1
* -2 0 2
* -1 0 1
*
* Sobel-y: -1 -2 -1
* 0 0 0
* 1 2 1
*/
int j_p, j_n; int j_p, j_n;
uchar v00, v01, v02, v10, v11, v12, v20, v21, v22; uchar v00, v01, v02, v10, v11, v12, v20, v21, v22;
for ( i = 0; i < H; i++ ) for ( i = 0; i < H; i++ )
...@@ -283,31 +270,45 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy, ...@@ -283,31 +270,45 @@ void spatialGradient( InputArray _src, OutputArray _dx, OutputArray _dy,
c_dx = P_dx [i]; c_dx = P_dx [i];
c_dy = P_dy [i]; c_dy = P_dy [i];
// Pre-load 2 columns // Process left-most column
j = i >= i_start ? 0 : j_start; j = 0;
j_p = j + j_offl;
j_n = 1;
if ( j_n >= W ) j_n = j + j_offr;
v00 = p_src[j_p]; v01 = p_src[j]; v02 = p_src[j_n];
v10 = c_src[j_p]; v11 = c_src[j]; v12 = c_src[j_n];
v20 = n_src[j_p]; v21 = n_src[j]; v22 = n_src[j_n];
spatialGradientKernel<short>( c_dx[0], c_dy[0], v00, v01, v02, v10,
v12, v20, v21, v22 );
v00 = v01; v10 = v11; v20 = v21;
v01 = v02; v11 = v12; v21 = v22;
// Process middle columns
j = i >= i_start ? 1 : j_start;
j_p = j - 1; j_p = j - 1;
if ( j_p < 0 ) j_p = j + j_offl;
v00 = p_src[j_p]; v01 = p_src[j]; v00 = p_src[j_p]; v01 = p_src[j];
v10 = c_src[j_p]; v11 = c_src[j]; v10 = c_src[j_p]; v11 = c_src[j];
v20 = n_src[j_p]; v21 = n_src[j]; v20 = n_src[j_p]; v21 = n_src[j];
for ( ; j < W; j++ ) for ( ; j < W - 1; j++ )
{ {
j_n = j + 1;
if ( j_n >= W ) j_n = j + j_offr;
// Get values for next column // Get values for next column
v02 = p_src[j_n]; j_n = j + 1; v02 = p_src[j_n]; v12 = c_src[j_n]; v22 = n_src[j_n];
v12 = c_src[j_n]; spatialGradientKernel<short>( c_dx[j], c_dy[j], v00, v01, v02, v10,
v22 = n_src[j_n]; v12, v20, v21, v22 );
c_dx[j] = -(v00 + v10 + v10 + v20) + (v02 + v12 + v12 + v22);
c_dy[j] = -(v00 + v01 + v01 + v02) + (v20 + v21 + v21 + v22);
// Move values back one column for next iteration // Move values back one column for next iteration
v00 = v01; v10 = v11; v20 = v21; v00 = v01; v10 = v11; v20 = v21;
v01 = v02; v11 = v12; v21 = v22; v01 = v02; v11 = v12; v21 = v22;
} }
// Process right-most column
if ( j < W )
{
j_n = j + j_offr; v02 = p_src[j_n]; v12 = c_src[j_n]; v22 = n_src[j_n];
spatialGradientKernel<short>( c_dx[j], c_dy[j], v00, v01, v02, v10,
v12, v20, v21, v22 );
}
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment