Commit 223893ea authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #13242 from terfendail:contours_wintr

parents 5bd1cc44 e9e8bf4b
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "perf_precomp.hpp"
namespace opencv_test {
CV_ENUM(RetrMode, RETR_EXTERNAL, RETR_LIST, RETR_CCOMP, RETR_TREE)
CV_ENUM(ApproxMode, CHAIN_APPROX_NONE, CHAIN_APPROX_SIMPLE, CHAIN_APPROX_TC89_L1, CHAIN_APPROX_TC89_KCOS)
typedef TestBaseWithParam< tuple<Size, RetrMode, ApproxMode, int> > TestFindContours;
PERF_TEST_P(TestFindContours, findContours,
Combine(
Values( szVGA, sz1080p ), // image size
RetrMode::all(), // retrieval mode
ApproxMode::all(), // approximation method
Values( 32, 128 ) // blob count
)
)
{
Size img_size = get<0>(GetParam());
int retr_mode = get<1>(GetParam());
int approx_method = get<2>(GetParam());
int blob_count = get<3>(GetParam());
RNG rng;
Mat img = Mat::zeros(img_size, CV_8UC1);
for(int i = 0; i < blob_count; i++ )
{
Point center;
center.x = (unsigned)rng % (img.cols-2);
center.y = (unsigned)rng % (img.rows-2);
Size axes;
axes.width = ((unsigned)rng % 49 + 2)/2;
axes.height = ((unsigned)rng % 49 + 2)/2;
double angle = (unsigned)rng % 180;
int brightness = (unsigned)rng % 2;
// keep the border clear
ellipse( img(Rect(1,1,img.cols-2,img.rows-2)), Point(center), Size(axes), angle, 0., 360., Scalar(brightness), -1);
}
vector< vector<Point> > contours;
TEST_CYCLE() findContours( img, contours, retr_mode, approx_method );
SANITY_CHECK_NOTHING();
}
typedef TestBaseWithParam< tuple<Size, ApproxMode, int> > TestFindContoursFF;
PERF_TEST_P(TestFindContoursFF, findContours,
Combine(
Values(szVGA, sz1080p), // image size
ApproxMode::all(), // approximation method
Values(32, 128) // blob count
)
)
{
Size img_size = get<0>(GetParam());
int approx_method = get<1>(GetParam());
int blob_count = get<2>(GetParam());
RNG rng;
Mat img = Mat::zeros(img_size, CV_32SC1);
for (int i = 0; i < blob_count; i++)
{
Point center;
center.x = (unsigned)rng % (img.cols - 2);
center.y = (unsigned)rng % (img.rows - 2);
Size axes;
axes.width = ((unsigned)rng % 49 + 2) / 2;
axes.height = ((unsigned)rng % 49 + 2) / 2;
double angle = (unsigned)rng % 180;
int brightness = (unsigned)rng % 2;
// keep the border clear
ellipse(img(Rect(1, 1, img.cols - 2, img.rows - 2)), Point(center), Size(axes), angle, 0., 360., Scalar(brightness), -1);
}
vector< vector<Point> > contours;
TEST_CYCLE() findContours(img, contours, RETR_FLOODFILL, approx_method);
SANITY_CHECK_NOTHING();
}
} // namespace
...@@ -41,6 +41,8 @@ ...@@ -41,6 +41,8 @@
#include "precomp.hpp" #include "precomp.hpp"
#include "opencv2/core/hal/intrin.hpp" #include "opencv2/core/hal/intrin.hpp"
using namespace cv;
/* initializes 8-element array for fast access to 3x3 neighborhood of a pixel */ /* initializes 8-element array for fast access to 3x3 neighborhood of a pixel */
#define CV_INIT_3X3_DELTAS( deltas, step, nch ) \ #define CV_INIT_3X3_DELTAS( deltas, step, nch ) \
((deltas)[0] = (nch), (deltas)[1] = -(step) + (nch), \ ((deltas)[0] = (nch), (deltas)[1] = -(step) + (nch), \
...@@ -1006,10 +1008,6 @@ cvFindNextContour( CvContourScanner scanner ) ...@@ -1006,10 +1008,6 @@ cvFindNextContour( CvContourScanner scanner )
if( !scanner ) if( !scanner )
CV_Error( CV_StsNullPtr, "" ); CV_Error( CV_StsNullPtr, "" );
#if CV_SSE2
bool haveSIMD = cv::checkHardwareSupport(CPU_SSE2);
#endif
CV_Assert(scanner->img_step >= 0); CV_Assert(scanner->img_step >= 0);
icvEndProcessContour( scanner ); icvEndProcessContour( scanner );
...@@ -1056,48 +1054,22 @@ cvFindNextContour( CvContourScanner scanner ) ...@@ -1056,48 +1054,22 @@ cvFindNextContour( CvContourScanner scanner )
} }
else else
{ {
#if CV_SSE2 #if CV_SIMD
if ((p = img[x]) != prev) { if ((p = img[x]) != prev)
{
goto _next_contour; goto _next_contour;
} else if (haveSIMD) { }
else
__m128i v_prev = _mm_set1_epi8((char)prev); {
int v_size = width - 32; v_uint8 v_prev = vx_setall_u8((uchar)prev);
for (; x <= width - v_uint8::nlanes; x += v_uint8::nlanes)
for (; x <= v_size; x += 32) { {
__m128i v_p1 = _mm_loadu_si128((const __m128i*)(img + x)); unsigned int mask = (unsigned int)v_signmask(vx_load((uchar*)(img + x)) != v_prev);
__m128i v_p2 = _mm_loadu_si128((const __m128i*)(img + x + 16)); if (mask)
{
__m128i v_cmp1 = _mm_cmpeq_epi8(v_p1, v_prev);
__m128i v_cmp2 = _mm_cmpeq_epi8(v_p2, v_prev);
unsigned int mask1 = _mm_movemask_epi8(v_cmp1);
unsigned int mask2 = _mm_movemask_epi8(v_cmp2);
mask1 ^= 0x0000ffff;
mask2 ^= 0x0000ffff;
if (mask1) {
p = img[(x += cv::trailingZeros32(mask1))];
goto _next_contour;
}
if (mask2) {
p = img[(x += cv::trailingZeros32(mask2 << 16))];
goto _next_contour;
}
}
if(x <= width - 16) {
__m128i v_p = _mm_loadu_si128((__m128i*)(img + x));
unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v_p, v_prev)) ^ 0x0000ffff;
if (mask) {
p = img[(x += cv::trailingZeros32(mask))]; p = img[(x += cv::trailingZeros32(mask))];
goto _next_contour; goto _next_contour;
} }
x += 16;
} }
} }
#endif #endif
...@@ -1107,7 +1079,7 @@ cvFindNextContour( CvContourScanner scanner ) ...@@ -1107,7 +1079,7 @@ cvFindNextContour( CvContourScanner scanner )
if( x >= width ) if( x >= width )
break; break;
#if CV_SSE2 #if CV_SIMD
_next_contour: _next_contour:
#endif #endif
{ {
...@@ -1353,99 +1325,45 @@ typedef struct CvLinkedRunPoint ...@@ -1353,99 +1325,45 @@ typedef struct CvLinkedRunPoint
} }
CvLinkedRunPoint; CvLinkedRunPoint;
inline int findStartContourPoint(uchar *src_data, CvSize img_size, int j, bool haveSIMD) { inline int findStartContourPoint(uchar *src_data, CvSize img_size, int j)
#if CV_SSE2 {
if (haveSIMD) { #if CV_SIMD
__m128i v_zero = _mm_setzero_si128(); v_uint8 v_zero = vx_setzero_u8();
int v_size = img_size.width - 32; for (; j <= img_size.width - v_uint8::nlanes; j += v_uint8::nlanes)
{
for (; j <= v_size; j += 32) { unsigned int mask = (unsigned int)v_signmask(vx_load((uchar*)(src_data + j)) != v_zero);
__m128i v_p1 = _mm_loadu_si128((const __m128i*)(src_data + j)); if (mask)
__m128i v_p2 = _mm_loadu_si128((const __m128i*)(src_data + j + 16)); {
j += cv::trailingZeros32(mask);
__m128i v_cmp1 = _mm_cmpeq_epi8(v_p1, v_zero); return j;
__m128i v_cmp2 = _mm_cmpeq_epi8(v_p2, v_zero);
unsigned int mask1 = _mm_movemask_epi8(v_cmp1);
unsigned int mask2 = _mm_movemask_epi8(v_cmp2);
mask1 ^= 0x0000ffff;
mask2 ^= 0x0000ffff;
if (mask1) {
j += cv::trailingZeros32(mask1);
return j;
}
if (mask2) {
j += cv::trailingZeros32(mask2 << 16);
return j;
}
}
if (j <= img_size.width - 16) {
__m128i v_p = _mm_loadu_si128((const __m128i*)(src_data + j));
unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v_p, v_zero)) ^ 0x0000ffff;
if (mask) {
j += cv::trailingZeros32(mask);
return j;
}
j += 16;
} }
} }
#else
CV_UNUSED(haveSIMD);
#endif #endif
for (; j < img_size.width && !src_data[j]; ++j) for (; j < img_size.width && !src_data[j]; ++j)
; ;
return j; return j;
} }
inline int findEndContourPoint(uchar *src_data, CvSize img_size, int j, bool haveSIMD) { inline int findEndContourPoint(uchar *src_data, CvSize img_size, int j)
#if CV_SSE2 {
if (j < img_size.width && !src_data[j]) { #if CV_SIMD
if (j < img_size.width && !src_data[j])
{
return j; return j;
} else if (haveSIMD) { }
__m128i v_zero = _mm_setzero_si128(); else
int v_size = img_size.width - 32; {
v_uint8 v_zero = vx_setzero_u8();
for (; j <= v_size; j += 32) { for (; j <= img_size.width - v_uint8::nlanes; j += v_uint8::nlanes)
__m128i v_p1 = _mm_loadu_si128((const __m128i*)(src_data + j)); {
__m128i v_p2 = _mm_loadu_si128((const __m128i*)(src_data + j + 16)); unsigned int mask = (unsigned int)v_signmask(vx_load((uchar*)(src_data + j)) == v_zero);
if (mask)
__m128i v_cmp1 = _mm_cmpeq_epi8(v_p1, v_zero); {
__m128i v_cmp2 = _mm_cmpeq_epi8(v_p2, v_zero);
unsigned int mask1 = _mm_movemask_epi8(v_cmp1);
unsigned int mask2 = _mm_movemask_epi8(v_cmp2);
if (mask1) {
j += cv::trailingZeros32(mask1);
return j;
}
if (mask2) {
j += cv::trailingZeros32(mask2 << 16);
return j;
}
}
if (j <= img_size.width - 16) {
__m128i v_p = _mm_loadu_si128((const __m128i*)(src_data + j));
unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v_p, v_zero));
if (mask) {
j += cv::trailingZeros32(mask); j += cv::trailingZeros32(mask);
return j; return j;
} }
j += 16;
} }
} }
#else
CV_UNUSED(haveSIMD);
#endif #endif
for (; j < img_size.width && src_data[j]; ++j) for (; j < img_size.width && src_data[j]; ++j)
; ;
...@@ -1475,7 +1393,6 @@ icvFindContoursInInterval( const CvArr* src, ...@@ -1475,7 +1393,6 @@ icvFindContoursInInterval( const CvArr* src,
int lower_total; int lower_total;
int upper_total; int upper_total;
int all_total; int all_total;
bool haveSIMD = false;
CvSeq* runs; CvSeq* runs;
CvLinkedRunPoint tmp; CvLinkedRunPoint tmp;
...@@ -1505,9 +1422,7 @@ icvFindContoursInInterval( const CvArr* src, ...@@ -1505,9 +1422,7 @@ icvFindContoursInInterval( const CvArr* src,
if( contourHeaderSize < (int)sizeof(CvContour)) if( contourHeaderSize < (int)sizeof(CvContour))
CV_Error( CV_StsBadSize, "Contour header size must be >= sizeof(CvContour)" ); CV_Error( CV_StsBadSize, "Contour header size must be >= sizeof(CvContour)" );
#if CV_SSE2
haveSIMD = cv::checkHardwareSupport(CPU_SSE2);
#endif
storage00.reset(cvCreateChildMemStorage(storage)); storage00.reset(cvCreateChildMemStorage(storage));
storage01.reset(cvCreateChildMemStorage(storage)); storage01.reset(cvCreateChildMemStorage(storage));
...@@ -1539,7 +1454,7 @@ icvFindContoursInInterval( const CvArr* src, ...@@ -1539,7 +1454,7 @@ icvFindContoursInInterval( const CvArr* src,
tmp_prev = upper_line; tmp_prev = upper_line;
for( j = 0; j < img_size.width; ) for( j = 0; j < img_size.width; )
{ {
j = findStartContourPoint(src_data, cvSize(img_size), j, haveSIMD); j = findStartContourPoint(src_data, cvSize(img_size), j);
if( j == img_size.width ) if( j == img_size.width )
break; break;
...@@ -1549,7 +1464,7 @@ icvFindContoursInInterval( const CvArr* src, ...@@ -1549,7 +1464,7 @@ icvFindContoursInInterval( const CvArr* src,
tmp_prev->next = (CvLinkedRunPoint*)CV_GET_WRITTEN_ELEM( writer ); tmp_prev->next = (CvLinkedRunPoint*)CV_GET_WRITTEN_ELEM( writer );
tmp_prev = tmp_prev->next; tmp_prev = tmp_prev->next;
j = findEndContourPoint(src_data, cvSize(img_size), j + 1, haveSIMD); j = findEndContourPoint(src_data, cvSize(img_size), j + 1);
tmp.pt.x = j - 1; tmp.pt.x = j - 1;
CV_WRITE_SEQ_ELEM( tmp, writer ); CV_WRITE_SEQ_ELEM( tmp, writer );
...@@ -1573,7 +1488,7 @@ icvFindContoursInInterval( const CvArr* src, ...@@ -1573,7 +1488,7 @@ icvFindContoursInInterval( const CvArr* src,
all_total = runs->total; all_total = runs->total;
for( j = 0; j < img_size.width; ) for( j = 0; j < img_size.width; )
{ {
j = findStartContourPoint(src_data, cvSize(img_size), j, haveSIMD); j = findStartContourPoint(src_data, cvSize(img_size), j);
if( j == img_size.width ) break; if( j == img_size.width ) break;
...@@ -1582,7 +1497,7 @@ icvFindContoursInInterval( const CvArr* src, ...@@ -1582,7 +1497,7 @@ icvFindContoursInInterval( const CvArr* src,
tmp_prev->next = (CvLinkedRunPoint*)CV_GET_WRITTEN_ELEM( writer ); tmp_prev->next = (CvLinkedRunPoint*)CV_GET_WRITTEN_ELEM( writer );
tmp_prev = tmp_prev->next; tmp_prev = tmp_prev->next;
j = findEndContourPoint(src_data, cvSize(img_size), j + 1, haveSIMD); j = findEndContourPoint(src_data, cvSize(img_size), j + 1);
tmp.pt.x = j - 1; tmp.pt.x = j - 1;
CV_WRITE_SEQ_ELEM( tmp, writer ); CV_WRITE_SEQ_ELEM( tmp, writer );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment