Commit 223893ea authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #13242 from terfendail:contours_wintr

parents 5bd1cc44 e9e8bf4b
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "perf_precomp.hpp"
namespace opencv_test {
CV_ENUM(RetrMode, RETR_EXTERNAL, RETR_LIST, RETR_CCOMP, RETR_TREE)
CV_ENUM(ApproxMode, CHAIN_APPROX_NONE, CHAIN_APPROX_SIMPLE, CHAIN_APPROX_TC89_L1, CHAIN_APPROX_TC89_KCOS)
typedef TestBaseWithParam< tuple<Size, RetrMode, ApproxMode, int> > TestFindContours;
PERF_TEST_P(TestFindContours, findContours,
Combine(
Values( szVGA, sz1080p ), // image size
RetrMode::all(), // retrieval mode
ApproxMode::all(), // approximation method
Values( 32, 128 ) // blob count
)
)
{
Size img_size = get<0>(GetParam());
int retr_mode = get<1>(GetParam());
int approx_method = get<2>(GetParam());
int blob_count = get<3>(GetParam());
RNG rng;
Mat img = Mat::zeros(img_size, CV_8UC1);
for(int i = 0; i < blob_count; i++ )
{
Point center;
center.x = (unsigned)rng % (img.cols-2);
center.y = (unsigned)rng % (img.rows-2);
Size axes;
axes.width = ((unsigned)rng % 49 + 2)/2;
axes.height = ((unsigned)rng % 49 + 2)/2;
double angle = (unsigned)rng % 180;
int brightness = (unsigned)rng % 2;
// keep the border clear
ellipse( img(Rect(1,1,img.cols-2,img.rows-2)), Point(center), Size(axes), angle, 0., 360., Scalar(brightness), -1);
}
vector< vector<Point> > contours;
TEST_CYCLE() findContours( img, contours, retr_mode, approx_method );
SANITY_CHECK_NOTHING();
}
typedef TestBaseWithParam< tuple<Size, ApproxMode, int> > TestFindContoursFF;
PERF_TEST_P(TestFindContoursFF, findContours,
Combine(
Values(szVGA, sz1080p), // image size
ApproxMode::all(), // approximation method
Values(32, 128) // blob count
)
)
{
Size img_size = get<0>(GetParam());
int approx_method = get<1>(GetParam());
int blob_count = get<2>(GetParam());
RNG rng;
Mat img = Mat::zeros(img_size, CV_32SC1);
for (int i = 0; i < blob_count; i++)
{
Point center;
center.x = (unsigned)rng % (img.cols - 2);
center.y = (unsigned)rng % (img.rows - 2);
Size axes;
axes.width = ((unsigned)rng % 49 + 2) / 2;
axes.height = ((unsigned)rng % 49 + 2) / 2;
double angle = (unsigned)rng % 180;
int brightness = (unsigned)rng % 2;
// keep the border clear
ellipse(img(Rect(1, 1, img.cols - 2, img.rows - 2)), Point(center), Size(axes), angle, 0., 360., Scalar(brightness), -1);
}
vector< vector<Point> > contours;
TEST_CYCLE() findContours(img, contours, RETR_FLOODFILL, approx_method);
SANITY_CHECK_NOTHING();
}
} // namespace
......@@ -41,6 +41,8 @@
#include "precomp.hpp"
#include "opencv2/core/hal/intrin.hpp"
using namespace cv;
/* initializes 8-element array for fast access to 3x3 neighborhood of a pixel */
#define CV_INIT_3X3_DELTAS( deltas, step, nch ) \
((deltas)[0] = (nch), (deltas)[1] = -(step) + (nch), \
......@@ -1006,10 +1008,6 @@ cvFindNextContour( CvContourScanner scanner )
if( !scanner )
CV_Error( CV_StsNullPtr, "" );
#if CV_SSE2
bool haveSIMD = cv::checkHardwareSupport(CPU_SSE2);
#endif
CV_Assert(scanner->img_step >= 0);
icvEndProcessContour( scanner );
......@@ -1056,48 +1054,22 @@ cvFindNextContour( CvContourScanner scanner )
}
else
{
#if CV_SSE2
if ((p = img[x]) != prev) {
#if CV_SIMD
if ((p = img[x]) != prev)
{
goto _next_contour;
} else if (haveSIMD) {
__m128i v_prev = _mm_set1_epi8((char)prev);
int v_size = width - 32;
for (; x <= v_size; x += 32) {
__m128i v_p1 = _mm_loadu_si128((const __m128i*)(img + x));
__m128i v_p2 = _mm_loadu_si128((const __m128i*)(img + x + 16));
__m128i v_cmp1 = _mm_cmpeq_epi8(v_p1, v_prev);
__m128i v_cmp2 = _mm_cmpeq_epi8(v_p2, v_prev);
unsigned int mask1 = _mm_movemask_epi8(v_cmp1);
unsigned int mask2 = _mm_movemask_epi8(v_cmp2);
mask1 ^= 0x0000ffff;
mask2 ^= 0x0000ffff;
if (mask1) {
p = img[(x += cv::trailingZeros32(mask1))];
goto _next_contour;
}
if (mask2) {
p = img[(x += cv::trailingZeros32(mask2 << 16))];
goto _next_contour;
}
}
if(x <= width - 16) {
__m128i v_p = _mm_loadu_si128((__m128i*)(img + x));
unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v_p, v_prev)) ^ 0x0000ffff;
if (mask) {
}
else
{
v_uint8 v_prev = vx_setall_u8((uchar)prev);
for (; x <= width - v_uint8::nlanes; x += v_uint8::nlanes)
{
unsigned int mask = (unsigned int)v_signmask(vx_load((uchar*)(img + x)) != v_prev);
if (mask)
{
p = img[(x += cv::trailingZeros32(mask))];
goto _next_contour;
}
x += 16;
}
}
#endif
......@@ -1107,7 +1079,7 @@ cvFindNextContour( CvContourScanner scanner )
if( x >= width )
break;
#if CV_SSE2
#if CV_SIMD
_next_contour:
#endif
{
......@@ -1353,99 +1325,45 @@ typedef struct CvLinkedRunPoint
}
CvLinkedRunPoint;
inline int findStartContourPoint(uchar *src_data, CvSize img_size, int j, bool haveSIMD) {
#if CV_SSE2
if (haveSIMD) {
__m128i v_zero = _mm_setzero_si128();
int v_size = img_size.width - 32;
for (; j <= v_size; j += 32) {
__m128i v_p1 = _mm_loadu_si128((const __m128i*)(src_data + j));
__m128i v_p2 = _mm_loadu_si128((const __m128i*)(src_data + j + 16));
__m128i v_cmp1 = _mm_cmpeq_epi8(v_p1, v_zero);
__m128i v_cmp2 = _mm_cmpeq_epi8(v_p2, v_zero);
unsigned int mask1 = _mm_movemask_epi8(v_cmp1);
unsigned int mask2 = _mm_movemask_epi8(v_cmp2);
mask1 ^= 0x0000ffff;
mask2 ^= 0x0000ffff;
if (mask1) {
j += cv::trailingZeros32(mask1);
return j;
}
if (mask2) {
j += cv::trailingZeros32(mask2 << 16);
return j;
}
}
if (j <= img_size.width - 16) {
__m128i v_p = _mm_loadu_si128((const __m128i*)(src_data + j));
unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v_p, v_zero)) ^ 0x0000ffff;
if (mask) {
j += cv::trailingZeros32(mask);
return j;
}
j += 16;
inline int findStartContourPoint(uchar *src_data, CvSize img_size, int j)
{
#if CV_SIMD
v_uint8 v_zero = vx_setzero_u8();
for (; j <= img_size.width - v_uint8::nlanes; j += v_uint8::nlanes)
{
unsigned int mask = (unsigned int)v_signmask(vx_load((uchar*)(src_data + j)) != v_zero);
if (mask)
{
j += cv::trailingZeros32(mask);
return j;
}
}
#else
CV_UNUSED(haveSIMD);
#endif
for (; j < img_size.width && !src_data[j]; ++j)
;
return j;
}
inline int findEndContourPoint(uchar *src_data, CvSize img_size, int j, bool haveSIMD) {
#if CV_SSE2
if (j < img_size.width && !src_data[j]) {
inline int findEndContourPoint(uchar *src_data, CvSize img_size, int j)
{
#if CV_SIMD
if (j < img_size.width && !src_data[j])
{
return j;
} else if (haveSIMD) {
__m128i v_zero = _mm_setzero_si128();
int v_size = img_size.width - 32;
for (; j <= v_size; j += 32) {
__m128i v_p1 = _mm_loadu_si128((const __m128i*)(src_data + j));
__m128i v_p2 = _mm_loadu_si128((const __m128i*)(src_data + j + 16));
__m128i v_cmp1 = _mm_cmpeq_epi8(v_p1, v_zero);
__m128i v_cmp2 = _mm_cmpeq_epi8(v_p2, v_zero);
unsigned int mask1 = _mm_movemask_epi8(v_cmp1);
unsigned int mask2 = _mm_movemask_epi8(v_cmp2);
if (mask1) {
j += cv::trailingZeros32(mask1);
return j;
}
if (mask2) {
j += cv::trailingZeros32(mask2 << 16);
return j;
}
}
if (j <= img_size.width - 16) {
__m128i v_p = _mm_loadu_si128((const __m128i*)(src_data + j));
unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v_p, v_zero));
if (mask) {
}
else
{
v_uint8 v_zero = vx_setzero_u8();
for (; j <= img_size.width - v_uint8::nlanes; j += v_uint8::nlanes)
{
unsigned int mask = (unsigned int)v_signmask(vx_load((uchar*)(src_data + j)) == v_zero);
if (mask)
{
j += cv::trailingZeros32(mask);
return j;
}
j += 16;
}
}
#else
CV_UNUSED(haveSIMD);
#endif
for (; j < img_size.width && src_data[j]; ++j)
;
......@@ -1475,7 +1393,6 @@ icvFindContoursInInterval( const CvArr* src,
int lower_total;
int upper_total;
int all_total;
bool haveSIMD = false;
CvSeq* runs;
CvLinkedRunPoint tmp;
......@@ -1505,9 +1422,7 @@ icvFindContoursInInterval( const CvArr* src,
if( contourHeaderSize < (int)sizeof(CvContour))
CV_Error( CV_StsBadSize, "Contour header size must be >= sizeof(CvContour)" );
#if CV_SSE2
haveSIMD = cv::checkHardwareSupport(CPU_SSE2);
#endif
storage00.reset(cvCreateChildMemStorage(storage));
storage01.reset(cvCreateChildMemStorage(storage));
......@@ -1539,7 +1454,7 @@ icvFindContoursInInterval( const CvArr* src,
tmp_prev = upper_line;
for( j = 0; j < img_size.width; )
{
j = findStartContourPoint(src_data, cvSize(img_size), j, haveSIMD);
j = findStartContourPoint(src_data, cvSize(img_size), j);
if( j == img_size.width )
break;
......@@ -1549,7 +1464,7 @@ icvFindContoursInInterval( const CvArr* src,
tmp_prev->next = (CvLinkedRunPoint*)CV_GET_WRITTEN_ELEM( writer );
tmp_prev = tmp_prev->next;
j = findEndContourPoint(src_data, cvSize(img_size), j + 1, haveSIMD);
j = findEndContourPoint(src_data, cvSize(img_size), j + 1);
tmp.pt.x = j - 1;
CV_WRITE_SEQ_ELEM( tmp, writer );
......@@ -1573,7 +1488,7 @@ icvFindContoursInInterval( const CvArr* src,
all_total = runs->total;
for( j = 0; j < img_size.width; )
{
j = findStartContourPoint(src_data, cvSize(img_size), j, haveSIMD);
j = findStartContourPoint(src_data, cvSize(img_size), j);
if( j == img_size.width ) break;
......@@ -1582,7 +1497,7 @@ icvFindContoursInInterval( const CvArr* src,
tmp_prev->next = (CvLinkedRunPoint*)CV_GET_WRITTEN_ELEM( writer );
tmp_prev = tmp_prev->next;
j = findEndContourPoint(src_data, cvSize(img_size), j + 1, haveSIMD);
j = findEndContourPoint(src_data, cvSize(img_size), j + 1);
tmp.pt.x = j - 1;
CV_WRITE_SEQ_ELEM( tmp, writer );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment