Commit fec21239 authored by Alexander Karsakov's avatar Alexander Karsakov

Revert optimization for warpAffine INTER_NEAREST mode

parent d3cdfefa
...@@ -98,15 +98,15 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of ...@@ -98,15 +98,15 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of
{ {
int round_delta = (AB_SCALE >> 1); int round_delta = (AB_SCALE >> 1);
int X0 = rint(fma(M[0], dx, fma(M[1], dy0, M[2])) * AB_SCALE) + round_delta; int X0_ = rint(M[0] * dx * AB_SCALE);
int Y0 = rint(fma(M[3], dx, fma(M[4], dy0, M[5])) * AB_SCALE) + round_delta; int Y0_ = rint(M[3] * dx * AB_SCALE);
int XSTEP = (int)(M[1] * AB_SCALE);
int YSTEP = (int)(M[4] * AB_SCALE);
int dst_index = mad24(dy0, dst_step, mad24(dx, pixsize, dst_offset)); int dst_index = mad24(dy0, dst_step, mad24(dx, pixsize, dst_offset));
for (int dy = dy0, dy1 = min(dst_rows, dy0 + rowsPerWI); dy < dy1; ++dy, dst_index += dst_step) for (int dy = dy0, dy1 = min(dst_rows, dy0 + rowsPerWI); dy < dy1; ++dy, dst_index += dst_step)
{ {
int X0 = X0_ + rint(fma(M[1], dy, M[2]) * AB_SCALE) + round_delta;
int Y0 = Y0_ + rint(fma(M[4], dy, M[5]) * AB_SCALE) + round_delta;
short sx = convert_short_sat(X0 >> AB_BITS); short sx = convert_short_sat(X0 >> AB_BITS);
short sy = convert_short_sat(Y0 >> AB_BITS); short sy = convert_short_sat(Y0 >> AB_BITS);
...@@ -117,9 +117,6 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of ...@@ -117,9 +117,6 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of
} }
else else
storepix(scalar, dstptr + dst_index); storepix(scalar, dstptr + dst_index);
X0 += XSTEP;
Y0 += YSTEP;
} }
} }
} }
...@@ -376,4 +373,4 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of ...@@ -376,4 +373,4 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of
} }
} }
#endif #endif
\ No newline at end of file
...@@ -413,6 +413,9 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in ...@@ -413,6 +413,9 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
Size ssize = _src.size(); Size ssize = _src.size();
Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz; Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz;
if (dsize.height < 2 || dsize.width < 2)
return false;
CV_Assert( ssize.width > 0 && ssize.height > 0 && CV_Assert( ssize.width > 0 && ssize.height > 0 &&
std::abs(dsize.width*2 - ssize.width) <= 2 && std::abs(dsize.width*2 - ssize.width) <= 2 &&
std::abs(dsize.height*2 - ssize.height) <= 2 ); std::abs(dsize.height*2 - ssize.height) <= 2 );
......
...@@ -94,7 +94,8 @@ OCL_TEST_P(PyrDown, Mat) ...@@ -94,7 +94,8 @@ OCL_TEST_P(PyrDown, Mat)
{ {
for (int j = 0; j < test_loop_times; j++) for (int j = 0; j < test_loop_times; j++)
{ {
Size src_roiSize = randomSize(1, MAX_VALUE); // minimal src size is set to 4 since size<4 doesn't make sense
Size src_roiSize = randomSize(4, MAX_VALUE);
Size dst_roiSize = Size(randomInt((src_roiSize.width - 1) / 2, (src_roiSize.width + 3) / 2), Size dst_roiSize = Size(randomInt((src_roiSize.width - 1) / 2, (src_roiSize.width + 3) / 2),
randomInt((src_roiSize.height - 1) / 2, (src_roiSize.height + 3) / 2)); randomInt((src_roiSize.height - 1) / 2, (src_roiSize.height + 3) / 2));
dst_roiSize = dst_roiSize.area() == 0 ? Size((src_roiSize.width + 1) / 2, (src_roiSize.height + 1) / 2) : dst_roiSize; dst_roiSize = dst_roiSize.area() == 0 ? Size((src_roiSize.width + 1) / 2, (src_roiSize.height + 1) / 2) : dst_roiSize;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment