Commit e0584bb8 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #2876 from vbystricky:oclopt_integralsum

parents 1493160f 09bcc061
......@@ -231,7 +231,7 @@ OCL_PERF_TEST_P(IntegralFixture, Integral1, ::testing::Combine(OCL_TEST_SIZES, O
OCL_TEST_CYCLE() cv::integral(src, dst, ddepth);
SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
SANITY_CHECK(dst, 2e-6, ERROR_RELATIVE);
}
OCL_PERF_TEST_P(IntegralFixture, Integral2, ::testing::Combine(OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32S, CV_32F)))
......@@ -243,11 +243,11 @@ OCL_PERF_TEST_P(IntegralFixture, Integral2, ::testing::Combine(OCL_TEST_SIZES, O
checkDeviceMaxMemoryAllocSize(srcSize, ddepth);
UMat src(srcSize, CV_8UC1), sum(srcSize + Size(1, 1), ddepth), sqsum(srcSize + Size(1, 1), CV_32F);
declare.in(src, WARMUP_RNG).out(sum).out(sqsum);
declare.in(src, WARMUP_RNG).out(sum, sqsum);
OCL_TEST_CYCLE() cv::integral(src, sum, sqsum, ddepth, CV_32F);
SANITY_CHECK(sum, 1e-6, ERROR_RELATIVE);
SANITY_CHECK(sum, 2e-4, ERROR_RELATIVE);
SANITY_CHECK(sqsum, 5e-5, ERROR_RELATIVE);
}
......
This diff is collapsed.
This diff is collapsed.
......@@ -235,97 +235,87 @@ typedef void (*IntegralFunc)(const uchar* src, size_t srcstep, uchar* sum, size_
#ifdef HAVE_OPENCL
enum { vlen = 4 };
static bool ocl_integral( InputArray _src, OutputArray _sum, int sdepth )
{
if ( _src.type() != CV_8UC1 || _src.step() % vlen != 0 || _src.offset() % vlen != 0 ||
!(sdepth == CV_32S || sdepth == CV_32F) )
return false;
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
ocl::Kernel k1("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc,
format("-D sdepth=%d", sdepth));
if (k1.empty())
if ( (_src.type() != CV_8UC1) ||
!(sdepth == CV_32S || sdepth == CV_32F || (doubleSupport && sdepth == CV_64F)))
return false;
Size size = _src.size(), t_size = Size(((size.height + vlen - 1) / vlen) * vlen, size.width),
ssize(size.width + 1, size.height + 1);
_sum.create(ssize, sdepth);
UMat src = _src.getUMat(), t_sum(t_size, sdepth), sum = _sum.getUMat();
t_sum = t_sum(Range::all(), Range(0, size.height));
static const int tileSize = 16;
String build_opt = format("-D sumT=%s -D LOCAL_SUM_SIZE=%d%s",
ocl::typeToStr(sdepth), tileSize,
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt);
if (kcols.empty())
return false;
int offset = (int)src.offset / vlen;
int vcols = (src.cols + vlen - 1) / vlen;
int sum_offset = (int)sum.offset / vlen;
UMat src = _src.getUMat();
Size src_size = src.size();
Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize);
UMat buf(bufsize, sdepth);
kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf));
size_t gt = src.cols, lt = tileSize;
if (!kcols.run(1, &gt, &lt, false))
return false;
k1.args(ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(t_sum),
offset, src.rows, src.cols, (int)src.step, (int)t_sum.step);
size_t gt = ((vcols + 1) / 2) * 256, lt = 256;
if (!k1.run(1, &gt, &lt, false))
ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt);
if (krows.empty())
return false;
ocl::Kernel k2("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc,
format("-D sdepth=%d", sdepth));
k2.args(ocl::KernelArg::PtrReadOnly(t_sum), ocl::KernelArg::PtrWriteOnly(sum),
t_sum.rows, t_sum.cols, (int)t_sum.step, (int)sum.step, sum_offset);
Size sumsize(src_size.width + 1, src_size.height + 1);
_sum.create(sumsize, sdepth);
UMat sum = _sum.getUMat();
size_t gt2 = t_sum.cols * 32, lt2 = 256;
return k2.run(1, &gt2, &lt2, false);
krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::WriteOnly(sum));
gt = src.rows;
return krows.run(1, &gt, &lt, false);
}
static bool ocl_integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, int sdepth, int sqdepth )
{
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if ( _src.type() != CV_8UC1 || _src.step() % vlen != 0 || _src.offset() % vlen != 0 ||
(!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) )
return false;
char cvt[40];
String opts = format("-D sdepth=%d -D sqdepth=%d -D TYPE=%s -D TYPE4=%s4 -D convert_TYPE4=%s%s",
sdepth, sqdepth, ocl::typeToStr(sqdepth), ocl::typeToStr(sqdepth),
ocl::convertTypeStr(sdepth, sqdepth, 4, cvt),
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
ocl::Kernel k1("integral_cols", ocl::imgproc::integral_sqrsum_oclsrc, opts);
if (k1.empty())
if ( _src.type() != CV_8UC1 || (!doubleSupport && (sdepth == CV_64F || sqdepth == CV_64F)) )
return false;
Size size = _src.size(), dsize = Size(size.width + 1, size.height + 1),
t_size = Size(((size.height + vlen - 1) / vlen) * vlen, size.width);
UMat src = _src.getUMat(), t_sum(t_size, sdepth), t_sqsum(t_size, sqdepth);
t_sum = t_sum(Range::all(), Range(0, size.height));
t_sqsum = t_sqsum(Range::all(), Range(0, size.height));
_sum.create(dsize, sdepth);
_sqsum.create(dsize, sqdepth);
UMat sum = _sum.getUMat(), sqsum = _sqsum.getUMat();
static const int tileSize = 16;
int offset = (int)src.offset / vlen;
int pre_invalid = src.offset % vlen;
int vcols = (pre_invalid + src.cols + vlen - 1) / vlen;
int sum_offset = (int)(sum.offset / sum.elemSize());
int sqsum_offset = (int)(sqsum.offset / sqsum.elemSize());
String build_opt = format("-D SUM_SQUARE -D sumT=%s -D sumSQT=%s -D LOCAL_SUM_SIZE=%d%s",
ocl::typeToStr(sdepth), ocl::typeToStr(sqdepth),
tileSize,
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
k1.args(ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrWriteOnly(t_sum),
ocl::KernelArg::PtrWriteOnly(t_sqsum), offset, pre_invalid, src.rows,
src.cols, (int)src.step, (int)t_sum.step, (int)t_sqsum.step);
ocl::Kernel kcols("integral_sum_cols", ocl::imgproc::integral_sum_oclsrc, build_opt);
if (kcols.empty())
return false;
size_t gt = ((vcols + 1) / 2) * 256, lt = 256;
if (!k1.run(1, &gt, &lt, false))
UMat src = _src.getUMat();
Size src_size = src.size();
Size bufsize(((src_size.height + tileSize - 1) / tileSize) * tileSize, ((src_size.width + tileSize - 1) / tileSize) * tileSize);
UMat buf(bufsize, sdepth);
UMat buf_sq(bufsize, sqdepth);
kcols.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(buf), ocl::KernelArg::WriteOnlyNoSize(buf_sq));
size_t gt = src.cols, lt = tileSize;
if (!kcols.run(1, &gt, &lt, false))
return false;
ocl::Kernel k2("integral_rows", ocl::imgproc::integral_sqrsum_oclsrc, opts);
if (k2.empty())
ocl::Kernel krows("integral_sum_rows", ocl::imgproc::integral_sum_oclsrc, build_opt);
if (krows.empty())
return false;
k2.args(ocl::KernelArg::PtrReadOnly(t_sum), ocl::KernelArg::PtrReadOnly(t_sqsum),
ocl::KernelArg::PtrWriteOnly(sum), ocl::KernelArg::PtrWriteOnly(sqsum),
t_sum.rows, t_sum.cols, (int)t_sum.step, (int)t_sqsum.step,
(int)sum.step, (int)sqsum.step, sum_offset, sqsum_offset);
Size sumsize(src_size.width + 1, src_size.height + 1);
_sum.create(sumsize, sdepth);
UMat sum = _sum.getUMat();
_sqsum.create(sumsize, sqdepth);
UMat sum_sq = _sqsum.getUMat();
size_t gt2 = t_sum.cols * 32, lt2 = 256;
return k2.run(1, &gt2, &lt2, false);
krows.args(ocl::KernelArg::ReadOnlyNoSize(buf), ocl::KernelArg::ReadOnlyNoSize(buf_sq), ocl::KernelArg::WriteOnly(sum), ocl::KernelArg::WriteOnlyNoSize(sum_sq));
gt = src.rows;
return krows.run(1, &gt, &lt, false);
}
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment