Commit a66db67b authored by Alexander Karsakov's avatar Alexander Karsakov

Attempt to improve performance

parent c747426f
...@@ -3413,15 +3413,15 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst, ...@@ -3413,15 +3413,15 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
String opts = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d%s%s" String opts = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d%s%s"
" -D srcT=%s -D convertToWT=%s -D WT=%s -D dstT=%s -D convertToDstT=%s" " -D srcT=%s -D convertToWT=%s -D WT=%s -D dstT=%s -D convertToDstT=%s"
" -D %s -D srcT1=%s -D dstT1=%s -D CN=%d -D SHIFT_BITS=%d%s", " -D %s -D srcT1=%s -D dstT1=%s -D WT1=%s -D CN=%d -D SHIFT_BITS=%d%s",
(int)lt2[0], (int)lt2[1], row_kernel.cols / 2, col_kernel.cols / 2, (int)lt2[0], (int)lt2[1], row_kernel.cols / 2, col_kernel.cols / 2,
ocl::kernelToStr(row_kernel, wdepth, "KERNEL_MATRIX_X").c_str(), ocl::kernelToStr(row_kernel, wdepth, "KERNEL_MATRIX_X").c_str(),
ocl::kernelToStr(col_kernel, wdepth, "KERNEL_MATRIX_Y").c_str(), ocl::kernelToStr(col_kernel, wdepth, "KERNEL_MATRIX_Y").c_str(),
ocl::typeToStr(stype), ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), ocl::typeToStr(stype), ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]),
ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), ocl::typeToStr(dtype), ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), ocl::typeToStr(dtype),
ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), borderMap[borderType], ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), borderMap[borderType],
ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), cn, 2*shift_bits, ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), ocl::typeToStr(wdepth),
int_arithm ? " -D INTEGER_ARITHMETIC" : ""); cn, 2*shift_bits, int_arithm ? " -D INTEGER_ARITHMETIC" : "");
ocl::Kernel k("sep_filter", ocl::imgproc::filterSep_singlePass_oclsrc, opts); ocl::Kernel k("sep_filter", ocl::imgproc::filterSep_singlePass_oclsrc, opts);
if (k.empty()) if (k.empty())
...@@ -3481,8 +3481,8 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, ...@@ -3481,8 +3481,8 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
ctype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL) ctype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL)
{ {
bdepth = CV_32S; bdepth = CV_32S;
kernelX.convertTo( kernelX, CV_32S, 1 << shift_bits ); kernelX.convertTo( kernelX, bdepth, 1 << shift_bits );
kernelY.convertTo( kernelY, CV_32S, 1 << shift_bits ); kernelY.convertTo( kernelY, bdepth, 1 << shift_bits );
int_arithm = true; int_arithm = true;
} }
......
...@@ -100,8 +100,8 @@ ...@@ -100,8 +100,8 @@
// horizontal and vertical filter kernels // horizontal and vertical filter kernels
// should be defined on host during compile time to avoid overhead // should be defined on host during compile time to avoid overhead
#define DIG(a) a, #define DIG(a) a,
__constant WT mat_kernelX[] = { KERNEL_MATRIX_X }; __constant WT1 mat_kernelX[] = { KERNEL_MATRIX_X };
__constant WT mat_kernelY[] = { KERNEL_MATRIX_Y }; __constant WT1 mat_kernelY[] = { KERNEL_MATRIX_Y };
__kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int srcOffsetY, int height, int width, __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int srcOffsetY, int height, int width,
__global uchar* Dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, float delta) __global uchar* Dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, float delta)
...@@ -124,8 +124,6 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int ...@@ -124,8 +124,6 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
// calculate pixel position in source image taking image offset into account // calculate pixel position in source image taking image offset into account
int srcX = x + srcOffsetX - RADIUSX; int srcX = x + srcOffsetX - RADIUSX;
int srcY = y + srcOffsetY - RADIUSY; int srcY = y + srcOffsetY - RADIUSY;
int xb = srcX;
int yb = srcY;
// extrapolate coordinates, if needed // extrapolate coordinates, if needed
// and read my own source pixel into local memory // and read my own source pixel into local memory
...@@ -191,6 +189,7 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int ...@@ -191,6 +189,7 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
sum = (sum + (1 << (SHIFT_BITS-1))) >> SHIFT_BITS; sum = (sum + (1 << (SHIFT_BITS-1))) >> SHIFT_BITS;
#endif #endif
// store result into destination image // store result into destination image
storepix(convertToDstT(sum + (WT)(delta)), Dst + mad24(y, dst_step, mad24(x, DSTSIZE, dst_offset))); storepix(convertToDstT(sum + (WT)(delta)), Dst + mad24(y, dst_step, mad24(x, DSTSIZE, dst_offset)));
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment