Commit 20aaa8fe authored by Vadim Pisarevsky's avatar Vadim Pisarevsky Committed by OpenCV Buildbot

Merge pull request #2560 from akarsakov:gaussianblur_integer

parents 92db6786 a66db67b
This diff is collapsed.
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
// //
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2014, Itseez, Inc, all rights reserved.
// Third party copyrights are property of their respective owners. // Third party copyrights are property of their respective owners.
// //
// @Authors // @Authors
...@@ -60,7 +61,7 @@ ...@@ -60,7 +61,7 @@
#endif #endif
#define DIG(a) a, #define DIG(a) a,
__constant float mat_kernel[] = { COEFF }; __constant srcT1 mat_kernel[] = { COEFF };
__kernel void col_filter(__global const uchar * src, int src_step, int src_offset, int src_whole_rows, int src_whole_cols, __kernel void col_filter(__global const uchar * src, int src_step, int src_offset, int src_whole_rows, int src_whole_cols,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, float delta) __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, float delta)
...@@ -96,9 +97,17 @@ __kernel void col_filter(__global const uchar * src, int src_step, int src_offse ...@@ -96,9 +97,17 @@ __kernel void col_filter(__global const uchar * src, int src_step, int src_offse
{ {
temp[0] = LDS_DAT[l_y + RADIUSY - i][l_x]; temp[0] = LDS_DAT[l_y + RADIUSY - i][l_x];
temp[1] = LDS_DAT[l_y + RADIUSY + i][l_x]; temp[1] = LDS_DAT[l_y + RADIUSY + i][l_x];
#ifndef INTEGER_ARITHMETIC
sum += mad(temp[0], mat_kernel[RADIUSY - i], temp[1] * mat_kernel[RADIUSY + i]); sum += mad(temp[0], mat_kernel[RADIUSY - i], temp[1] * mat_kernel[RADIUSY + i]);
#else
sum += mad24(temp[0],mat_kernel[RADIUSY - i], temp[1] * mat_kernel[RADIUSY + i]);
#endif
} }
#ifdef INTEGER_ARITHMETIC
sum = (sum + (1 << (SHIFT_BITS-1))) >> SHIFT_BITS;
#endif
// write the result to dst // write the result to dst
if (x < dst_cols && y < dst_rows) if (x < dst_cols && y < dst_rows)
{ {
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
// //
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2014, Itseez, Inc, all rights reserved.
// Third party copyrights are property of their respective owners. // Third party copyrights are property of their respective owners.
// //
// @Authors // @Authors
...@@ -138,7 +139,15 @@ ...@@ -138,7 +139,15 @@
#endif #endif
#define DIG(a) a, #define DIG(a) a,
__constant float mat_kernel[] = { COEFF }; __constant dstT1 mat_kernel[] = { COEFF };
#ifndef INTEGER_ARITHMETIC
#define dstT4 float4
#define convertDstVec convert_float4
#else
#define dstT4 int4
#define convertDstVec convert_int4
#endif
__kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel, int src_offset_x, int src_offset_y, __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel, int src_offset_x, int src_offset_y,
int src_cols, int src_rows, int src_whole_cols, int src_whole_rows, int src_cols, int src_rows, int src_whole_cols, int src_whole_rows,
...@@ -155,7 +164,7 @@ __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel ...@@ -155,7 +164,7 @@ __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel
int start_y = y + src_offset_y - radiusy; int start_y = y + src_offset_y - radiusy;
int start_addr = mad24(start_y, src_step_in_pixel, start_x); int start_addr = mad24(start_y, src_step_in_pixel, start_x);
float4 sum; dstT4 sum;
uchar4 temp[READ_TIMES_ROW]; uchar4 temp[READ_TIMES_ROW];
__local uchar4 LDS_DAT[LSIZE1][READ_TIMES_ROW * LSIZE0 + 1]; __local uchar4 LDS_DAT[LSIZE1][READ_TIMES_ROW * LSIZE0 + 1];
...@@ -249,19 +258,23 @@ __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel ...@@ -249,19 +258,23 @@ __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
// read pixels from lds and calculate the result // read pixels from lds and calculate the result
sum = convert_float4(vload4(0,(__local uchar *)&LDS_DAT[l_y][l_x]+RADIUSX+offset)) * mat_kernel[RADIUSX]; sum = convertDstVec(vload4(0,(__local uchar *)&LDS_DAT[l_y][l_x]+RADIUSX+offset)) * mat_kernel[RADIUSX];
for (int i = 1; i <= RADIUSX; ++i) for (int i = 1; i <= RADIUSX; ++i)
{ {
temp[0] = vload4(0, (__local uchar*)&LDS_DAT[l_y][l_x] + RADIUSX + offset - i); temp[0] = vload4(0, (__local uchar*)&LDS_DAT[l_y][l_x] + RADIUSX + offset - i);
temp[1] = vload4(0, (__local uchar*)&LDS_DAT[l_y][l_x] + RADIUSX + offset + i); temp[1] = vload4(0, (__local uchar*)&LDS_DAT[l_y][l_x] + RADIUSX + offset + i);
sum += mad(convert_float4(temp[0]), mat_kernel[RADIUSX-i], convert_float4(temp[1]) * mat_kernel[RADIUSX + i]); #ifndef INTEGER_ARITHMETIC
sum += mad(convertDstVec(temp[0]), mat_kernel[RADIUSX-i], convertDstVec(temp[1]) * mat_kernel[RADIUSX + i]);
#else
sum += mad24(convertDstVec(temp[0]), mat_kernel[RADIUSX-i], convertDstVec(temp[1]) * mat_kernel[RADIUSX + i]);
#endif
} }
start_addr = mad24(y, dst_step_in_pixel, x); start_addr = mad24(y, dst_step_in_pixel, x);
// write the result to dst // write the result to dst
if ((x+3<dst_cols) & (y<dst_rows)) if ((x+3<dst_cols) & (y<dst_rows))
*(__global float4*)&dst[start_addr] = sum; *(__global dstT4*)&dst[start_addr] = sum;
else if ((x+2<dst_cols) && (y<dst_rows)) else if ((x+2<dst_cols) && (y<dst_rows))
{ {
dst[start_addr] = sum.x; dst[start_addr] = sum.x;
...@@ -355,7 +368,11 @@ __kernel void row_filter(__global const uchar * src, int src_step, int src_offse ...@@ -355,7 +368,11 @@ __kernel void row_filter(__global const uchar * src, int src_step, int src_offse
{ {
temp[0] = LDS_DAT[l_y][l_x + RADIUSX - i]; temp[0] = LDS_DAT[l_y][l_x + RADIUSX - i];
temp[1] = LDS_DAT[l_y][l_x + RADIUSX + i]; temp[1] = LDS_DAT[l_y][l_x + RADIUSX + i];
#ifndef INTEGER_ARITHMETIC
sum += mad(convertToDstT(temp[0]), mat_kernel[RADIUSX - i], convertToDstT(temp[1]) * mat_kernel[RADIUSX + i]); sum += mad(convertToDstT(temp[0]), mat_kernel[RADIUSX - i], convertToDstT(temp[1]) * mat_kernel[RADIUSX + i]);
#else
sum += mad24(convertToDstT(temp[0]), mat_kernel[RADIUSX - i], convertToDstT(temp[1]) * mat_kernel[RADIUSX + i]);
#endif
} }
// write the result to dst // write the result to dst
......
...@@ -100,8 +100,8 @@ ...@@ -100,8 +100,8 @@
// horizontal and vertical filter kernels // horizontal and vertical filter kernels
// should be defined on host during compile time to avoid overhead // should be defined on host during compile time to avoid overhead
#define DIG(a) a, #define DIG(a) a,
__constant float mat_kernelX[] = { KERNEL_MATRIX_X }; __constant WT1 mat_kernelX[] = { KERNEL_MATRIX_X };
__constant float mat_kernelY[] = { KERNEL_MATRIX_Y }; __constant WT1 mat_kernelY[] = { KERNEL_MATRIX_Y };
__kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int srcOffsetY, int height, int width, __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int srcOffsetY, int height, int width,
__global uchar* Dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, float delta) __global uchar* Dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, float delta)
...@@ -124,8 +124,6 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int ...@@ -124,8 +124,6 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
// calculate pixel position in source image taking image offset into account // calculate pixel position in source image taking image offset into account
int srcX = x + srcOffsetX - RADIUSX; int srcX = x + srcOffsetX - RADIUSX;
int srcY = y + srcOffsetY - RADIUSY; int srcY = y + srcOffsetY - RADIUSY;
int xb = srcX;
int yb = srcY;
// extrapolate coordinates, if needed // extrapolate coordinates, if needed
// and read my own source pixel into local memory // and read my own source pixel into local memory
...@@ -159,12 +157,16 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int ...@@ -159,12 +157,16 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
// do vertical filter pass // do vertical filter pass
// and store intermediate results to second local memory array // and store intermediate results to second local memory array
int i, clocX = lix; int i, clocX = lix;
WT sum = 0.0f; WT sum = (WT) 0;
do do
{ {
sum = 0.0f; sum = (WT) 0;
for (i=0; i<=2*RADIUSY; i++) for (i=0; i<=2*RADIUSY; i++)
#ifndef INTEGER_ARITHMETIC
sum = mad(lsmem[liy+i][clocX], mat_kernelY[i], sum); sum = mad(lsmem[liy+i][clocX], mat_kernelY[i], sum);
#else
sum = mad24(lsmem[liy+i][clocX], mat_kernelY[i], sum);
#endif
lsmemDy[liy][clocX] = sum; lsmemDy[liy][clocX] = sum;
clocX += BLK_X; clocX += BLK_X;
} }
...@@ -180,7 +182,13 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int ...@@ -180,7 +182,13 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
// and calculate final result // and calculate final result
sum = 0.0f; sum = 0.0f;
for (i=0; i<=2*RADIUSX; i++) for (i=0; i<=2*RADIUSX; i++)
#ifndef INTEGER_ARITHMETIC
sum = mad(lsmemDy[liy][lix+i], mat_kernelX[i], sum); sum = mad(lsmemDy[liy][lix+i], mat_kernelX[i], sum);
#else
sum = mad24(lsmemDy[liy][lix+i], mat_kernelX[i], sum);
sum = (sum + (1 << (SHIFT_BITS-1))) >> SHIFT_BITS;
#endif
// store result into destination image // store result into destination image
storepix(convertToDstT(sum + (WT)(delta)), Dst + mad24(y, dst_step, mad24(x, DSTSIZE, dst_offset))); storepix(convertToDstT(sum + (WT)(delta)), Dst + mad24(y, dst_step, mad24(x, DSTSIZE, dst_offset)));
......
...@@ -219,7 +219,7 @@ OCL_TEST_P(GaussianBlurTest, Mat) ...@@ -219,7 +219,7 @@ OCL_TEST_P(GaussianBlurTest, Mat)
OCL_OFF(cv::GaussianBlur(src_roi, dst_roi, Size(ksize, ksize), sigma1, sigma2, borderType)); OCL_OFF(cv::GaussianBlur(src_roi, dst_roi, Size(ksize, ksize), sigma1, sigma2, borderType));
OCL_ON(cv::GaussianBlur(usrc_roi, udst_roi, Size(ksize, ksize), sigma1, sigma2, borderType)); OCL_ON(cv::GaussianBlur(usrc_roi, udst_roi, Size(ksize, ksize), sigma1, sigma2, borderType));
Near(CV_MAT_DEPTH(type) == CV_8U ? 3 : 5e-5, false); Near(CV_MAT_DEPTH(type) >= CV_32F ? 5e-5 : 1, false);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment