Commit 69eae13f authored by Erik Karlsson's avatar Erik Karlsson

Changed pointers from sample_t * to uchar *. Rescaling psz accordingly.

parent d56d04e4
...@@ -89,13 +89,13 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, ...@@ -89,13 +89,13 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
char buf[4][40]; char buf[4][40];
String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d" String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
" -D sample_t=%s -D pixel_t=%s -D int_t=%s" " -D pixel_t=%s -D int_t=%s"
" -D weight_t=%s -D sum_t=%s -D convert_sum_t=%s" " -D weight_t=%s -D sum_t=%s -D convert_sum_t=%s"
" -D BLOCK_COLS=%d -D BLOCK_ROWS=%d" " -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d" " -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
" -D convert_int_t=%s -D cn=%d -D psz=%d -D convert_pixel_t=%s%s", " -D convert_int_t=%s -D cn=%d -D psz=%d -D convert_pixel_t=%s%s",
templateWindowSize, searchWindowSize, templateWindowSize, searchWindowSize,
ocl::typeToStr(depth), ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)), ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)),
depth == CV_8U ? ocl::typeToStr(CV_32S) : "long", depth == CV_8U ? ocl::typeToStr(CV_32S) : "long",
depth == CV_8U ? ocl::typeToStr(CV_32SC(cn)) : depth == CV_8U ? ocl::typeToStr(CV_32SC(cn)) :
(sprintf(buf[0], "long%d", cn), buf[0]), (sprintf(buf[0], "long%d", cn), buf[0]),
...@@ -103,7 +103,8 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, ...@@ -103,7 +103,8 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
(sprintf(buf[1], "convert_long%d", cn), buf[1]), (sprintf(buf[1], "convert_long%d", cn), buf[1]),
BLOCK_COLS, BLOCK_ROWS, BLOCK_COLS, BLOCK_ROWS,
ctaSize, templateWindowHalfWize, searchWindowHalfSize, ctaSize, templateWindowHalfWize, searchWindowHalfSize,
ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn, cn == 3 ? 4 : cn, ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn,
(depth == CV_8U ? sizeof(uchar) : sizeof(ushort)) * (cn == 3 ? 4 : cn),
ocl::convertTypeStr(CV_32S, depth, cn, buf[3]), abs ? " -D ABS" : ""); ocl::convertTypeStr(CV_32S, depth, cn, buf[3]), abs ? " -D ABS" : "");
ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts); ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts);
......
...@@ -97,7 +97,7 @@ inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_v ...@@ -97,7 +97,7 @@ inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_v
#define COND if (x == 0 && y == 0) #define COND if (x == 0 && y == 0)
inline void calcFirstElementInRow(__global const sample_t * src, int src_step, int src_offset, inline void calcFirstElementInRow(__global const uchar * src, int src_step, int src_offset,
__local int * dists, int y, int x, int id, __local int * dists, int y, int x, int id,
__global int * col_dists, __global int * up_col_dists) __global int * col_dists, __global int * up_col_dists)
{ {
...@@ -129,8 +129,8 @@ inline void calcFirstElementInRow(__global const sample_t * src, int src_step, i ...@@ -129,8 +129,8 @@ inline void calcFirstElementInRow(__global const sample_t * src, int src_step, i
dist += value; dist += value;
} }
src_current = (__global const pixel_t *)((__global const sample_t *)src_current + src_step); src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step);
src_template = (__global const pixel_t *)((__global const sample_t *)src_template + src_step); src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step);
} }
#pragma unroll #pragma unroll
...@@ -142,7 +142,7 @@ inline void calcFirstElementInRow(__global const sample_t * src, int src_step, i ...@@ -142,7 +142,7 @@ inline void calcFirstElementInRow(__global const sample_t * src, int src_step, i
} }
} }
inline void calcElementInFirstRow(__global const sample_t * src, int src_step, int src_offset, inline void calcElementInFirstRow(__global const uchar * src, int src_step, int src_offset,
__local int * dists, int y, int x0, int x, int id, int first, __local int * dists, int y, int x0, int x, int id, int first,
__global int * col_dists, __global int * up_col_dists) __global int * col_dists, __global int * up_col_dists)
{ {
...@@ -164,8 +164,8 @@ inline void calcElementInFirstRow(__global const sample_t * src, int src_step, i ...@@ -164,8 +164,8 @@ inline void calcElementInFirstRow(__global const sample_t * src, int src_step, i
{ {
col_dist += calcDist(src_current[0], src_template[0]); col_dist += calcDist(src_current[0], src_template[0]);
src_current = (__global const pixel_t *)((__global const sample_t *)src_current + src_step); src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step);
src_template = (__global const pixel_t *)((__global const sample_t *)src_template + src_step); src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step);
} }
dists[i] += col_dist - col_dists_current[first]; dists[i] += col_dist - col_dists_current[first];
...@@ -174,7 +174,7 @@ inline void calcElementInFirstRow(__global const sample_t * src, int src_step, i ...@@ -174,7 +174,7 @@ inline void calcElementInFirstRow(__global const sample_t * src, int src_step, i
} }
} }
inline void calcElement(__global const sample_t * src, int src_step, int src_offset, inline void calcElement(__global const uchar * src, int src_step, int src_offset,
__local int * dists, int y, int x0, int x, int id, int first, __local int * dists, int y, int x0, int x, int id, int first,
__global int * col_dists, __global int * up_col_dists) __global int * col_dists, __global int * up_col_dists)
{ {
...@@ -207,9 +207,9 @@ inline void calcElement(__global const sample_t * src, int src_step, int src_off ...@@ -207,9 +207,9 @@ inline void calcElement(__global const sample_t * src, int src_step, int src_off
} }
} }
inline void convolveWindow(__global const sample_t * src, int src_step, int src_offset, inline void convolveWindow(__global const uchar * src, int src_step, int src_offset,
__local int * dists, __global const int * almostDist2Weight, __local int * dists, __global const int * almostDist2Weight,
__global sample_t * dst, int dst_step, int dst_offset, __global uchar * dst, int dst_step, int dst_offset,
int y, int x, int id, __local weight_t * weights_local, int y, int x, int id, __local weight_t * weights_local,
__local sum_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift) __local sum_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift)
{ {
...@@ -255,9 +255,9 @@ inline void convolveWindow(__global const sample_t * src, int src_step, int src_ ...@@ -255,9 +255,9 @@ inline void convolveWindow(__global const sample_t * src, int src_step, int src_
} }
} }
__kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step, int src_offset, __kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int src_offset,
__global sample_t * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
__global const int * almostDist2Weight, __global sample_t * buffer, __global const int * almostDist2Weight, __global uchar * buffer,
int almostTemplateWindowSizeSqBinShift) int almostTemplateWindowSizeSqBinShift)
{ {
int block_x = get_group_id(0), nblocks_x = get_num_groups(0); int block_x = get_group_id(0), nblocks_x = get_num_groups(0);
...@@ -277,11 +277,6 @@ __kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step, ...@@ -277,11 +277,6 @@ __kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step,
__global int * col_dists = (__global int *)(buffer + block_data_start * sizeof(int)); __global int * col_dists = (__global int *)(buffer + block_data_start * sizeof(int));
__global int * up_col_dists = col_dists + SEARCH_SIZE_SQ * TEMPLATE_SIZE; __global int * up_col_dists = col_dists + SEARCH_SIZE_SQ * TEMPLATE_SIZE;
src_step /= sizeof(sample_t);
src_offset /= sizeof(sample_t);
dst_step /= sizeof(sample_t);
dst_offset /= sizeof(sample_t);
for (int y = y0; y < y1; ++y) for (int y = y0; y < y1; ++y)
for (int x = x0; x < x1; ++x) for (int x = x0; x < x1; ++x)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment