Commit 7faf1f62 authored by Ilya Lavrenov's avatar Ilya Lavrenov

some improvements

parent 72c327fe
......@@ -26,7 +26,7 @@ OCL_PERF_TEST(Photo, DenoisingGrayscale)
cv::fastNlMeansDenoising(original, result, 10);
SANITY_CHECK(result, 1);
OCL_PERF_TEST(Photo, DenoisingColored)
......@@ -19,7 +19,7 @@ enum
CTA_SIZE = 128
CTA_SIZE = 256
static inline int getNearestPowerOf2(int value)
......@@ -35,8 +35,6 @@ __kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almost
weight = 0;
almostDist2Weight[almostDist] = weight;
// printf("%d ", weight);
......@@ -50,12 +48,20 @@ inline int_t calcDist(uchar_t a, uchar_t b)
return diff * diff;
inline int_t calcDistUpDown(uchar_t down_value, uchar_t down_value_t, uchar_t up_value, uchar_t up_value_t)
int_t A = convert_int_t(down_value) - convert_int_t(down_value_t);
int_t B = convert_int_t(up_value) - convert_int_t(up_value_t);
return (A - B) * (A + B);
inline void calcFirstElementInRow(__global const uchar * src, int src_step, int src_offset,
__local int_t * dists, int y, int x, int id,
__global int_t * col_dists, __global int_t * up_col_dists)
int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2;
int_t col_dists_current_private[TEMPLATE_SIZE];
for (int i = id, size = SEARCH_SIZE_SQ; i < size; i += CTA_SIZE)
......@@ -68,9 +74,8 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int
#pragma unroll
for (int j = 0; j < TEMPLATE_SIZE; ++j)
col_dists_current[j] = (int_t)(0);
col_dists_current_private[j] = (int_t)(0);
#pragma unroll
for (int ty = 0; ty < TEMPLATE_SIZE; ++ty)
#pragma unroll
......@@ -78,7 +83,7 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int
value = calcDist(src_template[tx], src_current[tx]);
col_dists_current[tx + TEMPLATE_SIZE2] += value;
col_dists_current_private[tx + TEMPLATE_SIZE2] += value;
dist += value;
......@@ -86,6 +91,10 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int
src_template = (__global const uchar_t *)((__global const uchar *)src_template + src_step);
#pragma unroll
for (int j = 0; j < TEMPLATE_SIZE; ++j)
col_dists_current[j] = col_dists_current_private[j];
dists[i] = dist;
up_col_dists[0 + i] = col_dists[TEMPLATE_SIZE - 1];
......@@ -148,7 +157,7 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset
__global int_t * col_dists_current = col_dists + mad24(i, TEMPLATE_SIZE, first);
__global int_t * up_col_dists_current = up_col_dists + mad24(x0, SEARCH_SIZE_SQ, i);
int_t col_dist = up_col_dists_current[0] + calcDist(down_value, down_value_t) - calcDist(up_value, up_value_t);
int_t col_dist = up_col_dists_current[0] + calcDistUpDown(down_value, down_value_t, up_value, up_value_t);
dists[i] += col_dist - col_dists_current[0];
col_dists_current[0] = col_dist;
......@@ -192,7 +201,7 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off
for (int lsize = CTA_SIZE2 >> 1; lsize > 0; lsize >>= 1)
for (int lsize = CTA_SIZE2 >> 1; lsize > 2; lsize >>= 1)
if (id < lsize)
......@@ -206,7 +215,11 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off
if (id == 0)
int dst_index = mad24(y, dst_step, mad24(cn, x, dst_offset));
*(__global uchar_t *)(dst + dst_index) = convert_uchar_t(weighted_sum_local[0] / weights_local[0]);
int_t weighted_sum_local_0 = weighted_sum_local[0] + weighted_sum_local[1] +
weighted_sum_local[2] + weighted_sum_local[3];
int weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3];
*(__global uchar_t *)(dst + dst_index) = convert_uchar_t(weighted_sum_local_0 / weights_local_0);
......@@ -234,7 +247,6 @@ __kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int
for (int y = y0; y < y1; ++y)
for (int x = x0; x < x1; ++x)
// barrier(CLK_LOCAL_MEM_FENCE);
if (x == x0)
calcFirstElementInRow(src, src_step, src_offset, dists, y, x, id, col_dists, up_col_dists);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment