Commit c66e27d4 authored by yao's avatar yao

stereoBM fix an error on Linux when running full performance test

parent c73a10cb
......@@ -258,27 +258,13 @@ float sobel(__global unsigned char *input, int x, int y, int rows, int cols)
float CalcSums(__local float *cols, __local float *cols_cache, int winsz)
{
float cache = 0;
float cache2 = 0;
int winsz2 = winsz/2;
int x = get_local_id(0);
int group_size_x = get_local_size(0);
unsigned int cache = cols[0];
for(int i = 1; i <= winsz2; i++)
#pragma unroll
for(int i = 1; i <= winsz; i++)
cache += cols[i];
cols_cache[0] = cache;
barrier(CLK_LOCAL_MEM_FENCE);
if (x < group_size_x - winsz2)
cache2 = cols_cache[winsz2];
else
for(int i = winsz2 + 1; i < winsz; i++)
cache2 += cols[i];
return cols[0] + cache + cache2;
return cache;
}
#define RpT (2 * ROWSperTHREAD) // got experimentally
......@@ -301,8 +287,7 @@ __kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, in
int beg_row = group_id_y * RpT;
int end_row = min(beg_row + RpT, disp_rows);
// if (x < disp_cols)
// {
int y = beg_row;
float sum = 0;
......@@ -340,11 +325,15 @@ __kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, in
}
barrier(CLK_LOCAL_MEM_FENCE);
float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
if (sum_win < threshold)
disp[y * disp_step + x] = 0;
if (x < disp_cols)
{
float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
if (sum_win < threshold)
disp[y * disp_step + x] = 0;
}
barrier(CLK_LOCAL_MEM_FENCE);
}
// }
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment