Commit d5aaea27 authored by yao's avatar yao

fix some mismatch on cpu device running OCL

parent cb63bbf0
...@@ -226,9 +226,9 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char ...@@ -226,9 +226,9 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
volatile __local unsigned int *col_ssd_extra = get_local_id(0) < (2 * radius) ? col_ssd + BLOCK_W : 0; volatile __local unsigned int *col_ssd_extra = get_local_id(0) < (2 * radius) ? col_ssd + BLOCK_W : 0;
int X = get_group_id(0) * BLOCK_W + get_local_id(0) + maxdisp + radius; int X = get_group_id(0) * BLOCK_W + get_local_id(0) + maxdisp + radius;
// int Y = get_group_id(1) * ROWSperTHREAD + radius; // int Y = get_group_id(1) * ROWSperTHREAD + radius;
#define Y (get_group_id(1) * ROWSperTHREAD + radius) #define Y (get_group_id(1) * ROWSperTHREAD + radius)
volatile __global unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step; volatile __global unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step;
__global unsigned char* disparImage = disp + X + Y * disp_step; __global unsigned char* disparImage = disp + X + Y * disp_step;
...@@ -251,9 +251,9 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char ...@@ -251,9 +251,9 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
barrier(CLK_LOCAL_MEM_FENCE); //before MinSSD function barrier(CLK_LOCAL_MEM_FENCE); //before MinSSD function
uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
if (X < cwidth - radius && Y < cheight - radius) if (X < cwidth - radius && Y < cheight - radius)
{ {
uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
if (minSSD.x < minSSDImage[0]) if (minSSD.x < minSSDImage[0])
{ {
disparImage[0] = (unsigned char)(d + minSSD.y); disparImage[0] = (unsigned char)(d + minSSD.y);
...@@ -264,7 +264,7 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char ...@@ -264,7 +264,7 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
for(int row = 1; row < end_row; row++) for(int row = 1; row < end_row; row++)
{ {
int idx1 = y_tex * img_step + x_tex; int idx1 = y_tex * img_step + x_tex;
int idx2 = (y_tex + (2 * radius + 1)) * img_step + x_tex; int idx2 = min(y_tex + (2 * radius + 1), cheight - 1) * img_step + x_tex;
barrier(CLK_GLOBAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
...@@ -278,10 +278,10 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char ...@@ -278,10 +278,10 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
if (X < cwidth - radius && row < cheight - radius - Y) if (X < cwidth - radius && row < cheight - radius - Y)
{ {
int idx = row * cminSSD_step; int idx = row * cminSSD_step;
uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
if (minSSD.x < minSSDImage[idx]) if (minSSD.x < minSSDImage[idx])
{ {
disparImage[disp_step * row] = (unsigned char)(d + minSSD.y); disparImage[disp_step * row] = (unsigned char)(d + minSSD.y);
...@@ -378,50 +378,50 @@ __kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, in ...@@ -378,50 +378,50 @@ __kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, in
int beg_row = group_id_y * RpT; int beg_row = group_id_y * RpT;
int end_row = min(beg_row + RpT, disp_rows); int end_row = min(beg_row + RpT, disp_rows);
// if (x < disp_cols) // if (x < disp_cols)
// { // {
int y = beg_row; int y = beg_row;
float sum = 0; float sum = 0;
float sum_extra = 0; float sum_extra = 0;
for(int i = y - winsz2; i <= y + winsz2; ++i) for(int i = y - winsz2; i <= y + winsz2; ++i)
{ {
sum += sobel(input, x - winsz2, i, input_rows, input_cols); sum += sobel(input, x - winsz2, i, input_rows, input_cols);
if (cols_extra) if (cols_extra)
sum_extra += sobel(input, x + group_size_x - winsz2, i, input_rows, input_cols); sum_extra += sobel(input, x + group_size_x - winsz2, i, input_rows, input_cols);
} }
*cols = sum;
if (cols_extra)
*cols_extra = sum_extra;
barrier(CLK_LOCAL_MEM_FENCE);
float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
if (sum_win < threshold)
disp[y * disp_step + x] = 0;
barrier(CLK_LOCAL_MEM_FENCE);
for(int y = beg_row + 1; y < end_row; ++y)
{
sum = sum - sobel(input, x - winsz2, y - winsz2 - 1, input_rows, input_cols) +
sobel(input, x - winsz2, y + winsz2, input_rows, input_cols);
*cols = sum; *cols = sum;
if (cols_extra) if (cols_extra)
{
sum_extra = sum_extra - sobel(input, x + group_size_x - winsz2, y - winsz2 - 1,input_rows, input_cols)
+ sobel(input, x + group_size_x - winsz2, y + winsz2, input_rows, input_cols);
*cols_extra = sum_extra; *cols_extra = sum_extra;
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255; float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
if (sum_win < threshold) if (sum_win < threshold)
disp[y * disp_step + x] = 0; disp[y * disp_step + x] = 0;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
}
for(int y = beg_row + 1; y < end_row; ++y) // }
{
sum = sum - sobel(input, x - winsz2, y - winsz2 - 1, input_rows, input_cols) +
sobel(input, x - winsz2, y + winsz2, input_rows, input_cols);
*cols = sum;
if (cols_extra)
{
sum_extra = sum_extra - sobel(input, x + group_size_x - winsz2, y - winsz2 - 1,input_rows, input_cols)
+ sobel(input, x + group_size_x - winsz2, y + winsz2, input_rows, input_cols);
*cols_extra = sum_extra;
}
barrier(CLK_LOCAL_MEM_FENCE);
float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
if (sum_win < threshold)
disp[y * disp_step + x] = 0;
barrier(CLK_LOCAL_MEM_FENCE);
}
// }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment