Commit d5aaea27 authored by yao's avatar yao

fix some mismatch on cpu device running OCL

parent cb63bbf0
...@@ -228,7 +228,7 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char ...@@ -228,7 +228,7 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
int X = get_group_id(0) * BLOCK_W + get_local_id(0) + maxdisp + radius; int X = get_group_id(0) * BLOCK_W + get_local_id(0) + maxdisp + radius;
// int Y = get_group_id(1) * ROWSperTHREAD + radius; // int Y = get_group_id(1) * ROWSperTHREAD + radius;
#define Y (get_group_id(1) * ROWSperTHREAD + radius) #define Y (get_group_id(1) * ROWSperTHREAD + radius)
volatile __global unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step; volatile __global unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step;
__global unsigned char* disparImage = disp + X + Y * disp_step; __global unsigned char* disparImage = disp + X + Y * disp_step;
...@@ -251,9 +251,9 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char ...@@ -251,9 +251,9 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
barrier(CLK_LOCAL_MEM_FENCE); //before MinSSD function barrier(CLK_LOCAL_MEM_FENCE); //before MinSSD function
uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
if (X < cwidth - radius && Y < cheight - radius) if (X < cwidth - radius && Y < cheight - radius)
{ {
uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
if (minSSD.x < minSSDImage[0]) if (minSSD.x < minSSDImage[0])
{ {
disparImage[0] = (unsigned char)(d + minSSD.y); disparImage[0] = (unsigned char)(d + minSSD.y);
...@@ -264,7 +264,7 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char ...@@ -264,7 +264,7 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
for(int row = 1; row < end_row; row++) for(int row = 1; row < end_row; row++)
{ {
int idx1 = y_tex * img_step + x_tex; int idx1 = y_tex * img_step + x_tex;
int idx2 = (y_tex + (2 * radius + 1)) * img_step + x_tex; int idx2 = min(y_tex + (2 * radius + 1), cheight - 1) * img_step + x_tex;
barrier(CLK_GLOBAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
...@@ -278,10 +278,10 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char ...@@ -278,10 +278,10 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
if (X < cwidth - radius && row < cheight - radius - Y) if (X < cwidth - radius && row < cheight - radius - Y)
{ {
int idx = row * cminSSD_step; int idx = row * cminSSD_step;
uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
if (minSSD.x < minSSDImage[idx]) if (minSSD.x < minSSDImage[idx])
{ {
disparImage[disp_step * row] = (unsigned char)(d + minSSD.y); disparImage[disp_step * row] = (unsigned char)(d + minSSD.y);
...@@ -378,8 +378,8 @@ __kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, in ...@@ -378,8 +378,8 @@ __kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, in
int beg_row = group_id_y * RpT; int beg_row = group_id_y * RpT;
int end_row = min(beg_row + RpT, disp_rows); int end_row = min(beg_row + RpT, disp_rows);
// if (x < disp_cols) // if (x < disp_cols)
// { // {
int y = beg_row; int y = beg_row;
float sum = 0; float sum = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment