Commit 9ba25e9d authored by Andrey Kamaev's avatar Andrey Kamaev Committed by OpenCV Buildbot

Merge pull request #742 from bitwangyaoyao:2.4_fix

parents 0c64fc61 8cc5b980
...@@ -127,8 +127,9 @@ namespace cv ...@@ -127,8 +127,9 @@ namespace cv
// currently only support wavefront size queries // currently only support wavefront size queries
enum DEVICE_INFO enum DEVICE_INFO
{ {
WAVEFRONT_SIZE, //in AMD speak WAVEFRONT_SIZE, //in AMD speak
WARP_SIZE = WAVEFRONT_SIZE //in nvidia speak WARP_SIZE = WAVEFRONT_SIZE, //in nvidia speak
IS_CPU_DEVICE //check if the device is CPU
}; };
//info should have been pre-allocated //info should have been pre-allocated
void CV_EXPORTS queryDeviceInfo(DEVICE_INFO info_type, void* info); void CV_EXPORTS queryDeviceInfo(DEVICE_INFO info_type, void* info);
......
...@@ -44,7 +44,6 @@ ...@@ -44,7 +44,6 @@
//M*/ //M*/
#include "precomp.hpp" #include "precomp.hpp"
using namespace cv; using namespace cv;
using namespace cv::ocl; using namespace cv::ocl;
using namespace std; using namespace std;
...@@ -230,7 +229,6 @@ void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oc ...@@ -230,7 +229,6 @@ void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oc
} }
} }
void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img) void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img)
{ {
computeGradient(img, grad, qangle); computeGradient(img, grad, qangle);
...@@ -1571,6 +1569,27 @@ void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int b ...@@ -1571,6 +1569,27 @@ void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int b
cdescr_size = descr_size; cdescr_size = descr_size;
} }
static inline int divUp(int total, int grain)
{
return (total + grain - 1) / grain;
}
static void openCLExecuteKernel_hog(Context *clCxt , const char **source, string kernelName,
size_t globalThreads[3], size_t localThreads[3],
vector< pair<size_t, const void *> > &args)
{
size_t wave_size = 0;
queryDeviceInfo(WAVEFRONT_SIZE, &wave_size);
if (wave_size <= 16)
{
char build_options[64];
sprintf(build_options, (wave_size == 16) ? "-D WAVE_SIZE_16" : "-D WAVE_SIZE_1");
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
else
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1);
}
void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int block_stride_y, void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int block_stride_y,
int height, int width, const cv::ocl::oclMat &grad, int height, int width, const cv::ocl::oclMat &grad,
const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists) const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists)
...@@ -1582,8 +1601,10 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc ...@@ -1582,8 +1601,10 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y; int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y;
size_t globalThreads[3] = { img_block_width * 32, img_block_height * 2, 1 }; int blocks_total = img_block_width * img_block_height;
size_t localThreads[3] = { 32, 2, 1 }; int blocks_in_group = 4;
size_t localThreads[3] = { blocks_in_group * 24, 2, 1 };
size_t globalThreads[3] = { divUp(blocks_total, blocks_in_group) * localThreads[0], 2, 1 };
int grad_quadstep = grad.step >> 2; int grad_quadstep = grad.step >> 2;
int qangle_step = qangle.step; int qangle_step = qangle.step;
...@@ -1593,14 +1614,15 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc ...@@ -1593,14 +1614,15 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc
int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12) * sizeof(float); int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12) * sizeof(float);
int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y) * sizeof(float); int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y) * sizeof(float);
int smem = hists_size + final_hists_size; int smem = (hists_size + final_hists_size) * blocks_in_group;
args.push_back( make_pair( sizeof(cl_int), (void *)&width));
args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_stride_x)); args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_stride_x));
args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_stride_y)); args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_stride_y));
args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins)); args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins));
args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size)); args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size));
args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width));
args.push_back( make_pair( sizeof(cl_int), (void *)&blocks_in_group));
args.push_back( make_pair( sizeof(cl_int), (void *)&blocks_total));
args.push_back( make_pair( sizeof(cl_int), (void *)&grad_quadstep)); args.push_back( make_pair( sizeof(cl_int), (void *)&grad_quadstep));
args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step)); args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step));
args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data));
...@@ -1609,7 +1631,7 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc ...@@ -1609,7 +1631,7 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc
args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data));
args.push_back( make_pair( smem, (void *)NULL)); args.push_back( make_pair( smem, (void *)NULL));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args);
} }
void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int block_stride_y, void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int block_stride_y,
...@@ -1637,7 +1659,7 @@ void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int bl ...@@ -1637,7 +1659,7 @@ void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int bl
args.push_back( make_pair( sizeof(cl_float), (void *)&threshold)); args.push_back( make_pair( sizeof(cl_float), (void *)&threshold));
args.push_back( make_pair( nthreads * sizeof(float), (void *)NULL)); args.push_back( make_pair( nthreads * sizeof(float), (void *)NULL));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args);
} }
void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int block_stride_y, void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int block_stride_y,
...@@ -1671,7 +1693,7 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int blo ...@@ -1671,7 +1693,7 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int blo
args.push_back( make_pair( sizeof(cl_float), (void *)&threshold)); args.push_back( make_pair( sizeof(cl_float), (void *)&threshold));
args.push_back( make_pair( sizeof(cl_mem), (void *)&labels.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&labels.data));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args);
} }
void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
...@@ -1702,7 +1724,7 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, ...@@ -1702,7 +1724,7 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width,
args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
} }
void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
...@@ -1734,12 +1756,7 @@ void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, ...@@ -1734,12 +1756,7 @@ void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width,
args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
}
static inline int divUp(int total, int grain)
{
return (total + grain - 1) / grain;
} }
void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img, void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img,
...@@ -1768,7 +1785,7 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const c ...@@ -1768,7 +1785,7 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const c
args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma)); args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma));
args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins)); args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
} }
void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img, void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img,
...@@ -1798,7 +1815,7 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const c ...@@ -1798,7 +1815,7 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const c
args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma)); args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma));
args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins)); args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
} }
void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz) void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz)
...@@ -1815,14 +1832,16 @@ void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz ...@@ -1815,14 +1832,16 @@ void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz
float ifx = (float)src.cols / sz.width; float ifx = (float)src.cols / sz.width;
float ify = (float)src.rows / sz.height; float ify = (float)src.rows / sz.height;
int src_step = static_cast<int>(src.step);
int dst_step = static_cast<int>(dst.step);
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset)); args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.step)); args.push_back( make_pair(sizeof(cl_int), (void *)&src_step));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&sz.width)); args.push_back( make_pair(sizeof(cl_int), (void *)&sz.width));
...@@ -1830,5 +1849,5 @@ void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz ...@@ -1830,5 +1849,5 @@ void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz
args.push_back( make_pair(sizeof(cl_float), (void *)&ifx)); args.push_back( make_pair(sizeof(cl_float), (void *)&ifx));
args.push_back( make_pair(sizeof(cl_float), (void *)&ify)); args.push_back( make_pair(sizeof(cl_float), (void *)&ify));
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
} }
...@@ -397,6 +397,15 @@ namespace cv ...@@ -397,6 +397,15 @@ namespace cv
} }
break; break;
case IS_CPU_DEVICE:
{
cl_device_type devicetype;
openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum],
CL_DEVICE_TYPE, sizeof(cl_device_type),
&devicetype, NULL));
*(bool*)info = (devicetype == CVCL_DEVICE_TYPE_CPU);
}
break;
default: default:
CV_Error(-1, "Invalid device info type"); CV_Error(-1, "Invalid device info type");
break; break;
......
...@@ -394,7 +394,7 @@ void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double be ...@@ -394,7 +394,7 @@ void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double be
if( rtype < 0 ) if( rtype < 0 )
rtype = type(); rtype = type();
else else
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels()); rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), oclchannels());
//int scn = channels(); //int scn = channels();
int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype); int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype);
......
This diff is collapsed.
This diff is collapsed.
...@@ -226,9 +226,9 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char ...@@ -226,9 +226,9 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
volatile __local unsigned int *col_ssd_extra = get_local_id(0) < (2 * radius) ? col_ssd + BLOCK_W : 0; volatile __local unsigned int *col_ssd_extra = get_local_id(0) < (2 * radius) ? col_ssd + BLOCK_W : 0;
int X = get_group_id(0) * BLOCK_W + get_local_id(0) + maxdisp + radius; int X = get_group_id(0) * BLOCK_W + get_local_id(0) + maxdisp + radius;
// int Y = get_group_id(1) * ROWSperTHREAD + radius; // int Y = get_group_id(1) * ROWSperTHREAD + radius;
#define Y (get_group_id(1) * ROWSperTHREAD + radius) #define Y (get_group_id(1) * ROWSperTHREAD + radius)
volatile __global unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step; volatile __global unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step;
__global unsigned char* disparImage = disp + X + Y * disp_step; __global unsigned char* disparImage = disp + X + Y * disp_step;
...@@ -251,9 +251,9 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char ...@@ -251,9 +251,9 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
barrier(CLK_LOCAL_MEM_FENCE); //before MinSSD function barrier(CLK_LOCAL_MEM_FENCE); //before MinSSD function
uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
if (X < cwidth - radius && Y < cheight - radius) if (X < cwidth - radius && Y < cheight - radius)
{ {
uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
if (minSSD.x < minSSDImage[0]) if (minSSD.x < minSSDImage[0])
{ {
disparImage[0] = (unsigned char)(d + minSSD.y); disparImage[0] = (unsigned char)(d + minSSD.y);
...@@ -264,7 +264,7 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char ...@@ -264,7 +264,7 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
for(int row = 1; row < end_row; row++) for(int row = 1; row < end_row; row++)
{ {
int idx1 = y_tex * img_step + x_tex; int idx1 = y_tex * img_step + x_tex;
int idx2 = (y_tex + (2 * radius + 1)) * img_step + x_tex; int idx2 = min(y_tex + (2 * radius + 1), cheight - 1) * img_step + x_tex;
barrier(CLK_GLOBAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
...@@ -278,10 +278,10 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char ...@@ -278,10 +278,10 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
if (X < cwidth - radius && row < cheight - radius - Y) if (X < cwidth - radius && row < cheight - radius - Y)
{ {
int idx = row * cminSSD_step; int idx = row * cminSSD_step;
uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius);
if (minSSD.x < minSSDImage[idx]) if (minSSD.x < minSSDImage[idx])
{ {
disparImage[disp_step * row] = (unsigned char)(d + minSSD.y); disparImage[disp_step * row] = (unsigned char)(d + minSSD.y);
...@@ -378,50 +378,50 @@ __kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, in ...@@ -378,50 +378,50 @@ __kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, in
int beg_row = group_id_y * RpT; int beg_row = group_id_y * RpT;
int end_row = min(beg_row + RpT, disp_rows); int end_row = min(beg_row + RpT, disp_rows);
// if (x < disp_cols) // if (x < disp_cols)
// { // {
int y = beg_row; int y = beg_row;
float sum = 0; float sum = 0;
float sum_extra = 0; float sum_extra = 0;
for(int i = y - winsz2; i <= y + winsz2; ++i) for(int i = y - winsz2; i <= y + winsz2; ++i)
{ {
sum += sobel(input, x - winsz2, i, input_rows, input_cols); sum += sobel(input, x - winsz2, i, input_rows, input_cols);
if (cols_extra) if (cols_extra)
sum_extra += sobel(input, x + group_size_x - winsz2, i, input_rows, input_cols); sum_extra += sobel(input, x + group_size_x - winsz2, i, input_rows, input_cols);
} }
*cols = sum;
if (cols_extra)
*cols_extra = sum_extra;
barrier(CLK_LOCAL_MEM_FENCE);
float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
if (sum_win < threshold)
disp[y * disp_step + x] = 0;
barrier(CLK_LOCAL_MEM_FENCE);
for(int y = beg_row + 1; y < end_row; ++y)
{
sum = sum - sobel(input, x - winsz2, y - winsz2 - 1, input_rows, input_cols) +
sobel(input, x - winsz2, y + winsz2, input_rows, input_cols);
*cols = sum; *cols = sum;
if (cols_extra) if (cols_extra)
{
sum_extra = sum_extra - sobel(input, x + group_size_x - winsz2, y - winsz2 - 1,input_rows, input_cols)
+ sobel(input, x + group_size_x - winsz2, y + winsz2, input_rows, input_cols);
*cols_extra = sum_extra; *cols_extra = sum_extra;
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255; float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
if (sum_win < threshold) if (sum_win < threshold)
disp[y * disp_step + x] = 0; disp[y * disp_step + x] = 0;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
}
for(int y = beg_row + 1; y < end_row; ++y) // }
{
sum = sum - sobel(input, x - winsz2, y - winsz2 - 1, input_rows, input_cols) +
sobel(input, x - winsz2, y + winsz2, input_rows, input_cols);
*cols = sum;
if (cols_extra)
{
sum_extra = sum_extra - sobel(input, x + group_size_x - winsz2, y - winsz2 - 1,input_rows, input_cols)
+ sobel(input, x + group_size_x - winsz2, y + winsz2, input_rows, input_cols);
*cols_extra = sum_extra;
}
barrier(CLK_LOCAL_MEM_FENCE);
float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
if (sum_win < threshold)
disp[y * disp_step + x] = 0;
barrier(CLK_LOCAL_MEM_FENCE);
}
// }
} }
This diff is collapsed.
...@@ -115,10 +115,9 @@ int main(int argc, char **argv) ...@@ -115,10 +115,9 @@ int main(int argc, char **argv)
std::cout << "platform invalid\n"; std::cout << "platform invalid\n";
return -1; return -1;
} }
if(pid != 0 || device != 0)
{ setDevice(oclinfo[pid], device);
setDevice(oclinfo[pid], device);
}
cout << "Device type:" << type << endl << "Device name:" << oclinfo[pid].DeviceName[device] << endl; cout << "Device type:" << type << endl << "Device name:" << oclinfo[pid].DeviceName[device] << endl;
return RUN_ALL_TESTS(); return RUN_ALL_TESTS();
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment