Commit 0faac595 authored by Ilya Lavrenov's avatar Ilya Lavrenov

fixed minMaxLoc kernel (removed compilation errors)

parent 544c0240
...@@ -341,7 +341,7 @@ static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int vlen , ...@@ -341,7 +341,7 @@ static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int vlen ,
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst ));
size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
if(src.oclchannels() != 3) if (src.oclchannels() != 3)
openCLExecuteKernel(src.clCxt, &arithm_sum, "arithm_op_sum", gt, lt, args, -1, -1, build_options); openCLExecuteKernel(src.clCxt, &arithm_sum, "arithm_op_sum", gt, lt, args, -1, -1, build_options);
else else
openCLExecuteKernel(src.clCxt, &arithm_sum_3, "arithm_op_sum_3", gt, lt, args, -1, -1, build_options); openCLExecuteKernel(src.clCxt, &arithm_sum_3, "arithm_op_sum_3", gt, lt, args, -1, -1, build_options);
...@@ -365,9 +365,9 @@ Scalar arithmetic_sum(const oclMat &src, int type = 0) ...@@ -365,9 +365,9 @@ Scalar arithmetic_sum(const oclMat &src, int type = 0)
memset(p, 0, dbsize * sizeof(T)); memset(p, 0, dbsize * sizeof(T));
openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(T)); openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(T));
for(int i = 0; i < dbsize;) for (int i = 0; i < dbsize;)
{ {
for(int j = 0; j < src.oclchannels(); j++, i++) for (int j = 0; j < src.oclchannels(); j++, i++)
s.val[j] += p[i]; s.val[j] += p[i];
} }
delete[] p; delete[] p;
...@@ -378,9 +378,9 @@ Scalar arithmetic_sum(const oclMat &src, int type = 0) ...@@ -378,9 +378,9 @@ Scalar arithmetic_sum(const oclMat &src, int type = 0)
typedef Scalar (*sumFunc)(const oclMat &src, int type); typedef Scalar (*sumFunc)(const oclMat &src, int type);
Scalar cv::ocl::sum(const oclMat &src) Scalar cv::ocl::sum(const oclMat &src)
{ {
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "select device don't support double"); CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
} }
static sumFunc functab[2] = static sumFunc functab[2] =
{ {
...@@ -395,9 +395,9 @@ Scalar cv::ocl::sum(const oclMat &src) ...@@ -395,9 +395,9 @@ Scalar cv::ocl::sum(const oclMat &src)
Scalar cv::ocl::absSum(const oclMat &src) Scalar cv::ocl::absSum(const oclMat &src)
{ {
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "select device don't support double"); CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
} }
static sumFunc functab[2] = static sumFunc functab[2] =
{ {
...@@ -412,9 +412,9 @@ Scalar cv::ocl::absSum(const oclMat &src) ...@@ -412,9 +412,9 @@ Scalar cv::ocl::absSum(const oclMat &src)
Scalar cv::ocl::sqrSum(const oclMat &src) Scalar cv::ocl::sqrSum(const oclMat &src)
{ {
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "select device don't support double"); CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
} }
static sumFunc functab[2] = static sumFunc functab[2] =
{ {
...@@ -446,7 +446,7 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev) ...@@ -446,7 +446,7 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev)
m1 = (Mat)dst1; m1 = (Mat)dst1;
m2 = (Mat)dst2; m2 = (Mat)dst2;
int i = 0, *p = (int *)m1.data, *q = (int *)m2.data; int i = 0, *p = (int *)m1.data, *q = (int *)m2.data;
for(; i < channels; i++) for (; i < channels; i++)
{ {
mean.val[i] = (double)p[i] / (src.cols * src.rows); mean.val[i] = (double)p[i] / (src.cols * src.rows);
stddev.val[i] = std::sqrt(std::max((double) q[i] / (src.cols * src.rows) - mean.val[i] * mean.val[i] , 0.)); stddev.val[i] = std::sqrt(std::max((double) q[i] / (src.cols * src.rows) - mean.val[i] * mean.val[i] , 0.));
...@@ -476,7 +476,7 @@ static void arithmetic_minMax_run(const oclMat &src, const oclMat &mask, cl_mem ...@@ -476,7 +476,7 @@ static void arithmetic_minMax_run(const oclMat &src, const oclMat &mask, cl_mem
args.push_back( make_pair( sizeof(cl_int) , (void *)&elemnum)); args.push_back( make_pair( sizeof(cl_int) , (void *)&elemnum));
args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum)); args.push_back( make_pair( sizeof(cl_int) , (void *)&groupnum));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
if(!mask.empty()) if (!mask.empty())
{ {
int mall_cols = mask.step / (vlen * mask.elemSize1()); int mall_cols = mask.step / (vlen * mask.elemSize1());
int mpre_cols = (mask.offset % mask.step) / (vlen * mask.elemSize1()); int mpre_cols = (mask.offset % mask.step) / (vlen * mask.elemSize1());
...@@ -499,7 +499,7 @@ static void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl ...@@ -499,7 +499,7 @@ static void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl
vector<pair<size_t , const void *> > args; vector<pair<size_t , const void *> > args;
size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
char build_options[50]; char build_options[50];
if(src.oclchannels() == 1) if (src.oclchannels() == 1)
{ {
int cols = (src.cols - 1) / vlen + 1; int cols = (src.cols - 1) / vlen + 1;
int invalid_cols = src.step / (vlen * src.elemSize1()) - cols; int invalid_cols = src.step / (vlen * src.elemSize1()) - cols;
...@@ -519,8 +519,6 @@ static void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl ...@@ -519,8 +519,6 @@ static void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl
args.push_back( make_pair( sizeof(cl_int) , (void *)&moffset )); args.push_back( make_pair( sizeof(cl_int) , (void *)&moffset ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst ));
// printf("elemnum:%d,cols:%d,invalid_cols:%d,offset:%d,minvalid_cols:%d,moffset:%d,repeat_e:%d\r\n",
// elemnum,cols,invalid_cols,offset,minvalid_cols,moffset,repeat_me);
openCLExecuteKernel(src.clCxt, &arithm_minMax_mask, kernelName, gt, lt, args, -1, -1, build_options); openCLExecuteKernel(src.clCxt, &arithm_minMax_mask, kernelName, gt, lt, args, -1, -1, build_options);
} }
} }
...@@ -549,18 +547,18 @@ template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal, ...@@ -549,18 +547,18 @@ template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal,
Mat matbuf = Mat(buf); Mat matbuf = Mat(buf);
T *p = matbuf.ptr<T>(); T *p = matbuf.ptr<T>();
if(minVal != NULL) if (minVal != NULL)
{ {
*minVal = std::numeric_limits<double>::max(); *minVal = std::numeric_limits<double>::max();
for(int i = 0; i < vlen * (int)groupnum; i++) for (int i = 0; i < vlen * (int)groupnum; i++)
{ {
*minVal = *minVal < p[i] ? *minVal : p[i]; *minVal = *minVal < p[i] ? *minVal : p[i];
} }
} }
if(maxVal != NULL) if (maxVal != NULL)
{ {
*maxVal = -std::numeric_limits<double>::max(); *maxVal = -std::numeric_limits<double>::max();
for(int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++) for (int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++)
{ {
*maxVal = *maxVal > p[i] ? *maxVal : p[i]; *maxVal = *maxVal > p[i] ? *maxVal : p[i];
} }
...@@ -577,9 +575,9 @@ void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oc ...@@ -577,9 +575,9 @@ void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oc
void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf) void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf)
{ {
CV_Assert(src.oclchannels() == 1); CV_Assert(src.oclchannels() == 1);
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "select device don't support double"); CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
} }
static minMaxFunc functab[8] = static minMaxFunc functab[8] =
{ {
...@@ -625,7 +623,7 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) ...@@ -625,7 +623,7 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType)
m = (gm2); m = (gm2);
p = (int *)m.data; p = (int *)m.data;
r = -std::numeric_limits<double>::max(); r = -std::numeric_limits<double>::max();
for(i = 0; i < channels; i++) for (i = 0; i < channels; i++)
{ {
r = std::max(r, (double)p[i]); r = std::max(r, (double)p[i]);
} }
...@@ -635,7 +633,7 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) ...@@ -635,7 +633,7 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType)
//arithmetic_sum_run(gm1, gm2,"arithm_op_sum"); //arithmetic_sum_run(gm1, gm2,"arithm_op_sum");
m = (gm2); m = (gm2);
p = (int *)m.data; p = (int *)m.data;
for(i = 0; i < channels; i++) for (i = 0; i < channels; i++)
{ {
r = r + (double)p[i]; r = r + (double)p[i];
} }
...@@ -645,14 +643,14 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) ...@@ -645,14 +643,14 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType)
//arithmetic_sum_run(gm1, gm2,"arithm_op_squares_sum"); //arithmetic_sum_run(gm1, gm2,"arithm_op_squares_sum");
m = (gm2); m = (gm2);
p = (int *)m.data; p = (int *)m.data;
for(i = 0; i < channels; i++) for (i = 0; i < channels; i++)
{ {
r = r + (double)p[i]; r = r + (double)p[i];
} }
r = std::sqrt(r); r = std::sqrt(r);
break; break;
} }
if(isRelative) if (isRelative)
r = r / norm(src2, normType); r = r / norm(src2, normType);
return r; return r;
} }
...@@ -663,9 +661,9 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) ...@@ -663,9 +661,9 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType)
static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName) static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName)
{ {
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return; return;
} }
...@@ -710,9 +708,9 @@ static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kern ...@@ -710,9 +708,9 @@ static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kern
static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, bool isVertical) static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, bool isVertical)
{ {
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return; return;
} }
...@@ -749,7 +747,7 @@ static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kern ...@@ -749,7 +747,7 @@ static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kern
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols ));
if(isVertical) if (isVertical)
args.push_back( make_pair( sizeof(cl_int), (void *)&rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&rows ));
else else
args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
...@@ -764,11 +762,11 @@ static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kern ...@@ -764,11 +762,11 @@ static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kern
void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode) void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode)
{ {
dst.create(src.size(), src.type()); dst.create(src.size(), src.type());
if(flipCode == 0) if (flipCode == 0)
{ {
arithmetic_flip_rows_run(src, dst, "arithm_flip_rows"); arithmetic_flip_rows_run(src, dst, "arithm_flip_rows");
} }
else if(flipCode > 0) else if (flipCode > 0)
arithmetic_flip_cols_run(src, dst, "arithm_flip_cols", false); arithmetic_flip_cols_run(src, dst, "arithm_flip_cols", false);
else else
arithmetic_flip_cols_run(src, dst, "arithm_flip_rc", true); arithmetic_flip_cols_run(src, dst, "arithm_flip_rc", true);
...@@ -877,9 +875,9 @@ void cv::ocl::log(const oclMat &src, oclMat &dst) ...@@ -877,9 +875,9 @@ void cv::ocl::log(const oclMat &src, oclMat &dst)
static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName) static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName)
{ {
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return; return;
} }
...@@ -921,9 +919,9 @@ void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst) ...@@ -921,9 +919,9 @@ void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst)
static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString) static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString)
{ {
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return; return;
} }
...@@ -964,7 +962,7 @@ void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle , bool angle ...@@ -964,7 +962,7 @@ void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle , bool angle
CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F)); CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F));
Angle.create(x.size(), x.type()); Angle.create(x.size(), x.type());
string kernelName = angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians"; string kernelName = angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians";
if(angleInDegrees) if (angleInDegrees)
arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase); arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase);
else else
arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase); arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase);
...@@ -977,9 +975,9 @@ void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle , bool angle ...@@ -977,9 +975,9 @@ void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle , bool angle
static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart, static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart,
string kernelName, bool angleInDegrees) string kernelName, bool angleInDegrees)
{ {
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return; return;
} }
...@@ -1030,9 +1028,9 @@ void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat ...@@ -1030,9 +1028,9 @@ void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat
static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees, static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees,
string kernelName) string kernelName)
{ {
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return; return;
} }
...@@ -1048,7 +1046,7 @@ static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &d ...@@ -1048,7 +1046,7 @@ static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &d
int tmp = angleInDegrees ? 1 : 0; int tmp = angleInDegrees ? 1 : 0;
vector<pair<size_t , const void *> > args; vector<pair<size_t , const void *> > args;
if(src1.data) if (src1.data)
{ {
args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step )); args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step ));
...@@ -1077,7 +1075,7 @@ void cv::ocl::polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat & ...@@ -1077,7 +1075,7 @@ void cv::ocl::polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &
x.create(angle.size(), angle.type()); x.create(angle.size(), angle.type());
y.create(angle.size(), angle.type()); y.create(angle.size(), angle.type());
if( magnitude.data ) if ( magnitude.data )
{ {
CV_Assert( magnitude.size() == angle.size() && magnitude.type() == angle.type() ); CV_Assert( magnitude.size() == angle.size() && magnitude.type() == angle.type() );
arithmetic_ptc_run(magnitude, angle, x, y, angleInDegrees, "arithm_polarToCart_mag"); arithmetic_ptc_run(magnitude, angle, x, y, angleInDegrees, "arithm_polarToCart_mag");
...@@ -1119,7 +1117,7 @@ static void arithmetic_minMaxLoc_mask_run(const oclMat &src, const oclMat &mask, ...@@ -1119,7 +1117,7 @@ static void arithmetic_minMaxLoc_mask_run(const oclMat &src, const oclMat &mask,
vector<pair<size_t , const void *> > args; vector<pair<size_t , const void *> > args;
size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
char build_options[50]; char build_options[50];
if(src.oclchannels() == 1) if (src.oclchannels() == 1)
{ {
int cols = (src.cols - 1) / vlen + 1; int cols = (src.cols - 1) / vlen + 1;
int invalid_cols = src.step / (vlen * src.elemSize1()) - cols; int invalid_cols = src.step / (vlen * src.elemSize1()) - cols;
...@@ -1143,7 +1141,8 @@ static void arithmetic_minMaxLoc_mask_run(const oclMat &src, const oclMat &mask, ...@@ -1143,7 +1141,8 @@ static void arithmetic_minMaxLoc_mask_run(const oclMat &src, const oclMat &mask,
openCLExecuteKernel(src.clCxt, &arithm_minMaxLoc_mask, "arithm_op_minMaxLoc_mask", gt, lt, args, -1, -1, build_options); openCLExecuteKernel(src.clCxt, &arithm_minMaxLoc_mask, "arithm_op_minMaxLoc_mask", gt, lt, args, -1, -1, build_options);
} }
} }
template<typename T>
template <typename T>
void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal, void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
Point *minLoc, Point *maxLoc, const oclMat &mask) Point *minLoc, Point *maxLoc, const oclMat &mask)
{ {
...@@ -1164,12 +1163,12 @@ void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal, ...@@ -1164,12 +1163,12 @@ void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
T *p = new T[groupnum * vlen * 4]; T *p = new T[groupnum * vlen * 4];
memset(p, 0, dbsize); memset(p, 0, dbsize);
openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize); openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize);
for(int i = 0; i < vlen * (int)groupnum; i++) for (int i = 0; i < vlen * (int)groupnum; i++)
{ {
*minVal = (*minVal < p[i] || p[i + 2 * vlen * groupnum] == -1) ? *minVal : p[i]; *minVal = (*minVal < p[i] || p[i + 2 * vlen * groupnum] == -1) ? *minVal : p[i];
minloc = (*minVal < p[i] || p[i + 2 * vlen * groupnum] == -1) ? minloc : cvRound(p[i + 2 * vlen * groupnum]); minloc = (*minVal < p[i] || p[i + 2 * vlen * groupnum] == -1) ? minloc : cvRound(p[i + 2 * vlen * groupnum]);
} }
for(int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++) for (int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++)
{ {
*maxVal = (*maxVal > p[i] || p[i + 2 * vlen * groupnum] == -1) ? *maxVal : p[i]; *maxVal = (*maxVal > p[i] || p[i + 2 * vlen * groupnum] == -1) ? *maxVal : p[i];
maxloc = (*maxVal > p[i] || p[i + 2 * vlen * groupnum] == -1) ? maxloc : cvRound(p[i + 2 * vlen * groupnum]); maxloc = (*maxVal > p[i] || p[i + 2 * vlen * groupnum] == -1) ? maxloc : cvRound(p[i + 2 * vlen * groupnum]);
...@@ -1178,9 +1177,9 @@ void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal, ...@@ -1178,9 +1177,9 @@ void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
int pre_rows = src.offset / src.step; int pre_rows = src.offset / src.step;
int pre_cols = (src.offset % src.step) / src.elemSize1(); int pre_cols = (src.offset % src.step) / src.elemSize1();
int wholecols = src.step / src.elemSize1(); int wholecols = src.step / src.elemSize1();
if( minLoc ) if ( minLoc )
{ {
if( minloc >= 0 ) if ( minloc >= 0 )
{ {
minLoc->y = minloc / wholecols - pre_rows; minLoc->y = minloc / wholecols - pre_rows;
minLoc->x = minloc % wholecols - pre_cols; minLoc->x = minloc % wholecols - pre_cols;
...@@ -1188,9 +1187,9 @@ void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal, ...@@ -1188,9 +1187,9 @@ void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
else else
minLoc->x = minLoc->y = -1; minLoc->x = minLoc->y = -1;
} }
if( maxLoc ) if ( maxLoc )
{ {
if( maxloc >= 0 ) if ( maxloc >= 0 )
{ {
maxLoc->y = maxloc / wholecols - pre_rows; maxLoc->y = maxloc / wholecols - pre_rows;
maxLoc->x = maxloc % wholecols - pre_cols; maxLoc->x = maxloc % wholecols - pre_cols;
...@@ -1209,9 +1208,9 @@ typedef void (*minMaxLocFunc)(const oclMat &src, double *minVal, double *maxVal, ...@@ -1209,9 +1208,9 @@ typedef void (*minMaxLocFunc)(const oclMat &src, double *minVal, double *maxVal,
void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal, void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
Point *minLoc, Point *maxLoc, const oclMat &mask) Point *minLoc, Point *maxLoc, const oclMat &mask)
{ {
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "select device don't support double"); CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
return; return;
} }
...@@ -1259,12 +1258,11 @@ static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int vlen ...@@ -1259,12 +1258,11 @@ static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int vlen
int cv::ocl::countNonZero(const oclMat &src) int cv::ocl::countNonZero(const oclMat &src)
{ {
size_t groupnum = src.clCxt->computeUnits(); size_t groupnum = src.clCxt->computeUnits();
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{ {
CV_Error(CV_GpuNotSupported, "select device don't support double"); CV_Error(CV_GpuNotSupported, "selected device doesn't support double");
} }
CV_Assert(groupnum != 0); CV_Assert(groupnum != 0);
// groupnum = groupnum * 2;
int vlen = 8 , dbsize = groupnum * vlen; int vlen = 8 , dbsize = groupnum * vlen;
Context *clCxt = src.clCxt; Context *clCxt = src.clCxt;
string kernelName = "arithm_op_nonzero"; string kernelName = "arithm_op_nonzero";
...@@ -1274,7 +1272,7 @@ int cv::ocl::countNonZero(const oclMat &src) ...@@ -1274,7 +1272,7 @@ int cv::ocl::countNonZero(const oclMat &src)
memset(p, 0, dbsize * sizeof(int)); memset(p, 0, dbsize * sizeof(int));
openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(int)); openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(int));
for(int i = 0; i < dbsize; i++) for (int i = 0; i < dbsize; i++)
nonzero += p[i]; nonzero += p[i];
delete[] p; delete[] p;
...@@ -1677,7 +1675,7 @@ static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string ...@@ -1677,7 +1675,7 @@ static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
float pf = static_cast<float>(p); float pf = static_cast<float>(p);
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE)) if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE))
args.push_back( make_pair( sizeof(cl_float), (void *)&pf )); args.push_back( make_pair( sizeof(cl_float), (void *)&pf ));
else else
args.push_back( make_pair( sizeof(cl_double), (void *)&p )); args.push_back( make_pair( sizeof(cl_double), (void *)&p ));
...@@ -1687,7 +1685,7 @@ static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string ...@@ -1687,7 +1685,7 @@ static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string
void cv::ocl::pow(const oclMat &x, double p, oclMat &y) void cv::ocl::pow(const oclMat &x, double p, oclMat &y)
{ {
if(!x.clCxt->supportsFeature(Context::CL_DOUBLE) && x.type() == CV_64F) if (!x.clCxt->supportsFeature(Context::CL_DOUBLE) && x.type() == CV_64F)
{ {
cout << "Selected device do not support double" << endl; cout << "Selected device do not support double" << endl;
return; return;
...@@ -1714,14 +1712,14 @@ void cv::ocl::setIdentity(oclMat& src, double scalar) ...@@ -1714,14 +1712,14 @@ void cv::ocl::setIdentity(oclMat& src, double scalar)
size_t global_threads[] = {src.cols, src.rows, 1}; size_t global_threads[] = {src.cols, src.rows, 1};
string kernelName = "setIdentityKernel"; string kernelName = "setIdentityKernel";
if(src.type() == CV_32FC1) if (src.type() == CV_32FC1)
kernelName += "_F1"; kernelName += "_F1";
else if(src.type() == CV_32SC1) else if (src.type() == CV_32SC1)
kernelName += "_I1"; kernelName += "_I1";
else else
{ {
kernelName += "_D1"; kernelName += "_D1";
if(!(clCxt->supportsFeature(Context::CL_DOUBLE))) if (!(clCxt->supportsFeature(Context::CL_DOUBLE)))
{ {
oclMat temp; oclMat temp;
src.convertTo(temp, CV_32FC1); src.convertTo(temp, CV_32FC1);
...@@ -1738,9 +1736,9 @@ void cv::ocl::setIdentity(oclMat& src, double scalar) ...@@ -1738,9 +1736,9 @@ void cv::ocl::setIdentity(oclMat& src, double scalar)
int scalar_i = 0; int scalar_i = 0;
float scalar_f = 0.0f; float scalar_f = 0.0f;
if(clCxt->supportsFeature(Context::CL_DOUBLE)) if (clCxt->supportsFeature(Context::CL_DOUBLE))
{ {
if(src.type() == CV_32SC1) if (src.type() == CV_32SC1)
{ {
scalar_i = (int)scalar; scalar_i = (int)scalar;
args.push_back(make_pair(sizeof(cl_int), (void*)&scalar_i)); args.push_back(make_pair(sizeof(cl_int), (void*)&scalar_i));
...@@ -1750,7 +1748,7 @@ void cv::ocl::setIdentity(oclMat& src, double scalar) ...@@ -1750,7 +1748,7 @@ void cv::ocl::setIdentity(oclMat& src, double scalar)
} }
else else
{ {
if(src.type() == CV_32SC1) if (src.type() == CV_32SC1)
{ {
scalar_i = (int)scalar; scalar_i = (int)scalar;
args.push_back(make_pair(sizeof(cl_int), (void*)&scalar_i)); args.push_back(make_pair(sizeof(cl_int), (void*)&scalar_i));
......
...@@ -142,29 +142,35 @@ ...@@ -142,29 +142,35 @@
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable
/**************************************Array minMax**************************************/ /**************************************Array minMax**************************************/
__kernel void arithm_op_minMaxLoc (int cols,int invalid_cols,int offset,int elemnum,int groupnum,
__kernel void arithm_op_minMaxLoc(int cols, int invalid_cols, int offset, int elemnum, int groupnum,
__global VEC_TYPE *src, __global RES_TYPE *dst) __global VEC_TYPE *src, __global RES_TYPE *dst)
{ {
unsigned int lid = get_local_id(0); unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0); unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0); unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols; unsigned int idx = offset + id + (id / cols) * invalid_cols;
__local VEC_TYPE localmem_max[128],localmem_min[128];
VEC_TYPE minval,maxval,temp; __local VEC_TYPE localmem_max[128], localmem_min[128];
__local VEC_TYPE_LOC localmem_maxloc[128],localmem_minloc[128]; VEC_TYPE minval, maxval, temp;
VEC_TYPE_LOC minloc,maxloc,temploc,negative = -1;
__local VEC_TYPE_LOC localmem_maxloc[128], localmem_minloc[128];
VEC_TYPE_LOC minloc, maxloc, temploc, negative = -1;
int idx_c; int idx_c;
if(id < elemnum)
if (id < elemnum)
{ {
temp = src[idx]; temp = src[idx];
idx_c = idx << 2; idx_c = idx << 2;
temploc = (VEC_TYPE_LOC)(idx_c,idx_c+1,idx_c+2,idx_c+3); temploc = (VEC_TYPE_LOC)(idx_c, idx_c + 1, idx_c + 2, idx_c + 3);
if(id % cols == 0 )
if (id % cols == 0 )
{ {
repeat_s(temp); repeat_s(temp);
repeat_s(temploc); repeat_s(temploc);
} }
if(id % cols == cols - 1) if (id % cols == cols - 1)
{ {
repeat_e(temp); repeat_e(temp);
repeat_e(temploc); repeat_e(temploc);
...@@ -181,164 +187,33 @@ __kernel void arithm_op_minMaxLoc (int cols,int invalid_cols,int offset,int elem ...@@ -181,164 +187,33 @@ __kernel void arithm_op_minMaxLoc (int cols,int invalid_cols,int offset,int elem
minloc = negative; minloc = negative;
maxloc = negative; maxloc = negative;
} }
float4 aaa;
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8)) int grainSize = (groupnum << 8);
for (id = id + grainSize; id < elemnum; id = id + grainSize)
{ {
idx = offset + id + (id / cols) * invalid_cols; idx = offset + id + (id / cols) * invalid_cols;
temp = src[idx]; temp = src[idx];
idx_c = idx << 2; idx_c = idx << 2;
temploc = (VEC_TYPE_LOC)(idx_c,idx_c+1,idx_c+2,idx_c+3); temploc = (VEC_TYPE_LOC)(idx_c, idx_c+1, idx_c+2, idx_c+3);
if(id % cols == 0 )
if (id % cols == 0 )
{ {
repeat_s(temp); repeat_s(temp);
repeat_s(temploc); repeat_s(temploc);
} }
if(id % cols == cols - 1) if (id % cols == cols - 1)
{ {
repeat_e(temp); repeat_e(temp);
repeat_e(temploc); repeat_e(temploc);
} }
minval = min(minval,temp);
maxval = max(maxval,temp);
minloc = CONDITION_FUNC(minval == temp, temploc , minloc);
maxloc = CONDITION_FUNC(maxval == temp, temploc , maxloc);
aaa= convert_float4(maxval == temp);
maxloc = convert_int4(aaa) ? temploc : maxloc;
}
if(lid > 127)
{
localmem_min[lid - 128] = minval;
localmem_max[lid - 128] = maxval;
localmem_minloc[lid - 128] = minloc;
localmem_maxloc[lid - 128] = maxloc;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128)
{
localmem_min[lid] = min(minval,localmem_min[lid]);
localmem_max[lid] = max(maxval,localmem_max[lid]);
localmem_minloc[lid] = CONDITION_FUNC(localmem_min[lid] == minval, minloc , localmem_minloc[lid]);
localmem_maxloc[lid] = CONDITION_FUNC(localmem_max[lid] == maxval, maxloc , localmem_maxloc[lid]);
}
barrier(CLK_LOCAL_MEM_FENCE);
for(int lsize = 64; lsize > 0; lsize >>= 1)
{
if(lid < lsize)
{
int lid2 = lsize + lid;
localmem_min[lid] = min(localmem_min[lid] , localmem_min[lid2]);
localmem_max[lid] = max(localmem_max[lid] , localmem_max[lid2]);
localmem_minloc[lid] =
CONDITION_FUNC(localmem_min[lid] == localmem_min[lid2], localmem_minloc[lid2] , localmem_minloc[lid]);
localmem_maxloc[lid] =
CONDITION_FUNC(localmem_max[lid] == localmem_max[lid2], localmem_maxloc[lid2] , localmem_maxloc[lid]);
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if( lid == 0)
{
dst[gid] = CONVERT_RES_TYPE(localmem_min[0]);
dst[gid + groupnum] = CONVERT_RES_TYPE(localmem_max[0]);
dst[gid + 2 * groupnum] = CONVERT_RES_TYPE(localmem_minloc[0]);
dst[gid + 3 * groupnum] = CONVERT_RES_TYPE(localmem_maxloc[0]);
}
}
#if defined (REPEAT_S0) minval = min(minval, temp);
#define repeat_ms(a) a = a; maxval = max(maxval, temp);
#endif minloc = CONDITION_FUNC(minval == temp, temploc, minloc);
#if defined (REPEAT_S1) maxloc = CONDITION_FUNC(maxval == temp, temploc, maxloc);
#define repeat_ms(a) a.s0 = 0;
#endif
#if defined (REPEAT_S2)
#define repeat_ms(a) a.s0 = 0;a.s1 = 0;
#endif
#if defined (REPEAT_S3)
#define repeat_ms(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;
#endif
#if defined (REPEAT_E0)
#define repeat_me(a) a = a;
#endif
#if defined (REPEAT_E1)
#define repeat_me(a) a.s3 = 0;
#endif
#if defined (REPEAT_E2)
#define repeat_me(a) a.s3 = 0;a.s2 = 0;
#endif
#if defined (REPEAT_E3)
#define repeat_me(a) a.s3 = 0;a.s2 = 0;a.s1 = 0;
#endif
/**************************************Array minMaxLoc mask**************************************/
/*
__kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int elemnum,int groupnum,__global VEC_TYPE *src,
int minvalid_cols,int moffset,__global uchar4 *mask,__global RES_TYPE *dst)
{
unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols;
unsigned int midx = moffset + id + (id / cols) * minvalid_cols;
__local VEC_TYPE localmem_max[128],localmem_min[128];
VEC_TYPE minval,maxval,temp,max_val = MAX_VAL,min_val = MIN_VAL,zero = 0,m_temp;
__local VEC_TYPE_LOC localmem_maxloc[128],localmem_minloc[128];
VEC_TYPE_LOC minloc,maxloc,temploc,negative = -1;
if(id < elemnum)
{
temp = src[idx];
m_temp = CONVERT_TYPE(mask[midx]);
int idx_c = idx << 2;
temploc = (VEC_TYPE_LOC)(idx_c,idx_c+1,idx_c+2,idx_c+3);
if(id % cols == 0 )
{
repeat_ms(m_temp);
repeat_s(temploc);
}
if(id % cols == cols - 1)
{
repeat_me(m_temp);
repeat_e(temploc);
}
minval = m_temp > zero ? temp : max_val;
maxval = m_temp > zero ? temp : min_val;
minloc = CONDITION_FUNC(m_temp > zero, temploc , negative);
maxloc = minloc;
} }
else
{
minval = MAX_VAL;
maxval = MIN_VAL;
minloc = negative;
maxloc = negative;
}
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
{
idx = offset + id + (id / cols) * invalid_cols;
midx = moffset + id + (id / cols) * minvalid_cols;
temp = src[idx];
m_temp = CONVERT_TYPE(mask[midx]);
int idx_c = idx << 2;
temploc = (VEC_TYPE_LOC)(idx_c,idx_c+1,idx_c+2,idx_c+3);
if(id % cols == 0 )
{
repeat_ms(m_temp);
repeat_s(temploc);
}
if(id % cols == cols - 1)
{
repeat_me(m_temp);
repeat_e(temploc);
}
minval = min(minval,m_temp > zero ? temp : max_val);
maxval = max(maxval,m_temp > zero ? temp : min_val);
temploc = CONDITION_FUNC(m_temp > zero, temploc , negative); if (lid > 127)
minloc = CONDITION_FUNC(minval == temp, temploc , minloc);
maxloc = CONDITION_FUNC(maxval == temp, temploc , maxloc);
}
if(lid > 127)
{ {
localmem_min[lid - 128] = minval; localmem_min[lid - 128] = minval;
localmem_max[lid - 128] = maxval; localmem_max[lid - 128] = maxval;
...@@ -346,29 +221,30 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int ...@@ -346,29 +221,30 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
localmem_maxloc[lid - 128] = maxloc; localmem_maxloc[lid - 128] = maxloc;
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128)
if (lid < 128)
{ {
localmem_min[lid] = min(minval,localmem_min[lid]); localmem_min[lid] = min(minval,localmem_min[lid]);
localmem_max[lid] = max(maxval,localmem_max[lid]); localmem_max[lid] = max(maxval,localmem_max[lid]);
localmem_minloc[lid] = CONDITION_FUNC(localmem_min[lid] == minval, minloc , localmem_minloc[lid]); localmem_minloc[lid] = CONDITION_FUNC(localmem_min[lid] == minval, minloc, localmem_minloc[lid]);
localmem_maxloc[lid] = CONDITION_FUNC(localmem_max[lid] == maxval, maxloc , localmem_maxloc[lid]); localmem_maxloc[lid] = CONDITION_FUNC(localmem_max[lid] == maxval, maxloc, localmem_maxloc[lid]);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
for(int lsize = 64; lsize > 0; lsize >>= 1)
for (int lsize = 64; lsize > 0; lsize >>= 1)
{ {
if(lid < lsize) if (lid < lsize)
{ {
int lid2 = lsize + lid; int lid2 = lsize + lid;
localmem_min[lid] = min(localmem_min[lid] , localmem_min[lid2]); localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
localmem_max[lid] = max(localmem_max[lid] , localmem_max[lid2]); localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
localmem_minloc[lid] = localmem_minloc[lid] = CONDITION_FUNC(localmem_min[lid] == localmem_min[lid2], localmem_minloc[lid2], localmem_minloc[lid]);
CONDITION_FUNC(localmem_min[lid] == localmem_min[lid2], localmem_minloc[lid2] , localmem_minloc[lid]); localmem_maxloc[lid] = CONDITION_FUNC(localmem_max[lid] == localmem_max[lid2], localmem_maxloc[lid2], localmem_maxloc[lid]);
localmem_maxloc[lid] =
CONDITION_FUNC(localmem_max[lid] == localmem_max[lid2], localmem_maxloc[lid2] , localmem_maxloc[lid]);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
if( lid == 0)
if ( lid == 0)
{ {
dst[gid] = CONVERT_RES_TYPE(localmem_min[0]); dst[gid] = CONVERT_RES_TYPE(localmem_min[0]);
dst[gid + groupnum] = CONVERT_RES_TYPE(localmem_max[0]); dst[gid + groupnum] = CONVERT_RES_TYPE(localmem_max[0]);
...@@ -376,5 +252,3 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int ...@@ -376,5 +252,3 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
dst[gid + 3 * groupnum] = CONVERT_RES_TYPE(localmem_maxloc[0]); dst[gid + 3 * groupnum] = CONVERT_RES_TYPE(localmem_maxloc[0]);
} }
} }
*/
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment