Commit eeaa4b36 authored by Ilya Lavrenov's avatar Ilya Lavrenov

eliminated convertTo

parent c072c28e
...@@ -1494,7 +1494,7 @@ static bool ocl_calcHist1(InputArray _src, OutputArray _hist, int ddepth = CV_32 ...@@ -1494,7 +1494,7 @@ static bool ocl_calcHist1(InputArray _src, OutputArray _hist, int ddepth = CV_32
_hist.create(BINS, 1, ddepth); _hist.create(BINS, 1, ddepth);
UMat src = _src.getUMat(), ghist(1, BINS * compunits, CV_32SC1), UMat src = _src.getUMat(), ghist(1, BINS * compunits, CV_32SC1),
hist = ddepth == CV_32S ? _hist.getUMat() : UMat(BINS, 1, CV_32SC1); hist = _hist.getUMat();
k1.args(ocl::KernelArg::ReadOnly(src), k1.args(ocl::KernelArg::ReadOnly(src),
ocl::KernelArg::PtrWriteOnly(ghist), (int)src.total()); ocl::KernelArg::PtrWriteOnly(ghist), (int)src.total());
...@@ -1503,23 +1503,18 @@ static bool ocl_calcHist1(InputArray _src, OutputArray _hist, int ddepth = CV_32 ...@@ -1503,23 +1503,18 @@ static bool ocl_calcHist1(InputArray _src, OutputArray _hist, int ddepth = CV_32
if (!k1.run(1, &globalsize, &wgs, false)) if (!k1.run(1, &globalsize, &wgs, false))
return false; return false;
char cvt[40];
ocl::Kernel k2("merge_histogram", ocl::imgproc::histogram_oclsrc, ocl::Kernel k2("merge_histogram", ocl::imgproc::histogram_oclsrc,
format("-D BINS=%d -D HISTS_COUNT=%d -D WGS=%d", format("-D BINS=%d -D HISTS_COUNT=%d -D WGS=%d -D convertToHT=%s -D HT=%s",
BINS, compunits, (int)wgs)); BINS, compunits, (int)wgs, ocl::convertTypeStr(CV_32S, ddepth, 1, cvt),
ocl::typeToStr(ddepth)));
if (k2.empty()) if (k2.empty())
return false; return false;
k2.args(ocl::KernelArg::PtrReadOnly(ghist), k2.args(ocl::KernelArg::PtrReadOnly(ghist),
ocl::KernelArg::PtrWriteOnly(hist)); ocl::KernelArg::WriteOnlyNoSize(hist));
if (!k2.run(1, &wgs, &wgs, false))
return false;
if (hist.depth() != ddepth)
hist.convertTo(_hist, ddepth);
else
_hist.getUMatRef() = hist;
return true; return k2.run(1, &wgs, &wgs, false);
} }
static bool ocl_calcHist(InputArrayOfArrays images, OutputArray hist) static bool ocl_calcHist(InputArrayOfArrays images, OutputArray hist)
......
...@@ -45,6 +45,8 @@ ...@@ -45,6 +45,8 @@
#define T uchar #define T uchar
#endif #endif
#define noconvert
__kernel void calculate_histogram(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols, __kernel void calculate_histogram(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * histptr, int total) __global uchar * histptr, int total)
{ {
...@@ -111,10 +113,20 @@ __kernel void calculate_histogram(__global const uchar * src, int src_step, int ...@@ -111,10 +113,20 @@ __kernel void calculate_histogram(__global const uchar * src, int src_step, int
hist[i] = localhist[i]; hist[i] = localhist[i];
} }
__kernel void merge_histogram(__global const int * ghist, __global int * hist) #ifndef HT
#define HT int
#endif
#ifndef convertToHT
#define convertToHT noconvert
#endif
__kernel void merge_histogram(__global const int * ghist, __global uchar * histptr, int hist_step, int hist_offset)
{ {
int lid = get_local_id(0); int lid = get_local_id(0);
__global HT * hist = (__global HT *)(histptr + hist_offset);
#pragma unroll #pragma unroll
for (int i = lid; i < BINS; i += WGS) for (int i = lid; i < BINS; i += WGS)
hist[i] = ghist[i]; hist[i] = ghist[i];
...@@ -126,7 +138,7 @@ __kernel void merge_histogram(__global const int * ghist, __global int * hist) ...@@ -126,7 +138,7 @@ __kernel void merge_histogram(__global const int * ghist, __global int * hist)
ghist += BINS; ghist += BINS;
#pragma unroll #pragma unroll
for (int j = lid; j < BINS; j += WGS) for (int j = lid; j < BINS; j += WGS)
hist[j] += ghist[j]; hist[j] += convertToHT(ghist[j]);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment