Commit 39b27a19 authored by Alexander Karsakov's avatar Alexander Karsakov

Refactoring and optimization

parent d59a6fa5
...@@ -652,80 +652,102 @@ HoughLinesProbabilistic( Mat& image, ...@@ -652,80 +652,102 @@ HoughLinesProbabilistic( Mat& image,
} }
} }
} }
static bool ocl_makePointsList(InputArray _src, OutputArray _pointsList, InputOutputArray _counters)
static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, double theta, int threshold,
double min_theta, double max_theta)
{ {
CV_Assert(_src.type() == CV_8UC1);
if (max_theta < 0 || max_theta > CV_PI ) {
CV_Error( CV_StsBadArg, "max_theta must fall between 0 and pi" );
}
if (min_theta < 0 || min_theta > max_theta ) {
CV_Error( CV_StsBadArg, "min_theta must fall between 0 and max_theta" );
}
UMat src = _src.getUMat(); UMat src = _src.getUMat();
_pointsList.create(1, (int) src.total(), CV_32SC1);
float irho = (float) (1 / rho); UMat pointsList = _pointsList.getUMat();
int numangle = cvRound((max_theta - min_theta) / theta); UMat counters = _counters.getUMat();
int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho);
ocl::Device dev = ocl::Device::getDefault(); ocl::Device dev = ocl::Device::getDefault();
// make list of nonzero points const int pixelsPerWI = 16;
const int pixelsPerWI = 8;
int workgroup_size = min((int) dev.maxWorkGroupSize(), (src.cols + pixelsPerWI - 1)/pixelsPerWI); int workgroup_size = min((int) dev.maxWorkGroupSize(), (src.cols + pixelsPerWI - 1)/pixelsPerWI);
ocl::Kernel pointListKernel("make_point_list", ocl::imgproc::hough_lines_oclsrc, ocl::Kernel pointListKernel("make_point_list", ocl::imgproc::hough_lines_oclsrc,
format("-D MAKE_POINTS_LIST -D GROUP_SIZE=%d -D LOCAL_SIZE=%d", workgroup_size, src.cols)); format("-D MAKE_POINTS_LIST -D GROUP_SIZE=%d -D LOCAL_SIZE=%d", workgroup_size, src.cols));
if (pointListKernel.empty()) if (pointListKernel.empty())
return false; return false;
UMat pointsList(1, (int) src.total(), CV_32SC1);
UMat counters(1, 2, CV_32SC1, Scalar::all(0));
pointListKernel.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(pointsList), pointListKernel.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(pointsList),
ocl::KernelArg::PtrWriteOnly(counters)); ocl::KernelArg::PtrWriteOnly(counters));
size_t localThreads[2] = { workgroup_size, 1 }; size_t localThreads[2] = { workgroup_size, 1 };
size_t globalThreads[2] = { workgroup_size, src.rows }; size_t globalThreads[2] = { workgroup_size, src.rows };
if (!pointListKernel.run(2, globalThreads, localThreads, false)) return pointListKernel.run(2, globalThreads, localThreads, false);
return false; }
int total_points = counters.getMat(ACCESS_READ).at<int>(0, 0); static bool ocl_fillAccum(InputArray _pointsList, OutputArray _accum, int total_points, double rho, double theta, int numrho, int numangle)
if (total_points <= 0) {
{ UMat pointsList = _pointsList.getUMat();
_lines.assign(UMat(0,0,CV_32FC2)); _accum.create(numangle + 2, numrho + 2, CV_32SC1);
return true; UMat accum = _accum.getUMat();
} ocl::Device dev = ocl::Device::getDefault();
// convert src image to hough space float irho = (float) (1 / rho);
UMat accum(numangle + 2, numrho + 2, CV_32SC1); int workgroup_size = min((int) dev.maxWorkGroupSize(), total_points);
workgroup_size = min((int) dev.maxWorkGroupSize(), total_points);
ocl::Kernel fillAccumKernel; ocl::Kernel fillAccumKernel;
size_t* fillAccumLT = NULL; size_t localThreads[2];
size_t globalThreads[2];
int local_memory_needed = (numrho + 2)*sizeof(int); int local_memory_needed = (numrho + 2)*sizeof(int);
if (local_memory_needed > dev.localMemSize()) if (local_memory_needed > dev.localMemSize())
{ {
accum.setTo(Scalar::all(0)); accum.setTo(Scalar::all(0));
fillAccumKernel.create("fill_accum_global", ocl::imgproc::hough_lines_oclsrc, fillAccumKernel.create("fill_accum_global", ocl::imgproc::hough_lines_oclsrc,
format("-D FILL_ACCUM_GLOBAL")); format("-D FILL_ACCUM_GLOBAL"));
if (fillAccumKernel.empty())
return false;
globalThreads[0] = workgroup_size; globalThreads[1] = numangle; globalThreads[0] = workgroup_size; globalThreads[1] = numangle;
fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum),
total_points, irho, (float) theta, numrho, numangle);
return fillAccumKernel.run(2, globalThreads, NULL, false);
} }
else else
{ {
fillAccumKernel.create("fill_accum_local", ocl::imgproc::hough_lines_oclsrc, fillAccumKernel.create("fill_accum_local", ocl::imgproc::hough_lines_oclsrc,
format("-D FILL_ACCUM_LOCAL -D LOCAL_SIZE=%d -D BUFFER_SIZE=%d", workgroup_size, numrho + 2)); format("-D FILL_ACCUM_LOCAL -D LOCAL_SIZE=%d -D BUFFER_SIZE=%d", workgroup_size, numrho + 2));
if (fillAccumKernel.empty())
return false;
localThreads[0] = workgroup_size; localThreads[1] = 1; localThreads[0] = workgroup_size; localThreads[1] = 1;
globalThreads[0] = workgroup_size; globalThreads[1] = numangle+2; globalThreads[0] = workgroup_size; globalThreads[1] = numangle+2;
fillAccumLT = localThreads; fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum),
total_points, irho, (float) theta, numrho, numangle);
return fillAccumKernel.run(2, globalThreads, localThreads, false);
}
}
static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, double theta, int threshold,
double min_theta, double max_theta)
{
CV_Assert(_src.type() == CV_8UC1);
if (max_theta < 0 || max_theta > CV_PI ) {
CV_Error( CV_StsBadArg, "max_theta must fall between 0 and pi" );
} }
if (fillAccumKernel.empty()) if (min_theta < 0 || min_theta > max_theta ) {
CV_Error( CV_StsBadArg, "min_theta must fall between 0 and max_theta" );
}
UMat src = _src.getUMat();
int numangle = cvRound((max_theta - min_theta) / theta);
int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho);
UMat pointsList;
UMat counters(1, 2, CV_32SC1, Scalar::all(0));
if (!ocl_makePointsList(src, pointsList, counters))
return false; return false;
fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum), int total_points = counters.getMat(ACCESS_READ).at<int>(0, 0);
total_points, irho, (float) theta, numrho, numangle); if (total_points <= 0)
{
_lines.assign(UMat(0,0,CV_32FC2));
return true;
}
if (!fillAccumKernel.run(2, globalThreads, fillAccumLT, false)) UMat accum;
if (!ocl_fillAccum(pointsList, accum, total_points, rho, theta, numrho, numangle))
return false; return false;
const int pixPerWI = 8; const int pixPerWI = 8;
...@@ -741,7 +763,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub ...@@ -741,7 +763,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub
getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::WriteOnlyNoSize(lines), getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::WriteOnlyNoSize(lines),
ocl::KernelArg::PtrWriteOnly(counters), linesMax, threshold, (float) rho, (float) theta); ocl::KernelArg::PtrWriteOnly(counters), linesMax, threshold, (float) rho, (float) theta);
globalThreads[0] = (numrho + pixPerWI - 1)/pixPerWI; globalThreads[1] = numangle; size_t globalThreads[2] = { (numrho + pixPerWI - 1)/pixPerWI, numangle };
if (!getLinesKernel.run(2, globalThreads, NULL, false)) if (!getLinesKernel.run(2, globalThreads, NULL, false))
return false; return false;
...@@ -753,13 +775,23 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub ...@@ -753,13 +775,23 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub
return true; return true;
} }
static bool ocl_HoughLinesP(InputArray _src, OutputArray _lines, double rho, double theta, int threshold,
double minLineLength, double maxGap)
{
CV_Assert(_src.type() == CV_8UC1);
UMat src = _src.getUMat();
return false;
}
} }
void cv::HoughLines( InputArray _image, OutputArray _lines, void cv::HoughLines( InputArray _image, OutputArray _lines,
double rho, double theta, int threshold, double rho, double theta, int threshold,
double srn, double stn, double min_theta, double max_theta ) double srn, double stn, double min_theta, double max_theta )
{ {
CV_OCL_RUN(srn == 0 && stn == 0 && _lines.isUMat(), CV_OCL_RUN(srn == 0 && stn == 0 && _image.isUMat() && _lines.isUMat(),
ocl_HoughLines(_image, _lines, rho, theta, threshold, min_theta, max_theta)); ocl_HoughLines(_image, _lines, rho, theta, threshold, min_theta, max_theta));
Mat image = _image.getMat(); Mat image = _image.getMat();
...@@ -778,6 +810,8 @@ void cv::HoughLinesP(InputArray _image, OutputArray _lines, ...@@ -778,6 +810,8 @@ void cv::HoughLinesP(InputArray _image, OutputArray _lines,
double rho, double theta, int threshold, double rho, double theta, int threshold,
double minLineLength, double maxGap ) double minLineLength, double maxGap )
{ {
CV_OCL_RUN(_image.isUMat() && _lines.isUMat(), ocl_HoughLinesP(_image, _lines, rho, theta, threshold, minLineLength, maxGap));
Mat image = _image.getMat(); Mat image = _image.getMat();
std::vector<Vec4i> lines; std::vector<Vec4i> lines;
HoughLinesProbabilistic(image, (float)rho, (float)theta, threshold, cvRound(minLineLength), cvRound(maxGap), lines, INT_MAX); HoughLinesProbabilistic(image, (float)rho, (float)theta, threshold, cvRound(minLineLength), cvRound(maxGap), lines, INT_MAX);
......
...@@ -107,14 +107,13 @@ __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, i ...@@ -107,14 +107,13 @@ __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, i
__global const int * list = (__global const int*)(list_ptr + list_offset); __global const int * list = (__global const int*)(list_ptr + list_offset);
const int shift = (numrho - 1) / 2; const int shift = (numrho - 1) / 2;
for (int i = count_idx; i < total_points; i += LOCAL_SIZE) for (int i = count_idx; i < total_points; i += LOCAL_SIZE)
{ {
const int point = list[i]; const int point = list[i];
const int x = (point & 0xFFFF); const int x = (point & 0xFFFF);
const int y = (point >> 16) & 0xFFFF; const int y = point >> 16;
int r = convert_int_rte(x * cosVal + y * sinVal) + shift; int r = convert_int_rte(mad(x, cosVal, y * sinVal)) + shift;
atomic_inc(l_accum + r + 1); atomic_inc(l_accum + r + 1);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment