Commit f7aadd07 authored by Alexander Karsakov's avatar Alexander Karsakov

Added getLines, fill_accum_local kernels

parent 038bfb98
...@@ -239,8 +239,9 @@ namespace ...@@ -239,8 +239,9 @@ namespace
void GeneralizedHoughBase::detectImpl(InputArray image, OutputArray positions, OutputArray votes) void GeneralizedHoughBase::detectImpl(InputArray image, OutputArray positions, OutputArray votes)
{ {
#ifndef HAVE_OPENCV_CUDAFILTERS #ifndef HAVE_OPENCV_CUDAFILTERS
(void) templ; (void) image;
(void) templCenter; (void) positions;
(void) votes;
throw_no_cuda(); throw_no_cuda();
#else #else
calcEdges(image, imageEdges_, imageDx_, imageDy_); calcEdges(image, imageEdges_, imageDx_, imageDy_);
......
...@@ -668,9 +668,10 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub ...@@ -668,9 +668,10 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub
UMat src = _src.getUMat(); UMat src = _src.getUMat();
float irho = 1 / rho; float irho = (float) (1 / rho);
int numangle = cvRound((max_theta - min_theta) / theta); int numangle = cvRound((max_theta - min_theta) / theta);
int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho); int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho);
ocl::Device dev = ocl::Device::getDefault();
// make list of nonzero points // make list of nonzero points
const int pixelsPerWI = 4; const int pixelsPerWI = 4;
...@@ -680,7 +681,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub ...@@ -680,7 +681,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub
if (pointListKernel.empty()) if (pointListKernel.empty())
return false; return false;
UMat pointsList(1, src.total(), CV_32SC1); UMat pointsList(1, (int) src.total(), CV_32SC1);
UMat total(1, 1, CV_32SC1, Scalar::all(0)); UMat total(1, 1, CV_32SC1, Scalar::all(0));
pointListKernel.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(pointsList), pointListKernel.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(pointsList),
ocl::KernelArg::PtrWriteOnly(total)); ocl::KernelArg::PtrWriteOnly(total));
...@@ -692,37 +693,66 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub ...@@ -692,37 +693,66 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub
int total_points = total.getMat(ACCESS_READ).at<int>(0, 0); int total_points = total.getMat(ACCESS_READ).at<int>(0, 0);
if (total_points <= 0) if (total_points <= 0)
return false; {
_lines.assign(UMat(0,0,CV_32FC2));
return true;
}
// convert src to hough space // convert src to hough space
group_size = (total_points + pixelsPerWI - 1)/pixelsPerWI; group_size = min((int) dev.maxWorkGroupSize(), total_points);
ocl::Kernel fillAccumKernel("fill_accum", ocl::imgproc::hough_lines_oclsrc, int local_memory_needed = (numrho + 2)*sizeof(int);
format("-D FILL_ACCUM -D GROUP_SIZE=%d", group_size)); ocl::Kernel fillAccumKernel;
globalThreads[0] = group_size; globalThreads[1] = numangle;
size_t* fillAccumLT = NULL;
UMat accum(numangle + 2, numrho + 2, CV_32SC1);
if (local_memory_needed > dev.localMemSize())
{
fillAccumKernel.create("fill_accum_global", ocl::imgproc::hough_lines_oclsrc,
format("-D FILL_ACCUM_GLOBAL"));
accum.setTo(Scalar::all(0));
}
else
{
fillAccumKernel.create("fill_accum_local", ocl::imgproc::hough_lines_oclsrc,
format("-D FILL_ACCUM_LOCAL -D LOCAL_SIZE=%d -D BUFFER_SIZE=%d", group_size, numrho + 2));
localThreads[0] = group_size; localThreads[1] = 1;
fillAccumLT = localThreads;
}
if (fillAccumKernel.empty()) if (fillAccumKernel.empty())
return false; return false;
UMat accum(numangle + 2, numrho + 2, CV_32SC1, Scalar::all(0)); int linesMax = min(total_points*numangle/threshold, 4096);
UMat lines(linesMax, 1, CV_32FC2);
UMat lines_count(1, 1, CV_32SC1, Scalar::all(0));
fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum), fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum),
total_points, irho, (float) theta, numrho, numangle); total_points, irho, (float) theta, numrho, numangle);
globalThreads[0] = group_size; globalThreads[1] = numangle;
if (!fillAccumKernel.run(2, globalThreads, NULL, false))
if (!fillAccumKernel.run(2, globalThreads, fillAccumLT, false))
return false; return false;
printf("GPU: \n");
int sum = 0;
Mat ac = accum.getMat(ACCESS_READ);
for (int i=0; i<8; i++)
{
for (int j=0; j<8; j++)
{
sum += ac.at<int>(i, j);
printf("%d ", ac.at<int>(i, j));
}
printf("\n");
}
printf("sum = %d\n", sum);
ocl::Kernel getLinesKernel("get_lines", ocl::imgproc::hough_lines_oclsrc,
format("-D GET_LINES"));
if (getLinesKernel.empty())
return false; return false;
globalThreads[0] = numrho; globalThreads[1] = numangle;
getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::WriteOnlyNoSize(lines),
ocl::KernelArg::PtrWriteOnly(lines_count), linesMax, threshold, (float) rho, (float) theta);
if (!getLinesKernel.run(2, globalThreads, NULL, false))
return false;
int total_lines = min(lines_count.getMat(ACCESS_READ).at<int>(0, 0), linesMax);
if (total_lines > 0)
_lines.assign(lines.rowRange(Range(0, total_lines)));
else
_lines.assign(UMat(0,0,CV_32FC2));
return true;
} }
} }
......
...@@ -50,9 +50,9 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int ...@@ -50,9 +50,9 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int
} }
} }
#elif defined FILL_ACCUM #elif defined FILL_ACCUM_GLOBAL
__kernel void fill_accum(__global const uchar * list_ptr, int list_step, int list_offset, __kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, int list_offset,
__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, __global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
int count, float irho, float theta, int numrho, int numangle) int count, float irho, float theta, int numrho, int numangle)
{ {
...@@ -82,5 +82,82 @@ __kernel void fill_accum(__global const uchar * list_ptr, int list_step, int lis ...@@ -82,5 +82,82 @@ __kernel void fill_accum(__global const uchar * list_ptr, int list_step, int lis
} }
} }
#elif defined FILL_ACCUM_LOCAL
__kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, int list_offset,
__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
int count, float irho, float theta, int numrho, int numangle)
{
int theta_idx = get_global_id(1);
int count_idx = get_local_id(0);
float cosVal;
float sinVal = sincos(theta * ((float)theta_idx), &cosVal);
sinVal *= irho;
cosVal *= irho;
__local int l_accum[BUFFER_SIZE];
for (int i=count_idx; i<BUFFER_SIZE; i+=LOCAL_SIZE)
l_accum[i] = 0;
barrier(CLK_LOCAL_MEM_FENCE);
__global const int * list = (__global const int*)(list_ptr + list_offset);
const int shift = (numrho - 1) / 2;
if (theta_idx < numangle)
{
for (int i = count_idx; i < count; i += LOCAL_SIZE)
{
const int val = list[i];
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
int r = convert_int_rte(x * cosVal + y * sinVal) + shift;
atomic_inc(l_accum + r + 1);
}
}
barrier(CLK_LOCAL_MEM_FENCE);
__global int* accum = (__global int*)(accum_ptr + mad24(theta_idx + 1, accum_step, accum_offset));
for (int i=count_idx; i<BUFFER_SIZE; i+=LOCAL_SIZE)
accum[i] = l_accum[i];
}
#elif defined GET_LINES
#define ACCUM(ptr) *((__global int*)(ptr))
__kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
__global uchar * lines_ptr, int lines_step, int lines_offset, __global int* lines_index,
int linesMax, int threshold, float rho, float theta)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < accum_cols-2 && y < accum_rows-2)
{
__global uchar* accum = accum_ptr + mad24(y+1, accum_step, mad24(x+1, (int) sizeof(int), accum_offset));
__global float2* lines = (__global float2*)(lines_ptr + lines_offset);
int curVote = ACCUM(accum);
if (curVote > threshold && curVote > ACCUM(accum - sizeof(int)) && curVote >= ACCUM(accum + sizeof(int)) &&
curVote > ACCUM(accum - accum_step) && curVote >= ACCUM(accum + accum_step))
{
int index = atomic_inc(lines_index);
if (index < linesMax)
{
float radius = (x - (accum_cols - 3) * 0.5f) * rho;
float angle = y * theta;
lines[index] = (float2)(radius, angle);
}
}
}
}
#endif #endif
...@@ -13,49 +13,66 @@ ...@@ -13,49 +13,66 @@
namespace cvtest { namespace cvtest {
namespace ocl { namespace ocl {
PARAM_TEST_CASE(HoughLinesTestBase, bool) struct Vec2fComparator
{
bool operator()(const cv::Vec2f& a, const cv::Vec2f b) const
{
if(a[0] != b[0]) return a[0] < b[0];
else return a[1] < b[1];
}
};
PARAM_TEST_CASE(HoughLinesTestBase, double, double, int)
{ {
double rhoStep; double rhoStep;
double thetaStep; double thetaStep;
int threshold; int threshold;
bool useRoi;
Size src_size;
Mat src, dst; Mat src, dst;
UMat usrc, udst; UMat usrc, udst;
virtual void SetUp() virtual void SetUp()
{ {
rhoStep = 10; rhoStep = GET_PARAM(0);
thetaStep = 0.5; thetaStep = GET_PARAM(1);
threshold = 80; threshold = GET_PARAM(2);
useRoi = false;
} }
virtual void generateTestData() virtual void generateTestData()
{ {
//Mat image = readImage("shared/pic1.png", IMREAD_GRAYSCALE); src_size = randomSize(500, 1000);
src.create(src_size, CV_8UC1);
Mat image = randomMat(Size(20, 10), CV_8UC1, 0, 255, false); src.setTo(Scalar::all(0));
line(src, Point(0, 100), Point(100, 100), Scalar::all(255), 1);
line(src, Point(0, 200), Point(100, 200), Scalar::all(255), 1);
line(src, Point(0, 400), Point(100, 400), Scalar::all(255), 1);
line(src, Point(100, 0), Point(100, 200), Scalar::all(255), 1);
line(src, Point(200, 0), Point(200, 200), Scalar::all(255), 1);
line(src, Point(400, 0), Point(400, 200), Scalar::all(255), 1);
cv::threshold(image, src, 127, 255, THRESH_BINARY);
//Canny(image, src, 100, 150, 3);
src.copyTo(usrc); src.copyTo(usrc);
} }
}; };
typedef HoughLinesTestBase HoughLines; typedef HoughLinesTestBase HoughLines;
OCL_TEST_P(HoughLines, RealImage) OCL_TEST_P(HoughLines, GeneratedImage)
{ {
for (int j = 0; j < test_loop_times; j++)
{
generateTestData(); generateTestData();
//std::cout << src << std::endl; OCL_OFF(cv::HoughLines(src, dst, rhoStep, thetaStep, threshold));
OCL_ON(cv::HoughLines(usrc, udst, rhoStep, thetaStep, threshold));
OCL_OFF(cv::HoughLines(src, dst, rhoStep, thetaStep, threshold, 0, 0)); //Near(1e-5);
OCL_ON(cv::HoughLines(usrc, udst, rhoStep, thetaStep, threshold, 0, 0)); }
} }
OCL_INSTANTIATE_TEST_CASE_P(Imgproc, HoughLines, Values(true, false)); OCL_INSTANTIATE_TEST_CASE_P(Imgproc, HoughLines, Combine(Values(1, 0.5), // rhoStep
Values(CV_PI / 180.0, CV_PI / 360.0), // thetaStep
Values(80, 150))); // threshold
} } // namespace cvtest::ocl } } // namespace cvtest::ocl
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment