Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
13c44dd3
Commit
13c44dd3
authored
Dec 14, 2012
by
Suenghoon Park
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
finished ocl::HoughCircles
parent
0656f131
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
374 additions
and
348 deletions
+374
-348
ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+2
-1
hough.cpp
modules/ocl/src/hough.cpp
+244
-194
imgproc_hough.cl
modules/ocl/src/kernels/imgproc_hough.cl
+128
-153
No files found.
modules/ocl/include/opencv2/ocl/ocl.hpp
View file @
13c44dd3
...
...
@@ -833,7 +833,8 @@ namespace cv
{
oclMat
edges
;
oclMat
accum
;
oclMat
list
;
oclMat
srcPoints
;
oclMat
centers
;
CannyBuf
cannyBuf
;
};
...
...
modules/ocl/src/hough.cpp
View file @
13c44dd3
...
...
@@ -50,31 +50,29 @@ using namespace cv::ocl;
#if !defined (HAVE_OPENCL)
// void cv::ocl::HoughLines(const oclMat&, oclMat&, float, float, int, bool, int) { throw_nogpu(); }
// void cv::ocl::HoughLines(const oclMat&, oclMat&, HoughLinesBuf&, float, float, int, bool, int) { throw_nogpu(); }
// void cv::ocl::HoughLinesDownload(const oclMat&, OutputArray, OutputArray) { throw_nogpu(); }
void
cv
::
ocl
::
HoughCircles
(
const
oclMat
&
,
oclMat
&
,
int
,
float
,
float
,
int
,
int
,
int
,
int
,
int
)
{
throw_nogpu
();
}
void
cv
::
ocl
::
HoughCircles
(
const
oclMat
&
,
oclMat
&
,
HoughCirclesBuf
&
,
int
,
float
,
float
,
int
,
int
,
int
,
int
,
int
)
{
throw_nogpu
();
}
void
cv
::
ocl
::
HoughCirclesDownload
(
const
oclMat
&
,
OutputArray
)
{
throw_nogpu
();
}
// Ptr<GeneralizedHough_GPU> cv::ocl::GeneralizedHough_GPU::create(int) { throw_nogpu(); return Ptr<GeneralizedHough_GPU>(); }
// cv::ocl::GeneralizedHough_GPU::~GeneralizedHough_GPU() {}
// void cv::ocl::GeneralizedHough_GPU::setTemplate(const oclMat&, int, Point) { throw_nogpu(); }
// void cv::ocl::GeneralizedHough_GPU::setTemplate(const oclMat&, const oclMat&, const oclMat&, Point) { throw_nogpu(); }
// void cv::ocl::GeneralizedHough_GPU::detect(const oclMat&, oclMat&, int) { throw_nogpu(); }
// void cv::ocl::GeneralizedHough_GPU::detect(const oclMat&, const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
// void cv::ocl::GeneralizedHough_GPU::download(const oclMat&, OutputArray, OutputArray) { throw_nogpu(); }
// void cv::ocl::GeneralizedHough_GPU::release() {}
#else
/* !defined (HAVE_OPENCL) */
namespace
cv
{
namespace
ocl
{
int
buildPointList_gpu
(
const
oclMat
&
src
,
unsigned
int
*
list
);
#define MUL_UP(a, b) ((a)/(b)+1)*(b)
namespace
cv
{
namespace
ocl
{
///////////////////////////OpenCL kernel strings///////////////////////////
extern
const
char
*
hough
;
extern
const
char
*
imgproc_hough
;
namespace
hough
{
int
buildPointList_gpu
(
const
oclMat
&
src
,
oclMat
&
list
);
void
circlesAccumCenters_gpu
(
const
unsigned
int
*
list
,
int
count
,
const
oclMat
&
dx
,
const
oclMat
&
dy
,
oclMat
&
accum
,
int
minRadius
,
int
maxRadius
,
float
idp
);
int
buildCentersList_gpu
(
const
oclMat
&
accum
,
oclMat
&
centers
,
int
threshold
);
int
circlesAccumRadius_gpu
(
const
oclMat
&
centers
,
int
centersCount
,
const
oclMat
&
list
,
int
count
,
oclMat
&
circles
,
int
maxCircles
,
float
dp
,
int
minRadius
,
int
maxRadius
,
int
threshold
);
}
}}
...
...
@@ -82,9 +80,9 @@ namespace cv { namespace ocl
//////////////////////////////////////////////////////////
// common functions
namespace
cv
{
namespace
ocl
namespace
cv
{
namespace
ocl
{
namespace
hough
{
int
buildPointList_gpu
(
const
oclMat
&
src
,
unsigned
int
*
list
)
int
buildPointList_gpu
(
const
oclMat
&
src
,
oclMat
&
list
)
{
const
int
PIXELS_PER_THREAD
=
16
;
...
...
@@ -102,8 +100,8 @@ namespace cv { namespace ocl
size_t
localThreads
[
3
]
=
{
blkSizeX
,
blkSizeY
,
1
};
const
int
PIXELS_PER_BLOCK
=
blkSizeX
*
PIXELS_PER_THREAD
;
const
size_t
glbSizeX
=
src
.
cols
%
(
PIXELS_PER_BLOCK
)
==
0
?
src
.
cols
:
(
src
.
cols
/
PIXELS_PER_BLOCK
+
1
)
*
PIXELS_PER_BLOCK
;
const
size_t
glbSizeY
=
src
.
rows
%
blkSizeY
==
0
?
src
.
rows
:
(
src
.
rows
/
blkSizeY
+
1
)
*
blkSizeY
;
const
size_t
glbSizeX
=
src
.
cols
%
(
PIXELS_PER_BLOCK
)
==
0
?
src
.
cols
:
MUL_UP
(
src
.
cols
,
PIXELS_PER_BLOCK
)
;
const
size_t
glbSizeY
=
src
.
rows
%
blkSizeY
==
0
?
src
.
rows
:
MUL_UP
(
src
.
rows
,
blkSizeY
)
;
size_t
globalThreads
[
3
]
=
{
glbSizeX
,
glbSizeY
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
...
...
@@ -111,110 +109,141 @@ namespace cv { namespace ocl
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
list
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
list
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
counter
));
openCLExecuteKernel
(
src
.
clCxt
,
&
hough
,
"buildPointList"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_
hough
,
"buildPointList"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLSafeCall
(
clEnqueueReadBuffer
(
src
.
clCxt
->
impl
->
clCmdQueue
,
counter
,
CL_TRUE
,
0
,
sizeof
(
int
),
&
totalCount
,
0
,
NULL
,
NULL
));
openCLSafeCall
(
clReleaseMemObject
(
counter
));
return
totalCount
;
}
}}
//////////////////////////////////////////////////////////
// HoughLines
// namespace cv { namespace ocl { namespace device
// {
// namespace hough
// {
// void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20);
// int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort);
// }
// }}}
// void cv::ocl::HoughLines(const oclMat& src, oclMat& lines, float rho, float theta, int threshold, bool doSort, int maxLines)
// {
// HoughLinesBuf buf;
// HoughLines(src, lines, buf, rho, theta, threshold, doSort, maxLines);
// }
// void cv::ocl::HoughLines(const oclMat& src, oclMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort, int maxLines)
// {
// using namespace cv::ocl::device::hough;
// CV_Assert(src.type() == CV_8UC1);
// CV_Assert(src.cols < std::numeric_limits<unsigned short>::max());
// CV_Assert(src.rows < std::numeric_limits<unsigned short>::max());
// ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.list);
// unsigned int* srcPoints = buf.list.ptr<unsigned int>();
// const int pointsCount = buildPointList_gpu(src, srcPoints);
// if (pointsCount == 0)
// {
// lines.release();
// return;
// }
// const int numangle = cvRound(CV_PI / theta);
// const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho);
// CV_Assert(numangle > 0 && numrho > 0);
// ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, buf.accum);
// buf.accum.setTo(Scalar::all(0));
// DeviceInfo devInfo;
// linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));
// ensureSizeIsEnough(2, maxLines, CV_32FC2, lines);
// int linesCount = linesGetResult_gpu(buf.accum, lines.ptr<float2>(0), lines.ptr<int>(1), maxLines, rho, theta, threshold, doSort);
// if (linesCount > 0)
// lines.cols = linesCount;
// else
// lines.release();
// }
// void cv::ocl::HoughLinesDownload(const oclMat& d_lines, OutputArray h_lines_, OutputArray h_votes_)
// {
// if (d_lines.empty())
// {
// h_lines_.release();
// if (h_votes_.needed())
// h_votes_.release();
// return;
// }
// CV_Assert(d_lines.rows == 2 && d_lines.type() == CV_32FC2);
// h_lines_.create(1, d_lines.cols, CV_32FC2);
// Mat h_lines = h_lines_.getMat();
// d_lines.row(0).download(h_lines);
// if (h_votes_.needed())
// {
// h_votes_.create(1, d_lines.cols, CV_32SC1);
// Mat h_votes = h_votes_.getMat();
// oclMat d_votes(1, d_lines.cols, CV_32SC1, const_cast<int*>(d_lines.ptr<int>(1)));
// d_votes.download(h_votes);
// }
// }
}}}
//////////////////////////////////////////////////////////
// HoughCircles
// namespace cv { namespace ocl
// {
// namespace hough
// {
// void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp);
// int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold);
// int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count,
// float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20);
// }
// }}
namespace
cv
{
namespace
ocl
{
namespace
hough
{
void
circlesAccumCenters_gpu
(
const
oclMat
&
list
,
int
count
,
const
oclMat
&
dx
,
const
oclMat
&
dy
,
oclMat
&
accum
,
int
minRadius
,
int
maxRadius
,
float
idp
)
{
const
size_t
blkSizeX
=
256
;
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
const
size_t
glbSizeX
=
count
%
blkSizeX
==
0
?
count
:
MUL_UP
(
count
,
blkSizeX
);
size_t
globalThreads
[
3
]
=
{
glbSizeX
,
1
,
1
};
const
int
width
=
accum
.
cols
-
2
;
const
int
height
=
accum
.
rows
-
2
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
list
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
count
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dx
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dx
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dy
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dy
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
accum
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
accum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
width
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
height
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
minRadius
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
maxRadius
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
idp
));
openCLExecuteKernel
(
accum
.
clCxt
,
&
imgproc_hough
,
"circlesAccumCenters"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
int
buildCentersList_gpu
(
const
oclMat
&
accum
,
oclMat
&
centers
,
int
threshold
)
{
int
totalCount
=
0
;
int
err
=
CL_SUCCESS
;
cl_mem
counter
=
clCreateBuffer
(
accum
.
clCxt
->
impl
->
clContext
,
CL_MEM_COPY_HOST_PTR
,
sizeof
(
int
),
&
totalCount
,
&
err
);
openCLSafeCall
(
err
);
const
size_t
blkSizeX
=
32
;
const
size_t
blkSizeY
=
8
;
size_t
localThreads
[
3
]
=
{
blkSizeX
,
blkSizeY
,
1
};
const
size_t
glbSizeX
=
(
accum
.
cols
-
2
)
%
blkSizeX
==
0
?
accum
.
cols
-
2
:
MUL_UP
(
accum
.
cols
-
2
,
blkSizeX
);
const
size_t
glbSizeY
=
(
accum
.
rows
-
2
)
%
blkSizeY
==
0
?
accum
.
rows
-
2
:
MUL_UP
(
accum
.
rows
-
2
,
blkSizeY
);
size_t
globalThreads
[
3
]
=
{
glbSizeX
,
glbSizeY
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
accum
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
accum
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
accum
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
accum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
centers
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
threshold
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
counter
));
openCLExecuteKernel
(
accum
.
clCxt
,
&
imgproc_hough
,
"buildCentersList"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLSafeCall
(
clEnqueueReadBuffer
(
accum
.
clCxt
->
impl
->
clCmdQueue
,
counter
,
CL_TRUE
,
0
,
sizeof
(
int
),
&
totalCount
,
0
,
NULL
,
NULL
));
openCLSafeCall
(
clReleaseMemObject
(
counter
));
return
totalCount
;
}
int
circlesAccumRadius_gpu
(
const
oclMat
&
centers
,
int
centersCount
,
const
oclMat
&
list
,
int
count
,
oclMat
&
circles
,
int
maxCircles
,
float
dp
,
int
minRadius
,
int
maxRadius
,
int
threshold
)
{
int
totalCount
=
0
;
int
err
=
CL_SUCCESS
;
cl_mem
counter
=
clCreateBuffer
(
circles
.
clCxt
->
impl
->
clContext
,
CL_MEM_COPY_HOST_PTR
,
sizeof
(
int
),
&
totalCount
,
&
err
);
openCLSafeCall
(
err
);
const
size_t
blkSizeX
=
circles
.
clCxt
->
impl
->
maxWorkGroupSize
;
size_t
localThreads
[
3
]
=
{
blkSizeX
,
1
,
1
};
const
size_t
glbSizeX
=
centersCount
*
blkSizeX
;
size_t
globalThreads
[
3
]
=
{
glbSizeX
,
1
,
1
};
const
int
histSize
=
maxRadius
-
minRadius
+
1
;
size_t
smemSize
=
(
histSize
+
2
)
*
sizeof
(
int
);
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
centers
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
list
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
count
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
circles
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
maxCircles
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
dp
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
minRadius
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
maxRadius
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
histSize
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
threshold
));
args
.
push_back
(
make_pair
(
smemSize
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
counter
));
CV_Assert
(
circles
.
offset
==
0
);
openCLExecuteKernel
(
circles
.
clCxt
,
&
imgproc_hough
,
"circlesAccumRadius"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
openCLSafeCall
(
clEnqueueReadBuffer
(
circles
.
clCxt
->
impl
->
clCmdQueue
,
counter
,
CL_TRUE
,
0
,
sizeof
(
int
),
&
totalCount
,
0
,
NULL
,
NULL
));
openCLSafeCall
(
clReleaseMemObject
(
counter
));
totalCount
=
::
min
(
totalCount
,
maxCircles
);
return
totalCount
;
}
}}}
// namespace cv { namespace ocl { namespace hough
void
cv
::
ocl
::
HoughCircles
(
const
oclMat
&
src
,
oclMat
&
circles
,
int
method
,
float
dp
,
float
minDist
,
int
cannyThreshold
,
int
votesThreshold
,
int
minRadius
,
int
maxRadius
,
int
maxCircles
)
{
...
...
@@ -239,119 +268,140 @@ void cv::ocl::HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf&
cv
::
ocl
::
Canny
(
src
,
buf
.
cannyBuf
,
buf
.
edges
,
std
::
max
(
cannyThreshold
/
2
,
1
),
cannyThreshold
);
ensureSizeIsEnough
(
2
,
src
.
size
().
area
(),
CV_32SC1
,
buf
.
list
);
// unsigned int* srcPoints = buf.list.ptr<unsigned int>(0);
unsigned
int
*
srcPoints
=
(
unsigned
int
*
)
buf
.
list
.
data
;
// unsigned int* centers = buf.list.ptr<unsigned int>(1);
unsigned
int
*
centers
=
(
unsigned
int
*
)
buf
.
list
.
data
+
buf
.
list
.
step
;
const
int
pointsCount
=
buildPointList_gpu
(
buf
.
edges
,
srcPoints
);
//std::cout << "pointsCount: " << pointsCount << std::endl;
ensureSizeIsEnough
(
1
,
src
.
size
().
area
(),
CV_32SC1
,
buf
.
srcPoints
);
const
int
pointsCount
=
hough
::
buildPointList_gpu
(
buf
.
edges
,
buf
.
srcPoints
);
if
(
pointsCount
==
0
)
{
circles
.
release
();
return
;
}
//
ensureSizeIsEnough(cvCeil(src.rows * idp) + 2, cvCeil(src.cols * idp) + 2, CV_32SC1, buf.accum);
//
buf.accum.setTo(Scalar::all(0));
ensureSizeIsEnough
(
cvCeil
(
src
.
rows
*
idp
)
+
2
,
cvCeil
(
src
.
cols
*
idp
)
+
2
,
CV_32SC1
,
buf
.
accum
);
buf
.
accum
.
setTo
(
Scalar
::
all
(
0
));
// circlesAccumCenters_gpu(
srcPoints, pointsCount, buf.cannyBuf.dx, buf.cannyBuf.dy, buf.accum, minRadius, maxRadius, idp);
hough
::
circlesAccumCenters_gpu
(
buf
.
srcPoints
,
pointsCount
,
buf
.
cannyBuf
.
dx
,
buf
.
cannyBuf
.
dy
,
buf
.
accum
,
minRadius
,
maxRadius
,
idp
);
// int centersCount = buildCentersList_gpu(buf.accum, centers, votesThreshold);
// if (centersCount == 0)
// {
// circles.release();
// return;
// }
ensureSizeIsEnough
(
1
,
src
.
size
().
area
(),
CV_32SC1
,
buf
.
centers
);
int
centersCount
=
hough
::
buildCentersList_gpu
(
buf
.
accum
,
buf
.
centers
,
votesThreshold
);
if
(
centersCount
==
0
)
{
circles
.
release
();
return
;
}
//
if (minDist > 1)
//
{
// cv::AutoBuffer<ushort2
> oldBuf_(centersCount);
// cv::AutoBuffer<ushort2
> newBuf_(centersCount);
//
int newCount = 0;
if
(
minDist
>
1
)
{
cv
::
AutoBuffer
<
unsigned
int
>
oldBuf_
(
centersCount
);
cv
::
AutoBuffer
<
unsigned
int
>
newBuf_
(
centersCount
);
int
newCount
=
0
;
// ushort2
* oldBuf = oldBuf_;
// ushort2
* newBuf = newBuf_;
unsigned
int
*
oldBuf
=
oldBuf_
;
unsigned
int
*
newBuf
=
newBuf_
;
// cudaSafeCall( cudaMemcpy(oldBuf, centers, centersCount * sizeof(ushort2), cudaMemcpyDeviceToHost) );
openCLSafeCall
(
clEnqueueReadBuffer
(
buf
.
centers
.
clCxt
->
impl
->
clCmdQueue
,
(
cl_mem
)
buf
.
centers
.
data
,
CL_TRUE
,
0
,
centersCount
*
sizeof
(
unsigned
int
),
oldBuf
,
0
,
NULL
,
NULL
));
// const int cellSize = cvRound(minDist);
// const int gridWidth = (src.cols + cellSize - 1) / cellSize;
// const int gridHeight = (src.rows + cellSize - 1) / cellSize;
// std::vector< std::vector<ushort2> > grid(gridWidth * gridHeight);
const
int
cellSize
=
cvRound
(
minDist
);
const
int
gridWidth
=
(
src
.
cols
+
cellSize
-
1
)
/
cellSize
;
const
int
gridHeight
=
(
src
.
rows
+
cellSize
-
1
)
/
cellSize
;
// const float minDist2 = minDist * minDist
;
std
::
vector
<
std
::
vector
<
unsigned
int
>
>
grid
(
gridWidth
*
gridHeight
)
;
// for (int i = 0; i < centersCount; ++i)
// {
// ushort2 p = oldBuf[i];
const
float
minDist2
=
minDist
*
minDist
;
// bool good = true;
for
(
int
i
=
0
;
i
<
centersCount
;
++
i
)
{
unsigned
int
p
=
oldBuf
[
i
];
const
int
px
=
p
&
0xFFFF
;
const
int
py
=
(
p
>>
16
)
&
0xFFFF
;
// int xCell = static_cast<int>(p.x / cellSize);
// int yCell = static_cast<int>(p.y / cellSize);
bool
good
=
true
;
// int x1 = xCell - 1;
// int y1 = yCell - 1;
// int x2 = xCell + 1;
// int y2 = yCell + 1;
int
xCell
=
static_cast
<
int
>
(
px
/
cellSize
);
int
yCell
=
static_cast
<
int
>
(
py
/
cellSize
);
// // boundary check
// x1 = std::max(0, x1);
// y1 = std::max(0, y1);
// x2 = std::min(gridWidth - 1, x2);
// y2 = std::min(gridHeight - 1, y2);
int
x1
=
xCell
-
1
;
int
y1
=
yCell
-
1
;
int
x2
=
xCell
+
1
;
int
y2
=
yCell
+
1
;
// for (int yy = y1; yy <= y2; ++yy)
// {
// for (int xx = x1; xx <= x2; ++xx)
// {
// vector<ushort2>& m = grid[yy * gridWidth + xx]
;
// boundary check
x1
=
std
::
max
(
0
,
x1
);
y1
=
std
::
max
(
0
,
y1
);
x2
=
std
::
min
(
gridWidth
-
1
,
x2
);
y2
=
std
::
min
(
gridHeight
-
1
,
y2
)
;
// for(size_t j = 0; j < m.size(); ++j)
// {
// float dx = (float)(p.x - m[j].x);
// float dy = (float)(p.y - m[j].y);
for
(
int
yy
=
y1
;
yy
<=
y2
;
++
yy
)
{
for
(
int
xx
=
x1
;
xx
<=
x2
;
++
xx
)
{
vector
<
unsigned
int
>&
m
=
grid
[
yy
*
gridWidth
+
xx
];
for
(
size_t
j
=
0
;
j
<
m
.
size
();
++
j
)
{
const
int
val
=
m
[
j
];
const
int
jx
=
val
&
0xFFFF
;
const
int
jy
=
(
val
>>
16
)
&
0xFFFF
;
// if (dx * dx + dy * dy < minDist2)
// {
// good = false;
// goto break_out;
// }
// }
// }
// }
float
dx
=
(
float
)(
px
-
jx
);
float
dy
=
(
float
)(
py
-
jy
);
// break_out:
if
(
dx
*
dx
+
dy
*
dy
<
minDist2
)
{
good
=
false
;
goto
break_out
;
}
}
}
}
// if(good)
// {
// grid[yCell * gridWidth + xCell].push_back(p);
break_out
:
// newBuf[newCount++] = p;
// }
// }
if
(
good
)
{
grid
[
yCell
*
gridWidth
+
xCell
].
push_back
(
p
);
newBuf
[
newCount
++
]
=
p
;
}
}
// cudaSafeCall( cudaMemcpy(centers, newBuf, newCount * sizeof(unsigned int), cudaMemcpyHostToDevice) );
// centersCount = newCount;
// }
openCLSafeCall
(
clEnqueueWriteBuffer
(
buf
.
centers
.
clCxt
->
impl
->
clCmdQueue
,
(
cl_mem
)
buf
.
centers
.
data
,
CL_TRUE
,
0
,
newCount
*
sizeof
(
unsigned
int
),
newBuf
,
0
,
0
,
0
));
centersCount
=
newCount
;
}
//
ensureSizeIsEnough(1, maxCircles, CV_32FC3, circles);
ensureSizeIsEnough
(
1
,
maxCircles
,
CV_32FC3
,
circles
);
// DeviceInfo devInfo;
// const int circlesCount = circlesAccumRadius_gpu(centers, centersCount, srcPoints, pointsCount, circles.ptr<float3>(), maxCircles,
// dp, minRadius, maxRadius, votesThreshold, devInfo.supports(FEATURE_SET_COMPUTE_20));
const
int
circlesCount
=
hough
::
circlesAccumRadius_gpu
(
buf
.
centers
,
centersCount
,
buf
.
srcPoints
,
pointsCount
,
circles
,
maxCircles
,
dp
,
minRadius
,
maxRadius
,
votesThreshold
);
//
if (circlesCount > 0)
//
circles.cols = circlesCount;
//
else
//
circles.release();
if
(
circlesCount
>
0
)
circles
.
cols
=
circlesCount
;
else
circles
.
release
();
}
void
cv
::
ocl
::
HoughCirclesDownload
(
const
oclMat
&
d_circles
,
cv
::
OutputArray
h_circles_
)
{
// FIX ME: garbage values are copied!
CV_Error
(
CV_StsNotImplemented
,
"HoughCirclesDownload is not implemented"
);
if
(
d_circles
.
empty
())
{
h_circles_
.
release
();
...
...
modules/ocl/src/kernels/hough.cl
→
modules/ocl/src/kernels/
imgproc_
hough.cl
View file @
13c44dd3
...
...
@@ -14,6 +14,8 @@
//
Copyright
(
C
)
2009
,
Willow
Garage
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
Modified
by
Seunghoon
Park
(
pclove1@gmail.com
)
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
...
...
@@ -48,6 +50,7 @@
#
define
PIXELS_PER_THREAD
16
//
TODO:
add
offset
to
support
ROI
__kernel
void
buildPointList
(
__global
const
uchar*
src,
int
cols,
int
rows,
...
...
@@ -112,196 +115,168 @@ __kernel void buildPointList(__global const uchar* src,
////////////////////////////////////////////////////////////////////////
// circlesAccumCenters
// __global__ void circlesAccumCenters(const unsigned int* list, const int count, const PtrStepi dx, const PtrStepi dy,
// PtrStepi accum, const int width, const int height, const int minRadius, const int maxRadius, const float idp)
// {
// const int SHIFT = 10;
// const int ONE = 1 << SHIFT;
// const int tid = blockIdx.x * blockDim.x + threadIdx.x;
// if (tid >= count)
// return;
// const unsigned int val = list[tid];
// TODO: add offset to support ROI
__kernel void circlesAccumCenters(__global const unsigned int* list,
const int count,
__global const int* dx,
const int dxStep,
__global const int* dy,
const int dyStep,
__global int* accum,
const int accumStep,
const int width,
const int height,
const int minRadius,
const int maxRadius,
const float idp)
{
const int dxStepInPixel = dxStep / sizeof(int);
const int dyStepInPixel = dyStep / sizeof(int);
const int accumStepInPixel = accumStep / sizeof(int);
// const int x = (val & 0xFFFF)
;
// const int y = (val >> 16) & 0xFFFF
;
const int SHIFT = 10
;
const int ONE = 1 << SHIFT
;
// const int vx = dx(y, x)
;
// const int vy = dy(y, x
);
// const int tid = blockIdx.x * blockDim.x + threadIdx.x
;
const int wid = get_global_id(0
);
// if (vx == 0 && vy == 0
)
//
return;
if (wid >= count
)
return;
// const float mag = ::sqrtf(vx * vx + vy * vy)
;
const unsigned int val = list[wid]
;
// const int x0 = __float2int_rn((x * idp) * ONE
);
// const int y0 = __float2int_rn((y * idp) * ONE)
;
const int x = (val & 0xFFFF
);
const int y = (val >> 16) & 0xFFFF
;
// int sx = __float2int_rn((vx * idp) * ONE / mag)
;
// int sy = __float2int_rn((vy * idp) * ONE / mag)
;
const int vx = dx[mad24(y, dxStepInPixel, x)]
;
const int vy = dy[mad24(y, dyStepInPixel, x)]
;
// // Step from minRadius to maxRadius in both directions of the gradient
// for (int k1 = 0; k1 < 2; ++k1)
// {
// int x1 = x0 + minRadius * sx;
// int y1 = y0 + minRadius * sy;
if (vx == 0 && vy == 0)
return;
// for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r)
// {
// const int x2 = x1 >> SHIFT;
// const int y2 = y1 >> SHIFT;
const float mag = sqrt(convert_float(vx * vx + vy * vy));
// if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height)
// break
;
const int x0 = convert_int_rte((x * idp) * ONE);
const int y0 = convert_int_rte((y * idp) * ONE)
;
// ::atomicAdd(accum.ptr(y2 + 1) + x2 + 1, 1
);
// }
int sx = convert_int_rte((vx * idp) * ONE / mag
);
int sy = convert_int_rte((vy * idp) * ONE / mag);
// sx = -sx;
// sy = -sy;
// }
// }
// Step from minRadius to maxRadius in both directions of the gradient
for (int k1 = 0; k1 < 2; ++k1)
{
int x1 = x0 + minRadius * sx;
int y1 = y0 + minRadius * sy;
// void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp
)
//
{
// const dim3 block(256)
;
// const dim3 grid(divUp(count, block.x))
;
for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r
)
{
const int x2 = x1 >> SHIFT
;
const int y2 = y1 >> SHIFT
;
// cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) );
if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height)
break;
// circlesAccumCenters<<<grid, block>>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp
);
// cudaSafeCall( cudaGetLastError() );
atomic_add(&accum[mad24(y2+1, accumStepInPixel, x2+1)], 1
);
}
// cudaSafeCall( cudaDeviceSynchronize() );
// }
sx = -sx;
sy = -sy;
}
}
// ////////////////////////////////////////////////////////////////////////
// // buildCentersList
// __global__ void buildCentersList(const PtrStepSzi accum, unsigned int* centers, const int threshold)
// {
// const int x = blockIdx.x * blockDim.x + threadIdx.x;
// const int y = blockIdx.y * blockDim.y + threadIdx.y;
// if (x < accum.cols - 2 && y < accum.rows - 2)
// {
// const int top = accum(y, x + 1);
// const int left = accum(y + 1, x);
// const int cur = accum(y + 1, x + 1);
// const int right = accum(y + 1, x + 2);
// const int bottom = accum(y + 2, x + 1);
// if (cur > threshold && cur > top && cur >= bottom && cur > left && cur >= right)
// {
// const unsigned int val = (y << 16) |
x
;
//
const
int
idx
=
:
:atomicAdd
(
&g_counter,
1
)
;
//
centers[idx]
=
val
;
//
}
//
}
//
}
//
int
buildCentersList_gpu
(
PtrStepSzi
accum,
unsigned
int*
centers,
int
threshold
)
//
{
//
void*
counterPtr
;
//
cudaSafeCall
(
cudaGetSymbolAddress
(
&counterPtr,
g_counter
)
)
;
//
cudaSafeCall
(
cudaMemset
(
counterPtr,
0
,
sizeof
(
int
))
)
;
// TODO: add offset to support ROI
__kernel void buildCentersList(__global const int* accum,
const int accumCols,
const int accumRows,
const int accumStep,
__global unsigned int* centers,
const int threshold,
__global int* counter)
{
const int accumStepInPixel = accumStep/sizeof(int);
//
const
dim3
block
(
32
,
8
)
;
//
const
dim3
grid
(
divUp
(
accum.cols
-
2
,
block.x
)
,
divUp
(
accum.rows
-
2
,
block.y
)
)
;
const int x = get_global_id(0
);
const int y = get_global_id(1
);
//
cudaSafeCall
(
cudaFuncSetCacheConfig
(
buildCentersList,
cudaFuncCachePreferL1
)
)
;
if (x < accumCols - 2 && y < accumRows - 2)
{
const int top = accum[mad24(y, accumStepInPixel, x + 1)];
//
buildCentersList<<<grid,
block>>>
(
accum,
centers,
threshold
)
;
//
cudaSafeCall
(
cudaGetLastError
()
)
;
const int left = accum[mad24(y + 1, accumStepInPixel, x)];
const int cur = accum[mad24(y + 1, accumStepInPixel, x + 1)];
const int right = accum[mad24(y + 1, accumStepInPixel, x + 2)];
//
cudaSafeCall
(
cudaDeviceSynchronize
()
)
;
const int bottom = accum[mad24(y + 2, accumStepInPixel, x + 1)];
;
//
int
totalCount
;
//
cudaSafeCall
(
cudaMemcpy
(
&totalCount,
counterPtr,
sizeof
(
int
)
,
cudaMemcpyDeviceToHost
)
)
;
if (cur > threshold && cur > top && cur >= bottom && cur > left && cur >= right)
{
const unsigned int val = (y << 16) |
x
;
const
int
idx
=
atomic_add
(
counter,
1
)
;
centers[idx]
=
val
;
}
}
}
//
return
totalCount
;
//
}
//
////////////////////////////////////////////////////////////////////////
//
//
circlesAccumRadius
//
__global__
void
circlesAccumRadius
(
const
unsigned
int*
centers,
const
unsigned
int*
list,
const
int
count,
//
float3*
circles,
const
int
maxCircles,
const
float
dp,
//
const
int
minRadius,
const
int
maxRadius,
const
int
histSize,
const
int
threshold
)
//
{
//
int*
smem
=
DynamicSharedMem<int>
()
;
//
for
(
int
i
=
threadIdx.x
; i < histSize + 2; i += blockDim.x)
//
smem[i]
=
0
;
//
__syncthreads
()
;
//
unsigned
int
val
=
centers[blockIdx.x]
;
//
float
cx
=
(
val
&
0xFFFF
)
;
//
float
cy
=
(
val
>>
16
)
&
0xFFFF
;
//
cx
=
(
cx
+
0.5f
)
*
dp
;
//
cy
=
(
cy
+
0.5f
)
*
dp
;
//
for
(
int
i
=
threadIdx.x
; i < count; i += blockDim.x)
//
{
//
val
=
list[i]
;
//
const
int
x
=
(
val
&
0xFFFF
)
;
//
const
int
y
=
(
val
>>
16
)
&
0xFFFF
;
//
const
float
rad
=
:
:sqrtf
((
cx
-
x
)
*
(
cx
-
x
)
+
(
cy
-
y
)
*
(
cy
-
y
))
;
//
if
(
rad
>=
minRadius
&&
rad
<=
maxRadius
)
//
{
//
const
int
r
=
__float2int_rn
(
rad
-
minRadius
)
;
//
Emulation::smem::atomicAdd
(
&smem[r
+
1],
1
)
;
//
}
//
}
//
__syncthreads
()
;
//
TODO:
add
offset
to
support
ROI
__kernel
void
circlesAccumRadius
(
__global
const
unsigned
int*
centers,
__global
const
unsigned
int*
list,
const
int
count,
__global
float4*
circles,
const
int
maxCircles,
const
float
dp,
const
int
minRadius,
const
int
maxRadius,
const
int
histSize,
const
int
threshold,
__local
int*
smem,
__global
int*
counter
)
{
for
(
int
i
=
get_local_id
(
0
)
; i < histSize + 2; i += get_local_size(0))
smem[i]
=
0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
for
(
int
i
=
threadIdx.x
; i < histSize; i += blockDim.x)
//
{
//
const
int
curVotes
=
smem[i
+
1]
;
unsigned
int
val
=
centers[get_group_id
(
0
)
]
;
//
if
(
curVotes
>=
threshold
&&
curVotes
>
smem[i]
&&
curVotes
>=
smem[i
+
2]
)
//
{
//
const
int
ind
=
:
:atomicAdd
(
&g_counter,
1
)
;
//
if
(
ind
<
maxCircles
)
//
circles[ind]
=
make_float3
(
cx,
cy,
i
+
minRadius
)
;
//
}
//
}
//
}
float
cx
=
convert_float
(
val
&
0xFFFF
)
;
float
cy
=
convert_float
((
val
>>
16
)
&
0xFFFF
)
;
//
int
circlesAccumRadius_gpu
(
const
unsigned
int*
centers,
int
centersCount,
const
unsigned
int*
list,
int
count,
//
float3*
circles,
int
maxCircles,
float
dp,
int
minRadius,
int
maxRadius,
int
threshold,
bool
has20
)
//
{
//
void*
counterPtr
;
//
cudaSafeCall
(
cudaGetSymbolAddress
(
&counterPtr,
g_counter
)
)
;
cx
=
(
cx
+
0.5f
)
*
dp
;
cy
=
(
cy
+
0.5f
)
*
dp
;
//
cudaSafeCall
(
cudaMemset
(
counterPtr,
0
,
sizeof
(
int
))
)
;
for
(
int
i
=
get_local_id
(
0
)
; i < count; i += get_local_size(0))
{
val
=
list[i]
;
//
const
dim3
block
(
has20
?
1024
:
512
)
;
//
const
dim3
grid
(
centersCount
)
;
const
int
x
=
(
val
&
0xFFFF
)
;
const
int
y
=
(
val
>>
16
)
&
0xFFFF
;
//
const
int
histSize
=
maxRadius
-
minRadius
+
1
;
//
size_t
smemSize
=
(
histSize
+
2
)
*
sizeof
(
int
)
;
const
float
rad
=
sqrt
((
cx
-
x
)
*
(
cx
-
x
)
+
(
cy
-
y
)
*
(
cy
-
y
))
;
if
(
rad
>=
minRadius
&&
rad
<=
maxRadius
)
{
const
int
r
=
convert_int_rte
(
rad
-
minRadius
)
;
//
circlesAccumRadius<<<grid,
block,
smemSize>>>
(
centers,
list,
count,
circles,
maxCircles,
dp,
minRadius,
maxRadius,
histSize,
threshold
)
;
//
cudaSafeCall
(
cudaGetLastError
()
)
;
atomic_add
(
&smem[r
+
1],
1
)
;
}
}
//
cudaSafeCall
(
cudaDeviceSynchronize
()
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
int
totalCount
;
//
cudaSafeCall
(
cudaMemcpy
(
&totalCount,
counterPtr,
sizeof
(
int
)
,
cudaMemcpyDeviceToHost
)
)
;
for
(
int
i
=
get_local_id
(
0
)
; i < histSize; i += get_local_size(0))
{
const
int
curVotes
=
smem[i
+
1]
;
//
totalCount
=
:
:min
(
totalCount,
maxCircles
)
;
if
(
curVotes
>=
threshold
&&
curVotes
>
smem[i]
&&
curVotes
>=
smem[i
+
2]
)
//
return
totalCount
;
//
}
{
const
int
ind
=
atomic_add
(
counter,
1
)
;
if
(
ind
<
maxCircles
)
{
circles[ind]
=
(
float4
)(
cx,
cy,
convert_float
(
i
+
minRadius
)
,
0.0f
)
;
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment