Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
ad86b803
Commit
ad86b803
authored
Dec 13, 2012
by
Suenghoon Park
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
finished buildPointList
parent
67ce03d7
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
690 additions
and
0 deletions
+690
-0
hough.cpp
modules/ocl/src/hough.cpp
+383
-0
hough.cl
modules/ocl/src/kernels/hough.cl
+307
-0
No files found.
modules/ocl/src/hough.cpp
0 → 100644
View file @
ad86b803
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Modified by Seunghoon Park(pclove1@gmail.com)
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using
namespace
std
;
using
namespace
cv
;
using
namespace
cv
::
ocl
;
#if !defined (HAVE_OPENCL)
// void cv::ocl::HoughLines(const oclMat&, oclMat&, float, float, int, bool, int) { throw_nogpu(); }
// void cv::ocl::HoughLines(const oclMat&, oclMat&, HoughLinesBuf&, float, float, int, bool, int) { throw_nogpu(); }
// void cv::ocl::HoughLinesDownload(const oclMat&, OutputArray, OutputArray) { throw_nogpu(); }
void
cv
::
ocl
::
HoughCircles
(
const
oclMat
&
,
oclMat
&
,
int
,
float
,
float
,
int
,
int
,
int
,
int
,
int
)
{
throw_nogpu
();
}
void
cv
::
ocl
::
HoughCircles
(
const
oclMat
&
,
oclMat
&
,
HoughCirclesBuf
&
,
int
,
float
,
float
,
int
,
int
,
int
,
int
,
int
)
{
throw_nogpu
();
}
void
cv
::
ocl
::
HoughCirclesDownload
(
const
oclMat
&
,
OutputArray
)
{
throw_nogpu
();
}
// Ptr<GeneralizedHough_GPU> cv::ocl::GeneralizedHough_GPU::create(int) { throw_nogpu(); return Ptr<GeneralizedHough_GPU>(); }
// cv::ocl::GeneralizedHough_GPU::~GeneralizedHough_GPU() {}
// void cv::ocl::GeneralizedHough_GPU::setTemplate(const oclMat&, int, Point) { throw_nogpu(); }
// void cv::ocl::GeneralizedHough_GPU::setTemplate(const oclMat&, const oclMat&, const oclMat&, Point) { throw_nogpu(); }
// void cv::ocl::GeneralizedHough_GPU::detect(const oclMat&, oclMat&, int) { throw_nogpu(); }
// void cv::ocl::GeneralizedHough_GPU::detect(const oclMat&, const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
// void cv::ocl::GeneralizedHough_GPU::download(const oclMat&, OutputArray, OutputArray) { throw_nogpu(); }
// void cv::ocl::GeneralizedHough_GPU::release() {}
#else
/* !defined (HAVE_OPENCL) */
namespace
cv
{
namespace
ocl
{
int
buildPointList_gpu
(
const
oclMat
&
src
,
unsigned
int
*
list
);
///////////////////////////OpenCL kernel strings///////////////////////////
extern
const
char
*
hough
;
}}
//////////////////////////////////////////////////////////
// common functions
namespace
cv
{
namespace
ocl
{
int
buildPointList_gpu
(
const
oclMat
&
src
,
unsigned
int
*
list
)
{
const
int
PIXELS_PER_THREAD
=
16
;
// void* counterPtr;
// cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
// cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
int
totalCount
=
0
;
int
err
=
CL_SUCCESS
;
cl_mem
counter
=
clCreateBuffer
(
src
.
clCxt
->
impl
->
clContext
,
CL_MEM_COPY_HOST_PTR
,
// CL_MEM_READ_WRITE,
sizeof
(
int
),
&
totalCount
,
// NULL,
&
err
);
openCLSafeCall
(
err
);
// openCLSafeCall(clEnqueueWriteBuffer(src.clCxt->impl->clCmdQueue, counter, CL_TRUE, 0, sizeof(int), &totalCount, 0, 0, 0));
// const dim3 block(32, 4);
// const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y));
const
size_t
blkSizeX
=
32
;
const
size_t
blkSizeY
=
4
;
size_t
localThreads
[
3
]
=
{
blkSizeX
,
blkSizeY
,
1
};
const
int
PIXELS_PER_BLOCK
=
blkSizeX
*
PIXELS_PER_THREAD
;
const
size_t
glbSizeX
=
src
.
cols
%
(
PIXELS_PER_BLOCK
)
==
0
?
src
.
cols
:
(
src
.
cols
/
PIXELS_PER_BLOCK
+
1
)
*
PIXELS_PER_BLOCK
;
const
size_t
glbSizeY
=
src
.
rows
%
blkSizeY
==
0
?
src
.
rows
:
(
src
.
rows
/
blkSizeY
+
1
)
*
blkSizeY
;
size_t
globalThreads
[
3
]
=
{
glbSizeX
,
glbSizeY
,
1
};
// cudaSafeCall( cudaFuncSetCacheConfig(buildPointList<PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
// buildPointList<PIXELS_PER_THREAD><<<grid, block>>>(src, list);
// cudaSafeCall( cudaGetLastError() );
// cudaSafeCall( cudaDeviceSynchronize() );
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
list
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
counter
));
openCLExecuteKernel
(
src
.
clCxt
,
&
hough
,
"buildPointList"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
// int totalCount;
// cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
openCLSafeCall
(
clEnqueueReadBuffer
(
src
.
clCxt
->
impl
->
clCmdQueue
,
counter
,
CL_TRUE
,
0
,
sizeof
(
int
),
&
totalCount
,
0
,
NULL
,
NULL
));
openCLSafeCall
(
clReleaseMemObject
(
counter
));
return
totalCount
;
}
}}
//////////////////////////////////////////////////////////
// HoughLines
// namespace cv { namespace ocl { namespace device
// {
// namespace hough
// {
// void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20);
// int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort);
// }
// }}}
// void cv::ocl::HoughLines(const oclMat& src, oclMat& lines, float rho, float theta, int threshold, bool doSort, int maxLines)
// {
// HoughLinesBuf buf;
// HoughLines(src, lines, buf, rho, theta, threshold, doSort, maxLines);
// }
// void cv::ocl::HoughLines(const oclMat& src, oclMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort, int maxLines)
// {
// using namespace cv::ocl::device::hough;
// CV_Assert(src.type() == CV_8UC1);
// CV_Assert(src.cols < std::numeric_limits<unsigned short>::max());
// CV_Assert(src.rows < std::numeric_limits<unsigned short>::max());
// ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.list);
// unsigned int* srcPoints = buf.list.ptr<unsigned int>();
// const int pointsCount = buildPointList_gpu(src, srcPoints);
// if (pointsCount == 0)
// {
// lines.release();
// return;
// }
// const int numangle = cvRound(CV_PI / theta);
// const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho);
// CV_Assert(numangle > 0 && numrho > 0);
// ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, buf.accum);
// buf.accum.setTo(Scalar::all(0));
// DeviceInfo devInfo;
// linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));
// ensureSizeIsEnough(2, maxLines, CV_32FC2, lines);
// int linesCount = linesGetResult_gpu(buf.accum, lines.ptr<float2>(0), lines.ptr<int>(1), maxLines, rho, theta, threshold, doSort);
// if (linesCount > 0)
// lines.cols = linesCount;
// else
// lines.release();
// }
// void cv::ocl::HoughLinesDownload(const oclMat& d_lines, OutputArray h_lines_, OutputArray h_votes_)
// {
// if (d_lines.empty())
// {
// h_lines_.release();
// if (h_votes_.needed())
// h_votes_.release();
// return;
// }
// CV_Assert(d_lines.rows == 2 && d_lines.type() == CV_32FC2);
// h_lines_.create(1, d_lines.cols, CV_32FC2);
// Mat h_lines = h_lines_.getMat();
// d_lines.row(0).download(h_lines);
// if (h_votes_.needed())
// {
// h_votes_.create(1, d_lines.cols, CV_32SC1);
// Mat h_votes = h_votes_.getMat();
// oclMat d_votes(1, d_lines.cols, CV_32SC1, const_cast<int*>(d_lines.ptr<int>(1)));
// d_votes.download(h_votes);
// }
// }
//////////////////////////////////////////////////////////
// HoughCircles
// namespace cv { namespace ocl
// {
// namespace hough
// {
// void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp);
// int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold);
// int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count,
// float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20);
// }
// }}
void
cv
::
ocl
::
HoughCircles
(
const
oclMat
&
src
,
oclMat
&
circles
,
int
method
,
float
dp
,
float
minDist
,
int
cannyThreshold
,
int
votesThreshold
,
int
minRadius
,
int
maxRadius
,
int
maxCircles
)
{
HoughCirclesBuf
buf
;
HoughCircles
(
src
,
circles
,
buf
,
method
,
dp
,
minDist
,
cannyThreshold
,
votesThreshold
,
minRadius
,
maxRadius
,
maxCircles
);
}
void
cv
::
ocl
::
HoughCircles
(
const
oclMat
&
src
,
oclMat
&
circles
,
HoughCirclesBuf
&
buf
,
int
method
,
float
dp
,
float
minDist
,
int
cannyThreshold
,
int
votesThreshold
,
int
minRadius
,
int
maxRadius
,
int
maxCircles
)
{
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
CV_Assert
(
src
.
cols
<
std
::
numeric_limits
<
unsigned
short
>::
max
());
CV_Assert
(
src
.
rows
<
std
::
numeric_limits
<
unsigned
short
>::
max
());
CV_Assert
(
method
==
CV_HOUGH_GRADIENT
);
CV_Assert
(
dp
>
0
);
CV_Assert
(
minRadius
>
0
&&
maxRadius
>
minRadius
);
CV_Assert
(
cannyThreshold
>
0
);
CV_Assert
(
votesThreshold
>
0
);
CV_Assert
(
maxCircles
>
0
);
const
float
idp
=
1.0
f
/
dp
;
cv
::
ocl
::
Canny
(
src
,
buf
.
cannyBuf
,
buf
.
edges
,
std
::
max
(
cannyThreshold
/
2
,
1
),
cannyThreshold
);
ensureSizeIsEnough
(
2
,
src
.
size
().
area
(),
CV_32SC1
,
buf
.
list
);
// unsigned int* srcPoints = buf.list.ptr<unsigned int>(0);
unsigned
int
*
srcPoints
=
(
unsigned
int
*
)
buf
.
list
.
data
;
// unsigned int* centers = buf.list.ptr<unsigned int>(1);
unsigned
int
*
centers
=
(
unsigned
int
*
)
buf
.
list
.
data
+
buf
.
list
.
step
;
const
int
pointsCount
=
buildPointList_gpu
(
buf
.
edges
,
srcPoints
);
//std::cout << "pointsCount: " << pointsCount << std::endl;
if
(
pointsCount
==
0
)
{
circles
.
release
();
return
;
}
// ensureSizeIsEnough(cvCeil(src.rows * idp) + 2, cvCeil(src.cols * idp) + 2, CV_32SC1, buf.accum);
// buf.accum.setTo(Scalar::all(0));
// circlesAccumCenters_gpu(srcPoints, pointsCount, buf.cannyBuf.dx, buf.cannyBuf.dy, buf.accum, minRadius, maxRadius, idp);
// int centersCount = buildCentersList_gpu(buf.accum, centers, votesThreshold);
// if (centersCount == 0)
// {
// circles.release();
// return;
// }
// if (minDist > 1)
// {
// cv::AutoBuffer<ushort2> oldBuf_(centersCount);
// cv::AutoBuffer<ushort2> newBuf_(centersCount);
// int newCount = 0;
// ushort2* oldBuf = oldBuf_;
// ushort2* newBuf = newBuf_;
// cudaSafeCall( cudaMemcpy(oldBuf, centers, centersCount * sizeof(ushort2), cudaMemcpyDeviceToHost) );
// const int cellSize = cvRound(minDist);
// const int gridWidth = (src.cols + cellSize - 1) / cellSize;
// const int gridHeight = (src.rows + cellSize - 1) / cellSize;
// std::vector< std::vector<ushort2> > grid(gridWidth * gridHeight);
// const float minDist2 = minDist * minDist;
// for (int i = 0; i < centersCount; ++i)
// {
// ushort2 p = oldBuf[i];
// bool good = true;
// int xCell = static_cast<int>(p.x / cellSize);
// int yCell = static_cast<int>(p.y / cellSize);
// int x1 = xCell - 1;
// int y1 = yCell - 1;
// int x2 = xCell + 1;
// int y2 = yCell + 1;
// // boundary check
// x1 = std::max(0, x1);
// y1 = std::max(0, y1);
// x2 = std::min(gridWidth - 1, x2);
// y2 = std::min(gridHeight - 1, y2);
// for (int yy = y1; yy <= y2; ++yy)
// {
// for (int xx = x1; xx <= x2; ++xx)
// {
// vector<ushort2>& m = grid[yy * gridWidth + xx];
// for(size_t j = 0; j < m.size(); ++j)
// {
// float dx = (float)(p.x - m[j].x);
// float dy = (float)(p.y - m[j].y);
// if (dx * dx + dy * dy < minDist2)
// {
// good = false;
// goto break_out;
// }
// }
// }
// }
// break_out:
// if(good)
// {
// grid[yCell * gridWidth + xCell].push_back(p);
// newBuf[newCount++] = p;
// }
// }
// cudaSafeCall( cudaMemcpy(centers, newBuf, newCount * sizeof(unsigned int), cudaMemcpyHostToDevice) );
// centersCount = newCount;
// }
// ensureSizeIsEnough(1, maxCircles, CV_32FC3, circles);
// DeviceInfo devInfo;
// const int circlesCount = circlesAccumRadius_gpu(centers, centersCount, srcPoints, pointsCount, circles.ptr<float3>(), maxCircles,
// dp, minRadius, maxRadius, votesThreshold, devInfo.supports(FEATURE_SET_COMPUTE_20));
// if (circlesCount > 0)
// circles.cols = circlesCount;
// else
// circles.release();
}
void
cv
::
ocl
::
HoughCirclesDownload
(
const
oclMat
&
d_circles
,
cv
::
OutputArray
h_circles_
)
{
if
(
d_circles
.
empty
())
{
h_circles_
.
release
();
return
;
}
CV_Assert
(
d_circles
.
rows
==
1
&&
d_circles
.
type
()
==
CV_32FC3
);
h_circles_
.
create
(
1
,
d_circles
.
cols
,
CV_32FC3
);
Mat
h_circles
=
h_circles_
.
getMat
();
d_circles
.
download
(
h_circles
);
}
#endif
/* !defined (HAVE_OPENCL) */
modules/ocl/src/kernels/hough.cl
0 → 100644
View file @
ad86b803
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2000-2008,
Intel
Corporation,
all
rights
reserved.
//
Copyright
(
C
)
2009
,
Willow
Garage
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
"as is"
and
//
any
express
or
bpied
warranties,
including,
but
not
limited
to,
the
bpied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics
:
enable
#
pragma
OPENCL
EXTENSION
cl_khr_local_int32_base_atomics
:
enable
////////////////////////////////////////////////////////////////////////
//
buildPointList
#
define
PIXELS_PER_THREAD
16
__kernel
void
buildPointList
(
__global
const
uchar*
src,
int
cols,
int
rows,
int
step,
__global
unsigned
int*
list,
__global
int*
counter
)
{
__local
unsigned
int
s_queues[4][32
*
PIXELS_PER_THREAD]
;
__local
int
s_qsize[4]
;
__local
int
s_globStart[4]
;
const
int
x
=
get_group_id
(
0
)
*
get_local_size
(
0
)
*
PIXELS_PER_THREAD
+
get_local_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
if
(
get_local_id
(
0
)
==
0
)
s_qsize[get_local_id
(
1
)
]
=
0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
y
<
rows
)
{
//
fill
the
queue
__global
const
uchar*
srcRow
=
&src[y
*
step]
;
for
(
int
i
=
0
,
xx
=
x
; i < PIXELS_PER_THREAD && xx < cols; ++i, xx += get_local_size(0))
{
if
(
srcRow[xx]
)
{
const
unsigned
int
val
=
(
y
<<
16
)
| xx;
const int qidx = atomic_add(&s_qsize[get_local_id(1)], 1);
s_queues[get_local_id(1)][qidx] = val;
}
}
}
barrier(CLK_LOCAL_MEM_FENCE);
// let one work-item reserve the space required in the global list
if (get_local_id(0) == 0 && get_local_id(1) == 0)
{
// find how many items are stored in each list
int totalSize = 0;
for (int i = 0; i < get_local_size(1); ++i)
{
s_globStart[i] = totalSize;
totalSize += s_qsize[i];
}
// calculate the offset in the global list
const int globalOffset = atomic_add(counter, totalSize);
for (int i = 0; i < get_local_size(1); ++i)
s_globStart[i] += globalOffset;
}
barrier(CLK_GLOBAL_MEM_FENCE);
// copy local queues to global queue
const int qsize = s_qsize[get_local_id(1)];
int gidx = s_globStart[get_local_id(1)] + get_local_id(0);
for(int i = get_local_id(0); i < qsize; i += get_local_size(0), gidx += get_local_size(0))
list[gidx] = s_queues[get_local_id(1)][i];
}
////////////////////////////////////////////////////////////////////////
// circlesAccumCenters
// __global__ void circlesAccumCenters(const unsigned int* list, const int count, const PtrStepi dx, const PtrStepi dy,
// PtrStepi accum, const int width, const int height, const int minRadius, const int maxRadius, const float idp)
// {
// const int SHIFT = 10;
// const int ONE = 1 << SHIFT;
// const int tid = blockIdx.x * blockDim.x + threadIdx.x;
// if (tid >= count)
// return;
// const unsigned int val = list[tid];
// const int x = (val & 0xFFFF);
// const int y = (val >> 16) & 0xFFFF;
// const int vx = dx(y, x);
// const int vy = dy(y, x);
// if (vx == 0 && vy == 0)
// return;
// const float mag = ::sqrtf(vx * vx + vy * vy);
// const int x0 = __float2int_rn((x * idp) * ONE);
// const int y0 = __float2int_rn((y * idp) * ONE);
// int sx = __float2int_rn((vx * idp) * ONE / mag);
// int sy = __float2int_rn((vy * idp) * ONE / mag);
// // Step from minRadius to maxRadius in both directions of the gradient
// for (int k1 = 0; k1 < 2; ++k1)
// {
// int x1 = x0 + minRadius * sx;
// int y1 = y0 + minRadius * sy;
// for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r)
// {
// const int x2 = x1 >> SHIFT;
// const int y2 = y1 >> SHIFT;
// if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height)
// break;
// ::atomicAdd(accum.ptr(y2 + 1) + x2 + 1, 1);
// }
// sx = -sx;
// sy = -sy;
// }
// }
// void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp)
// {
// const dim3 block(256);
// const dim3 grid(divUp(count, block.x));
// cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) );
// circlesAccumCenters<<<grid, block>>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp);
// cudaSafeCall( cudaGetLastError() );
// cudaSafeCall( cudaDeviceSynchronize() );
// }
// ////////////////////////////////////////////////////////////////////////
// // buildCentersList
// __global__ void buildCentersList(const PtrStepSzi accum, unsigned int* centers, const int threshold)
// {
// const int x = blockIdx.x * blockDim.x + threadIdx.x;
// const int y = blockIdx.y * blockDim.y + threadIdx.y;
// if (x < accum.cols - 2 && y < accum.rows - 2)
// {
// const int top = accum(y, x + 1);
// const int left = accum(y + 1, x);
// const int cur = accum(y + 1, x + 1);
// const int right = accum(y + 1, x + 2);
// const int bottom = accum(y + 2, x + 1);
// if (cur > threshold && cur > top && cur >= bottom && cur > left && cur >= right)
// {
// const unsigned int val = (y << 16) |
x
;
//
const
int
idx
=
:
:atomicAdd
(
&g_counter,
1
)
;
//
centers[idx]
=
val
;
//
}
//
}
//
}
//
int
buildCentersList_gpu
(
PtrStepSzi
accum,
unsigned
int*
centers,
int
threshold
)
//
{
//
void*
counterPtr
;
//
cudaSafeCall
(
cudaGetSymbolAddress
(
&counterPtr,
g_counter
)
)
;
//
cudaSafeCall
(
cudaMemset
(
counterPtr,
0
,
sizeof
(
int
))
)
;
//
const
dim3
block
(
32
,
8
)
;
//
const
dim3
grid
(
divUp
(
accum.cols
-
2
,
block.x
)
,
divUp
(
accum.rows
-
2
,
block.y
))
;
//
cudaSafeCall
(
cudaFuncSetCacheConfig
(
buildCentersList,
cudaFuncCachePreferL1
)
)
;
//
buildCentersList<<<grid,
block>>>
(
accum,
centers,
threshold
)
;
//
cudaSafeCall
(
cudaGetLastError
()
)
;
//
cudaSafeCall
(
cudaDeviceSynchronize
()
)
;
//
int
totalCount
;
//
cudaSafeCall
(
cudaMemcpy
(
&totalCount,
counterPtr,
sizeof
(
int
)
,
cudaMemcpyDeviceToHost
)
)
;
//
return
totalCount
;
//
}
//
////////////////////////////////////////////////////////////////////////
//
//
circlesAccumRadius
//
__global__
void
circlesAccumRadius
(
const
unsigned
int*
centers,
const
unsigned
int*
list,
const
int
count,
//
float3*
circles,
const
int
maxCircles,
const
float
dp,
//
const
int
minRadius,
const
int
maxRadius,
const
int
histSize,
const
int
threshold
)
//
{
//
int*
smem
=
DynamicSharedMem<int>
()
;
//
for
(
int
i
=
threadIdx.x
; i < histSize + 2; i += blockDim.x)
//
smem[i]
=
0
;
//
__syncthreads
()
;
//
unsigned
int
val
=
centers[blockIdx.x]
;
//
float
cx
=
(
val
&
0xFFFF
)
;
//
float
cy
=
(
val
>>
16
)
&
0xFFFF
;
//
cx
=
(
cx
+
0.5f
)
*
dp
;
//
cy
=
(
cy
+
0.5f
)
*
dp
;
//
for
(
int
i
=
threadIdx.x
; i < count; i += blockDim.x)
//
{
//
val
=
list[i]
;
//
const
int
x
=
(
val
&
0xFFFF
)
;
//
const
int
y
=
(
val
>>
16
)
&
0xFFFF
;
//
const
float
rad
=
:
:sqrtf
((
cx
-
x
)
*
(
cx
-
x
)
+
(
cy
-
y
)
*
(
cy
-
y
))
;
//
if
(
rad
>=
minRadius
&&
rad
<=
maxRadius
)
//
{
//
const
int
r
=
__float2int_rn
(
rad
-
minRadius
)
;
//
Emulation::smem::atomicAdd
(
&smem[r
+
1],
1
)
;
//
}
//
}
//
__syncthreads
()
;
//
for
(
int
i
=
threadIdx.x
; i < histSize; i += blockDim.x)
//
{
//
const
int
curVotes
=
smem[i
+
1]
;
//
if
(
curVotes
>=
threshold
&&
curVotes
>
smem[i]
&&
curVotes
>=
smem[i
+
2]
)
//
{
//
const
int
ind
=
:
:atomicAdd
(
&g_counter,
1
)
;
//
if
(
ind
<
maxCircles
)
//
circles[ind]
=
make_float3
(
cx,
cy,
i
+
minRadius
)
;
//
}
//
}
//
}
//
int
circlesAccumRadius_gpu
(
const
unsigned
int*
centers,
int
centersCount,
const
unsigned
int*
list,
int
count,
//
float3*
circles,
int
maxCircles,
float
dp,
int
minRadius,
int
maxRadius,
int
threshold,
bool
has20
)
//
{
//
void*
counterPtr
;
//
cudaSafeCall
(
cudaGetSymbolAddress
(
&counterPtr,
g_counter
)
)
;
//
cudaSafeCall
(
cudaMemset
(
counterPtr,
0
,
sizeof
(
int
))
)
;
//
const
dim3
block
(
has20
?
1024
:
512
)
;
//
const
dim3
grid
(
centersCount
)
;
//
const
int
histSize
=
maxRadius
-
minRadius
+
1
;
//
size_t
smemSize
=
(
histSize
+
2
)
*
sizeof
(
int
)
;
//
circlesAccumRadius<<<grid,
block,
smemSize>>>
(
centers,
list,
count,
circles,
maxCircles,
dp,
minRadius,
maxRadius,
histSize,
threshold
)
;
//
cudaSafeCall
(
cudaGetLastError
()
)
;
//
cudaSafeCall
(
cudaDeviceSynchronize
()
)
;
//
int
totalCount
;
//
cudaSafeCall
(
cudaMemcpy
(
&totalCount,
counterPtr,
sizeof
(
int
)
,
cudaMemcpyDeviceToHost
)
)
;
//
totalCount
=
:
:min
(
totalCount,
maxCircles
)
;
//
return
totalCount
;
//
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment