Commit f614e354 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

split hough sources

parent 1d79e131
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/emulation.hpp"
namespace cv { namespace gpu { namespace cudev
{
namespace hough
{
__device__ int g_counter;
template <int PIXELS_PER_THREAD>
__global__ void buildPointList(const PtrStepSzb src, unsigned int* list)
{
__shared__ unsigned int s_queues[4][32 * PIXELS_PER_THREAD];
__shared__ int s_qsize[4];
__shared__ int s_globStart[4];
const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (threadIdx.x == 0)
s_qsize[threadIdx.y] = 0;
__syncthreads();
if (y < src.rows)
{
// fill the queue
const uchar* srcRow = src.ptr(y);
for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < src.cols; ++i, xx += blockDim.x)
{
if (srcRow[xx])
{
const unsigned int val = (y << 16) | xx;
const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1);
s_queues[threadIdx.y][qidx] = val;
}
}
}
__syncthreads();
// let one thread reserve the space required in the global list
if (threadIdx.x == 0 && threadIdx.y == 0)
{
// find how many items are stored in each list
int totalSize = 0;
for (int i = 0; i < blockDim.y; ++i)
{
s_globStart[i] = totalSize;
totalSize += s_qsize[i];
}
// calculate the offset in the global list
const int globalOffset = atomicAdd(&g_counter, totalSize);
for (int i = 0; i < blockDim.y; ++i)
s_globStart[i] += globalOffset;
}
__syncthreads();
// copy local queues to global queue
const int qsize = s_qsize[threadIdx.y];
int gidx = s_globStart[threadIdx.y] + threadIdx.x;
for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x)
list[gidx] = s_queues[threadIdx.y][i];
}
int buildPointList_gpu(PtrStepSzb src, unsigned int* list)
{
const int PIXELS_PER_THREAD = 16;
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 4);
const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(buildPointList<PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
buildPointList<PIXELS_PER_THREAD><<<grid, block>>>(src, list);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
return totalCount;
}
}
}}}
#endif /* CUDA_DISABLER */
...@@ -43,654 +43,25 @@ ...@@ -43,654 +43,25 @@
#if !defined CUDA_DISABLER #if !defined CUDA_DISABLER
#include <thrust/device_ptr.h> #include <thrust/device_ptr.h>
#include <thrust/sort.h> #include <thrust/transform.h>
#include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/emulation.hpp" #include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/vec_math.hpp" #include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/limits.hpp" #include "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/dynamic_smem.hpp"
#ifdef HAVE_OPENCV_GPUARITHM
namespace cv { namespace gpu { namespace cudev namespace cv { namespace gpu { namespace cudev
{ {
namespace hough namespace ght
{ {
__device__ int g_counter; __device__ int g_counter;
////////////////////////////////////////////////////////////////////////
// buildPointList
template <int PIXELS_PER_THREAD>
__global__ void buildPointList(const PtrStepSzb src, unsigned int* list)
{
__shared__ unsigned int s_queues[4][32 * PIXELS_PER_THREAD];
__shared__ int s_qsize[4];
__shared__ int s_globStart[4];
const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (threadIdx.x == 0)
s_qsize[threadIdx.y] = 0;
__syncthreads();
if (y < src.rows)
{
// fill the queue
const uchar* srcRow = src.ptr(y);
for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < src.cols; ++i, xx += blockDim.x)
{
if (srcRow[xx])
{
const unsigned int val = (y << 16) | xx;
const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1);
s_queues[threadIdx.y][qidx] = val;
}
}
}
__syncthreads();
// let one thread reserve the space required in the global list
if (threadIdx.x == 0 && threadIdx.y == 0)
{
// find how many items are stored in each list
int totalSize = 0;
for (int i = 0; i < blockDim.y; ++i)
{
s_globStart[i] = totalSize;
totalSize += s_qsize[i];
}
// calculate the offset in the global list
const int globalOffset = atomicAdd(&g_counter, totalSize);
for (int i = 0; i < blockDim.y; ++i)
s_globStart[i] += globalOffset;
}
__syncthreads();
// copy local queues to global queue
const int qsize = s_qsize[threadIdx.y];
int gidx = s_globStart[threadIdx.y] + threadIdx.x;
for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x)
list[gidx] = s_queues[threadIdx.y][i];
}
int buildPointList_gpu(PtrStepSzb src, unsigned int* list)
{
const int PIXELS_PER_THREAD = 16;
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 4);
const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(buildPointList<PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
buildPointList<PIXELS_PER_THREAD><<<grid, block>>>(src, list);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
return totalCount;
}
////////////////////////////////////////////////////////////////////////
// linesAccum
__global__ void linesAccumGlobal(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho)
{
const int n = blockIdx.x;
const float ang = n * theta;
float sinVal;
float cosVal;
sincosf(ang, &sinVal, &cosVal);
sinVal *= irho;
cosVal *= irho;
const int shift = (numrho - 1) / 2;
int* accumRow = accum.ptr(n + 1);
for (int i = threadIdx.x; i < count; i += blockDim.x)
{
const unsigned int val = list[i];
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
int r = __float2int_rn(x * cosVal + y * sinVal);
r += shift;
::atomicAdd(accumRow + r + 1, 1);
}
}
__global__ void linesAccumShared(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho)
{
int* smem = DynamicSharedMem<int>();
for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x)
smem[i] = 0;
__syncthreads();
const int n = blockIdx.x;
const float ang = n * theta;
float sinVal;
float cosVal;
sincosf(ang, &sinVal, &cosVal);
sinVal *= irho;
cosVal *= irho;
const int shift = (numrho - 1) / 2;
for (int i = threadIdx.x; i < count; i += blockDim.x)
{
const unsigned int val = list[i];
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
int r = __float2int_rn(x * cosVal + y * sinVal);
r += shift;
Emulation::smem::atomicAdd(&smem[r + 1], 1);
}
__syncthreads();
int* accumRow = accum.ptr(n + 1);
for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x)
accumRow[i] = smem[i];
}
void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20)
{
const dim3 block(has20 ? 1024 : 512);
const dim3 grid(accum.rows - 2);
size_t smemSize = (accum.cols - 1) * sizeof(int);
if (smemSize < sharedMemPerBlock - 1000)
linesAccumShared<<<grid, block, smemSize>>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2);
else
linesAccumGlobal<<<grid, block>>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////
// linesGetResult
__global__ void linesGetResult(const PtrStepSzi accum, float2* out, int* votes, const int maxSize, const float rho, const float theta, const int threshold, const int numrho)
{
const int r = blockIdx.x * blockDim.x + threadIdx.x;
const int n = blockIdx.y * blockDim.y + threadIdx.y;
if (r >= accum.cols - 2 || n >= accum.rows - 2)
return;
const int curVotes = accum(n + 1, r + 1);
if (curVotes > threshold &&
curVotes > accum(n + 1, r) &&
curVotes >= accum(n + 1, r + 2) &&
curVotes > accum(n, r + 1) &&
curVotes >= accum(n + 2, r + 1))
{
const float radius = (r - (numrho - 1) * 0.5f) * rho;
const float angle = n * theta;
const int ind = ::atomicAdd(&g_counter, 1);
if (ind < maxSize)
{
out[ind] = make_float2(radius, angle);
votes[ind] = curVotes;
}
}
}
int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(linesGetResult, cudaFuncCachePreferL1) );
linesGetResult<<<grid, block>>>(accum, out, votes, maxSize, rho, theta, threshold, accum.cols - 2);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);
if (doSort && totalCount > 0)
{
thrust::device_ptr<float2> outPtr(out);
thrust::device_ptr<int> votesPtr(votes);
thrust::sort_by_key(votesPtr, votesPtr + totalCount, outPtr, thrust::greater<int>());
}
return totalCount;
}
////////////////////////////////////////////////////////////////////////
// houghLinesProbabilistic
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_mask(false, cudaFilterModePoint, cudaAddressModeClamp);
__global__ void houghLinesProbabilistic(const PtrStepSzi accum,
int4* out, const int maxSize,
const float rho, const float theta,
const int lineGap, const int lineLength,
const int rows, const int cols)
{
const int r = blockIdx.x * blockDim.x + threadIdx.x;
const int n = blockIdx.y * blockDim.y + threadIdx.y;
if (r >= accum.cols - 2 || n >= accum.rows - 2)
return;
const int curVotes = accum(n + 1, r + 1);
if (curVotes >= lineLength &&
curVotes > accum(n, r) &&
curVotes > accum(n, r + 1) &&
curVotes > accum(n, r + 2) &&
curVotes > accum(n + 1, r) &&
curVotes > accum(n + 1, r + 2) &&
curVotes > accum(n + 2, r) &&
curVotes > accum(n + 2, r + 1) &&
curVotes > accum(n + 2, r + 2))
{
const float radius = (r - (accum.cols - 2 - 1) * 0.5f) * rho;
const float angle = n * theta;
float cosa;
float sina;
sincosf(angle, &sina, &cosa);
float2 p0 = make_float2(cosa * radius, sina * radius);
float2 dir = make_float2(-sina, cosa);
float2 pb[4] = {make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1)};
float a;
if (dir.x != 0)
{
a = -p0.x / dir.x;
pb[0].x = 0;
pb[0].y = p0.y + a * dir.y;
a = (cols - 1 - p0.x) / dir.x;
pb[1].x = cols - 1;
pb[1].y = p0.y + a * dir.y;
}
if (dir.y != 0)
{
a = -p0.y / dir.y;
pb[2].x = p0.x + a * dir.x;
pb[2].y = 0;
a = (rows - 1 - p0.y) / dir.y;
pb[3].x = p0.x + a * dir.x;
pb[3].y = rows - 1;
}
if (pb[0].x == 0 && (pb[0].y >= 0 && pb[0].y < rows))
{
p0 = pb[0];
if (dir.x < 0)
dir = -dir;
}
else if (pb[1].x == cols - 1 && (pb[0].y >= 0 && pb[0].y < rows))
{
p0 = pb[1];
if (dir.x > 0)
dir = -dir;
}
else if (pb[2].y == 0 && (pb[2].x >= 0 && pb[2].x < cols))
{
p0 = pb[2];
if (dir.y < 0)
dir = -dir;
}
else if (pb[3].y == rows - 1 && (pb[3].x >= 0 && pb[3].x < cols))
{
p0 = pb[3];
if (dir.y > 0)
dir = -dir;
}
float2 d;
if (::fabsf(dir.x) > ::fabsf(dir.y))
{
d.x = dir.x > 0 ? 1 : -1;
d.y = dir.y / ::fabsf(dir.x);
}
else
{
d.x = dir.x / ::fabsf(dir.y);
d.y = dir.y > 0 ? 1 : -1;
}
float2 line_end[2];
int gap;
bool inLine = false;
float2 p1 = p0;
if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows)
return;
for (;;)
{
if (tex2D(tex_mask, p1.x, p1.y))
{
gap = 0;
if (!inLine)
{
line_end[0] = p1;
line_end[1] = p1;
inLine = true;
}
else
{
line_end[1] = p1;
}
}
else if (inLine)
{
if (++gap > lineGap)
{
bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength ||
::abs(line_end[1].y - line_end[0].y) >= lineLength;
if (good_line)
{
const int ind = ::atomicAdd(&g_counter, 1);
if (ind < maxSize)
out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y);
}
gap = 0;
inLine = false;
}
}
p1 = p1 + d;
if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows)
{
if (inLine)
{
bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength ||
::abs(line_end[1].y - line_end[0].y) >= lineLength;
if (good_line)
{
const int ind = ::atomicAdd(&g_counter, 1);
if (ind < maxSize)
out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y);
}
}
break;
}
}
}
}
int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
bindTexture(&tex_mask, mask);
houghLinesProbabilistic<<<grid, block>>>(accum,
out, maxSize,
rho, theta,
lineGap, lineLength,
mask.rows, mask.cols);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);
return totalCount;
}
////////////////////////////////////////////////////////////////////////
// circlesAccumCenters
__global__ void circlesAccumCenters(const unsigned int* list, const int count, const PtrStepi dx, const PtrStepi dy,
PtrStepi accum, const int width, const int height, const int minRadius, const int maxRadius, const float idp)
{
const int SHIFT = 10;
const int ONE = 1 << SHIFT;
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid >= count)
return;
const unsigned int val = list[tid];
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
const int vx = dx(y, x);
const int vy = dy(y, x);
if (vx == 0 && vy == 0)
return;
const float mag = ::sqrtf(vx * vx + vy * vy);
const int x0 = __float2int_rn((x * idp) * ONE);
const int y0 = __float2int_rn((y * idp) * ONE);
int sx = __float2int_rn((vx * idp) * ONE / mag);
int sy = __float2int_rn((vy * idp) * ONE / mag);
// Step from minRadius to maxRadius in both directions of the gradient
for (int k1 = 0; k1 < 2; ++k1)
{
int x1 = x0 + minRadius * sx;
int y1 = y0 + minRadius * sy;
for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r)
{
const int x2 = x1 >> SHIFT;
const int y2 = y1 >> SHIFT;
if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height)
break;
::atomicAdd(accum.ptr(y2 + 1) + x2 + 1, 1);
}
sx = -sx;
sy = -sy;
}
}
void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp)
{
const dim3 block(256);
const dim3 grid(divUp(count, block.x));
cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) );
circlesAccumCenters<<<grid, block>>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////
// buildCentersList
__global__ void buildCentersList(const PtrStepSzi accum, unsigned int* centers, const int threshold)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < accum.cols - 2 && y < accum.rows - 2)
{
const int top = accum(y, x + 1);
const int left = accum(y + 1, x);
const int cur = accum(y + 1, x + 1);
const int right = accum(y + 1, x + 2);
const int bottom = accum(y + 2, x + 1);
if (cur > threshold && cur > top && cur >= bottom && cur > left && cur >= right)
{
const unsigned int val = (y << 16) | x;
const int idx = ::atomicAdd(&g_counter, 1);
centers[idx] = val;
}
}
}
int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) );
buildCentersList<<<grid, block>>>(accum, centers, threshold);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
return totalCount;
}
////////////////////////////////////////////////////////////////////////
// circlesAccumRadius
__global__ void circlesAccumRadius(const unsigned int* centers, const unsigned int* list, const int count,
float3* circles, const int maxCircles, const float dp,
const int minRadius, const int maxRadius, const int histSize, const int threshold)
{
int* smem = DynamicSharedMem<int>();
for (int i = threadIdx.x; i < histSize + 2; i += blockDim.x)
smem[i] = 0;
__syncthreads();
unsigned int val = centers[blockIdx.x];
float cx = (val & 0xFFFF);
float cy = (val >> 16) & 0xFFFF;
cx = (cx + 0.5f) * dp;
cy = (cy + 0.5f) * dp;
for (int i = threadIdx.x; i < count; i += blockDim.x)
{
val = list[i];
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
const float rad = ::sqrtf((cx - x) * (cx - x) + (cy - y) * (cy - y));
if (rad >= minRadius && rad <= maxRadius)
{
const int r = __float2int_rn(rad - minRadius);
Emulation::smem::atomicAdd(&smem[r + 1], 1);
}
}
__syncthreads();
for (int i = threadIdx.x; i < histSize; i += blockDim.x)
{
const int curVotes = smem[i + 1];
if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2])
{
const int ind = ::atomicAdd(&g_counter, 1);
if (ind < maxCircles)
circles[ind] = make_float3(cx, cy, i + minRadius);
}
}
}
int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count,
float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(has20 ? 1024 : 512);
const dim3 grid(centersCount);
const int histSize = maxRadius - minRadius + 1;
size_t smemSize = (histSize + 2) * sizeof(int);
circlesAccumRadius<<<grid, block, smemSize>>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxCircles);
return totalCount;
}
////////////////////////////////////////////////////////////////////////
// Generalized Hough
template <typename T, int PIXELS_PER_THREAD> template <typename T, int PIXELS_PER_THREAD>
__global__ void buildEdgePointList(const PtrStepSzb edges, const PtrStep<T> dx, const PtrStep<T> dy, unsigned int* coordList, float* thetaList) __global__ void buildEdgePointList(const PtrStepSzb edges, const PtrStep<T> dx, const PtrStep<T> dy,
unsigned int* coordList, float* thetaList)
{ {
__shared__ unsigned int s_coordLists[4][32 * PIXELS_PER_THREAD]; __shared__ unsigned int s_coordLists[4][32 * PIXELS_PER_THREAD];
__shared__ float s_thetaLists[4][32 * PIXELS_PER_THREAD]; __shared__ float s_thetaLists[4][32 * PIXELS_PER_THREAD];
...@@ -812,7 +183,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -812,7 +183,7 @@ namespace cv { namespace gpu { namespace cudev
const int ind = ::atomicAdd(r_sizes + n, 1); const int ind = ::atomicAdd(r_sizes + n, 1);
if (ind < maxSize) if (ind < maxSize)
r_table(n, ind) = saturate_cast<short2>(p - templCenter); r_table(n, ind) = p - templCenter;
} }
void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
...@@ -831,9 +202,9 @@ namespace cv { namespace gpu { namespace cudev ...@@ -831,9 +202,9 @@ namespace cv { namespace gpu { namespace cudev
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// GHT_Ballard_Pos // Ballard_Pos
__global__ void GHT_Ballard_Pos_calcHist(const unsigned int* coordList, const float* thetaList, const int pointsCount, __global__ void Ballard_Pos_calcHist(const unsigned int* coordList, const float* thetaList, const int pointsCount,
const PtrStep<short2> r_table, const int* r_sizes, const PtrStep<short2> r_table, const int* r_sizes,
PtrStepSzi hist, PtrStepSzi hist,
const float idp, const float thetaScale) const float idp, const float thetaScale)
...@@ -856,7 +227,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -856,7 +227,7 @@ namespace cv { namespace gpu { namespace cudev
for (int j = 0; j < r_row_size; ++j) for (int j = 0; j < r_row_size; ++j)
{ {
int2 c = p - r_row[j]; short2 c = p - r_row[j];
c.x = __float2int_rn(c.x * idp); c.x = __float2int_rn(c.x * idp);
c.y = __float2int_rn(c.y * idp); c.y = __float2int_rn(c.y * idp);
...@@ -866,7 +237,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -866,7 +237,7 @@ namespace cv { namespace gpu { namespace cudev
} }
} }
void GHT_Ballard_Pos_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, void Ballard_Pos_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
PtrStepSz<short2> r_table, const int* r_sizes, PtrStepSz<short2> r_table, const int* r_sizes,
PtrStepSzi hist, PtrStepSzi hist,
float dp, int levels) float dp, int levels)
...@@ -877,13 +248,14 @@ namespace cv { namespace gpu { namespace cudev ...@@ -877,13 +248,14 @@ namespace cv { namespace gpu { namespace cudev
const float idp = 1.0f / dp; const float idp = 1.0f / dp;
const float thetaScale = levels / (2.0f * CV_PI_F); const float thetaScale = levels / (2.0f * CV_PI_F);
GHT_Ballard_Pos_calcHist<<<grid, block>>>(coordList, thetaList, pointsCount, r_table, r_sizes, hist, idp, thetaScale); Ballard_Pos_calcHist<<<grid, block>>>(coordList, thetaList, pointsCount, r_table, r_sizes, hist, idp, thetaScale);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void GHT_Ballard_Pos_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize, const float dp, const int threshold) __global__ void Ballard_Pos_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes,
const int maxSize, const float dp, const int threshold)
{ {
const int x = blockIdx.x * blockDim.x + threadIdx.x; const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y; const int y = blockIdx.y * blockDim.y + threadIdx.y;
...@@ -909,7 +281,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -909,7 +281,7 @@ namespace cv { namespace gpu { namespace cudev
} }
} }
int GHT_Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold) int Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold)
{ {
void* counterPtr; void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
...@@ -919,9 +291,9 @@ namespace cv { namespace gpu { namespace cudev ...@@ -919,9 +291,9 @@ namespace cv { namespace gpu { namespace cudev
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y)); const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_Pos_findPosInHist, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(Ballard_Pos_findPosInHist, cudaFuncCachePreferL1) );
GHT_Ballard_Pos_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize, dp, threshold); Ballard_Pos_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize, dp, threshold);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -935,9 +307,9 @@ namespace cv { namespace gpu { namespace cudev ...@@ -935,9 +307,9 @@ namespace cv { namespace gpu { namespace cudev
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// GHT_Ballard_PosScale // Ballard_PosScale
__global__ void GHT_Ballard_PosScale_calcHist(const unsigned int* coordList, const float* thetaList, __global__ void Ballard_PosScale_calcHist(const unsigned int* coordList, const float* thetaList,
PtrStep<short2> r_table, const int* r_sizes, PtrStep<short2> r_table, const int* r_sizes,
PtrStepi hist, const int rows, const int cols, PtrStepi hist, const int rows, const int cols,
const float minScale, const float scaleStep, const int scaleRange, const float minScale, const float scaleStep, const int scaleRange,
...@@ -973,7 +345,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -973,7 +345,7 @@ namespace cv { namespace gpu { namespace cudev
} }
} }
void GHT_Ballard_PosScale_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, void Ballard_PosScale_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
PtrStepSz<short2> r_table, const int* r_sizes, PtrStepSz<short2> r_table, const int* r_sizes,
PtrStepi hist, int rows, int cols, PtrStepi hist, int rows, int cols,
float minScale, float scaleStep, int scaleRange, float minScale, float scaleStep, int scaleRange,
...@@ -985,7 +357,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -985,7 +357,7 @@ namespace cv { namespace gpu { namespace cudev
const float idp = 1.0f / dp; const float idp = 1.0f / dp;
const float thetaScale = levels / (2.0f * CV_PI_F); const float thetaScale = levels / (2.0f * CV_PI_F);
GHT_Ballard_PosScale_calcHist<<<grid, block>>>(coordList, thetaList, Ballard_PosScale_calcHist<<<grid, block>>>(coordList, thetaList,
r_table, r_sizes, r_table, r_sizes,
hist, rows, cols, hist, rows, cols,
minScale, scaleStep, scaleRange, minScale, scaleStep, scaleRange,
...@@ -995,7 +367,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -995,7 +367,7 @@ namespace cv { namespace gpu { namespace cudev
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void GHT_Ballard_PosScale_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int scaleRange, __global__ void Ballard_PosScale_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int scaleRange,
float4* out, int3* votes, const int maxSize, float4* out, int3* votes, const int maxSize,
const float minScale, const float scaleStep, const float dp, const int threshold) const float minScale, const float scaleStep, const float dp, const int threshold)
{ {
...@@ -1034,7 +406,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1034,7 +406,7 @@ namespace cv { namespace gpu { namespace cudev
} }
} }
int GHT_Ballard_PosScale_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int scaleRange, float4* out, int3* votes, int maxSize, int Ballard_PosScale_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int scaleRange, float4* out, int3* votes, int maxSize,
float minScale, float scaleStep, float dp, int threshold) float minScale, float scaleStep, float dp, int threshold)
{ {
void* counterPtr; void* counterPtr;
...@@ -1045,9 +417,10 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1045,9 +417,10 @@ namespace cv { namespace gpu { namespace cudev
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y)); const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosScale_findPosInHist, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(Ballard_PosScale_findPosInHist, cudaFuncCachePreferL1) );
GHT_Ballard_PosScale_findPosInHist<<<grid, block>>>(hist, rows, cols, scaleRange, out, votes, maxSize, minScale, scaleStep, dp, threshold); Ballard_PosScale_findPosInHist<<<grid, block>>>(hist, rows, cols, scaleRange, out, votes,
maxSize, minScale, scaleStep, dp, threshold);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -1061,9 +434,9 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1061,9 +434,9 @@ namespace cv { namespace gpu { namespace cudev
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// GHT_Ballard_PosRotation // Ballard_PosRotation
__global__ void GHT_Ballard_PosRotation_calcHist(const unsigned int* coordList, const float* thetaList, __global__ void Ballard_PosRotation_calcHist(const unsigned int* coordList, const float* thetaList,
PtrStep<short2> r_table, const int* r_sizes, PtrStep<short2> r_table, const int* r_sizes,
PtrStepi hist, const int rows, const int cols, PtrStepi hist, const int rows, const int cols,
const float minAngle, const float angleStep, const int angleRange, const float minAngle, const float angleStep, const int angleRange,
...@@ -1107,7 +480,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1107,7 +480,7 @@ namespace cv { namespace gpu { namespace cudev
} }
} }
void GHT_Ballard_PosRotation_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, void Ballard_PosRotation_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
PtrStepSz<short2> r_table, const int* r_sizes, PtrStepSz<short2> r_table, const int* r_sizes,
PtrStepi hist, int rows, int cols, PtrStepi hist, int rows, int cols,
float minAngle, float angleStep, int angleRange, float minAngle, float angleStep, int angleRange,
...@@ -1119,7 +492,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1119,7 +492,7 @@ namespace cv { namespace gpu { namespace cudev
const float idp = 1.0f / dp; const float idp = 1.0f / dp;
const float thetaScale = levels / (2.0f * CV_PI_F); const float thetaScale = levels / (2.0f * CV_PI_F);
GHT_Ballard_PosRotation_calcHist<<<grid, block>>>(coordList, thetaList, Ballard_PosRotation_calcHist<<<grid, block>>>(coordList, thetaList,
r_table, r_sizes, r_table, r_sizes,
hist, rows, cols, hist, rows, cols,
minAngle, angleStep, angleRange, minAngle, angleStep, angleRange,
...@@ -1129,7 +502,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1129,7 +502,7 @@ namespace cv { namespace gpu { namespace cudev
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void GHT_Ballard_PosRotation_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int angleRange, __global__ void Ballard_PosRotation_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int angleRange,
float4* out, int3* votes, const int maxSize, float4* out, int3* votes, const int maxSize,
const float minAngle, const float angleStep, const float dp, const int threshold) const float minAngle, const float angleStep, const float dp, const int threshold)
{ {
...@@ -1168,7 +541,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1168,7 +541,7 @@ namespace cv { namespace gpu { namespace cudev
} }
} }
int GHT_Ballard_PosRotation_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int angleRange, float4* out, int3* votes, int maxSize, int Ballard_PosRotation_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int angleRange, float4* out, int3* votes, int maxSize,
float minAngle, float angleStep, float dp, int threshold) float minAngle, float angleStep, float dp, int threshold)
{ {
void* counterPtr; void* counterPtr;
...@@ -1179,9 +552,10 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1179,9 +552,10 @@ namespace cv { namespace gpu { namespace cudev
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y)); const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosRotation_findPosInHist, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(Ballard_PosRotation_findPosInHist, cudaFuncCachePreferL1) );
GHT_Ballard_PosRotation_findPosInHist<<<grid, block>>>(hist, rows, cols, angleRange, out, votes, maxSize, minAngle, angleStep, dp, threshold); Ballard_PosRotation_findPosInHist<<<grid, block>>>(hist, rows, cols, angleRange, out, votes,
maxSize, minAngle, angleStep, dp, threshold);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -1195,7 +569,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1195,7 +569,7 @@ namespace cv { namespace gpu { namespace cudev
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// GHT_Guil_Full // Guil_Full
struct FeatureTable struct FeatureTable
{ {
...@@ -1221,7 +595,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1221,7 +595,7 @@ namespace cv { namespace gpu { namespace cudev
__constant__ FeatureTable c_templFeatures; __constant__ FeatureTable c_templFeatures;
__constant__ FeatureTable c_imageFeatures; __constant__ FeatureTable c_imageFeatures;
void GHT_Guil_Full_setTemplFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2) void Guil_Full_setTemplFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2)
{ {
FeatureTable tbl; FeatureTable tbl;
...@@ -1245,7 +619,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1245,7 +619,7 @@ namespace cv { namespace gpu { namespace cudev
cudaSafeCall( cudaMemcpyToSymbol(c_templFeatures, &tbl, sizeof(FeatureTable)) ); cudaSafeCall( cudaMemcpyToSymbol(c_templFeatures, &tbl, sizeof(FeatureTable)) );
} }
void GHT_Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2) void Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2)
{ {
FeatureTable tbl; FeatureTable tbl;
...@@ -1347,7 +721,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1347,7 +721,7 @@ namespace cv { namespace gpu { namespace cudev
} }
template <class FT, bool isTempl> template <class FT, bool isTempl>
__global__ void GHT_Guil_Full_buildFeatureList(const unsigned int* coordList, const float* thetaList, const int pointsCount, __global__ void Guil_Full_buildFeatureList(const unsigned int* coordList, const float* thetaList, const int pointsCount,
int* sizes, const int maxSize, int* sizes, const int maxSize,
const float xi, const float angleEpsilon, const float alphaScale, const float xi, const float angleEpsilon, const float alphaScale,
const float2 center, const float maxDist) const float2 center, const float maxDist)
...@@ -1406,7 +780,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1406,7 +780,7 @@ namespace cv { namespace gpu { namespace cudev
} }
template <class FT, bool isTempl> template <class FT, bool isTempl>
void GHT_Guil_Full_buildFeatureList_caller(const unsigned int* coordList, const float* thetaList, int pointsCount, void Guil_Full_buildFeatureList_caller(const unsigned int* coordList, const float* thetaList, int pointsCount,
int* sizes, int maxSize, int* sizes, int maxSize,
float xi, float angleEpsilon, int levels, float xi, float angleEpsilon, int levels,
float2 center, float maxDist) float2 center, float maxDist)
...@@ -1416,7 +790,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1416,7 +790,7 @@ namespace cv { namespace gpu { namespace cudev
const float alphaScale = levels / (2.0f * CV_PI_F); const float alphaScale = levels / (2.0f * CV_PI_F);
GHT_Guil_Full_buildFeatureList<FT, isTempl><<<grid, block>>>(coordList, thetaList, pointsCount, Guil_Full_buildFeatureList<FT, isTempl><<<grid, block>>>(coordList, thetaList, pointsCount,
sizes, maxSize, sizes, maxSize,
xi * (CV_PI_F / 180.0f), angleEpsilon * (CV_PI_F / 180.0f), alphaScale, xi * (CV_PI_F / 180.0f), angleEpsilon * (CV_PI_F / 180.0f), alphaScale,
center, maxDist); center, maxDist);
...@@ -1428,28 +802,28 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1428,28 +802,28 @@ namespace cv { namespace gpu { namespace cudev
thrust::transform(sizesPtr, sizesPtr + levels + 1, sizesPtr, cudev::bind2nd(cudev::minimum<int>(), maxSize)); thrust::transform(sizesPtr, sizesPtr + levels + 1, sizesPtr, cudev::bind2nd(cudev::minimum<int>(), maxSize));
} }
void GHT_Guil_Full_buildTemplFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, void Guil_Full_buildTemplFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
int* sizes, int maxSize, int* sizes, int maxSize,
float xi, float angleEpsilon, int levels, float xi, float angleEpsilon, int levels,
float2 center, float maxDist) float2 center, float maxDist)
{ {
GHT_Guil_Full_buildFeatureList_caller<TemplFeatureTable, true>(coordList, thetaList, pointsCount, Guil_Full_buildFeatureList_caller<TemplFeatureTable, true>(coordList, thetaList, pointsCount,
sizes, maxSize, sizes, maxSize,
xi, angleEpsilon, levels, xi, angleEpsilon, levels,
center, maxDist); center, maxDist);
} }
void GHT_Guil_Full_buildImageFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, void Guil_Full_buildImageFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
int* sizes, int maxSize, int* sizes, int maxSize,
float xi, float angleEpsilon, int levels, float xi, float angleEpsilon, int levels,
float2 center, float maxDist) float2 center, float maxDist)
{ {
GHT_Guil_Full_buildFeatureList_caller<ImageFeatureTable, false>(coordList, thetaList, pointsCount, Guil_Full_buildFeatureList_caller<ImageFeatureTable, false>(coordList, thetaList, pointsCount,
sizes, maxSize, sizes, maxSize,
xi, angleEpsilon, levels, xi, angleEpsilon, levels,
center, maxDist); center, maxDist);
} }
__global__ void GHT_Guil_Full_calcOHist(const int* templSizes, const int* imageSizes, int* OHist, __global__ void Guil_Full_calcOHist(const int* templSizes, const int* imageSizes, int* OHist,
const float minAngle, const float maxAngle, const float iAngleStep, const int angleRange) const float minAngle, const float maxAngle, const float iAngleStep, const int angleRange)
{ {
extern __shared__ int s_OHist[]; extern __shared__ int s_OHist[];
...@@ -1487,7 +861,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1487,7 +861,7 @@ namespace cv { namespace gpu { namespace cudev
::atomicAdd(OHist + i, s_OHist[i]); ::atomicAdd(OHist + i, s_OHist[i]);
} }
void GHT_Guil_Full_calcOHist_gpu(const int* templSizes, const int* imageSizes, int* OHist, void Guil_Full_calcOHist_gpu(const int* templSizes, const int* imageSizes, int* OHist,
float minAngle, float maxAngle, float angleStep, int angleRange, float minAngle, float maxAngle, float angleStep, int angleRange,
int levels, int tMaxSize) int levels, int tMaxSize)
{ {
...@@ -1500,14 +874,14 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1500,14 +874,14 @@ namespace cv { namespace gpu { namespace cudev
const size_t smemSize = (angleRange + 1) * sizeof(float); const size_t smemSize = (angleRange + 1) * sizeof(float);
GHT_Guil_Full_calcOHist<<<grid, block, smemSize>>>(templSizes, imageSizes, OHist, Guil_Full_calcOHist<<<grid, block, smemSize>>>(templSizes, imageSizes, OHist,
minAngle, maxAngle, 1.0f / angleStep, angleRange); minAngle, maxAngle, 1.0f / angleStep, angleRange);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void GHT_Guil_Full_calcSHist(const int* templSizes, const int* imageSizes, int* SHist, __global__ void Guil_Full_calcSHist(const int* templSizes, const int* imageSizes, int* SHist,
const float angle, const float angleEpsilon, const float angle, const float angleEpsilon,
const float minScale, const float maxScale, const float iScaleStep, const int scaleRange) const float minScale, const float maxScale, const float iScaleStep, const int scaleRange)
{ {
...@@ -1551,7 +925,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1551,7 +925,7 @@ namespace cv { namespace gpu { namespace cudev
::atomicAdd(SHist + i, s_SHist[i]); ::atomicAdd(SHist + i, s_SHist[i]);
} }
void GHT_Guil_Full_calcSHist_gpu(const int* templSizes, const int* imageSizes, int* SHist, void Guil_Full_calcSHist_gpu(const int* templSizes, const int* imageSizes, int* SHist,
float angle, float angleEpsilon, float angle, float angleEpsilon,
float minScale, float maxScale, float iScaleStep, int scaleRange, float minScale, float maxScale, float iScaleStep, int scaleRange,
int levels, int tMaxSize) int levels, int tMaxSize)
...@@ -1564,7 +938,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1564,7 +938,7 @@ namespace cv { namespace gpu { namespace cudev
const size_t smemSize = (scaleRange + 1) * sizeof(float); const size_t smemSize = (scaleRange + 1) * sizeof(float);
GHT_Guil_Full_calcSHist<<<grid, block, smemSize>>>(templSizes, imageSizes, SHist, Guil_Full_calcSHist<<<grid, block, smemSize>>>(templSizes, imageSizes, SHist,
angle, angleEpsilon, angle, angleEpsilon,
minScale, maxScale, iScaleStep, scaleRange); minScale, maxScale, iScaleStep, scaleRange);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
...@@ -1572,7 +946,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1572,7 +946,7 @@ namespace cv { namespace gpu { namespace cudev
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void GHT_Guil_Full_calcPHist(const int* templSizes, const int* imageSizes, PtrStepSzi PHist, __global__ void Guil_Full_calcPHist(const int* templSizes, const int* imageSizes, PtrStepSzi PHist,
const float angle, const float sinVal, const float cosVal, const float angleEpsilon, const float scale, const float angle, const float sinVal, const float cosVal, const float angleEpsilon, const float scale,
const float idp) const float idp)
{ {
...@@ -1623,7 +997,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1623,7 +997,7 @@ namespace cv { namespace gpu { namespace cudev
} }
} }
void GHT_Guil_Full_calcPHist_gpu(const int* templSizes, const int* imageSizes, PtrStepSzi PHist, void Guil_Full_calcPHist_gpu(const int* templSizes, const int* imageSizes, PtrStepSzi PHist,
float angle, float angleEpsilon, float scale, float angle, float angleEpsilon, float scale,
float dp, float dp,
int levels, int tMaxSize) int levels, int tMaxSize)
...@@ -1637,9 +1011,9 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1637,9 +1011,9 @@ namespace cv { namespace gpu { namespace cudev
const float sinVal = ::sinf(angle); const float sinVal = ::sinf(angle);
const float cosVal = ::cosf(angle); const float cosVal = ::cosf(angle);
cudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_calcPHist, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(Guil_Full_calcPHist, cudaFuncCachePreferL1) );
GHT_Guil_Full_calcPHist<<<grid, block>>>(templSizes, imageSizes, PHist, Guil_Full_calcPHist<<<grid, block>>>(templSizes, imageSizes, PHist,
angle, sinVal, cosVal, angleEpsilon, scale, angle, sinVal, cosVal, angleEpsilon, scale,
1.0f / dp); 1.0f / dp);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
...@@ -1647,7 +1021,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1647,7 +1021,7 @@ namespace cv { namespace gpu { namespace cudev
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void GHT_Guil_Full_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize, __global__ void Guil_Full_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize,
const float angle, const int angleVotes, const float scale, const int scaleVotes, const float angle, const int angleVotes, const float scale, const int scaleVotes,
const float dp, const int threshold) const float dp, const int threshold)
{ {
...@@ -1675,7 +1049,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1675,7 +1049,7 @@ namespace cv { namespace gpu { namespace cudev
} }
} }
int GHT_Guil_Full_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int curSize, int maxSize, int Guil_Full_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int curSize, int maxSize,
float angle, int angleVotes, float scale, int scaleVotes, float angle, int angleVotes, float scale, int scaleVotes,
float dp, int threshold) float dp, int threshold)
{ {
...@@ -1687,9 +1061,9 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1687,9 +1061,9 @@ namespace cv { namespace gpu { namespace cudev
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y)); const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_findPosInHist, cudaFuncCachePreferL1) ); cudaSafeCall( cudaFuncSetCacheConfig(Guil_Full_findPosInHist, cudaFuncCachePreferL1) );
GHT_Guil_Full_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize, Guil_Full_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize,
angle, angleVotes, scale, scaleVotes, angle, angleVotes, scale, scaleVotes,
dp, threshold); dp, threshold);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
...@@ -1706,5 +1080,6 @@ namespace cv { namespace gpu { namespace cudev ...@@ -1706,5 +1080,6 @@ namespace cv { namespace gpu { namespace cudev
} }
}}} }}}
#endif // HAVE_OPENCV_GPUARITHM
#endif /* CUDA_DISABLER */ #endif /* CUDA_DISABLER */
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/dynamic_smem.hpp"
namespace cv { namespace gpu { namespace cudev
{
namespace hough_circles
{
__device__ int g_counter;
////////////////////////////////////////////////////////////////////////
// circlesAccumCenters
__global__ void circlesAccumCenters(const unsigned int* list, const int count, const PtrStepi dx, const PtrStepi dy,
PtrStepi accum, const int width, const int height, const int minRadius, const int maxRadius, const float idp)
{
const int SHIFT = 10;
const int ONE = 1 << SHIFT;
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid >= count)
return;
const unsigned int val = list[tid];
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
const int vx = dx(y, x);
const int vy = dy(y, x);
if (vx == 0 && vy == 0)
return;
const float mag = ::sqrtf(vx * vx + vy * vy);
const int x0 = __float2int_rn((x * idp) * ONE);
const int y0 = __float2int_rn((y * idp) * ONE);
int sx = __float2int_rn((vx * idp) * ONE / mag);
int sy = __float2int_rn((vy * idp) * ONE / mag);
// Step from minRadius to maxRadius in both directions of the gradient
for (int k1 = 0; k1 < 2; ++k1)
{
int x1 = x0 + minRadius * sx;
int y1 = y0 + minRadius * sy;
for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r)
{
const int x2 = x1 >> SHIFT;
const int y2 = y1 >> SHIFT;
if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height)
break;
::atomicAdd(accum.ptr(y2 + 1) + x2 + 1, 1);
}
sx = -sx;
sy = -sy;
}
}
void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp)
{
const dim3 block(256);
const dim3 grid(divUp(count, block.x));
cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) );
circlesAccumCenters<<<grid, block>>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////
// buildCentersList
__global__ void buildCentersList(const PtrStepSzi accum, unsigned int* centers, const int threshold)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < accum.cols - 2 && y < accum.rows - 2)
{
const int top = accum(y, x + 1);
const int left = accum(y + 1, x);
const int cur = accum(y + 1, x + 1);
const int right = accum(y + 1, x + 2);
const int bottom = accum(y + 2, x + 1);
if (cur > threshold && cur > top && cur >= bottom && cur > left && cur >= right)
{
const unsigned int val = (y << 16) | x;
const int idx = ::atomicAdd(&g_counter, 1);
centers[idx] = val;
}
}
}
int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) );
buildCentersList<<<grid, block>>>(accum, centers, threshold);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
return totalCount;
}
////////////////////////////////////////////////////////////////////////
// circlesAccumRadius
__global__ void circlesAccumRadius(const unsigned int* centers, const unsigned int* list, const int count,
float3* circles, const int maxCircles, const float dp,
const int minRadius, const int maxRadius, const int histSize, const int threshold)
{
int* smem = DynamicSharedMem<int>();
for (int i = threadIdx.x; i < histSize + 2; i += blockDim.x)
smem[i] = 0;
__syncthreads();
unsigned int val = centers[blockIdx.x];
float cx = (val & 0xFFFF);
float cy = (val >> 16) & 0xFFFF;
cx = (cx + 0.5f) * dp;
cy = (cy + 0.5f) * dp;
for (int i = threadIdx.x; i < count; i += blockDim.x)
{
val = list[i];
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
const float rad = ::sqrtf((cx - x) * (cx - x) + (cy - y) * (cy - y));
if (rad >= minRadius && rad <= maxRadius)
{
const int r = __float2int_rn(rad - minRadius);
Emulation::smem::atomicAdd(&smem[r + 1], 1);
}
}
__syncthreads();
for (int i = threadIdx.x; i < histSize; i += blockDim.x)
{
const int curVotes = smem[i + 1];
if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2])
{
const int ind = ::atomicAdd(&g_counter, 1);
if (ind < maxCircles)
circles[ind] = make_float3(cx, cy, i + minRadius);
}
}
}
int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count,
float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(has20 ? 1024 : 512);
const dim3 grid(centersCount);
const int histSize = maxRadius - minRadius + 1;
size_t smemSize = (histSize + 2) * sizeof(int);
circlesAccumRadius<<<grid, block, smemSize>>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxCircles);
return totalCount;
}
}
}}}
#endif /* CUDA_DISABLER */
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/dynamic_smem.hpp"
namespace cv { namespace gpu { namespace cudev
{
namespace hough_lines
{
__device__ int g_counter;
////////////////////////////////////////////////////////////////////////
// linesAccum
__global__ void linesAccumGlobal(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho)
{
const int n = blockIdx.x;
const float ang = n * theta;
float sinVal;
float cosVal;
sincosf(ang, &sinVal, &cosVal);
sinVal *= irho;
cosVal *= irho;
const int shift = (numrho - 1) / 2;
int* accumRow = accum.ptr(n + 1);
for (int i = threadIdx.x; i < count; i += blockDim.x)
{
const unsigned int val = list[i];
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
int r = __float2int_rn(x * cosVal + y * sinVal);
r += shift;
::atomicAdd(accumRow + r + 1, 1);
}
}
__global__ void linesAccumShared(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho)
{
int* smem = DynamicSharedMem<int>();
for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x)
smem[i] = 0;
__syncthreads();
const int n = blockIdx.x;
const float ang = n * theta;
float sinVal;
float cosVal;
sincosf(ang, &sinVal, &cosVal);
sinVal *= irho;
cosVal *= irho;
const int shift = (numrho - 1) / 2;
for (int i = threadIdx.x; i < count; i += blockDim.x)
{
const unsigned int val = list[i];
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
int r = __float2int_rn(x * cosVal + y * sinVal);
r += shift;
Emulation::smem::atomicAdd(&smem[r + 1], 1);
}
__syncthreads();
int* accumRow = accum.ptr(n + 1);
for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x)
accumRow[i] = smem[i];
}
void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20)
{
const dim3 block(has20 ? 1024 : 512);
const dim3 grid(accum.rows - 2);
size_t smemSize = (accum.cols - 1) * sizeof(int);
if (smemSize < sharedMemPerBlock - 1000)
linesAccumShared<<<grid, block, smemSize>>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2);
else
linesAccumGlobal<<<grid, block>>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////
// linesGetResult
__global__ void linesGetResult(const PtrStepSzi accum, float2* out, int* votes, const int maxSize, const float rho, const float theta, const int threshold, const int numrho)
{
const int r = blockIdx.x * blockDim.x + threadIdx.x;
const int n = blockIdx.y * blockDim.y + threadIdx.y;
if (r >= accum.cols - 2 || n >= accum.rows - 2)
return;
const int curVotes = accum(n + 1, r + 1);
if (curVotes > threshold &&
curVotes > accum(n + 1, r) &&
curVotes >= accum(n + 1, r + 2) &&
curVotes > accum(n, r + 1) &&
curVotes >= accum(n + 2, r + 1))
{
const float radius = (r - (numrho - 1) * 0.5f) * rho;
const float angle = n * theta;
const int ind = ::atomicAdd(&g_counter, 1);
if (ind < maxSize)
{
out[ind] = make_float2(radius, angle);
votes[ind] = curVotes;
}
}
}
int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(linesGetResult, cudaFuncCachePreferL1) );
linesGetResult<<<grid, block>>>(accum, out, votes, maxSize, rho, theta, threshold, accum.cols - 2);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);
if (doSort && totalCount > 0)
{
thrust::device_ptr<float2> outPtr(out);
thrust::device_ptr<int> votesPtr(votes);
thrust::sort_by_key(votesPtr, votesPtr + totalCount, outPtr, thrust::greater<int>());
}
return totalCount;
}
}
}}}
#endif /* CUDA_DISABLER */
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
namespace cv { namespace gpu { namespace cudev
{
namespace hough_segments
{
__device__ int g_counter;
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_mask(false, cudaFilterModePoint, cudaAddressModeClamp);
__global__ void houghLinesProbabilistic(const PtrStepSzi accum,
int4* out, const int maxSize,
const float rho, const float theta,
const int lineGap, const int lineLength,
const int rows, const int cols)
{
const int r = blockIdx.x * blockDim.x + threadIdx.x;
const int n = blockIdx.y * blockDim.y + threadIdx.y;
if (r >= accum.cols - 2 || n >= accum.rows - 2)
return;
const int curVotes = accum(n + 1, r + 1);
if (curVotes >= lineLength &&
curVotes > accum(n, r) &&
curVotes > accum(n, r + 1) &&
curVotes > accum(n, r + 2) &&
curVotes > accum(n + 1, r) &&
curVotes > accum(n + 1, r + 2) &&
curVotes > accum(n + 2, r) &&
curVotes > accum(n + 2, r + 1) &&
curVotes > accum(n + 2, r + 2))
{
const float radius = (r - (accum.cols - 2 - 1) * 0.5f) * rho;
const float angle = n * theta;
float cosa;
float sina;
sincosf(angle, &sina, &cosa);
float2 p0 = make_float2(cosa * radius, sina * radius);
float2 dir = make_float2(-sina, cosa);
float2 pb[4] = {make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1)};
float a;
if (dir.x != 0)
{
a = -p0.x / dir.x;
pb[0].x = 0;
pb[0].y = p0.y + a * dir.y;
a = (cols - 1 - p0.x) / dir.x;
pb[1].x = cols - 1;
pb[1].y = p0.y + a * dir.y;
}
if (dir.y != 0)
{
a = -p0.y / dir.y;
pb[2].x = p0.x + a * dir.x;
pb[2].y = 0;
a = (rows - 1 - p0.y) / dir.y;
pb[3].x = p0.x + a * dir.x;
pb[3].y = rows - 1;
}
if (pb[0].x == 0 && (pb[0].y >= 0 && pb[0].y < rows))
{
p0 = pb[0];
if (dir.x < 0)
dir = -dir;
}
else if (pb[1].x == cols - 1 && (pb[0].y >= 0 && pb[0].y < rows))
{
p0 = pb[1];
if (dir.x > 0)
dir = -dir;
}
else if (pb[2].y == 0 && (pb[2].x >= 0 && pb[2].x < cols))
{
p0 = pb[2];
if (dir.y < 0)
dir = -dir;
}
else if (pb[3].y == rows - 1 && (pb[3].x >= 0 && pb[3].x < cols))
{
p0 = pb[3];
if (dir.y > 0)
dir = -dir;
}
float2 d;
if (::fabsf(dir.x) > ::fabsf(dir.y))
{
d.x = dir.x > 0 ? 1 : -1;
d.y = dir.y / ::fabsf(dir.x);
}
else
{
d.x = dir.x / ::fabsf(dir.y);
d.y = dir.y > 0 ? 1 : -1;
}
float2 line_end[2];
int gap;
bool inLine = false;
float2 p1 = p0;
if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows)
return;
for (;;)
{
if (tex2D(tex_mask, p1.x, p1.y))
{
gap = 0;
if (!inLine)
{
line_end[0] = p1;
line_end[1] = p1;
inLine = true;
}
else
{
line_end[1] = p1;
}
}
else if (inLine)
{
if (++gap > lineGap)
{
bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength ||
::abs(line_end[1].y - line_end[0].y) >= lineLength;
if (good_line)
{
const int ind = ::atomicAdd(&g_counter, 1);
if (ind < maxSize)
out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y);
}
gap = 0;
inLine = false;
}
}
p1 = p1 + d;
if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows)
{
if (inLine)
{
bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength ||
::abs(line_end[1].y - line_end[0].y) >= lineLength;
if (good_line)
{
const int ind = ::atomicAdd(&g_counter, 1);
if (ind < maxSize)
out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y);
}
}
break;
}
}
}
}
int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
bindTexture(&tex_mask, mask);
houghLinesProbabilistic<<<grid, block>>>(accum,
out, maxSize,
rho, theta,
lineGap, lineLength,
mask.rows, mask.cols);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);
return totalCount;
}
}
}}}
#endif /* CUDA_DISABLER */
...@@ -45,539 +45,15 @@ ...@@ -45,539 +45,15 @@
using namespace cv; using namespace cv;
using namespace cv::gpu; using namespace cv::gpu;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) || !defined(HAVE_OPENCV_GPUARITHM)
Ptr<gpu::HoughLinesDetector> cv::gpu::createHoughLinesDetector(float, float, int, bool, int) { throw_no_cuda(); return Ptr<HoughLinesDetector>(); }
Ptr<gpu::HoughSegmentDetector> cv::gpu::createHoughSegmentDetector(float, float, int, int, int) { throw_no_cuda(); return Ptr<HoughSegmentDetector>(); }
Ptr<gpu::HoughCirclesDetector> cv::gpu::createHoughCirclesDetector(float, float, int, int, int, int, int) { throw_no_cuda(); return Ptr<HoughCirclesDetector>(); }
Ptr<gpu::GeneralizedHough> cv::gpu::GeneralizedHough::create(int) { throw_no_cuda(); return Ptr<GeneralizedHough>(); } Ptr<gpu::GeneralizedHough> cv::gpu::GeneralizedHough::create(int) { throw_no_cuda(); return Ptr<GeneralizedHough>(); }
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
#include "opencv2/core/utility.hpp"
namespace cv { namespace gpu { namespace cudev
{
namespace hough
{
int buildPointList_gpu(PtrStepSzb src, unsigned int* list);
}
}}}
//////////////////////////////////////////////////////////
// HoughLinesDetector
namespace cv { namespace gpu { namespace cudev
{
namespace hough
{
void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20);
int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort);
}
}}}
namespace
{
class HoughLinesDetectorImpl : public HoughLinesDetector
{
public:
HoughLinesDetectorImpl(float rho, float theta, int threshold, bool doSort, int maxLines) :
rho_(rho), theta_(theta), threshold_(threshold), doSort_(doSort), maxLines_(maxLines)
{
}
void detect(InputArray src, OutputArray lines);
void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray());
void setRho(float rho) { rho_ = rho; }
float getRho() const { return rho_; }
void setTheta(float theta) { theta_ = theta; }
float getTheta() const { return theta_; }
void setThreshold(int threshold) { threshold_ = threshold; }
int getThreshold() const { return threshold_; }
void setDoSort(bool doSort) { doSort_ = doSort; }
bool getDoSort() const { return doSort_; }
void setMaxLines(int maxLines) { maxLines_ = maxLines; }
int getMaxLines() const { return maxLines_; }
void write(FileStorage& fs) const
{
fs << "name" << "HoughLinesDetector_GPU"
<< "rho" << rho_
<< "theta" << theta_
<< "threshold" << threshold_
<< "doSort" << doSort_
<< "maxLines" << maxLines_;
}
void read(const FileNode& fn)
{
CV_Assert( String(fn["name"]) == "HoughLinesDetector_GPU" );
rho_ = (float)fn["rho"];
theta_ = (float)fn["theta"];
threshold_ = (int)fn["threshold"];
doSort_ = (int)fn["doSort"] != 0;
maxLines_ = (int)fn["maxLines"];
}
private:
float rho_;
float theta_;
int threshold_;
bool doSort_;
int maxLines_;
GpuMat accum_;
GpuMat list_;
GpuMat result_;
};
void HoughLinesDetectorImpl::detect(InputArray _src, OutputArray lines)
{
using namespace cv::gpu::cudev::hough;
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC1 );
CV_Assert( src.cols < std::numeric_limits<unsigned short>::max() );
CV_Assert( src.rows < std::numeric_limits<unsigned short>::max() );
ensureSizeIsEnough(1, src.size().area(), CV_32SC1, list_);
unsigned int* srcPoints = list_.ptr<unsigned int>();
const int pointsCount = buildPointList_gpu(src, srcPoints);
if (pointsCount == 0)
{
lines.release();
return;
}
const int numangle = cvRound(CV_PI / theta_);
const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho_);
CV_Assert( numangle > 0 && numrho > 0 );
ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum_);
accum_.setTo(Scalar::all(0));
DeviceInfo devInfo;
linesAccum_gpu(srcPoints, pointsCount, accum_, rho_, theta_, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));
ensureSizeIsEnough(2, maxLines_, CV_32FC2, result_);
int linesCount = linesGetResult_gpu(accum_, result_.ptr<float2>(0), result_.ptr<int>(1), maxLines_, rho_, theta_, threshold_, doSort_);
if (linesCount == 0)
{
lines.release();
return;
}
result_.cols = linesCount;
result_.copyTo(lines);
}
void HoughLinesDetectorImpl::downloadResults(InputArray _d_lines, OutputArray h_lines, OutputArray h_votes)
{
GpuMat d_lines = _d_lines.getGpuMat();
if (d_lines.empty())
{
h_lines.release();
if (h_votes.needed())
h_votes.release();
return;
}
CV_Assert( d_lines.rows == 2 && d_lines.type() == CV_32FC2 );
d_lines.row(0).download(h_lines);
if (h_votes.needed())
{
GpuMat d_votes(1, d_lines.cols, CV_32SC1, d_lines.ptr<int>(1));
d_votes.download(h_votes);
}
}
}
Ptr<HoughLinesDetector> cv::gpu::createHoughLinesDetector(float rho, float theta, int threshold, bool doSort, int maxLines)
{
return new HoughLinesDetectorImpl(rho, theta, threshold, doSort, maxLines);
}
//////////////////////////////////////////////////////////
// HoughLinesP
namespace cv { namespace gpu { namespace cudev
{
namespace hough
{
int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength);
}
}}}
namespace
{
class PHoughLinesDetectorImpl : public HoughSegmentDetector
{
public:
PHoughLinesDetectorImpl(float rho, float theta, int minLineLength, int maxLineGap, int maxLines) :
rho_(rho), theta_(theta), minLineLength_(minLineLength), maxLineGap_(maxLineGap), maxLines_(maxLines)
{
}
void detect(InputArray src, OutputArray lines);
void setRho(float rho) { rho_ = rho; }
float getRho() const { return rho_; }
void setTheta(float theta) { theta_ = theta; }
float getTheta() const { return theta_; }
void setMinLineLength(int minLineLength) { minLineLength_ = minLineLength; }
int getMinLineLength() const { return minLineLength_; }
void setMaxLineGap(int maxLineGap) { maxLineGap_ = maxLineGap; }
int getMaxLineGap() const { return maxLineGap_; }
void setMaxLines(int maxLines) { maxLines_ = maxLines; }
int getMaxLines() const { return maxLines_; }
void write(FileStorage& fs) const
{
fs << "name" << "PHoughLinesDetector_GPU"
<< "rho" << rho_
<< "theta" << theta_
<< "minLineLength" << minLineLength_
<< "maxLineGap" << maxLineGap_
<< "maxLines" << maxLines_;
}
void read(const FileNode& fn)
{
CV_Assert( String(fn["name"]) == "PHoughLinesDetector_GPU" );
rho_ = (float)fn["rho"];
theta_ = (float)fn["theta"];
minLineLength_ = (int)fn["minLineLength"];
maxLineGap_ = (int)fn["maxLineGap"];
maxLines_ = (int)fn["maxLines"];
}
private:
float rho_;
float theta_;
int minLineLength_;
int maxLineGap_;
int maxLines_;
GpuMat accum_;
GpuMat list_;
GpuMat result_;
};
void PHoughLinesDetectorImpl::detect(InputArray _src, OutputArray lines)
{
using namespace cv::gpu::cudev::hough;
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC1 );
CV_Assert( src.cols < std::numeric_limits<unsigned short>::max() );
CV_Assert( src.rows < std::numeric_limits<unsigned short>::max() );
ensureSizeIsEnough(1, src.size().area(), CV_32SC1, list_);
unsigned int* srcPoints = list_.ptr<unsigned int>();
const int pointsCount = buildPointList_gpu(src, srcPoints);
if (pointsCount == 0)
{
lines.release();
return;
}
const int numangle = cvRound(CV_PI / theta_);
const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho_);
CV_Assert( numangle > 0 && numrho > 0 );
ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum_);
accum_.setTo(Scalar::all(0));
DeviceInfo devInfo;
linesAccum_gpu(srcPoints, pointsCount, accum_, rho_, theta_, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));
ensureSizeIsEnough(1, maxLines_, CV_32SC4, result_);
int linesCount = houghLinesProbabilistic_gpu(src, accum_, result_.ptr<int4>(), maxLines_, rho_, theta_, maxLineGap_, minLineLength_);
if (linesCount == 0)
{
lines.release();
return;
}
result_.cols = linesCount;
result_.copyTo(lines);
}
}
Ptr<HoughSegmentDetector> cv::gpu::createHoughSegmentDetector(float rho, float theta, int minLineLength, int maxLineGap, int maxLines)
{
return new PHoughLinesDetectorImpl(rho, theta, minLineLength, maxLineGap, maxLines);
}
//////////////////////////////////////////////////////////
// HoughCircles
namespace cv { namespace gpu { namespace cudev namespace cv { namespace gpu { namespace cudev
{ {
namespace hough namespace ght
{
void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp);
int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold);
int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count,
float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20);
}
}}}
namespace
{
class HoughCirclesDetectorImpl : public HoughCirclesDetector
{
public:
HoughCirclesDetectorImpl(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles);
void detect(InputArray src, OutputArray circles);
void setDp(float dp) { dp_ = dp; }
float getDp() const { return dp_; }
void setMinDist(float minDist) { minDist_ = minDist; }
float getMinDist() const { return minDist_; }
void setCannyThreshold(int cannyThreshold) { cannyThreshold_ = cannyThreshold; }
int getCannyThreshold() const { return cannyThreshold_; }
void setVotesThreshold(int votesThreshold) { votesThreshold_ = votesThreshold; }
int getVotesThreshold() const { return votesThreshold_; }
void setMinRadius(int minRadius) { minRadius_ = minRadius; }
int getMinRadius() const { return minRadius_; }
void setMaxRadius(int maxRadius) { maxRadius_ = maxRadius; }
int getMaxRadius() const { return maxRadius_; }
void setMaxCircles(int maxCircles) { maxCircles_ = maxCircles; }
int getMaxCircles() const { return maxCircles_; }
void write(FileStorage& fs) const
{
fs << "name" << "HoughCirclesDetector_GPU"
<< "dp" << dp_
<< "minDist" << minDist_
<< "cannyThreshold" << cannyThreshold_
<< "votesThreshold" << votesThreshold_
<< "minRadius" << minRadius_
<< "maxRadius" << maxRadius_
<< "maxCircles" << maxCircles_;
}
void read(const FileNode& fn)
{
CV_Assert( String(fn["name"]) == "HoughCirclesDetector_GPU" );
dp_ = (float)fn["dp"];
minDist_ = (float)fn["minDist"];
cannyThreshold_ = (int)fn["cannyThreshold"];
votesThreshold_ = (int)fn["votesThreshold"];
minRadius_ = (int)fn["minRadius"];
maxRadius_ = (int)fn["maxRadius"];
maxCircles_ = (int)fn["maxCircles"];
}
private:
float dp_;
float minDist_;
int cannyThreshold_;
int votesThreshold_;
int minRadius_;
int maxRadius_;
int maxCircles_;
GpuMat dx_, dy_;
GpuMat edges_;
GpuMat accum_;
GpuMat list_;
GpuMat result_;
Ptr<gpu::Filter> filterDx_;
Ptr<gpu::Filter> filterDy_;
Ptr<gpu::CannyEdgeDetector> canny_;
};
HoughCirclesDetectorImpl::HoughCirclesDetectorImpl(float dp, float minDist, int cannyThreshold, int votesThreshold,
int minRadius, int maxRadius, int maxCircles) :
dp_(dp), minDist_(minDist), cannyThreshold_(cannyThreshold), votesThreshold_(votesThreshold),
minRadius_(minRadius), maxRadius_(maxRadius), maxCircles_(maxCircles)
{
canny_ = gpu::createCannyEdgeDetector(std::max(cannyThreshold_ / 2, 1), cannyThreshold_);
filterDx_ = gpu::createSobelFilter(CV_8UC1, CV_32S, 1, 0);
filterDy_ = gpu::createSobelFilter(CV_8UC1, CV_32S, 0, 1);
}
void HoughCirclesDetectorImpl::detect(InputArray _src, OutputArray circles)
{
using namespace cv::gpu::cudev::hough;
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC1 );
CV_Assert( src.cols < std::numeric_limits<unsigned short>::max() );
CV_Assert( src.rows < std::numeric_limits<unsigned short>::max() );
CV_Assert( dp_ > 0 );
CV_Assert( minRadius_ > 0 && maxRadius_ > minRadius_ );
CV_Assert( cannyThreshold_ > 0 );
CV_Assert( votesThreshold_ > 0 );
CV_Assert( maxCircles_ > 0 );
const float idp = 1.0f / dp_;
filterDx_->apply(src, dx_);
filterDy_->apply(src, dy_);
canny_->setLowThreshold(std::max(cannyThreshold_ / 2, 1));
canny_->setHighThreshold(cannyThreshold_);
canny_->detect(dx_, dy_, edges_);
ensureSizeIsEnough(2, src.size().area(), CV_32SC1, list_);
unsigned int* srcPoints = list_.ptr<unsigned int>(0);
unsigned int* centers = list_.ptr<unsigned int>(1);
const int pointsCount = buildPointList_gpu(edges_, srcPoints);
if (pointsCount == 0)
{
circles.release();
return;
}
ensureSizeIsEnough(cvCeil(src.rows * idp) + 2, cvCeil(src.cols * idp) + 2, CV_32SC1, accum_);
accum_.setTo(Scalar::all(0));
circlesAccumCenters_gpu(srcPoints, pointsCount, dx_, dy_, accum_, minRadius_, maxRadius_, idp);
int centersCount = buildCentersList_gpu(accum_, centers, votesThreshold_);
if (centersCount == 0)
{
circles.release();
return;
}
if (minDist_ > 1)
{
AutoBuffer<ushort2> oldBuf_(centersCount);
AutoBuffer<ushort2> newBuf_(centersCount);
int newCount = 0;
ushort2* oldBuf = oldBuf_;
ushort2* newBuf = newBuf_;
cudaSafeCall( cudaMemcpy(oldBuf, centers, centersCount * sizeof(ushort2), cudaMemcpyDeviceToHost) );
const int cellSize = cvRound(minDist_);
const int gridWidth = (src.cols + cellSize - 1) / cellSize;
const int gridHeight = (src.rows + cellSize - 1) / cellSize;
std::vector< std::vector<ushort2> > grid(gridWidth * gridHeight);
const float minDist2 = minDist_ * minDist_;
for (int i = 0; i < centersCount; ++i)
{
ushort2 p = oldBuf[i];
bool good = true;
int xCell = static_cast<int>(p.x / cellSize);
int yCell = static_cast<int>(p.y / cellSize);
int x1 = xCell - 1;
int y1 = yCell - 1;
int x2 = xCell + 1;
int y2 = yCell + 1;
// boundary check
x1 = std::max(0, x1);
y1 = std::max(0, y1);
x2 = std::min(gridWidth - 1, x2);
y2 = std::min(gridHeight - 1, y2);
for (int yy = y1; yy <= y2; ++yy)
{
for (int xx = x1; xx <= x2; ++xx)
{
std::vector<ushort2>& m = grid[yy * gridWidth + xx];
for(size_t j = 0; j < m.size(); ++j)
{
float dx = (float)(p.x - m[j].x);
float dy = (float)(p.y - m[j].y);
if (dx * dx + dy * dy < minDist2)
{
good = false;
goto break_out;
}
}
}
}
break_out:
if(good)
{
grid[yCell * gridWidth + xCell].push_back(p);
newBuf[newCount++] = p;
}
}
cudaSafeCall( cudaMemcpy(centers, newBuf, newCount * sizeof(unsigned int), cudaMemcpyHostToDevice) );
centersCount = newCount;
}
ensureSizeIsEnough(1, maxCircles_, CV_32FC3, result_);
int circlesCount = circlesAccumRadius_gpu(centers, centersCount, srcPoints, pointsCount, result_.ptr<float3>(), maxCircles_,
dp_, minRadius_, maxRadius_, votesThreshold_, deviceSupports(FEATURE_SET_COMPUTE_20));
if (circlesCount == 0)
{
circles.release();
return;
}
result_.cols = circlesCount;
result_.copyTo(circles);
}
}
Ptr<HoughCirclesDetector> cv::gpu::createHoughCirclesDetector(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles)
{
return new HoughCirclesDetectorImpl(dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius, maxCircles);
}
//////////////////////////////////////////////////////////
// GeneralizedHough
namespace cv { namespace gpu { namespace cudev
{
namespace hough
{ {
template <typename T> template <typename T>
int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList); int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
...@@ -585,50 +61,50 @@ namespace cv { namespace gpu { namespace cudev ...@@ -585,50 +61,50 @@ namespace cv { namespace gpu { namespace cudev
PtrStepSz<short2> r_table, int* r_sizes, PtrStepSz<short2> r_table, int* r_sizes,
short2 templCenter, int levels); short2 templCenter, int levels);
void GHT_Ballard_Pos_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, void Ballard_Pos_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
PtrStepSz<short2> r_table, const int* r_sizes, PtrStepSz<short2> r_table, const int* r_sizes,
PtrStepSzi hist, PtrStepSzi hist,
float dp, int levels); float dp, int levels);
int GHT_Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold); int Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold);
void GHT_Ballard_PosScale_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, void Ballard_PosScale_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
PtrStepSz<short2> r_table, const int* r_sizes, PtrStepSz<short2> r_table, const int* r_sizes,
PtrStepi hist, int rows, int cols, PtrStepi hist, int rows, int cols,
float minScale, float scaleStep, int scaleRange, float minScale, float scaleStep, int scaleRange,
float dp, int levels); float dp, int levels);
int GHT_Ballard_PosScale_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int scaleRange, float4* out, int3* votes, int maxSize, int Ballard_PosScale_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int scaleRange, float4* out, int3* votes, int maxSize,
float minScale, float scaleStep, float dp, int threshold); float minScale, float scaleStep, float dp, int threshold);
void GHT_Ballard_PosRotation_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, void Ballard_PosRotation_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
PtrStepSz<short2> r_table, const int* r_sizes, PtrStepSz<short2> r_table, const int* r_sizes,
PtrStepi hist, int rows, int cols, PtrStepi hist, int rows, int cols,
float minAngle, float angleStep, int angleRange, float minAngle, float angleStep, int angleRange,
float dp, int levels); float dp, int levels);
int GHT_Ballard_PosRotation_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int angleRange, float4* out, int3* votes, int maxSize, int Ballard_PosRotation_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int angleRange, float4* out, int3* votes, int maxSize,
float minAngle, float angleStep, float dp, int threshold); float minAngle, float angleStep, float dp, int threshold);
void GHT_Guil_Full_setTemplFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2); void Guil_Full_setTemplFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2);
void GHT_Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2); void Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2);
void GHT_Guil_Full_buildTemplFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, void Guil_Full_buildTemplFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
int* sizes, int maxSize, int* sizes, int maxSize,
float xi, float angleEpsilon, int levels, float xi, float angleEpsilon, int levels,
float2 center, float maxDist); float2 center, float maxDist);
void GHT_Guil_Full_buildImageFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, void Guil_Full_buildImageFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
int* sizes, int maxSize, int* sizes, int maxSize,
float xi, float angleEpsilon, int levels, float xi, float angleEpsilon, int levels,
float2 center, float maxDist); float2 center, float maxDist);
void GHT_Guil_Full_calcOHist_gpu(const int* templSizes, const int* imageSizes, int* OHist, void Guil_Full_calcOHist_gpu(const int* templSizes, const int* imageSizes, int* OHist,
float minAngle, float maxAngle, float angleStep, int angleRange, float minAngle, float maxAngle, float angleStep, int angleRange,
int levels, int tMaxSize); int levels, int tMaxSize);
void GHT_Guil_Full_calcSHist_gpu(const int* templSizes, const int* imageSizes, int* SHist, void Guil_Full_calcSHist_gpu(const int* templSizes, const int* imageSizes, int* SHist,
float angle, float angleEpsilon, float angle, float angleEpsilon,
float minScale, float maxScale, float iScaleStep, int scaleRange, float minScale, float maxScale, float iScaleStep, int scaleRange,
int levels, int tMaxSize); int levels, int tMaxSize);
void GHT_Guil_Full_calcPHist_gpu(const int* templSizes, const int* imageSizes, PtrStepSzi PHist, void Guil_Full_calcPHist_gpu(const int* templSizes, const int* imageSizes, PtrStepSzi PHist,
float angle, float angleEpsilon, float scale, float angle, float angleEpsilon, float scale,
float dp, float dp,
int levels, int tMaxSize); int levels, int tMaxSize);
int GHT_Guil_Full_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int curSize, int maxSize, int Guil_Full_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int curSize, int maxSize,
float angle, int angleVotes, float scale, int scaleVotes, float angle, int angleVotes, float scale, int scaleVotes,
float dp, int threshold); float dp, int threshold);
} }
...@@ -889,7 +365,7 @@ namespace ...@@ -889,7 +365,7 @@ namespace
void GHT_Pos::buildEdgePointList(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy) void GHT_Pos::buildEdgePointList(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy)
{ {
using namespace cv::gpu::cudev::hough; using namespace cv::gpu::cudev::ght;
typedef int (*func_t)(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList); typedef int (*func_t)(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
static const func_t funcs[] = static const func_t funcs[] =
...@@ -1077,7 +553,7 @@ namespace ...@@ -1077,7 +553,7 @@ namespace
void GHT_Ballard_Pos::processTempl() void GHT_Ballard_Pos::processTempl()
{ {
using namespace cv::gpu::cudev::hough; using namespace cv::gpu::cudev::ght;
CV_Assert(levels > 0); CV_Assert(levels > 0);
...@@ -1103,7 +579,7 @@ namespace ...@@ -1103,7 +579,7 @@ namespace
void GHT_Ballard_Pos::calcHist() void GHT_Ballard_Pos::calcHist()
{ {
using namespace cv::gpu::cudev::hough; using namespace cv::gpu::cudev::ght;
CV_Assert(levels > 0 && r_table.rows == (levels + 1) && r_sizes.cols == (levels + 1)); CV_Assert(levels > 0 && r_table.rows == (levels + 1) && r_sizes.cols == (levels + 1));
CV_Assert(dp > 0.0); CV_Assert(dp > 0.0);
...@@ -1117,7 +593,7 @@ namespace ...@@ -1117,7 +593,7 @@ namespace
if (edgePointList.cols > 0) if (edgePointList.cols > 0)
{ {
GHT_Ballard_Pos_calcHist_gpu(edgePointList.ptr<unsigned int>(0), edgePointList.ptr<float>(1), edgePointList.cols, Ballard_Pos_calcHist_gpu(edgePointList.ptr<unsigned int>(0), edgePointList.ptr<float>(1), edgePointList.cols,
r_table, r_sizes.ptr<int>(), r_table, r_sizes.ptr<int>(),
hist, hist,
(float)dp, levels); (float)dp, levels);
...@@ -1126,13 +602,13 @@ namespace ...@@ -1126,13 +602,13 @@ namespace
void GHT_Ballard_Pos::findPosInHist() void GHT_Ballard_Pos::findPosInHist()
{ {
using namespace cv::gpu::cudev::hough; using namespace cv::gpu::cudev::ght;
CV_Assert(votesThreshold > 0); CV_Assert(votesThreshold > 0);
ensureSizeIsEnough(2, maxSize, CV_32FC4, outBuf); ensureSizeIsEnough(2, maxSize, CV_32FC4, outBuf);
posCount = GHT_Ballard_Pos_findPosInHist_gpu(hist, outBuf.ptr<float4>(0), outBuf.ptr<int3>(1), maxSize, (float)dp, votesThreshold); posCount = Ballard_Pos_findPosInHist_gpu(hist, outBuf.ptr<float4>(0), outBuf.ptr<int3>(1), maxSize, (float)dp, votesThreshold);
} }
///////////////////////////////////// /////////////////////////////////////
...@@ -1181,7 +657,7 @@ namespace ...@@ -1181,7 +657,7 @@ namespace
void GHT_Ballard_PosScale::calcHist() void GHT_Ballard_PosScale::calcHist()
{ {
using namespace cv::gpu::cudev::hough; using namespace cv::gpu::cudev::ght;
CV_Assert(levels > 0 && r_table.rows == (levels + 1) && r_sizes.cols == (levels + 1)); CV_Assert(levels > 0 && r_table.rows == (levels + 1) && r_sizes.cols == (levels + 1));
CV_Assert(dp > 0.0); CV_Assert(dp > 0.0);
...@@ -1200,7 +676,7 @@ namespace ...@@ -1200,7 +676,7 @@ namespace
if (edgePointList.cols > 0) if (edgePointList.cols > 0)
{ {
GHT_Ballard_PosScale_calcHist_gpu(edgePointList.ptr<unsigned int>(0), edgePointList.ptr<float>(1), edgePointList.cols, Ballard_PosScale_calcHist_gpu(edgePointList.ptr<unsigned int>(0), edgePointList.ptr<float>(1), edgePointList.cols,
r_table, r_sizes.ptr<int>(), r_table, r_sizes.ptr<int>(),
hist, rows, cols, hist, rows, cols,
(float)minScale, (float)scaleStep, scaleRange, (float)dp, levels); (float)minScale, (float)scaleStep, scaleRange, (float)dp, levels);
...@@ -1209,7 +685,7 @@ namespace ...@@ -1209,7 +685,7 @@ namespace
void GHT_Ballard_PosScale::findPosInHist() void GHT_Ballard_PosScale::findPosInHist()
{ {
using namespace cv::gpu::cudev::hough; using namespace cv::gpu::cudev::ght;
CV_Assert(votesThreshold > 0); CV_Assert(votesThreshold > 0);
...@@ -1220,7 +696,7 @@ namespace ...@@ -1220,7 +696,7 @@ namespace
ensureSizeIsEnough(2, maxSize, CV_32FC4, outBuf); ensureSizeIsEnough(2, maxSize, CV_32FC4, outBuf);
posCount = GHT_Ballard_PosScale_findPosInHist_gpu(hist, rows, cols, scaleRange, outBuf.ptr<float4>(0), outBuf.ptr<int3>(1), maxSize, (float)minScale, (float)scaleStep, (float)dp, votesThreshold); posCount = Ballard_PosScale_findPosInHist_gpu(hist, rows, cols, scaleRange, outBuf.ptr<float4>(0), outBuf.ptr<int3>(1), maxSize, (float)minScale, (float)scaleStep, (float)dp, votesThreshold);
} }
///////////////////////////////////// /////////////////////////////////////
...@@ -1269,7 +745,7 @@ namespace ...@@ -1269,7 +745,7 @@ namespace
void GHT_Ballard_PosRotation::calcHist() void GHT_Ballard_PosRotation::calcHist()
{ {
using namespace cv::gpu::cudev::hough; using namespace cv::gpu::cudev::ght;
CV_Assert(levels > 0 && r_table.rows == (levels + 1) && r_sizes.cols == (levels + 1)); CV_Assert(levels > 0 && r_table.rows == (levels + 1) && r_sizes.cols == (levels + 1));
CV_Assert(dp > 0.0); CV_Assert(dp > 0.0);
...@@ -1288,7 +764,7 @@ namespace ...@@ -1288,7 +764,7 @@ namespace
if (edgePointList.cols > 0) if (edgePointList.cols > 0)
{ {
GHT_Ballard_PosRotation_calcHist_gpu(edgePointList.ptr<unsigned int>(0), edgePointList.ptr<float>(1), edgePointList.cols, Ballard_PosRotation_calcHist_gpu(edgePointList.ptr<unsigned int>(0), edgePointList.ptr<float>(1), edgePointList.cols,
r_table, r_sizes.ptr<int>(), r_table, r_sizes.ptr<int>(),
hist, rows, cols, hist, rows, cols,
(float)minAngle, (float)angleStep, angleRange, (float)dp, levels); (float)minAngle, (float)angleStep, angleRange, (float)dp, levels);
...@@ -1297,7 +773,7 @@ namespace ...@@ -1297,7 +773,7 @@ namespace
void GHT_Ballard_PosRotation::findPosInHist() void GHT_Ballard_PosRotation::findPosInHist()
{ {
using namespace cv::gpu::cudev::hough; using namespace cv::gpu::cudev::ght;
CV_Assert(votesThreshold > 0); CV_Assert(votesThreshold > 0);
...@@ -1308,7 +784,7 @@ namespace ...@@ -1308,7 +784,7 @@ namespace
ensureSizeIsEnough(2, maxSize, CV_32FC4, outBuf); ensureSizeIsEnough(2, maxSize, CV_32FC4, outBuf);
posCount = GHT_Ballard_PosRotation_findPosInHist_gpu(hist, rows, cols, angleRange, outBuf.ptr<float4>(0), outBuf.ptr<int3>(1), maxSize, (float)minAngle, (float)angleStep, (float)dp, votesThreshold); posCount = Ballard_PosRotation_findPosInHist_gpu(hist, rows, cols, angleRange, outBuf.ptr<float4>(0), outBuf.ptr<int3>(1), maxSize, (float)minAngle, (float)angleStep, (float)dp, votesThreshold);
} }
///////////////////////////////////////// /////////////////////////////////////////
...@@ -1476,10 +952,10 @@ namespace ...@@ -1476,10 +952,10 @@ namespace
void GHT_Guil_Full::processTempl() void GHT_Guil_Full::processTempl()
{ {
using namespace cv::gpu::cudev::hough; using namespace cv::gpu::cudev::ght;
buildFeatureList(templEdges, templDx, templDy, templFeatures, buildFeatureList(templEdges, templDx, templDy, templFeatures,
GHT_Guil_Full_setTemplFeatures, GHT_Guil_Full_buildTemplFeatureList_gpu, Guil_Full_setTemplFeatures, Guil_Full_buildTemplFeatureList_gpu,
true, templCenter); true, templCenter);
h_buf.resize(templFeatures.sizes.cols); h_buf.resize(templFeatures.sizes.cols);
...@@ -1489,7 +965,7 @@ namespace ...@@ -1489,7 +965,7 @@ namespace
void GHT_Guil_Full::processImage() void GHT_Guil_Full::processImage()
{ {
using namespace cv::gpu::cudev::hough; using namespace cv::gpu::cudev::ght;
CV_Assert(levels > 0); CV_Assert(levels > 0);
CV_Assert(templFeatures.sizes.cols == levels + 1); CV_Assert(templFeatures.sizes.cols == levels + 1);
...@@ -1518,7 +994,7 @@ namespace ...@@ -1518,7 +994,7 @@ namespace
ensureSizeIsEnough(2, maxSize, CV_32FC4, outBuf); ensureSizeIsEnough(2, maxSize, CV_32FC4, outBuf);
buildFeatureList(imageEdges, imageDx, imageDy, imageFeatures, buildFeatureList(imageEdges, imageDx, imageDy, imageFeatures,
GHT_Guil_Full_setImageFeatures, GHT_Guil_Full_buildImageFeatureList_gpu, Guil_Full_setImageFeatures, Guil_Full_buildImageFeatureList_gpu,
false); false);
calcOrientation(); calcOrientation();
...@@ -1601,14 +1077,14 @@ namespace ...@@ -1601,14 +1077,14 @@ namespace
void GHT_Guil_Full::calcOrientation() void GHT_Guil_Full::calcOrientation()
{ {
using namespace cv::gpu::cudev::hough; using namespace cv::gpu::cudev::ght;
const double iAngleStep = 1.0 / angleStep; const double iAngleStep = 1.0 / angleStep;
const int angleRange = cvCeil((maxAngle - minAngle) * iAngleStep); const int angleRange = cvCeil((maxAngle - minAngle) * iAngleStep);
hist.setTo(Scalar::all(0)); hist.setTo(Scalar::all(0));
GHT_Guil_Full_calcOHist_gpu(templFeatures.sizes.ptr<int>(), imageFeatures.sizes.ptr<int>(0), Guil_Full_calcOHist_gpu(templFeatures.sizes.ptr<int>(), imageFeatures.sizes.ptr<int>(0), hist.ptr<int>(),
hist.ptr<int>(), (float)minAngle, (float)maxAngle, (float)angleStep, angleRange, levels, templFeatures.maxSize); (float)minAngle, (float)maxAngle, (float)angleStep, angleRange, levels, templFeatures.maxSize);
cudaSafeCall( cudaMemcpy(&h_buf[0], hist.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&h_buf[0], hist.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) );
angles.clear(); angles.clear();
...@@ -1625,14 +1101,15 @@ namespace ...@@ -1625,14 +1101,15 @@ namespace
void GHT_Guil_Full::calcScale(double angle) void GHT_Guil_Full::calcScale(double angle)
{ {
using namespace cv::gpu::cudev::hough; using namespace cv::gpu::cudev::ght;
const double iScaleStep = 1.0 / scaleStep; const double iScaleStep = 1.0 / scaleStep;
const int scaleRange = cvCeil((maxScale - minScale) * iScaleStep); const int scaleRange = cvCeil((maxScale - minScale) * iScaleStep);
hist.setTo(Scalar::all(0)); hist.setTo(Scalar::all(0));
GHT_Guil_Full_calcSHist_gpu(templFeatures.sizes.ptr<int>(), imageFeatures.sizes.ptr<int>(0), Guil_Full_calcSHist_gpu(templFeatures.sizes.ptr<int>(), imageFeatures.sizes.ptr<int>(0), hist.ptr<int>(),
hist.ptr<int>(), (float)angle, (float)angleEpsilon, (float)minScale, (float)maxScale, (float)iScaleStep, scaleRange, levels, templFeatures.maxSize); (float)angle, (float)angleEpsilon, (float)minScale, (float)maxScale,
(float)iScaleStep, scaleRange, levels, templFeatures.maxSize);
cudaSafeCall( cudaMemcpy(&h_buf[0], hist.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy(&h_buf[0], hist.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) );
scales.clear(); scales.clear();
...@@ -1649,14 +1126,15 @@ namespace ...@@ -1649,14 +1126,15 @@ namespace
void GHT_Guil_Full::calcPosition(double angle, int angleVotes, double scale, int scaleVotes) void GHT_Guil_Full::calcPosition(double angle, int angleVotes, double scale, int scaleVotes)
{ {
using namespace cv::gpu::cudev::hough; using namespace cv::gpu::cudev::ght;
hist.setTo(Scalar::all(0)); hist.setTo(Scalar::all(0));
GHT_Guil_Full_calcPHist_gpu(templFeatures.sizes.ptr<int>(), imageFeatures.sizes.ptr<int>(0), Guil_Full_calcPHist_gpu(templFeatures.sizes.ptr<int>(), imageFeatures.sizes.ptr<int>(0), hist,
hist,(float) (float)angle, (float)angleEpsilon, (float)scale, (float)dp, levels, templFeatures.maxSize); (float)angle, (float)angleEpsilon, (float)scale, (float)dp, levels, templFeatures.maxSize);
posCount = GHT_Guil_Full_findPosInHist_gpu(hist, outBuf.ptr<float4>(0), outBuf.ptr<int3>(1), posCount = Guil_Full_findPosInHist_gpu(hist, outBuf.ptr<float4>(0), outBuf.ptr<int3>(1),
posCount, maxSize, (float)angle, angleVotes, (float)scale, scaleVotes, (float)dp, posThresh); posCount, maxSize, (float)angle, angleVotes,
(float)scale, scaleVotes, (float)dp, posThresh);
} }
} }
...@@ -1679,10 +1157,11 @@ Ptr<gpu::GeneralizedHough> cv::gpu::GeneralizedHough::create(int method) ...@@ -1679,10 +1157,11 @@ Ptr<gpu::GeneralizedHough> cv::gpu::GeneralizedHough::create(int method)
case (cv::GeneralizedHough::GHT_POSITION | cv::GeneralizedHough::GHT_SCALE | cv::GeneralizedHough::GHT_ROTATION): case (cv::GeneralizedHough::GHT_POSITION | cv::GeneralizedHough::GHT_SCALE | cv::GeneralizedHough::GHT_ROTATION):
CV_Assert( !GHT_Guil_Full_info_auto.name().empty() ); CV_Assert( !GHT_Guil_Full_info_auto.name().empty() );
return new GHT_Guil_Full(); return new GHT_Guil_Full();
}
default:
CV_Error(Error::StsBadArg, "Unsupported method"); CV_Error(Error::StsBadArg, "Unsupported method");
return Ptr<GeneralizedHough>(); return Ptr<GeneralizedHough>();
}
} }
#endif /* !defined (HAVE_CUDA) */ #endif /* !defined (HAVE_CUDA) */
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
Ptr<gpu::HoughCirclesDetector> cv::gpu::createHoughCirclesDetector(float, float, int, int, int, int, int) { throw_no_cuda(); return Ptr<HoughCirclesDetector>(); }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu { namespace cudev
{
namespace hough
{
int buildPointList_gpu(PtrStepSzb src, unsigned int* list);
}
namespace hough_circles
{
void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp);
int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold);
int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count,
float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20);
}
}}}
namespace
{
class HoughCirclesDetectorImpl : public HoughCirclesDetector
{
public:
HoughCirclesDetectorImpl(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles);
void detect(InputArray src, OutputArray circles);
void setDp(float dp) { dp_ = dp; }
float getDp() const { return dp_; }
void setMinDist(float minDist) { minDist_ = minDist; }
float getMinDist() const { return minDist_; }
void setCannyThreshold(int cannyThreshold) { cannyThreshold_ = cannyThreshold; }
int getCannyThreshold() const { return cannyThreshold_; }
void setVotesThreshold(int votesThreshold) { votesThreshold_ = votesThreshold; }
int getVotesThreshold() const { return votesThreshold_; }
void setMinRadius(int minRadius) { minRadius_ = minRadius; }
int getMinRadius() const { return minRadius_; }
void setMaxRadius(int maxRadius) { maxRadius_ = maxRadius; }
int getMaxRadius() const { return maxRadius_; }
void setMaxCircles(int maxCircles) { maxCircles_ = maxCircles; }
int getMaxCircles() const { return maxCircles_; }
void write(FileStorage& fs) const
{
fs << "name" << "HoughCirclesDetector_GPU"
<< "dp" << dp_
<< "minDist" << minDist_
<< "cannyThreshold" << cannyThreshold_
<< "votesThreshold" << votesThreshold_
<< "minRadius" << minRadius_
<< "maxRadius" << maxRadius_
<< "maxCircles" << maxCircles_;
}
void read(const FileNode& fn)
{
CV_Assert( String(fn["name"]) == "HoughCirclesDetector_GPU" );
dp_ = (float)fn["dp"];
minDist_ = (float)fn["minDist"];
cannyThreshold_ = (int)fn["cannyThreshold"];
votesThreshold_ = (int)fn["votesThreshold"];
minRadius_ = (int)fn["minRadius"];
maxRadius_ = (int)fn["maxRadius"];
maxCircles_ = (int)fn["maxCircles"];
}
private:
float dp_;
float minDist_;
int cannyThreshold_;
int votesThreshold_;
int minRadius_;
int maxRadius_;
int maxCircles_;
GpuMat dx_, dy_;
GpuMat edges_;
GpuMat accum_;
GpuMat list_;
GpuMat result_;
Ptr<gpu::Filter> filterDx_;
Ptr<gpu::Filter> filterDy_;
Ptr<gpu::CannyEdgeDetector> canny_;
};
HoughCirclesDetectorImpl::HoughCirclesDetectorImpl(float dp, float minDist, int cannyThreshold, int votesThreshold,
int minRadius, int maxRadius, int maxCircles) :
dp_(dp), minDist_(minDist), cannyThreshold_(cannyThreshold), votesThreshold_(votesThreshold),
minRadius_(minRadius), maxRadius_(maxRadius), maxCircles_(maxCircles)
{
canny_ = gpu::createCannyEdgeDetector(std::max(cannyThreshold_ / 2, 1), cannyThreshold_);
filterDx_ = gpu::createSobelFilter(CV_8UC1, CV_32S, 1, 0);
filterDy_ = gpu::createSobelFilter(CV_8UC1, CV_32S, 0, 1);
}
void HoughCirclesDetectorImpl::detect(InputArray _src, OutputArray circles)
{
using namespace cv::gpu::cudev::hough;
using namespace cv::gpu::cudev::hough_circles;
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC1 );
CV_Assert( src.cols < std::numeric_limits<unsigned short>::max() );
CV_Assert( src.rows < std::numeric_limits<unsigned short>::max() );
CV_Assert( dp_ > 0 );
CV_Assert( minRadius_ > 0 && maxRadius_ > minRadius_ );
CV_Assert( cannyThreshold_ > 0 );
CV_Assert( votesThreshold_ > 0 );
CV_Assert( maxCircles_ > 0 );
const float idp = 1.0f / dp_;
filterDx_->apply(src, dx_);
filterDy_->apply(src, dy_);
canny_->setLowThreshold(std::max(cannyThreshold_ / 2, 1));
canny_->setHighThreshold(cannyThreshold_);
canny_->detect(dx_, dy_, edges_);
ensureSizeIsEnough(2, src.size().area(), CV_32SC1, list_);
unsigned int* srcPoints = list_.ptr<unsigned int>(0);
unsigned int* centers = list_.ptr<unsigned int>(1);
const int pointsCount = buildPointList_gpu(edges_, srcPoints);
if (pointsCount == 0)
{
circles.release();
return;
}
ensureSizeIsEnough(cvCeil(src.rows * idp) + 2, cvCeil(src.cols * idp) + 2, CV_32SC1, accum_);
accum_.setTo(Scalar::all(0));
circlesAccumCenters_gpu(srcPoints, pointsCount, dx_, dy_, accum_, minRadius_, maxRadius_, idp);
int centersCount = buildCentersList_gpu(accum_, centers, votesThreshold_);
if (centersCount == 0)
{
circles.release();
return;
}
if (minDist_ > 1)
{
AutoBuffer<ushort2> oldBuf_(centersCount);
AutoBuffer<ushort2> newBuf_(centersCount);
int newCount = 0;
ushort2* oldBuf = oldBuf_;
ushort2* newBuf = newBuf_;
cudaSafeCall( cudaMemcpy(oldBuf, centers, centersCount * sizeof(ushort2), cudaMemcpyDeviceToHost) );
const int cellSize = cvRound(minDist_);
const int gridWidth = (src.cols + cellSize - 1) / cellSize;
const int gridHeight = (src.rows + cellSize - 1) / cellSize;
std::vector< std::vector<ushort2> > grid(gridWidth * gridHeight);
const float minDist2 = minDist_ * minDist_;
for (int i = 0; i < centersCount; ++i)
{
ushort2 p = oldBuf[i];
bool good = true;
int xCell = static_cast<int>(p.x / cellSize);
int yCell = static_cast<int>(p.y / cellSize);
int x1 = xCell - 1;
int y1 = yCell - 1;
int x2 = xCell + 1;
int y2 = yCell + 1;
// boundary check
x1 = std::max(0, x1);
y1 = std::max(0, y1);
x2 = std::min(gridWidth - 1, x2);
y2 = std::min(gridHeight - 1, y2);
for (int yy = y1; yy <= y2; ++yy)
{
for (int xx = x1; xx <= x2; ++xx)
{
std::vector<ushort2>& m = grid[yy * gridWidth + xx];
for(size_t j = 0; j < m.size(); ++j)
{
float dx = (float)(p.x - m[j].x);
float dy = (float)(p.y - m[j].y);
if (dx * dx + dy * dy < minDist2)
{
good = false;
goto break_out;
}
}
}
}
break_out:
if(good)
{
grid[yCell * gridWidth + xCell].push_back(p);
newBuf[newCount++] = p;
}
}
cudaSafeCall( cudaMemcpy(centers, newBuf, newCount * sizeof(unsigned int), cudaMemcpyHostToDevice) );
centersCount = newCount;
}
ensureSizeIsEnough(1, maxCircles_, CV_32FC3, result_);
int circlesCount = circlesAccumRadius_gpu(centers, centersCount, srcPoints, pointsCount, result_.ptr<float3>(), maxCircles_,
dp_, minRadius_, maxRadius_, votesThreshold_, deviceSupports(FEATURE_SET_COMPUTE_20));
if (circlesCount == 0)
{
circles.release();
return;
}
result_.cols = circlesCount;
result_.copyTo(circles);
}
}
Ptr<HoughCirclesDetector> cv::gpu::createHoughCirclesDetector(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles)
{
return new HoughCirclesDetectorImpl(dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius, maxCircles);
}
#endif /* !defined (HAVE_CUDA) */
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
Ptr<gpu::HoughLinesDetector> cv::gpu::createHoughLinesDetector(float, float, int, bool, int) { throw_no_cuda(); return Ptr<HoughLinesDetector>(); }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu { namespace cudev
{
namespace hough
{
int buildPointList_gpu(PtrStepSzb src, unsigned int* list);
}
namespace hough_lines
{
void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20);
int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort);
}
}}}
namespace
{
class HoughLinesDetectorImpl : public HoughLinesDetector
{
public:
HoughLinesDetectorImpl(float rho, float theta, int threshold, bool doSort, int maxLines) :
rho_(rho), theta_(theta), threshold_(threshold), doSort_(doSort), maxLines_(maxLines)
{
}
void detect(InputArray src, OutputArray lines);
void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray());
void setRho(float rho) { rho_ = rho; }
float getRho() const { return rho_; }
void setTheta(float theta) { theta_ = theta; }
float getTheta() const { return theta_; }
void setThreshold(int threshold) { threshold_ = threshold; }
int getThreshold() const { return threshold_; }
void setDoSort(bool doSort) { doSort_ = doSort; }
bool getDoSort() const { return doSort_; }
void setMaxLines(int maxLines) { maxLines_ = maxLines; }
int getMaxLines() const { return maxLines_; }
void write(FileStorage& fs) const
{
fs << "name" << "HoughLinesDetector_GPU"
<< "rho" << rho_
<< "theta" << theta_
<< "threshold" << threshold_
<< "doSort" << doSort_
<< "maxLines" << maxLines_;
}
void read(const FileNode& fn)
{
CV_Assert( String(fn["name"]) == "HoughLinesDetector_GPU" );
rho_ = (float)fn["rho"];
theta_ = (float)fn["theta"];
threshold_ = (int)fn["threshold"];
doSort_ = (int)fn["doSort"] != 0;
maxLines_ = (int)fn["maxLines"];
}
private:
float rho_;
float theta_;
int threshold_;
bool doSort_;
int maxLines_;
GpuMat accum_;
GpuMat list_;
GpuMat result_;
};
void HoughLinesDetectorImpl::detect(InputArray _src, OutputArray lines)
{
using namespace cv::gpu::cudev::hough;
using namespace cv::gpu::cudev::hough_lines;
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC1 );
CV_Assert( src.cols < std::numeric_limits<unsigned short>::max() );
CV_Assert( src.rows < std::numeric_limits<unsigned short>::max() );
ensureSizeIsEnough(1, src.size().area(), CV_32SC1, list_);
unsigned int* srcPoints = list_.ptr<unsigned int>();
const int pointsCount = buildPointList_gpu(src, srcPoints);
if (pointsCount == 0)
{
lines.release();
return;
}
const int numangle = cvRound(CV_PI / theta_);
const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho_);
CV_Assert( numangle > 0 && numrho > 0 );
ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum_);
accum_.setTo(Scalar::all(0));
DeviceInfo devInfo;
linesAccum_gpu(srcPoints, pointsCount, accum_, rho_, theta_, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));
ensureSizeIsEnough(2, maxLines_, CV_32FC2, result_);
int linesCount = linesGetResult_gpu(accum_, result_.ptr<float2>(0), result_.ptr<int>(1), maxLines_, rho_, theta_, threshold_, doSort_);
if (linesCount == 0)
{
lines.release();
return;
}
result_.cols = linesCount;
result_.copyTo(lines);
}
void HoughLinesDetectorImpl::downloadResults(InputArray _d_lines, OutputArray h_lines, OutputArray h_votes)
{
GpuMat d_lines = _d_lines.getGpuMat();
if (d_lines.empty())
{
h_lines.release();
if (h_votes.needed())
h_votes.release();
return;
}
CV_Assert( d_lines.rows == 2 && d_lines.type() == CV_32FC2 );
d_lines.row(0).download(h_lines);
if (h_votes.needed())
{
GpuMat d_votes(1, d_lines.cols, CV_32SC1, d_lines.ptr<int>(1));
d_votes.download(h_votes);
}
}
}
Ptr<HoughLinesDetector> cv::gpu::createHoughLinesDetector(float rho, float theta, int threshold, bool doSort, int maxLines)
{
return new HoughLinesDetectorImpl(rho, theta, threshold, doSort, maxLines);
}
#endif /* !defined (HAVE_CUDA) */
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
Ptr<gpu::HoughSegmentDetector> cv::gpu::createHoughSegmentDetector(float, float, int, int, int) { throw_no_cuda(); return Ptr<HoughSegmentDetector>(); }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu { namespace cudev
{
namespace hough
{
int buildPointList_gpu(PtrStepSzb src, unsigned int* list);
}
namespace hough_lines
{
void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20);
}
namespace hough_segments
{
int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength);
}
}}}
namespace
{
class HoughSegmentDetectorImpl : public HoughSegmentDetector
{
public:
HoughSegmentDetectorImpl(float rho, float theta, int minLineLength, int maxLineGap, int maxLines) :
rho_(rho), theta_(theta), minLineLength_(minLineLength), maxLineGap_(maxLineGap), maxLines_(maxLines)
{
}
void detect(InputArray src, OutputArray lines);
void setRho(float rho) { rho_ = rho; }
float getRho() const { return rho_; }
void setTheta(float theta) { theta_ = theta; }
float getTheta() const { return theta_; }
void setMinLineLength(int minLineLength) { minLineLength_ = minLineLength; }
int getMinLineLength() const { return minLineLength_; }
void setMaxLineGap(int maxLineGap) { maxLineGap_ = maxLineGap; }
int getMaxLineGap() const { return maxLineGap_; }
void setMaxLines(int maxLines) { maxLines_ = maxLines; }
int getMaxLines() const { return maxLines_; }
void write(FileStorage& fs) const
{
fs << "name" << "PHoughLinesDetector_GPU"
<< "rho" << rho_
<< "theta" << theta_
<< "minLineLength" << minLineLength_
<< "maxLineGap" << maxLineGap_
<< "maxLines" << maxLines_;
}
void read(const FileNode& fn)
{
CV_Assert( String(fn["name"]) == "PHoughLinesDetector_GPU" );
rho_ = (float)fn["rho"];
theta_ = (float)fn["theta"];
minLineLength_ = (int)fn["minLineLength"];
maxLineGap_ = (int)fn["maxLineGap"];
maxLines_ = (int)fn["maxLines"];
}
private:
float rho_;
float theta_;
int minLineLength_;
int maxLineGap_;
int maxLines_;
GpuMat accum_;
GpuMat list_;
GpuMat result_;
};
void HoughSegmentDetectorImpl::detect(InputArray _src, OutputArray lines)
{
using namespace cv::gpu::cudev::hough;
using namespace cv::gpu::cudev::hough_lines;
using namespace cv::gpu::cudev::hough_segments;
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC1 );
CV_Assert( src.cols < std::numeric_limits<unsigned short>::max() );
CV_Assert( src.rows < std::numeric_limits<unsigned short>::max() );
ensureSizeIsEnough(1, src.size().area(), CV_32SC1, list_);
unsigned int* srcPoints = list_.ptr<unsigned int>();
const int pointsCount = buildPointList_gpu(src, srcPoints);
if (pointsCount == 0)
{
lines.release();
return;
}
const int numangle = cvRound(CV_PI / theta_);
const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho_);
CV_Assert( numangle > 0 && numrho > 0 );
ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum_);
accum_.setTo(Scalar::all(0));
DeviceInfo devInfo;
linesAccum_gpu(srcPoints, pointsCount, accum_, rho_, theta_, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));
ensureSizeIsEnough(1, maxLines_, CV_32SC4, result_);
int linesCount = houghLinesProbabilistic_gpu(src, accum_, result_.ptr<int4>(), maxLines_, rho_, theta_, maxLineGap_, minLineLength_);
if (linesCount == 0)
{
lines.release();
return;
}
result_.cols = linesCount;
result_.copyTo(lines);
}
}
Ptr<HoughSegmentDetector> cv::gpu::createHoughSegmentDetector(float rho, float theta, int minLineLength, int maxLineGap, int maxLines)
{
return new HoughSegmentDetectorImpl(rho, theta, minLineLength, maxLineGap, maxLines);
}
#endif /* !defined (HAVE_CUDA) */
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include "opencv2/gpuimgproc.hpp" #include "opencv2/gpuimgproc.hpp"
#include "opencv2/gpufilters.hpp" #include "opencv2/gpufilters.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/core/private.hpp" #include "opencv2/core/private.hpp"
#include "opencv2/core/private.gpu.hpp" #include "opencv2/core/private.gpu.hpp"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment