Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
28716d7f
Commit
28716d7f
authored
Nov 12, 2012
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Canny
parent
e2995956
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
267 additions
and
294 deletions
+267
-294
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+8
-16
canny.cu
modules/gpu/src/cuda/canny.cu
+219
-215
imgproc.cpp
modules/gpu/src/imgproc.cpp
+39
-62
test_imgproc.cpp
modules/gpu/test/test_imgproc.cpp
+1
-1
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
28716d7f
...
@@ -792,31 +792,23 @@ private:
...
@@ -792,31 +792,23 @@ private:
GpuMat
lab
,
l
,
ab
;
GpuMat
lab
,
l
,
ab
;
};
};
struct
CV_EXPORTS
CannyBuf
;
CV_EXPORTS
void
Canny
(
const
GpuMat
&
image
,
GpuMat
&
edges
,
double
low_thresh
,
double
high_thresh
,
int
apperture_size
=
3
,
bool
L2gradient
=
false
);
CV_EXPORTS
void
Canny
(
const
GpuMat
&
image
,
CannyBuf
&
buf
,
GpuMat
&
edges
,
double
low_thresh
,
double
high_thresh
,
int
apperture_size
=
3
,
bool
L2gradient
=
false
);
CV_EXPORTS
void
Canny
(
const
GpuMat
&
dx
,
const
GpuMat
&
dy
,
GpuMat
&
edges
,
double
low_thresh
,
double
high_thresh
,
bool
L2gradient
=
false
);
CV_EXPORTS
void
Canny
(
const
GpuMat
&
dx
,
const
GpuMat
&
dy
,
CannyBuf
&
buf
,
GpuMat
&
edges
,
double
low_thresh
,
double
high_thresh
,
bool
L2gradient
=
false
);
struct
CV_EXPORTS
CannyBuf
struct
CV_EXPORTS
CannyBuf
{
{
CannyBuf
()
{}
explicit
CannyBuf
(
const
Size
&
image_size
,
int
apperture_size
=
3
)
{
create
(
image_size
,
apperture_size
);}
CannyBuf
(
const
GpuMat
&
dx_
,
const
GpuMat
&
dy_
);
void
create
(
const
Size
&
image_size
,
int
apperture_size
=
3
);
void
create
(
const
Size
&
image_size
,
int
apperture_size
=
3
);
void
release
();
void
release
();
GpuMat
dx
,
dy
;
GpuMat
dx
,
dy
;
GpuMat
dx_buf
,
dy_buf
;
GpuMat
mag
;
GpuMat
edgeBuf
;
GpuMat
map
;
GpuMat
trackBuf1
,
trackBuf
2
;
GpuMat
st1
,
st
2
;
Ptr
<
FilterEngine_GPU
>
filterDX
,
filterDY
;
Ptr
<
FilterEngine_GPU
>
filterDX
,
filterDY
;
};
};
CV_EXPORTS
void
Canny
(
const
GpuMat
&
image
,
GpuMat
&
edges
,
double
low_thresh
,
double
high_thresh
,
int
apperture_size
=
3
,
bool
L2gradient
=
false
);
CV_EXPORTS
void
Canny
(
const
GpuMat
&
image
,
CannyBuf
&
buf
,
GpuMat
&
edges
,
double
low_thresh
,
double
high_thresh
,
int
apperture_size
=
3
,
bool
L2gradient
=
false
);
CV_EXPORTS
void
Canny
(
const
GpuMat
&
dx
,
const
GpuMat
&
dy
,
GpuMat
&
edges
,
double
low_thresh
,
double
high_thresh
,
bool
L2gradient
=
false
);
CV_EXPORTS
void
Canny
(
const
GpuMat
&
dx
,
const
GpuMat
&
dy
,
CannyBuf
&
buf
,
GpuMat
&
edges
,
double
low_thresh
,
double
high_thresh
,
bool
L2gradient
=
false
);
class
CV_EXPORTS
ImagePyramid
class
CV_EXPORTS
ImagePyramid
{
{
public
:
public
:
...
...
modules/gpu/src/cuda/canny.cu
View file @
28716d7f
...
@@ -43,172 +43,151 @@
...
@@ -43,172 +43,151 @@
#if !defined CUDA_DISABLER
#if !defined CUDA_DISABLER
#include <utility>
#include <utility>
#include <algorithm>
#include "opencv2/gpu/device/common.hpp"
#include "internal_shared.hpp"
#include "opencv2/gpu/device/emulation.hpp"
#include "opencv2/gpu/device/transform.hpp"
#include "opencv2/gpu/device/functional.hpp"
#include "opencv2/gpu/device/utility.hpp"
namespace cv { namespace gpu { namespace device
using namespace cv::gpu;
{
using namespace cv::gpu::device;
namespace canny
{
__global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
{
__shared__ int smem[16][18];
const int j = blockIdx.x * blockDim.x + threadIdx.x;
namespace
const int i = blockIdx.y * blockDim.y + threadIdx.y;
{
struct L1 : binary_function<int, int, float>
if (i < rows)
{
{
smem[threadIdx.y][threadIdx.x + 1] = src.ptr(i)[j];
__device__ __forceinline__ float operator ()(int x, int y) const
if (threadIdx.x == 0)
{
{
smem[threadIdx.y][0] = src.ptr(i)[::max(j - 1, 0)];
return ::abs(x) + ::abs(y);
smem[threadIdx.y][17] = src.ptr(i)[::min(j + 16, cols - 1)];
}
}
__syncthreads();
if (j < cols)
__device__ __forceinline__ L1() {}
__device__ __forceinline__ L1(const L1&) {}
};
struct L2 : binary_function<int, int, float>
{
{
dx_buf.ptr(i)[j] = -smem[threadIdx.y][threadIdx.x] + smem[threadIdx.y][threadIdx.x + 2];
__device__ __forceinline__ float operator ()(int x, int y) const
dy_buf.ptr(i)[j] = smem[threadIdx.y][threadIdx.x] + 2 * smem[threadIdx.y][threadIdx.x + 1] + smem[threadIdx.y][threadIdx.x + 2];
}
}
}
void calcSobelRowPass_gpu(PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
{
{
dim3 block(16, 16, 1);
return ::sqrtf(x * x + y * y);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
calcSobelRowPass<<<grid, block>>>(src, dx_buf, dy_buf, rows, cols);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
}
struct L1
__device__ __forceinline__ L2() {}
{
__device__ __forceinline__ L2(const L2&) {}
static __device__ __forceinline__ float calc(int x, int y)
{
return ::abs(x) + ::abs(y);
}
};
};
struct L2
}
namespace cv { namespace gpu { namespace device
{
template <> struct TransformFunctorTraits<L1> : DefaultTransformFunctorTraits<L1>
{
{
static __device__ __forceinline__ float calc(int x, int y)
enum { smart_shift = 4 };
};
template <> struct TransformFunctorTraits<L2> : DefaultTransformFunctorTraits<L2>
{
{
return ::sqrtf(x * x + y * y);
enum { smart_shift = 4 };
}
};
};
}}}
template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, const PtrStepi dy_buf,
namespace
PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
{
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_src(false, cudaFilterModePoint, cudaAddressModeClamp);
struct SrcTex
{
{
__shared__ int sdx[18][16];
const int xoff;
__shared__ int sdy[18][16];
const int yoff;
__host__ SrcTex(int _xoff, int _yoff) : xoff(_xoff), yoff(_yoff) {}
const int j = blockIdx.x * blockDim.x + threadIdx.x;
const int i = blockIdx.y * blockDim.y + threadIdx.y;
if (j < cols)
__device__ __forceinline__ int operator ()(int y, int x) const
{
sdx[threadIdx.y + 1][threadIdx.x] = dx_buf.ptr(i)[j];
sdy[threadIdx.y + 1][threadIdx.x] = dy_buf.ptr(i)[j];
if (threadIdx.y == 0)
{
{
sdx[0][threadIdx.x] = dx_buf.ptr(::max(i - 1, 0))[j];
return tex2D(tex_src, x + xoff, y + yoff);
sdx[17][threadIdx.x] = dx_buf.ptr(::min(i + 16, rows - 1))[j];
sdy[0][threadIdx.x] = dy_buf.ptr(::max(i - 1, 0))[j];
sdy[17][threadIdx.x] = dy_buf.ptr(::min(i + 16, rows - 1))[j];
}
}
__syncthreads()
;
}
;
if (i < rows)
template <class Norm> __global__
void calcMagnitude(const SrcTex src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
{
{
int x = sdx[threadIdx.y][threadIdx.x] + 2 * sdx[threadIdx.y + 1][threadIdx.x] + sdx[threadIdx.y + 2][threadIdx.x]
;
const int x = blockIdx.x * blockDim.x + threadIdx.x
;
int y = -sdy[threadIdx.y][threadIdx.x] + sdy[threadIdx.y + 2][threadIdx.x]
;
const int y = blockIdx.y * blockDim.y + threadIdx.y
;
dx.ptr(i)[j] = x;
if (y >= mag.rows || x >= mag.cols)
dy.ptr(i)[j] = y
;
return
;
mag.ptr(i + 1)[j + 1] = Norm::calc(x, y);
int dxVal = (src(y - 1, x + 1) + 2 * src(y, x + 1) + src(y + 1, x + 1)) - (src(y - 1, x - 1) + 2 * src(y, x - 1) + src(y + 1, x - 1));
}
int dyVal = (src(y + 1, x - 1) + 2 * src(y + 1, x) + src(y + 1, x + 1)) - (src(y - 1, x - 1) + 2 * src(y - 1, x) + src(y - 1, x + 1));
}
dx(y, x) = dxVal;
dy(y, x) = dyVal;
mag(y, x) = norm(dxVal, dyVal);
}
}
}
void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad)
namespace canny
{
void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad)
{
{
dim3 block(16, 16, 1);
const dim3 block(16, 16);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
const dim3 grid(divUp(mag.cols, block.x), divUp(mag.rows, block.y));
bindTexture(&tex_src, srcWhole);
SrcTex src(xoff, yoff);
if (L2Grad)
if (L2Grad)
calcMagnitude<L2><<<grid, block>>>(dx_buf, dy_buf, dx, dy, mag, rows, cols);
{
L2 norm;
::calcMagnitude<<<grid, block>>>(src, dx, dy, mag, norm);
}
else
else
calcMagnitude<L1><<<grid, block>>>(dx_buf, dy_buf, dx, dy, mag, rows, cols);
{
L1 norm;
::calcMagnitude<<<grid, block>>>(src, dx, dy, mag, norm);
}
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaThreadSynchronize());
}
}
template <typename Norm> __global__ void calcMagnitude(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols
)
void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad
)
{
{
const int j = blockIdx.x * blockDim.x + threadIdx.x;
const int i = blockIdx.y * blockDim.y + threadIdx.y;
if (i < rows && j < cols)
mag.ptr(i + 1)[j + 1] = Norm::calc(dx.ptr(i)[j], dy.ptr(i)[j]);
}
void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad)
{
dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
if (L2Grad)
if (L2Grad)
calcMagnitude<L2><<<grid, block>>>(dx, dy, mag, rows, cols);
{
L2 norm;
transform(dx, dy, mag, norm, WithOutMask(), 0);
}
else
else
calcMagnitude<L1><<<grid, block>>>(dx, dy, mag, rows, cols);
{
L1 norm;
cudaSafeCall( cudaGetLastError() );
transform(dx, dy, mag, norm, WithOutMask(), 0);
cudaSafeCall( cudaDeviceSynchronize() );
}
}
}
}
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
#define CANNY_SHIFT 15
namespace
#define TG22 (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5)
{
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_mag(false, cudaFilterModePoint, cudaAddressModeClamp);
__global__ void calcMap(const PtrStepi dx, const PtrStepi dy, const PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh,
float high_thresh)
__global__ void calcMap(const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const
float high_thresh)
{
{
__shared__ float smem[18][18];
const int CANNY_SHIFT = 15;
const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
const int j = blockIdx.x * 16 + threadIdx.x;
const int i = blockIdx.y * 16 + threadIdx.y;
const int tid = threadIdx.y * 16 + threadIdx.x;
const int lx = tid % 18;
const int ly = tid / 18;
if (ly < 14)
const int x = blockIdx.x * blockDim.x + threadIdx.x;
smem[ly][lx] = mag.ptr(blockIdx.y * 16 + ly)[blockIdx.x * 16 + lx]
;
const int y = blockIdx.y * blockDim.y + threadIdx.y
;
if (ly < 4 && blockIdx.y * 16 + ly + 14 <= rows && blockIdx.x * 16 + lx <= col
s)
if (x >= dx.cols || y >= dx.row
s)
smem[ly + 14][lx] = mag.ptr(blockIdx.y * 16 + ly + 14)[blockIdx.x * 16 + lx]
;
return
;
__syncthreads();
int dxVal = dx(y, x);
int dyVal = dy(y, x);
if (i < rows && j < cols)
const int s = (dxVal ^ dyVal) < 0 ? -1 : 1;
{
const float m = tex2D(tex_mag, x, y);
int x = dx.ptr(i)[j];
int y = dy.ptr(i)[j];
const int s = (x ^ y) < 0 ? -1 : 1;
const float m = smem[threadIdx.y + 1][threadIdx.x + 1];
x = ::abs(x
);
dxVal = ::abs(dxVal
);
y = ::abs(y
);
dyVal = ::abs(dyVal
);
// 0 - the pixel can not belong to an edge
// 0 - the pixel can not belong to an edge
// 1 - the pixel might belong to an edge
// 1 - the pixel might belong to an edge
...
@@ -217,73 +196,84 @@ namespace cv { namespace gpu { namespace device
...
@@ -217,73 +196,84 @@ namespace cv { namespace gpu { namespace device
if (m > low_thresh)
if (m > low_thresh)
{
{
const int tg22x = x
* TG22;
const int tg22x = dxVal
* TG22;
const int tg67x = tg22x + ((x + x
) << CANNY_SHIFT);
const int tg67x = tg22x + ((dxVal + dxVal
) << CANNY_SHIFT);
y
<<= CANNY_SHIFT;
dyVal
<<= CANNY_SHIFT;
if (y
< tg22x)
if (dyVal
< tg22x)
{
{
if (m > smem[threadIdx.y + 1][threadIdx.x] && m >= smem[threadIdx.y + 1][threadIdx.x + 2]
)
if (m > tex2D(tex_mag, x - 1, y) && m >= tex2D(tex_mag, x + 1, y)
)
edge_type = 1 + (int)(m > high_thresh);
edge_type = 1 + (int)(m > high_thresh);
}
}
else if( y > tg67x
)
else if(dyVal > tg67x
)
{
{
if (m > smem[threadIdx.y][threadIdx.x + 1] && m >= smem[threadIdx.y + 2][threadIdx.x + 1]
)
if (m > tex2D(tex_mag, x, y - 1) && m >= tex2D(tex_mag, x, y + 1)
)
edge_type = 1 + (int)(m > high_thresh);
edge_type = 1 + (int)(m > high_thresh);
}
}
else
else
{
{
if (m > smem[threadIdx.y][threadIdx.x + 1 - s] && m > smem[threadIdx.y + 2][threadIdx.x + 1 + s]
)
if (m > tex2D(tex_mag, x - s, y - 1) && m >= tex2D(tex_mag, x + s, y + 1)
)
edge_type = 1 + (int)(m > high_thresh);
edge_type = 1 + (int)(m > high_thresh);
}
}
}
}
map.ptr(i + 1)[j + 1]
= edge_type;
map(y, x)
= edge_type;
}
}
}
}
#undef CANNY_SHIFT
#undef TG22
void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh)
namespace canny
{
void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh)
{
{
dim3 block(16, 16, 1);
const dim3 block(16, 16);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
const dim3 grid(divUp(dx.cols, block.x), divUp(dx.rows, block.y));
bindTexture(&tex_mag, mag);
calcMap<<<grid, block>>>(dx, dy, mag, map, rows, cols
, low_thresh, high_thresh);
::calcMap<<<grid, block>>>(dx, dy, map
, low_thresh, high_thresh);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
}
}
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
__device__ unsigned int counter = 0;
namespace
{
__device__ int counter = 0;
__global__ void edgesHysteresisLocal(PtrStepi map, ushort2* st, int rows, int cols
)
__global__ void edgesHysteresisLocal(PtrStepSzi map, ushort2* st
)
{
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 120)
__shared__ volatile int smem[18][18];
__shared__ int smem[18][18];
const int j = blockIdx.x * 16
+ threadIdx.x;
const int x = blockIdx.x * blockDim.x
+ threadIdx.x;
const int i = blockIdx.y * 16
+ threadIdx.y;
const int y = blockIdx.y * blockDim.y
+ threadIdx.y;
const int tid = threadIdx.y * 16 + threadIdx.x;
smem[threadIdx.y + 1][threadIdx.x + 1] = x < map.cols && y < map.rows ? map(y, x) : 0;
const int lx = tid % 18;
if (threadIdx.y == 0)
const int ly = tid / 18;
smem[0][threadIdx.x + 1] = y > 0 ? map(y - 1, x) : 0;
if (threadIdx.y == blockDim.y - 1)
if (ly < 14)
smem[blockDim.y + 1][threadIdx.x + 1] = y + 1 < map.rows ? map(y + 1, x) : 0;
smem[ly][lx] = map.ptr(blockIdx.y * 16 + ly)[blockIdx.x * 16 + lx];
if (threadIdx.x == 0)
smem[threadIdx.y + 1][0] = x > 0 ? map(y, x - 1) : 0;
if (ly < 4 && blockIdx.y * 16 + ly + 14 <= rows && blockIdx.x * 16 + lx <= cols)
if (threadIdx.x == blockDim.x - 1)
smem[ly + 14][lx] = map.ptr(blockIdx.y * 16 + ly + 14)[blockIdx.x * 16 + lx];
smem[threadIdx.y + 1][blockDim.x + 1] = x + 1 < map.cols ? map(y, x + 1) : 0;
if (threadIdx.x == 0 && threadIdx.y == 0)
smem[0][0] = y > 0 && x > 0 ? map(y - 1, x - 1) : 0;
if (threadIdx.x == blockDim.x - 1 && threadIdx.y == 0)
smem[0][blockDim.x + 1] = y > 0 && x + 1 < map.cols ? map(y - 1, x + 1) : 0;
if (threadIdx.x == 0 && threadIdx.y == blockDim.y - 1)
smem[blockDim.y + 1][0] = y + 1 < map.rows && x > 0 ? map(y + 1, x - 1) : 0;
if (threadIdx.x == blockDim.x - 1 && threadIdx.y == blockDim.y - 1)
smem[blockDim.y + 1][blockDim.x + 1] = y + 1 < map.rows && x + 1 < map.cols ? map(y + 1, x + 1) : 0;
__syncthreads();
__syncthreads();
if (i < rows && j < cols)
if (x >= map.cols || y >= map.rows)
{
return;
int n;
int n;
#pragma unroll
#pragma unroll
...
@@ -311,7 +301,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -311,7 +301,7 @@ namespace cv { namespace gpu { namespace device
const int e = smem[threadIdx.y + 1][threadIdx.x + 1];
const int e = smem[threadIdx.y + 1][threadIdx.x + 1];
map.ptr(i + 1)[j + 1]
= e;
map(y, x)
= e;
n = 0;
n = 0;
...
@@ -331,69 +321,73 @@ namespace cv { namespace gpu { namespace device
...
@@ -331,69 +321,73 @@ namespace cv { namespace gpu { namespace device
if (n > 0)
if (n > 0)
{
{
const unsigned int ind = atomicInc(&counter, (unsigned int)(-1)
);
const int ind = ::atomicAdd(&counter, 1
);
st[ind] = make_ushort2(j + 1, i + 1
);
st[ind] = make_ushort2(x, y
);
}
}
}
}
}
#endif
namespace canny
}
{
void edgesHysteresisLocal(PtrStepSzi map, ushort2* st1)
void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols)
{
{
void* counter_ptr;
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned
int)) );
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(
int)) );
dim3 block(16, 16, 1
);
const dim3 block(16, 16
);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1
);
const dim3 grid(divUp(map.cols, block.x), divUp(map.rows, block.y)
);
edgesHysteresisLocal<<<grid, block>>>(map, st1, rows, cols
);
::edgesHysteresisLocal<<<grid, block>>>(map, st1
);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
}
}
//////////////////////////////////////////////////////////////////////////////////////////
namespace
{
__constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
__constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
__constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
__constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
__global__ void edgesHysteresisGlobal(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols,
int count)
__global__ void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2, const
int count)
{
{
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 120
const int stack_size = 512;
const int stack_size = 512;
__shared__ unsigned
int s_counter;
__shared__
int s_counter;
__shared__ unsigned
int s_ind;
__shared__
int s_ind;
__shared__ ushort2 s_st[stack_size];
__shared__ ushort2 s_st[stack_size];
if (threadIdx.x == 0)
if (threadIdx.x == 0)
s_counter = 0;
s_counter = 0;
__syncthreads();
__syncthreads();
int ind = blockIdx.y * gridDim.x + blockIdx.x;
int ind = blockIdx.y * gridDim.x + blockIdx.x;
if (ind < count)
if (ind >= count)
{
return;
ushort2 pos = st1[ind];
ushort2 pos = st1[ind];
if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows)
{
if (threadIdx.x < 8)
if (threadIdx.x < 8)
{
{
pos.x += c_dx[threadIdx.x];
pos.x += c_dx[threadIdx.x];
pos.y += c_dy[threadIdx.x];
pos.y += c_dy[threadIdx.x];
if (map.ptr(pos.y)[pos.x]
== 1)
if (pos.x > 0 && pos.x <= map.cols && pos.y > 0 && pos.y <= map.rows && map(pos.y, pos.x)
== 1)
{
{
map.ptr(pos.y)[pos.x]
= 2;
map(pos.y, pos.x)
= 2;
ind = atomicInc(&s_counter, (unsigned int)(-1)
);
ind = Emulation::smem::atomicAdd(&s_counter, 1
);
s_st[ind] = pos;
s_st[ind] = pos;
}
}
}
}
__syncthreads();
__syncthreads();
while (s_counter > 0 && s_counter <= stack_size - blockDim.x)
while (s_counter > 0 && s_counter <= stack_size - blockDim.x)
...
@@ -401,30 +395,31 @@ namespace cv { namespace gpu { namespace device
...
@@ -401,30 +395,31 @@ namespace cv { namespace gpu { namespace device
const int subTaskIdx = threadIdx.x >> 3;
const int subTaskIdx = threadIdx.x >> 3;
const int portion = ::min(s_counter, blockDim.x >> 3);
const int portion = ::min(s_counter, blockDim.x >> 3);
pos.x = pos.y = 0;
if (subTaskIdx < portion)
if (subTaskIdx < portion)
pos = s_st[s_counter - 1 - subTaskIdx];
pos = s_st[s_counter - 1 - subTaskIdx];
__syncthreads();
__syncthreads();
if (threadIdx.x == 0)
if (threadIdx.x == 0)
s_counter -= portion;
s_counter -= portion;
__syncthreads();
__syncthreads();
if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows
)
if (subTaskIdx < portion
)
{
{
pos.x += c_dx[threadIdx.x & 7];
pos.x += c_dx[threadIdx.x & 7];
pos.y += c_dy[threadIdx.x & 7];
pos.y += c_dy[threadIdx.x & 7];
if (map.ptr(pos.y)[pos.x]
== 1)
if (pos.x > 0 && pos.x <= map.cols && pos.y > 0 && pos.y <= map.rows && map(pos.y, pos.x)
== 1)
{
{
map.ptr(pos.y)[pos.x]
= 2;
map(pos.y, pos.x)
= 2;
ind = atomicInc(&s_counter, (unsigned int)(-1)
);
ind = Emulation::smem::atomicAdd(&s_counter, 1
);
s_st[ind] = pos;
s_st[ind] = pos;
}
}
}
}
__syncthreads();
__syncthreads();
}
}
...
@@ -432,70 +427,79 @@ namespace cv { namespace gpu { namespace device
...
@@ -432,70 +427,79 @@ namespace cv { namespace gpu { namespace device
{
{
if (threadIdx.x == 0)
if (threadIdx.x == 0)
{
{
ind =
atomicAdd(&counter, s_counter);
ind = ::
atomicAdd(&counter, s_counter);
s_ind = ind - s_counter;
s_ind = ind - s_counter;
}
}
__syncthreads();
__syncthreads();
ind = s_ind;
ind = s_ind;
for (int i = threadIdx.x; i < s_counter; i += blockDim.x)
for (int i = threadIdx.x; i < s_counter; i += blockDim.x)
{
st2[ind + i] = s_st[i];
st2[ind + i] = s_st[i];
}
}
}
}
}
}
}
#endif
}
void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols)
namespace canny
{
void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2)
{
{
void* counter_ptr;
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr,
counter) );
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, ::
counter) );
unsigned
int count;
int count;
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned
int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(
int), cudaMemcpyDeviceToHost) );
while (count > 0)
while (count > 0)
{
{
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
const dim3 block(128);
const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1);
dim3 block(128, 1, 1);
::edgesHysteresisGlobal<<<grid, block>>>(map, st1, st2, count);
dim3 grid(std::min(count, 65535u), divUp(count, 65535), 1);
edgesHysteresisGlobal<<<grid, block>>>(map, st1, st2, rows, cols, count);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned
int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(
int), cudaMemcpyDeviceToHost) );
std::swap(st1, st2);
std::swap(st1, st2);
}
}
}
}
}
__global__ void getEdges(PtrStepi map, PtrStepb dst, int rows, int cols)
//////////////////////////////////////////////////////////////////////////////////////////
{
const int j = blockIdx.x * 16 + threadIdx.x;
const int i = blockIdx.y * 16 + threadIdx.y;
if (i < rows && j < cols)
namespace
dst.ptr(i)[j] = (uchar)(-(map.ptr(i + 1)[j + 1] >> 1));
{
struct GetEdges : unary_function<int, uchar>
{
__device__ __forceinline__ uchar operator ()(int e) const
{
return (uchar)(-(e >> 1));
}
}
void getEdges_gpu(PtrStepi map, PtrStepb dst, int rows, int cols)
__device__ __forceinline__ GetEdges() {}
{
__device__ __forceinline__ GetEdges(const GetEdges&) {}
dim3 block(16, 16, 1)
;
}
;
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
}
getEdges<<<grid, block>>>(map, dst, rows, cols);
namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaGetLastError() );
{
template <> struct TransformFunctorTraits<GetEdges> : DefaultTransformFunctorTraits<GetEdges>
{
enum { smart_shift = 4 };
};
}}}
cudaSafeCall( cudaDeviceSynchronize() );
namespace canny
{
void getEdges(PtrStepSzi map, PtrStepSzb dst)
{
transform(map, dst, GetEdges(), WithOutMask(), 0);
}
}
} // namespace canny
}
}}} // namespace cv { namespace gpu { namespace device
#endif /* CUDA_DISABLER */
#endif /* CUDA_DISABLER */
modules/gpu/src/imgproc.cpp
View file @
28716d7f
...
@@ -91,7 +91,6 @@ void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_n
...
@@ -91,7 +91,6 @@ void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_n
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
,
CannyBuf
&
,
GpuMat
&
,
double
,
double
,
int
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
,
CannyBuf
&
,
GpuMat
&
,
double
,
double
,
int
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
double
,
double
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
double
,
double
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
,
const
GpuMat
&
,
CannyBuf
&
,
GpuMat
&
,
double
,
double
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
,
const
GpuMat
&
,
CannyBuf
&
,
GpuMat
&
,
double
,
double
,
bool
)
{
throw_nogpu
();
}
cv
::
gpu
::
CannyBuf
::
CannyBuf
(
const
GpuMat
&
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
CannyBuf
::
create
(
const
Size
&
,
int
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
CannyBuf
::
create
(
const
Size
&
,
int
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
CannyBuf
::
release
()
{
throw_nogpu
();
}
void
cv
::
gpu
::
CannyBuf
::
release
()
{
throw_nogpu
();
}
...
@@ -1466,92 +1465,76 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
...
@@ -1466,92 +1465,76 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
// Canny
// Canny
cv
::
gpu
::
CannyBuf
::
CannyBuf
(
const
GpuMat
&
dx_
,
const
GpuMat
&
dy_
)
:
dx
(
dx_
),
dy
(
dy_
)
{
CV_Assert
(
dx_
.
type
()
==
CV_32SC1
&&
dy_
.
type
()
==
CV_32SC1
&&
dx_
.
size
()
==
dy_
.
size
());
create
(
dx_
.
size
(),
-
1
);
}
void
cv
::
gpu
::
CannyBuf
::
create
(
const
Size
&
image_size
,
int
apperture_size
)
void
cv
::
gpu
::
CannyBuf
::
create
(
const
Size
&
image_size
,
int
apperture_size
)
{
{
if
(
apperture_size
>
0
)
{
ensureSizeIsEnough
(
image_size
,
CV_32SC1
,
dx
);
ensureSizeIsEnough
(
image_size
,
CV_32SC1
,
dx
);
ensureSizeIsEnough
(
image_size
,
CV_32SC1
,
dy
);
ensureSizeIsEnough
(
image_size
,
CV_32SC1
,
dy
);
if
(
apperture_size
==
3
)
if
(
apperture_size
!=
3
)
{
ensureSizeIsEnough
(
image_size
,
CV_32SC1
,
dx_buf
);
ensureSizeIsEnough
(
image_size
,
CV_32SC1
,
dy_buf
);
}
else
if
(
apperture_size
>
0
)
{
{
if
(
!
filterDX
)
filterDX
=
createDerivFilter_GPU
(
CV_8UC1
,
CV_32S
,
1
,
0
,
apperture_size
,
BORDER_REPLICATE
);
filterDX
=
createDerivFilter_GPU
(
CV_8UC1
,
CV_32S
,
1
,
0
,
apperture_size
,
BORDER_REPLICATE
);
if
(
!
filterDY
)
filterDY
=
createDerivFilter_GPU
(
CV_8UC1
,
CV_32S
,
0
,
1
,
apperture_size
,
BORDER_REPLICATE
);
filterDY
=
createDerivFilter_GPU
(
CV_8UC1
,
CV_32S
,
0
,
1
,
apperture_size
,
BORDER_REPLICATE
);
}
}
}
ensureSizeIsEnough
(
image_size
.
height
+
2
,
image_size
.
width
+
2
,
CV_32FC1
,
edgeBuf
);
ensureSizeIsEnough
(
image_size
,
CV_32FC1
,
mag
);
ensureSizeIsEnough
(
image_size
,
CV_32SC1
,
map
);
ensureSizeIsEnough
(
1
,
image_size
.
width
*
image_size
.
height
,
CV_16UC2
,
trackBuf
1
);
ensureSizeIsEnough
(
1
,
image_size
.
area
(),
CV_16UC2
,
st
1
);
ensureSizeIsEnough
(
1
,
image_size
.
width
*
image_size
.
height
,
CV_16UC2
,
trackBuf
2
);
ensureSizeIsEnough
(
1
,
image_size
.
area
(),
CV_16UC2
,
st
2
);
}
}
void
cv
::
gpu
::
CannyBuf
::
release
()
void
cv
::
gpu
::
CannyBuf
::
release
()
{
{
dx
.
release
();
dx
.
release
();
dy
.
release
();
dy
.
release
();
dx_buf
.
release
();
mag
.
release
();
dy_buf
.
release
();
map
.
release
();
edgeBuf
.
release
();
st1
.
release
();
trackBuf1
.
release
();
st2
.
release
();
trackBuf2
.
release
();
}
}
namespace
c
v
{
namespace
gpu
{
namespace
device
namespace
c
anny
{
{
namespace
canny
void
calcMagnitude
(
PtrStepSzb
srcWhole
,
int
xoff
,
int
yoff
,
PtrStepSzi
dx
,
PtrStepSzi
dy
,
PtrStepSzf
mag
,
bool
L2Grad
);
{
void
calcMagnitude
(
PtrStepSzi
dx
,
PtrStepSzi
dy
,
PtrStepSzf
mag
,
bool
L2Grad
);
void
calcSobelRowPass_gpu
(
PtrStepb
src
,
PtrStepi
dx_buf
,
PtrStepi
dy_buf
,
int
rows
,
int
cols
);
void
calcMagnitude_gpu
(
PtrStepi
dx_buf
,
PtrStepi
dy_buf
,
PtrStepi
dx
,
PtrStepi
dy
,
PtrStepf
mag
,
int
rows
,
int
cols
,
bool
L2Grad
);
void
calcMagnitude_gpu
(
PtrStepi
dx
,
PtrStepi
dy
,
PtrStepf
mag
,
int
rows
,
int
cols
,
bool
L2Grad
);
void
calcMap_gpu
(
PtrStepi
dx
,
PtrStepi
dy
,
PtrStepf
mag
,
PtrStepi
map
,
int
rows
,
int
cols
,
float
low_thresh
,
float
high_thresh
);
void
calcMap
(
PtrStepSzi
dx
,
PtrStepSzi
dy
,
PtrStepSzf
mag
,
PtrStepSzi
map
,
float
low_thresh
,
float
high_thresh
);
void
edgesHysteresisLocal_gpu
(
PtrStepi
map
,
ushort2
*
st1
,
int
rows
,
int
cols
);
void
edgesHysteresisLocal
(
PtrStepSzi
map
,
ushort2
*
st1
);
void
edgesHysteresisGlobal_gpu
(
PtrStepi
map
,
ushort2
*
st1
,
ushort2
*
st2
,
int
rows
,
int
cols
);
void
edgesHysteresisGlobal
(
PtrStepSzi
map
,
ushort2
*
st1
,
ushort2
*
st2
);
void
getEdges_gpu
(
PtrStepi
map
,
PtrStepb
dst
,
int
rows
,
int
cols
);
void
getEdges
(
PtrStepSzi
map
,
PtrStepSzb
dst
);
}
}
}}}
namespace
namespace
{
{
void
CannyCaller
(
CannyBuf
&
buf
,
GpuMat
&
dst
,
float
low_thresh
,
float
high_thresh
)
void
CannyCaller
(
const
GpuMat
&
dx
,
const
GpuMat
&
dy
,
CannyBuf
&
buf
,
GpuMat
&
dst
,
float
low_thresh
,
float
high_thresh
)
{
{
using
namespace
::
cv
::
gpu
::
device
::
canny
;
using
namespace
canny
;
calcMap
_gpu
(
buf
.
dx
,
buf
.
dy
,
buf
.
edgeBuf
,
buf
.
edgeBuf
,
dst
.
rows
,
dst
.
cols
,
low_thresh
,
high_thresh
);
calcMap
(
dx
,
dy
,
buf
.
mag
,
buf
.
map
,
low_thresh
,
high_thresh
);
edgesHysteresisLocal
_gpu
(
buf
.
edgeBuf
,
buf
.
trackBuf1
.
ptr
<
ushort2
>
(),
dst
.
rows
,
dst
.
cols
);
edgesHysteresisLocal
(
buf
.
map
,
buf
.
st1
.
ptr
<
ushort2
>
()
);
edgesHysteresisGlobal
_gpu
(
buf
.
edgeBuf
,
buf
.
trackBuf1
.
ptr
<
ushort2
>
(),
buf
.
trackBuf2
.
ptr
<
ushort2
>
(),
dst
.
rows
,
dst
.
cols
);
edgesHysteresisGlobal
(
buf
.
map
,
buf
.
st1
.
ptr
<
ushort2
>
(),
buf
.
st2
.
ptr
<
ushort2
>
()
);
getEdges
_gpu
(
buf
.
edgeBuf
,
dst
,
dst
.
rows
,
dst
.
cols
);
getEdges
(
buf
.
map
,
dst
);
}
}
}
}
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
low_thresh
,
double
high_thresh
,
int
apperture_size
,
bool
L2gradient
)
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
low_thresh
,
double
high_thresh
,
int
apperture_size
,
bool
L2gradient
)
{
{
CannyBuf
buf
(
src
.
size
(),
apperture_size
)
;
CannyBuf
buf
;
Canny
(
src
,
buf
,
dst
,
low_thresh
,
high_thresh
,
apperture_size
,
L2gradient
);
Canny
(
src
,
buf
,
dst
,
low_thresh
,
high_thresh
,
apperture_size
,
L2gradient
);
}
}
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
src
,
CannyBuf
&
buf
,
GpuMat
&
dst
,
double
low_thresh
,
double
high_thresh
,
int
apperture_size
,
bool
L2gradient
)
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
src
,
CannyBuf
&
buf
,
GpuMat
&
dst
,
double
low_thresh
,
double
high_thresh
,
int
apperture_size
,
bool
L2gradient
)
{
{
using
namespace
::
cv
::
gpu
::
device
::
canny
;
using
namespace
canny
;
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
...
@@ -1562,37 +1545,37 @@ void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_th
...
@@ -1562,37 +1545,37 @@ void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_th
std
::
swap
(
low_thresh
,
high_thresh
);
std
::
swap
(
low_thresh
,
high_thresh
);
dst
.
create
(
src
.
size
(),
CV_8U
);
dst
.
create
(
src
.
size
(),
CV_8U
);
dst
.
setTo
(
Scalar
::
all
(
0
));
buf
.
create
(
src
.
size
(),
apperture_size
);
buf
.
create
(
src
.
size
(),
apperture_size
);
buf
.
edgeBuf
.
setTo
(
Scalar
::
all
(
0
));
if
(
apperture_size
==
3
)
if
(
apperture_size
==
3
)
{
{
calcSobelRowPass_gpu
(
src
,
buf
.
dx_buf
,
buf
.
dy_buf
,
src
.
rows
,
src
.
cols
);
Size
wholeSize
;
Point
ofs
;
src
.
locateROI
(
wholeSize
,
ofs
);
GpuMat
srcWhole
(
wholeSize
,
src
.
type
(),
src
.
datastart
,
src
.
step
);
calcMagnitude
_gpu
(
buf
.
dx_buf
,
buf
.
dy_buf
,
buf
.
dx
,
buf
.
dy
,
buf
.
edgeBuf
,
src
.
rows
,
src
.
cols
,
L2gradient
);
calcMagnitude
(
srcWhole
,
ofs
.
x
,
ofs
.
y
,
buf
.
dx
,
buf
.
dy
,
buf
.
mag
,
L2gradient
);
}
}
else
else
{
{
buf
.
filterDX
->
apply
(
src
,
buf
.
dx
,
Rect
(
0
,
0
,
src
.
cols
,
src
.
rows
));
buf
.
filterDX
->
apply
(
src
,
buf
.
dx
,
Rect
(
0
,
0
,
src
.
cols
,
src
.
rows
));
buf
.
filterDY
->
apply
(
src
,
buf
.
dy
,
Rect
(
0
,
0
,
src
.
cols
,
src
.
rows
));
buf
.
filterDY
->
apply
(
src
,
buf
.
dy
,
Rect
(
0
,
0
,
src
.
cols
,
src
.
rows
));
calcMagnitude
_gpu
(
buf
.
dx
,
buf
.
dy
,
buf
.
edgeBuf
,
src
.
rows
,
src
.
cols
,
L2gradient
);
calcMagnitude
(
buf
.
dx
,
buf
.
dy
,
buf
.
mag
,
L2gradient
);
}
}
CannyCaller
(
buf
,
dst
,
static_cast
<
float
>
(
low_thresh
),
static_cast
<
float
>
(
high_thresh
));
CannyCaller
(
buf
.
dx
,
buf
.
dy
,
buf
,
dst
,
static_cast
<
float
>
(
low_thresh
),
static_cast
<
float
>
(
high_thresh
));
}
}
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
dx
,
const
GpuMat
&
dy
,
GpuMat
&
dst
,
double
low_thresh
,
double
high_thresh
,
bool
L2gradient
)
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
dx
,
const
GpuMat
&
dy
,
GpuMat
&
dst
,
double
low_thresh
,
double
high_thresh
,
bool
L2gradient
)
{
{
CannyBuf
buf
(
dx
,
dy
)
;
CannyBuf
buf
;
Canny
(
dx
,
dy
,
buf
,
dst
,
low_thresh
,
high_thresh
,
L2gradient
);
Canny
(
dx
,
dy
,
buf
,
dst
,
low_thresh
,
high_thresh
,
L2gradient
);
}
}
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
dx
,
const
GpuMat
&
dy
,
CannyBuf
&
buf
,
GpuMat
&
dst
,
double
low_thresh
,
double
high_thresh
,
bool
L2gradient
)
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
dx
,
const
GpuMat
&
dy
,
CannyBuf
&
buf
,
GpuMat
&
dst
,
double
low_thresh
,
double
high_thresh
,
bool
L2gradient
)
{
{
using
namespace
::
cv
::
gpu
::
device
::
canny
;
using
namespace
canny
;
CV_Assert
(
TargetArchs
::
builtWith
(
SHARED_ATOMICS
)
&&
DeviceInfo
().
supports
(
SHARED_ATOMICS
));
CV_Assert
(
TargetArchs
::
builtWith
(
SHARED_ATOMICS
)
&&
DeviceInfo
().
supports
(
SHARED_ATOMICS
));
CV_Assert
(
dx
.
type
()
==
CV_32SC1
&&
dy
.
type
()
==
CV_32SC1
&&
dx
.
size
()
==
dy
.
size
());
CV_Assert
(
dx
.
type
()
==
CV_32SC1
&&
dy
.
type
()
==
CV_32SC1
&&
dx
.
size
()
==
dy
.
size
());
...
@@ -1601,17 +1584,11 @@ void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& d
...
@@ -1601,17 +1584,11 @@ void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& d
std
::
swap
(
low_thresh
,
high_thresh
);
std
::
swap
(
low_thresh
,
high_thresh
);
dst
.
create
(
dx
.
size
(),
CV_8U
);
dst
.
create
(
dx
.
size
(),
CV_8U
);
dst
.
setTo
(
Scalar
::
all
(
0
));
buf
.
dx
=
dx
;
buf
.
dy
=
dy
;
buf
.
create
(
dx
.
size
(),
-
1
);
buf
.
create
(
dx
.
size
(),
-
1
);
buf
.
edgeBuf
.
setTo
(
Scalar
::
all
(
0
));
calcMagnitude
_gpu
(
dx
,
dy
,
buf
.
edgeBuf
,
dx
.
rows
,
dx
.
cols
,
L2gradient
);
calcMagnitude
(
dx
,
dy
,
buf
.
mag
,
L2gradient
);
CannyCaller
(
buf
,
dst
,
static_cast
<
float
>
(
low_thresh
),
static_cast
<
float
>
(
high_thresh
));
CannyCaller
(
dx
,
dy
,
buf
,
dst
,
static_cast
<
float
>
(
low_thresh
),
static_cast
<
float
>
(
high_thresh
));
}
}
#endif
/* !defined (HAVE_CUDA) */
#endif
/* !defined (HAVE_CUDA) */
modules/gpu/test/test_imgproc.cpp
View file @
28716d7f
...
@@ -313,7 +313,7 @@ TEST_P(Canny, Accuracy)
...
@@ -313,7 +313,7 @@ TEST_P(Canny, Accuracy)
cv
::
Mat
edges_gold
;
cv
::
Mat
edges_gold
;
cv
::
Canny
(
img
,
edges_gold
,
low_thresh
,
high_thresh
,
apperture_size
,
useL2gradient
);
cv
::
Canny
(
img
,
edges_gold
,
low_thresh
,
high_thresh
,
apperture_size
,
useL2gradient
);
EXPECT_MAT_SIMILAR
(
edges_gold
,
edges
,
1
e-2
);
EXPECT_MAT_SIMILAR
(
edges_gold
,
edges
,
2
e-2
);
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment