Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
1d68a899
Commit
1d68a899
authored
Feb 07, 2014
by
Roman Donchenko
Committed by
OpenCV Buildbot
Feb 07, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2303 from jet47:gpu-canny-fix
parents
48f95d92
5dbdadb7
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
39 additions
and
28 deletions
+39
-28
perf_imgproc.cpp
modules/gpu/perf/perf_imgproc.cpp
+2
-2
canny.cu
modules/gpu/src/cuda/canny.cu
+29
-20
imgproc.cpp
modules/gpu/src/imgproc.cpp
+8
-6
No files found.
modules/gpu/perf/perf_imgproc.cpp
View file @
1d68a899
...
...
@@ -672,7 +672,7 @@ PERF_TEST_P(Sz, ImgProc_ColumnSum,
DEF_PARAM_TEST
(
Image_AppertureSz_L2gradient
,
string
,
int
,
bool
);
PERF_TEST_P
(
Image_AppertureSz_L2gradient
,
DISABLED_
ImgProc_Canny
,
PERF_TEST_P
(
Image_AppertureSz_L2gradient
,
ImgProc_Canny
,
Combine
(
Values
(
"perf/800x600.png"
,
"perf/1280x1024.png"
,
"perf/1680x1050.png"
),
Values
(
3
,
5
),
Bool
()))
...
...
@@ -1777,7 +1777,7 @@ PERF_TEST_P(Image, ImgProc_HoughLinesP,
DEF_PARAM_TEST
(
Sz_Dp_MinDist
,
cv
::
Size
,
float
,
float
);
PERF_TEST_P
(
Sz_Dp_MinDist
,
DISABLED_
ImgProc_HoughCircles
,
PERF_TEST_P
(
Sz_Dp_MinDist
,
ImgProc_HoughCircles
,
Combine
(
GPU_TYPICAL_MAT_SIZES
,
Values
(
1.0
f
,
2.0
f
,
4.0
f
),
Values
(
1.0
f
)))
...
...
modules/gpu/src/cuda/canny.cu
View file @
1d68a899
...
...
@@ -239,30 +239,35 @@ namespace canny
{
__device__ int counter = 0;
__global__ void edgesHysteresisLocalKernel(PtrStepSzi map, ushort2* st)
__device__ __forceinline__ bool checkIdx(int y, int x, int rows, int cols)
{
return (y >= 0) && (y < rows) && (x >= 0) && (x < cols);
}
__global__ void edgesHysteresisLocalKernel(PtrStepSzi map, short2* st)
{
__shared__ volatile int smem[18][18];
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
smem[threadIdx.y + 1][threadIdx.x + 1] =
x < map.cols && y < map.rows
? map(y, x) : 0;
smem[threadIdx.y + 1][threadIdx.x + 1] =
checkIdx(y, x, map.rows, map.cols)
? map(y, x) : 0;
if (threadIdx.y == 0)
smem[0][threadIdx.x + 1] =
y > 0
? map(y - 1, x) : 0;
smem[0][threadIdx.x + 1] =
checkIdx(y - 1, x, map.rows, map.cols)
? map(y - 1, x) : 0;
if (threadIdx.y == blockDim.y - 1)
smem[blockDim.y + 1][threadIdx.x + 1] =
y + 1 < map.rows
? map(y + 1, x) : 0;
smem[blockDim.y + 1][threadIdx.x + 1] =
checkIdx(y + 1, x, map.rows, map.cols)
? map(y + 1, x) : 0;
if (threadIdx.x == 0)
smem[threadIdx.y + 1][0] =
x > 0
? map(y, x - 1) : 0;
smem[threadIdx.y + 1][0] =
checkIdx(y, x - 1, map.rows, map.cols)
? map(y, x - 1) : 0;
if (threadIdx.x == blockDim.x - 1)
smem[threadIdx.y + 1][blockDim.x + 1] =
x + 1 < map.cols
? map(y, x + 1) : 0;
smem[threadIdx.y + 1][blockDim.x + 1] =
checkIdx(y, x + 1, map.rows, map.cols)
? map(y, x + 1) : 0;
if (threadIdx.x == 0 && threadIdx.y == 0)
smem[0][0] =
y > 0 && x > 0
? map(y - 1, x - 1) : 0;
smem[0][0] =
checkIdx(y - 1, x - 1, map.rows, map.cols)
? map(y - 1, x - 1) : 0;
if (threadIdx.x == blockDim.x - 1 && threadIdx.y == 0)
smem[0][blockDim.x + 1] =
y > 0 && x + 1 < map.cols
? map(y - 1, x + 1) : 0;
smem[0][blockDim.x + 1] =
checkIdx(y - 1, x + 1, map.rows, map.cols)
? map(y - 1, x + 1) : 0;
if (threadIdx.x == 0 && threadIdx.y == blockDim.y - 1)
smem[blockDim.y + 1][0] =
y + 1 < map.rows && x > 0
? map(y + 1, x - 1) : 0;
smem[blockDim.y + 1][0] =
checkIdx(y + 1, x - 1, map.rows, map.cols)
? map(y + 1, x - 1) : 0;
if (threadIdx.x == blockDim.x - 1 && threadIdx.y == blockDim.y - 1)
smem[blockDim.y + 1][blockDim.x + 1] =
y + 1 < map.rows && x + 1 < map.cols
? map(y + 1, x + 1) : 0;
smem[blockDim.y + 1][blockDim.x + 1] =
checkIdx(y + 1, x + 1, map.rows, map.cols)
? map(y + 1, x + 1) : 0;
__syncthreads();
...
...
@@ -317,11 +322,11 @@ namespace canny
if (n > 0)
{
const int ind = ::atomicAdd(&counter, 1);
st[ind] = make_
u
short2(x, y);
st[ind] = make_short2(x, y);
}
}
void edgesHysteresisLocal(PtrStepSzi map,
u
short2* st1)
void edgesHysteresisLocal(PtrStepSzi map, short2* st1)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
...
...
@@ -345,13 +350,13 @@ namespace canny
__constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
__constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
__global__ void edgesHysteresisGlobalKernel(PtrStepSzi map,
ushort2* st1, u
short2* st2, const int count)
__global__ void edgesHysteresisGlobalKernel(PtrStepSzi map,
short2* st1,
short2* st2, const int count)
{
const int stack_size = 512;
__shared__ int s_counter;
__shared__ int s_ind;
__shared__
u
short2 s_st[stack_size];
__shared__ short2 s_st[stack_size];
if (threadIdx.x == 0)
s_counter = 0;
...
...
@@ -363,14 +368,14 @@ namespace canny
if (ind >= count)
return;
u
short2 pos = st1[ind];
short2 pos = st1[ind];
if (threadIdx.x < 8)
{
pos.x += c_dx[threadIdx.x];
pos.y += c_dy[threadIdx.x];
if (pos.x > 0 && pos.x < map.cols
&& pos.y > 0 && pos.y < map.rows
&& map(pos.y, pos.x) == 1)
if (pos.x > 0 && pos.x < map.cols
- 1 && pos.y > 0 && pos.y < map.rows - 1
&& map(pos.y, pos.x) == 1)
{
map(pos.y, pos.x) = 2;
...
...
@@ -402,7 +407,7 @@ namespace canny
pos.x += c_dx[threadIdx.x & 7];
pos.y += c_dy[threadIdx.x & 7];
if (pos.x > 0 && pos.x < map.cols
&& pos.y > 0 && pos.y < map.rows
&& map(pos.y, pos.x) == 1)
if (pos.x > 0 && pos.x < map.cols
- 1 && pos.y > 0 && pos.y < map.rows - 1
&& map(pos.y, pos.x) == 1)
{
map(pos.y, pos.x) = 2;
...
...
@@ -419,8 +424,10 @@ namespace canny
{
if (threadIdx.x == 0)
{
ind = ::atomicAdd(&counter, s_counter);
s_ind = ind - s_counter;
s_ind = ::atomicAdd(&counter, s_counter);
if (s_ind + s_counter > map.cols * map.rows)
s_counter = 0;
}
__syncthreads();
...
...
@@ -432,7 +439,7 @@ namespace canny
}
}
void edgesHysteresisGlobal(PtrStepSzi map,
ushort2* st1, u
short2* st2)
void edgesHysteresisGlobal(PtrStepSzi map,
short2* st1,
short2* st2)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, canny::counter) );
...
...
@@ -454,6 +461,8 @@ namespace canny
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
count = min(count, map.cols * map.rows);
std::swap(st1, st2);
}
}
...
...
modules/gpu/src/imgproc.cpp
View file @
1d68a899
...
...
@@ -1491,6 +1491,8 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
void
cv
::
gpu
::
CannyBuf
::
create
(
const
Size
&
image_size
,
int
apperture_size
)
{
CV_Assert
(
image_size
.
width
<
std
::
numeric_limits
<
short
>::
max
()
&&
image_size
.
height
<
std
::
numeric_limits
<
short
>::
max
());
if
(
apperture_size
>
0
)
{
ensureSizeIsEnough
(
image_size
,
CV_32SC1
,
dx
);
...
...
@@ -1506,8 +1508,8 @@ void cv::gpu::CannyBuf::create(const Size& image_size, int apperture_size)
ensureSizeIsEnough
(
image_size
,
CV_32FC1
,
mag
);
ensureSizeIsEnough
(
image_size
,
CV_32SC1
,
map
);
ensureSizeIsEnough
(
1
,
image_size
.
area
(),
CV_16
U
C2
,
st1
);
ensureSizeIsEnough
(
1
,
image_size
.
area
(),
CV_16
U
C2
,
st2
);
ensureSizeIsEnough
(
1
,
image_size
.
area
(),
CV_16
S
C2
,
st1
);
ensureSizeIsEnough
(
1
,
image_size
.
area
(),
CV_16
S
C2
,
st2
);
}
void
cv
::
gpu
::
CannyBuf
::
release
()
...
...
@@ -1527,9 +1529,9 @@ namespace canny
void
calcMap
(
PtrStepSzi
dx
,
PtrStepSzi
dy
,
PtrStepSzf
mag
,
PtrStepSzi
map
,
float
low_thresh
,
float
high_thresh
);
void
edgesHysteresisLocal
(
PtrStepSzi
map
,
u
short2
*
st1
);
void
edgesHysteresisLocal
(
PtrStepSzi
map
,
short2
*
st1
);
void
edgesHysteresisGlobal
(
PtrStepSzi
map
,
ushort2
*
st1
,
u
short2
*
st2
);
void
edgesHysteresisGlobal
(
PtrStepSzi
map
,
short2
*
st1
,
short2
*
st2
);
void
getEdges
(
PtrStepSzi
map
,
PtrStepSzb
dst
);
}
...
...
@@ -1543,9 +1545,9 @@ namespace
buf
.
map
.
setTo
(
Scalar
::
all
(
0
));
calcMap
(
dx
,
dy
,
buf
.
mag
,
buf
.
map
,
low_thresh
,
high_thresh
);
edgesHysteresisLocal
(
buf
.
map
,
buf
.
st1
.
ptr
<
u
short2
>
());
edgesHysteresisLocal
(
buf
.
map
,
buf
.
st1
.
ptr
<
short2
>
());
edgesHysteresisGlobal
(
buf
.
map
,
buf
.
st1
.
ptr
<
ushort2
>
(),
buf
.
st2
.
ptr
<
u
short2
>
());
edgesHysteresisGlobal
(
buf
.
map
,
buf
.
st1
.
ptr
<
short2
>
(),
buf
.
st2
.
ptr
<
short2
>
());
getEdges
(
buf
.
map
,
dst
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment