Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
1d1da9c5
Commit
1d1da9c5
authored
Jan 23, 2012
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added support of BORDER_REFLECT to gpu::cornerHarris and gpu::cornerMinEigenVal
parent
347a7106
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
80 additions
and
140 deletions
+80
-140
imgproc.cu
modules/gpu/src/cuda/imgproc.cu
+60
-107
imgproc.cpp
modules/gpu/src/imgproc.cpp
+16
-29
test_imgproc.cpp
modules/gpu/test/test_imgproc.cpp
+4
-4
No files found.
modules/gpu/src/cuda/imgproc.cu
View file @
1d1da9c5
...
...
@@ -373,49 +373,17 @@ namespace cv { namespace gpu { namespace device
reprojectImageTo3D_caller(disp, xyzw, q, stream);
}
//////////////////////////////////////// Extract Cov Data ////////////////////////////////////////////////
__global__ void extractCovData_kernel(const int cols, const int rows, const PtrStepf Dx,
const PtrStepf Dy, PtrStepf dst)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < cols && y < rows)
{
float dx = Dx.ptr(y)[x];
float dy = Dy.ptr(y)[x];
dst.ptr(y)[x] = dx * dx;
dst.ptr(y + rows)[x] = dx * dy;
dst.ptr(y + (rows << 1))[x] = dy * dy;
}
}
void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst, cudaStream_t stream)
{
dim3 threads(32, 8);
dim3 grid(divUp(Dx.cols, threads.x), divUp(Dx.rows, threads.y));
extractCovData_kernel<<<grid, threads, 0, stream>>>(Dx.cols, Dx.rows, Dx, Dy, dst);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
/////////////////////////////////////////// Corner Harris /////////////////////////////////////////////////
texture<float,
2> harrisDxTex
;
texture<float,
2> harrisDyTex
;
texture<float,
cudaTextureType2D, cudaReadModeElementType> harrisDxTex(0, cudaFilterModePoint, cudaAddressModeClamp)
;
texture<float,
cudaTextureType2D, cudaReadModeElementType> harrisDyTex(0, cudaFilterModePoint, cudaAddressModeClamp)
;
__global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k,
PtrStepb dst)
__global__ void cornerHarris_kernel(const int block_size, const float k, DevMem2Df dst)
{
const
unsigned
int x = blockIdx.x * blockDim.x + threadIdx.x;
const
unsigned
int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x <
cols && y <
rows)
if (x <
dst.cols && y < dst.
rows)
{
float a = 0.f;
float b = 0.f;
...
...
@@ -432,24 +400,24 @@ namespace cv { namespace gpu { namespace device
{
float dx = tex2D(harrisDxTex, j, i);
float dy = tex2D(harrisDyTex, j, i);
a += dx * dx;
b += dx * dy;
c += dy * dy;
}
}
((float*)dst.ptr(y))[x]
= a * c - b * b - k * (a + c) * (a + c);
dst(y, x)
= a * c - b * b - k * (a + c) * (a + c);
}
}
template <typename BR, typename BC>
__global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k,
PtrStepb dst, BR border_row, BC border_col)
__global__ void cornerHarris_kernel(const int block_size, const float k, DevMem2Df dst, const BR border_row, const BC border_col)
{
const
unsigned
int x = blockIdx.x * blockDim.x + threadIdx.x;
const
unsigned
int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x <
cols && y <
rows)
if (x <
dst.cols && y < dst.
rows)
{
float a = 0.f;
float b = 0.f;
...
...
@@ -462,50 +430,45 @@ namespace cv { namespace gpu { namespace device
for (int i = ibegin; i < iend; ++i)
{
int y = border_col.idx_row(i);
const int y = border_col.idx_row(i);
for (int j = jbegin; j < jend; ++j)
{
int x = border_row.idx_col(j);
const int x = border_row.idx_col(j);
float dx = tex2D(harrisDxTex, x, y);
float dy = tex2D(harrisDyTex, x, y);
a += dx * dx;
b += dx * dy;
c += dy * dy;
}
}
((float*)dst.ptr(y))[x]
= a * c - b * b - k * (a + c) * (a + c);
dst(y, x)
= a * c - b * b - k * (a + c) * (a + c);
}
}
void cornerHarris_caller(const int block_size, const float k, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst,
int border_type, cudaStream_t stream)
void cornerHarris_gpu(int block_size, float k, DevMem2Df Dx, DevMem2Df Dy, DevMem2Df dst, int border_type, cudaStream_t stream)
{
const int rows = Dx.rows;
const int cols = Dx.cols;
dim3 threads(32, 8);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
dim3 block(32, 8);
dim3 grid(divUp(Dx.cols, block.x), divUp(Dx.rows, block.y));
cudaChannelFormatDesc desc = cudaCreateChannelDesc<float>();
cudaBindTexture2D(0, harrisDxTex, Dx.data, desc, Dx.cols, Dx.rows, Dx.step);
cudaBindTexture2D(0, harrisDyTex, Dy.data, desc, Dy.cols, Dy.rows, Dy.step);
harrisDxTex.filterMode = cudaFilterModePoint;
harrisDyTex.filterMode = cudaFilterModePoint;
bindTexture(&harrisDxTex, Dx);
bindTexture(&harrisDyTex, Dy);
switch (border_type)
{
case BORDER_REFLECT101_GPU:
cornerHarris_kernel<<<grid, threads, 0, stream>>>(
cols, rows, block_size, k, dst, BrdRowReflect101<void>(cols), BrdColReflect101<void>(rows));
cornerHarris_kernel<<<grid, block, 0, stream>>>(block_size, k, dst, BrdRowReflect101<void>(Dx.cols), BrdColReflect101<void>(Dx.rows));
break;
case BORDER_REFLECT_GPU:
cornerHarris_kernel<<<grid, block, 0, stream>>>(block_size, k, dst, BrdRowReflect<void>(Dx.cols), BrdColReflect<void>(Dx.rows));
break;
case BORDER_REPLICATE_GPU:
harrisDxTex.addressMode[0] = cudaAddressModeClamp;
harrisDxTex.addressMode[1] = cudaAddressModeClamp;
harrisDyTex.addressMode[0] = cudaAddressModeClamp;
harrisDyTex.addressMode[1] = cudaAddressModeClamp;
cornerHarris_kernel<<<grid, threads, 0, stream>>>(cols, rows, block_size, k, dst);
case BORDER_REPLICATE_GPU:
cornerHarris_kernel<<<grid, block, 0, stream>>>(block_size, k, dst);
break;
}
...
...
@@ -513,23 +476,19 @@ namespace cv { namespace gpu { namespace device
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
//cudaSafeCall(cudaUnbindTexture(harrisDxTex));
//cudaSafeCall(cudaUnbindTexture(harrisDyTex));
}
/////////////////////////////////////////// Corner Min Eigen Val /////////////////////////////////////////////////
texture<float,
2> minEigenValDxTex
;
texture<float,
2> minEigenValDyTex
;
texture<float,
cudaTextureType2D, cudaReadModeElementType> minEigenValDxTex(0, cudaFilterModePoint, cudaAddressModeClamp)
;
texture<float,
cudaTextureType2D, cudaReadModeElementType> minEigenValDyTex(0, cudaFilterModePoint, cudaAddressModeClamp)
;
__global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size,
PtrStepb dst)
__global__ void cornerMinEigenVal_kernel(const int block_size, DevMem2Df dst)
{
const
unsigned
int x = blockIdx.x * blockDim.x + threadIdx.x;
const
unsigned
int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x <
cols && y <
rows)
if (x <
dst.cols && y < dst.
rows)
{
float a = 0.f;
float b = 0.f;
...
...
@@ -546,6 +505,7 @@ namespace cv { namespace gpu { namespace device
{
float dx = tex2D(minEigenValDxTex, j, i);
float dy = tex2D(minEigenValDyTex, j, i);
a += dx * dx;
b += dx * dy;
c += dy * dy;
...
...
@@ -554,19 +514,19 @@ namespace cv { namespace gpu { namespace device
a *= 0.5f;
c *= 0.5f;
((float*)dst.ptr(y))[x] = (a + c) - sqrtf((a - c) * (a - c) + b * b);
dst(y, x) = (a + c) - sqrtf((a - c) * (a - c) + b * b);
}
}
template <typename BR, typename BC>
__global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size,
PtrStepb dst, BR border_row, BC border_col)
__global__ void cornerMinEigenVal_kernel(const int block_size, DevMem2Df dst, const BR border_row, const BC border_col)
{
const
unsigned
int x = blockIdx.x * blockDim.x + threadIdx.x;
const
unsigned
int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x <
cols && y <
rows)
if (x <
dst.cols && y < dst.
rows)
{
float a = 0.f;
float b = 0.f;
...
...
@@ -580,11 +540,14 @@ namespace cv { namespace gpu { namespace device
for (int i = ibegin; i < iend; ++i)
{
int y = border_col.idx_row(i);
for (int j = jbegin; j < jend; ++j)
{
int x = border_row.idx_col(j);
float dx = tex2D(minEigenValDxTex, x, y);
float dy = tex2D(minEigenValDyTex, x, y);
a += dx * dx;
b += dx * dy;
c += dy * dy;
...
...
@@ -593,38 +556,31 @@ namespace cv { namespace gpu { namespace device
a *= 0.5f;
c *= 0.5f;
((float*)dst.ptr(y))[x] = (a + c) - sqrtf((a - c) * (a - c) + b * b);
dst(y, x) = (a + c) - sqrtf((a - c) * (a - c) + b * b);
}
}
void cornerMinEigenVal_caller(const int block_size, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst,
int border_type, cudaStream_t stream)
void cornerMinEigenVal_gpu(int block_size, DevMem2Df Dx, DevMem2Df Dy, DevMem2Df dst, int border_type, cudaStream_t stream)
{
const int rows = Dx.rows
;
const int cols = Dx.cols
;
dim3 block(32, 8)
;
dim3 grid(divUp(Dx.cols, block.x), divUp(Dx.rows, block.y))
;
dim3 threads(32, 8);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
cudaChannelFormatDesc desc = cudaCreateChannelDesc<float>();
cudaBindTexture2D(0, minEigenValDxTex, Dx.data, desc, Dx.cols, Dx.rows, Dx.step);
cudaBindTexture2D(0, minEigenValDyTex, Dy.data, desc, Dy.cols, Dy.rows, Dy.step);
minEigenValDxTex.filterMode = cudaFilterModePoint;
minEigenValDyTex.filterMode = cudaFilterModePoint;
bindTexture(&minEigenValDxTex, Dx);
bindTexture(&minEigenValDyTex, Dy);
switch (border_type)
{
case BORDER_REFLECT101_GPU:
cornerMinEigenVal_kernel<<<grid, threads, 0, stream>>>(
cols, rows, block_size, dst, BrdRowReflect101<void>(cols), BrdColReflect101<void>(rows));
cornerMinEigenVal_kernel<<<grid, block, 0, stream>>>(block_size, dst, BrdRowReflect101<void>(Dx.cols), BrdColReflect101<void>(Dx.rows));
break;
case BORDER_REFLECT_GPU:
cornerMinEigenVal_kernel<<<grid, block, 0, stream>>>(block_size, dst, BrdRowReflect<void>(Dx.cols), BrdColReflect<void>(Dx.rows));
break;
case BORDER_REPLICATE_GPU:
minEigenValDxTex.addressMode[0] = cudaAddressModeClamp;
minEigenValDxTex.addressMode[1] = cudaAddressModeClamp;
minEigenValDyTex.addressMode[0] = cudaAddressModeClamp;
minEigenValDyTex.addressMode[1] = cudaAddressModeClamp;
cornerMinEigenVal_kernel<<<grid, threads, 0, stream>>>(cols, rows, block_size, dst);
case BORDER_REPLICATE_GPU:
cornerMinEigenVal_kernel<<<grid, block, 0, stream>>>(block_size, dst);
break;
}
...
...
@@ -632,9 +588,6 @@ namespace cv { namespace gpu { namespace device
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
//cudaSafeCall(cudaUnbindTexture(minEigenValDxTex));
//cudaSafeCall(cudaUnbindTexture(minEigenValDyTex));
}
////////////////////////////// Column Sum //////////////////////////////////////
...
...
modules/gpu/src/imgproc.cpp
View file @
1d1da9c5
...
...
@@ -1344,22 +1344,23 @@ namespace cv { namespace gpu { namespace device
{
namespace
imgproc
{
void
extractCovData_caller
(
const
DevMem2Df
Dx
,
const
DevMem2Df
Dy
,
PtrStepf
dst
,
cudaStream_t
stream
);
void
cornerHarris_caller
(
const
int
block_size
,
const
float
k
,
const
DevMem2Db
Dx
,
const
DevMem2Db
Dy
,
DevMem2Db
dst
,
int
border_type
,
cudaStream_t
stream
);
void
cornerMinEigenVal_caller
(
const
int
block_size
,
const
DevMem2Db
Dx
,
const
DevMem2Db
Dy
,
DevMem2Db
dst
,
int
border_type
,
cudaStream_t
stream
);
void
cornerHarris_gpu
(
int
block_size
,
float
k
,
DevMem2Df
Dx
,
DevMem2Df
Dy
,
DevMem2Df
dst
,
int
border_type
,
cudaStream_t
stream
);
void
cornerMinEigenVal_gpu
(
int
block_size
,
DevMem2Df
Dx
,
DevMem2Df
Dy
,
DevMem2Df
dst
,
int
border_type
,
cudaStream_t
stream
);
}
}}}
namespace
{
template
<
typename
T
>
void
extractCovData
(
const
GpuMat
&
src
,
GpuMat
&
Dx
,
GpuMat
&
Dy
,
GpuMat
&
buf
,
int
blockSize
,
int
ksize
,
int
borderType
,
Stream
&
stream
)
{
double
scale
=
(
double
)(
1
<<
((
ksize
>
0
?
ksize
:
3
)
-
1
))
*
blockSize
;
double
scale
=
static_cast
<
double
>
(
1
<<
((
ksize
>
0
?
ksize
:
3
)
-
1
))
*
blockSize
;
if
(
ksize
<
0
)
scale
*=
2.
;
if
(
src
.
depth
()
==
CV_8U
)
scale
*=
255.
;
scale
=
1.
/
scale
;
Dx
.
create
(
src
.
size
(),
CV_32F
);
...
...
@@ -1376,23 +1377,7 @@ namespace
Scharr
(
src
,
Dy
,
CV_32F
,
0
,
1
,
buf
,
scale
,
borderType
,
-
1
,
stream
);
}
}
void
extractCovData
(
const
GpuMat
&
src
,
GpuMat
&
Dx
,
GpuMat
&
Dy
,
GpuMat
&
buf
,
int
blockSize
,
int
ksize
,
int
borderType
,
Stream
&
stream
)
{
switch
(
src
.
type
())
{
case
CV_8U
:
extractCovData
<
unsigned
char
>
(
src
,
Dx
,
Dy
,
buf
,
blockSize
,
ksize
,
borderType
,
stream
);
break
;
case
CV_32F
:
extractCovData
<
float
>
(
src
,
Dx
,
Dy
,
buf
,
blockSize
,
ksize
,
borderType
,
stream
);
break
;
default
:
CV_Error
(
CV_StsBadArg
,
"extractCovData: unsupported type of the source matrix"
);
}
}
}
// Anonymous namespace
}
bool
cv
::
gpu
::
tryConvertToGpuBorderType
(
int
cpuBorderType
,
int
&
gpuBorderType
)
{
...
...
@@ -1433,17 +1418,18 @@ void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& D
void
cv
::
gpu
::
cornerHarris
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
GpuMat
&
Dx
,
GpuMat
&
Dy
,
GpuMat
&
buf
,
int
blockSize
,
int
ksize
,
double
k
,
int
borderType
,
Stream
&
stream
)
{
using
namespace
::
cv
::
gpu
::
device
::
imgproc
;
using
namespace
cv
::
gpu
::
device
::
imgproc
;
CV_Assert
(
borderType
==
cv
::
BORDER_REFLECT101
||
borderType
==
cv
::
BORDER_REPLICATE
);
CV_Assert
(
borderType
==
cv
::
BORDER_REFLECT101
||
borderType
==
cv
::
BORDER_REPLICATE
||
borderType
==
cv
::
BORDER_REFLECT
);
int
gpuBorderType
;
CV_Assert
(
tryConvertToGpuBorderType
(
borderType
,
gpuBorderType
));
extractCovData
(
src
,
Dx
,
Dy
,
buf
,
blockSize
,
ksize
,
borderType
,
stream
);
dst
.
create
(
src
.
size
(),
CV_32F
);
cornerHarris_caller
(
blockSize
,
(
float
)
k
,
Dx
,
Dy
,
dst
,
gpuBorderType
,
StreamAccessor
::
getStream
(
stream
));
cornerHarris_gpu
(
blockSize
,
static_cast
<
float
>
(
k
),
Dx
,
Dy
,
dst
,
gpuBorderType
,
StreamAccessor
::
getStream
(
stream
));
}
void
cv
::
gpu
::
cornerMinEigenVal
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
blockSize
,
int
ksize
,
int
borderType
)
...
...
@@ -1462,15 +1448,16 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuM
{
using
namespace
::
cv
::
gpu
::
device
::
imgproc
;
CV_Assert
(
borderType
==
cv
::
BORDER_REFLECT101
||
borderType
==
cv
::
BORDER_REPLICATE
);
CV_Assert
(
borderType
==
cv
::
BORDER_REFLECT101
||
borderType
==
cv
::
BORDER_REPLICATE
||
borderType
==
cv
::
BORDER_REFLECT
);
int
gpuBorderType
;
CV_Assert
(
tryConvertToGpuBorderType
(
borderType
,
gpuBorderType
));
extractCovData
(
src
,
Dx
,
Dy
,
buf
,
blockSize
,
ksize
,
borderType
,
stream
);
dst
.
create
(
src
.
size
(),
CV_32F
);
cornerMinEigenVal_caller
(
blockSize
,
Dx
,
Dy
,
dst
,
gpuBorderType
,
StreamAccessor
::
getStream
(
stream
));
cornerMinEigenVal_gpu
(
blockSize
,
Dx
,
Dy
,
dst
,
gpuBorderType
,
StreamAccessor
::
getStream
(
stream
));
}
//////////////////////////////////////////////////////////////////////////////
...
...
modules/gpu/test/test_imgproc.cpp
View file @
1d1da9c5
...
...
@@ -2774,13 +2774,13 @@ TEST_P(CornerHarris, Accuracy)
dev_dst
.
download
(
dst
);
);
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
1e-3
);
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
0.02
);
}
INSTANTIATE_TEST_CASE_P
(
ImgProc
,
CornerHarris
,
Combine
(
ALL_DEVICES
,
Values
(
CV_8UC1
,
CV_32FC1
),
Values
((
int
)
cv
::
BORDER_REFLECT101
,
(
int
)
cv
::
BORDER_REPLICATE
)));
Values
((
int
)
cv
::
BORDER_REFLECT101
,
(
int
)
cv
::
BORDER_REPLICATE
,
(
int
)
cv
::
BORDER_REFLECT
)));
///////////////////////////////////////////////////////////////////////////////////////////////////////
// cornerMinEigen
...
...
@@ -2829,13 +2829,13 @@ TEST_P(CornerMinEigen, Accuracy)
dev_dst
.
download
(
dst
);
);
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
1e-
2
);
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
0.0
2
);
}
INSTANTIATE_TEST_CASE_P
(
ImgProc
,
CornerMinEigen
,
Combine
(
ALL_DEVICES
,
Values
(
CV_8UC1
,
CV_32FC1
),
Values
((
int
)
cv
::
BORDER_REFLECT101
,
(
int
)
cv
::
BORDER_REPLICATE
)));
Values
((
int
)
cv
::
BORDER_REFLECT101
,
(
int
)
cv
::
BORDER_REPLICATE
,
(
int
)
cv
::
BORDER_REFLECT
)));
////////////////////////////////////////////////////////////////////////
// ColumnSum
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment