Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
68aba9f2
Commit
68aba9f2
authored
Dec 22, 2010
by
Alexey Spizhevoy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added mulSpectrums functions into GPU module
parent
fef06c25
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
202 additions
and
54 deletions
+202
-54
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+11
-2
imgproc.cu
modules/gpu/src/cuda/imgproc.cu
+103
-14
imgproc_gpu.cpp
modules/gpu/src/imgproc_gpu.cpp
+87
-37
match_template.cpp
modules/gpu/src/match_template.cpp
+1
-1
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
68aba9f2
...
...
@@ -628,10 +628,19 @@ namespace cv
//! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria
CV_EXPORTS
void
cornerMinEigenVal
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
blockSize
,
int
ksize
,
int
borderType
=
BORDER_REFLECT101
);
//! computes cross-correlation of two images using FFT
//! performs per-element multiplication of two full (i.e. not packed) Fourier spectrums
//! supports only 32FC2 matrixes (interleaved format)
CV_EXPORTS
void
mulSpectrums
(
const
GpuMat
&
a
,
const
GpuMat
&
b
,
GpuMat
&
c
,
int
flags
,
bool
conjB
=
false
);
//! performs per-element multiplication of two full (i.e. not packed) Fourier spectrums
//! supports only 32FC2 matrixes (interleaved format)
CV_EXPORTS
void
mulAndScaleSpectrums
(
const
GpuMat
&
a
,
const
GpuMat
&
b
,
GpuMat
&
c
,
int
flags
,
float
scale
,
bool
conjB
=
false
);
//! computes convolution (or cross-correlation) of two images using discrete Fourier transform
//! supports source images of 32FC1 type only
//! result matrix will have 32FC1 type
CV_EXPORTS
void
c
rossCorr
(
const
GpuMat
&
image
,
const
GpuMat
&
templ
,
GpuMat
&
result
);
CV_EXPORTS
void
c
onvolve
(
const
GpuMat
&
image
,
const
GpuMat
&
templ
,
GpuMat
&
result
,
bool
ccorr
=
false
);
//! computes the proximity map for the raster template and the image where the template is searched for
CV_EXPORTS
void
matchTemplate
(
const
GpuMat
&
image
,
const
GpuMat
&
templ
,
GpuMat
&
result
,
int
method
);
...
...
modules/gpu/src/cuda/imgproc.cu
View file @
68aba9f2
...
...
@@ -40,7 +40,6 @@
//
//M*/
#include <cufft.h>
#include "internal_shared.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
...
...
@@ -751,31 +750,121 @@ namespace cv { namespace gpu { namespace imgproc
}
//////////////////////////////////////////////////////////////////////////
// mul
tiplyAndNormalizeSpect
s
// mul
Spectrum
s
__global__ void multiplyAndNormalizeSpectsKernel(
int n, float scale, const cufftComplex* a,
const cufftComplex* b, cufftComplex* c)
__global__ void mulSpectrumsKernel(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,
DevMem2D_<cufftComplex> c)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < c.cols && y < c.rows)
{
c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
}
}
void mulSpectrums(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,
DevMem2D_<cufftComplex> c)
{
dim3 threads(256);
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
mulSpectrumsKernel<<<grid, threads>>>(a, b, c);
cudaSafeCall(cudaThreadSynchronize());
}
//////////////////////////////////////////////////////////////////////////
// mulSpectrums_CONJ
__global__ void mulSpectrumsKernel_CONJ(
const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,
DevMem2D_<cufftComplex> c)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < c.cols && y < c.rows)
{
c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
}
}
void mulSpectrums_CONJ(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,
DevMem2D_<cufftComplex> c)
{
dim3 threads(256);
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
mulSpectrumsKernel_CONJ<<<grid, threads>>>(a, b, c);
cudaSafeCall(cudaThreadSynchronize());
}
//////////////////////////////////////////////////////////////////////////
// mulAndScaleSpectrums
__global__ void mulAndScaleSpectrumsKernel(
const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,
float scale, DevMem2D_<cufftComplex> c)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
if (x < n)
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < c.cols && y < c.rows)
{
cufftComplex v = cuCmulf(a
[x], cuConjf(b[x])
);
c[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
cufftComplex v = cuCmulf(a
.ptr(y)[x], b.ptr(y)[x]
);
c
.ptr(y)
[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
}
}
// Performs per-element multiplication and normalization of two spectrums
void multiplyAndNormalizeSpects(int n, float scale, const cufftComplex* a,
const cufftComplex* b, cufftComplex* c)
void mulAndScaleSpectrums(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,
float scale, DevMem2D_<cufftComplex> c)
{
dim3 threads(256);
dim3 grid(divUp(
n, threads.x
));
dim3 grid(divUp(
c.cols, threads.x), divUp(c.rows, threads.y
));
mul
tiplyAndNormalizeSpectsKernel<<<grid, threads>>>(n, scale, a, b
, c);
mul
AndScaleSpectrumsKernel<<<grid, threads>>>(a, b, scale
, c);
cudaSafeCall(cudaThreadSynchronize());
}
//////////////////////////////////////////////////////////////////////////
// mulAndScaleSpectrums_CONJ
__global__ void mulAndScaleSpectrumsKernel_CONJ(
const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,
float scale, DevMem2D_<cufftComplex> c)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < c.cols && y < c.rows)
{
cufftComplex v = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
}
}
void mulAndScaleSpectrums_CONJ(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,
float scale, DevMem2D_<cufftComplex> c)
{
dim3 threads(256);
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
mulAndScaleSpectrumsKernel_CONJ<<<grid, threads>>>(a, b, scale, c);
cudaSafeCall(cudaThreadSynchronize());
}
}}}
modules/gpu/src/imgproc_gpu.cpp
View file @
68aba9f2
...
...
@@ -74,7 +74,9 @@ void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu();
void
cv
::
gpu
::
histRange
(
const
GpuMat
&
,
GpuMat
*
,
const
GpuMat
*
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
cornerHarris
(
const
GpuMat
&
,
GpuMat
&
,
int
,
int
,
double
,
int
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
cornerMinEigenVal
(
const
GpuMat
&
,
GpuMat
&
,
int
,
int
,
int
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
crossCorr
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
mulSpectrums
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
int
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
mulAndScaleSpectrums
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
int
,
float
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
convolve
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
bool
)
{
throw_nogpu
();
}
#else
/* !defined (HAVE_CUDA) */
...
...
@@ -1064,6 +1066,66 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, i
imgproc
::
cornerMinEigenVal_caller
(
blockSize
,
Dx
,
Dy
,
dst
,
gpuBorderType
);
}
//////////////////////////////////////////////////////////////////////////////
// mulSpectrums
namespace
cv
{
namespace
gpu
{
namespace
imgproc
{
void
mulSpectrums
(
const
PtrStep_
<
cufftComplex
>
a
,
const
PtrStep_
<
cufftComplex
>
b
,
DevMem2D_
<
cufftComplex
>
c
);
void
mulSpectrums_CONJ
(
const
PtrStep_
<
cufftComplex
>
a
,
const
PtrStep_
<
cufftComplex
>
b
,
DevMem2D_
<
cufftComplex
>
c
);
}}}
void
cv
::
gpu
::
mulSpectrums
(
const
GpuMat
&
a
,
const
GpuMat
&
b
,
GpuMat
&
c
,
int
flags
,
bool
conjB
)
{
typedef
void
(
*
Caller
)(
const
PtrStep_
<
cufftComplex
>
,
const
PtrStep_
<
cufftComplex
>
,
DevMem2D_
<
cufftComplex
>
);
static
Caller
callers
[]
=
{
imgproc
::
mulSpectrums
,
imgproc
::
mulSpectrums_CONJ
};
CV_Assert
(
a
.
type
()
==
b
.
type
()
&&
a
.
type
()
==
CV_32FC2
);
CV_Assert
(
a
.
size
()
==
b
.
size
());
c
.
create
(
a
.
size
(),
CV_32FC2
);
Caller
caller
=
callers
[(
int
)
conjB
];
caller
(
a
,
b
,
c
);
}
//////////////////////////////////////////////////////////////////////////////
// mulAndScaleSpectrums
namespace
cv
{
namespace
gpu
{
namespace
imgproc
{
void
mulAndScaleSpectrums
(
const
PtrStep_
<
cufftComplex
>
a
,
const
PtrStep_
<
cufftComplex
>
b
,
float
scale
,
DevMem2D_
<
cufftComplex
>
c
);
void
mulAndScaleSpectrums_CONJ
(
const
PtrStep_
<
cufftComplex
>
a
,
const
PtrStep_
<
cufftComplex
>
b
,
float
scale
,
DevMem2D_
<
cufftComplex
>
c
);
}}}
void
cv
::
gpu
::
mulAndScaleSpectrums
(
const
GpuMat
&
a
,
const
GpuMat
&
b
,
GpuMat
&
c
,
int
flags
,
float
scale
,
bool
conjB
)
{
typedef
void
(
*
Caller
)(
const
PtrStep_
<
cufftComplex
>
,
const
PtrStep_
<
cufftComplex
>
,
float
scale
,
DevMem2D_
<
cufftComplex
>
);
static
Caller
callers
[]
=
{
imgproc
::
mulAndScaleSpectrums
,
imgproc
::
mulAndScaleSpectrums_CONJ
};
CV_Assert
(
a
.
type
()
==
b
.
type
()
&&
a
.
type
()
==
CV_32FC2
);
CV_Assert
(
a
.
size
()
==
b
.
size
());
c
.
create
(
a
.
size
(),
CV_32FC2
);
Caller
caller
=
callers
[(
int
)
conjB
];
caller
(
a
,
b
,
scale
,
c
);
}
//////////////////////////////////////////////////////////////////////////////
// crossCorr
...
...
@@ -1094,15 +1156,12 @@ namespace
}
namespace
cv
{
namespace
gpu
{
namespace
imgproc
void
cv
::
gpu
::
convolve
(
const
GpuMat
&
image
,
const
GpuMat
&
templ
,
GpuMat
&
result
,
bool
ccorr
)
{
void
multiplyAndNormalizeSpects
(
int
n
,
float
scale
,
const
cufftComplex
*
a
,
const
cufftComplex
*
b
,
cufftComplex
*
c
);
}}}
// We must be sure we use correct OpenCV analogues for CUFFT types
StaticAssert
<
sizeof
(
float
)
==
sizeof
(
cufftReal
)
>::
check
(
);
StaticAssert
<
sizeof
(
float
)
*
2
==
sizeof
(
cufftComplex
)
>::
check
();
void
cv
::
gpu
::
crossCorr
(
const
GpuMat
&
image
,
const
GpuMat
&
templ
,
GpuMat
&
result
)
{
CV_Assert
(
image
.
type
()
==
CV_32F
);
CV_Assert
(
templ
.
type
()
==
CV_32F
);
...
...
@@ -1119,33 +1178,28 @@ void cv::gpu::crossCorr(const GpuMat& image, const GpuMat& templ, GpuMat& result
block_size
.
width
=
std
::
min
(
dft_size
.
width
-
templ
.
cols
+
1
,
result
.
cols
);
block_size
.
height
=
std
::
min
(
dft_size
.
height
-
templ
.
rows
+
1
,
result
.
rows
);
cufftReal
*
image_data
;
cufftReal
*
templ_data
;
cufftReal
*
result_data
;
cudaSafeCall
(
cudaMalloc
((
void
**
)
&
image_data
,
sizeof
(
cufftReal
)
*
dft_size
.
area
()));
cudaSafeCall
(
cudaMalloc
((
void
**
)
&
templ_data
,
sizeof
(
cufftReal
)
*
dft_size
.
area
()));
cudaSafeCall
(
cudaMalloc
((
void
**
)
&
result_data
,
sizeof
(
cufftReal
)
*
dft_size
.
area
()));
GpuMat
image_data
(
1
,
dft_size
.
area
(),
CV_32F
);
GpuMat
templ_data
(
1
,
dft_size
.
area
(),
CV_32F
);
GpuMat
result_data
(
1
,
dft_size
.
area
(),
CV_32F
);
int
spect_len
=
dft_size
.
height
*
(
dft_size
.
width
/
2
+
1
);
cufftComplex
*
image_spect
;
cufftComplex
*
templ_spect
;
cufftComplex
*
result_spect
;
cudaSafeCall
(
cudaMalloc
((
void
**
)
&
image_spect
,
sizeof
(
cufftComplex
)
*
spect_len
));
cudaSafeCall
(
cudaMalloc
((
void
**
)
&
templ_spect
,
sizeof
(
cufftComplex
)
*
spect_len
));
cudaSafeCall
(
cudaMalloc
((
void
**
)
&
result_spect
,
sizeof
(
cufftComplex
)
*
spect_len
));
GpuMat
image_spect
(
1
,
spect_len
,
CV_32FC2
);
GpuMat
templ_spect
(
1
,
spect_len
,
CV_32FC2
);
GpuMat
result_spect
(
1
,
spect_len
,
CV_32FC2
);
cufftHandle
planR2C
,
planC2R
;
cufftSafeCall
(
cufftPlan2d
(
&
planC2R
,
dft_size
.
height
,
dft_size
.
width
,
CUFFT_C2R
));
cufftSafeCall
(
cufftPlan2d
(
&
planR2C
,
dft_size
.
height
,
dft_size
.
width
,
CUFFT_R2C
));
GpuMat
templ_roi
(
templ
.
size
(),
CV_32
S
,
templ
.
data
,
templ
.
step
);
GpuMat
templ_block
(
dft_size
,
CV_32
S
,
templ_data
,
dft_size
.
width
*
sizeof
(
cufftReal
));
GpuMat
templ_roi
(
templ
.
size
(),
CV_32
F
,
templ
.
data
,
templ
.
step
);
GpuMat
templ_block
(
dft_size
,
CV_32
F
,
templ_data
.
ptr
()
,
dft_size
.
width
*
sizeof
(
cufftReal
));
copyMakeBorder
(
templ_roi
,
templ_block
,
0
,
templ_block
.
rows
-
templ_roi
.
rows
,
0
,
templ_block
.
cols
-
templ_roi
.
cols
,
0
);
cufftSafeCall
(
cufftExecR2C
(
planR2C
,
templ_data
,
templ_spect
));
cufftSafeCall
(
cufftExecR2C
(
planR2C
,
templ_data
.
ptr
<
cufftReal
>
(),
templ_spect
.
ptr
<
cufftComplex
>
()));
GpuMat
image_block
(
dft_size
,
CV_32
S
,
image_data
,
dft_size
.
width
*
sizeof
(
cufftReal
));
GpuMat
image_block
(
dft_size
,
CV_32
F
,
image_data
.
ptr
()
,
dft_size
.
width
*
sizeof
(
cufftReal
));
// Process all blocks of the result matrix
for
(
int
y
=
0
;
y
<
result
.
rows
;
y
+=
block_size
.
height
)
...
...
@@ -1156,18 +1210,20 @@ void cv::gpu::crossCorr(const GpuMat& image, const GpuMat& templ, GpuMat& result
Size
image_roi_size
;
image_roi_size
.
width
=
std
::
min
(
x
+
dft_size
.
width
,
image
.
cols
)
-
x
;
image_roi_size
.
height
=
std
::
min
(
y
+
dft_size
.
height
,
image
.
rows
)
-
y
;
GpuMat
image_roi
(
image_roi_size
,
CV_32
S
,
(
void
*
)(
image
.
ptr
<
float
>
(
y
)
+
x
),
image
.
step
);
GpuMat
image_roi
(
image_roi_size
,
CV_32
F
,
(
void
*
)(
image
.
ptr
<
float
>
(
y
)
+
x
),
image
.
step
);
// Make source image block continous
copyMakeBorder
(
image_roi
,
image_block
,
0
,
image_block
.
rows
-
image_roi
.
rows
,
0
,
image_block
.
cols
-
image_roi
.
cols
,
0
);
cufftSafeCall
(
cufftExecR2C
(
planR2C
,
image_data
,
image_spect
));
cufftSafeCall
(
cufftExecR2C
(
planR2C
,
image_data
.
ptr
<
cufftReal
>
(),
image_spect
.
ptr
<
cufftComplex
>
()));
imgproc
::
multiplyAndNormalizeSpects
(
spect_len
,
1.
f
/
dft_size
.
area
(),
image_spect
,
templ_spect
,
result_spect
);
mulAndScaleSpectrums
(
image_spect
,
templ_spect
,
result_spect
,
0
,
1.
f
/
dft_size
.
area
(),
ccorr
);
cufftSafeCall
(
cufftExecC2R
(
planC2R
,
result_spect
,
result_data
));
cufftSafeCall
(
cufftExecC2R
(
planC2R
,
result_spect
.
ptr
<
cufftComplex
>
(),
result_data
.
ptr
<
cufftReal
>
()));
// Copy result block into appropriate part of the result matrix.
// We can't compute it inplace as the result of the CUFFT transforms
...
...
@@ -1176,23 +1232,17 @@ void cv::gpu::crossCorr(const GpuMat& image, const GpuMat& templ, GpuMat& result
result_roi_size
.
width
=
std
::
min
(
x
+
block_size
.
width
,
result
.
cols
)
-
x
;
result_roi_size
.
height
=
std
::
min
(
y
+
block_size
.
height
,
result
.
rows
)
-
y
;
GpuMat
result_roi
(
result_roi_size
,
CV_32F
,
(
void
*
)(
result
.
ptr
<
float
>
(
y
)
+
x
),
result
.
step
);
GpuMat
result_block
(
result_roi_size
,
CV_32F
,
result_data
,
dft_size
.
width
*
sizeof
(
cufftReal
));
GpuMat
result_block
(
result_roi_size
,
CV_32F
,
result_data
.
ptr
()
,
dft_size
.
width
*
sizeof
(
cufftReal
));
result_block
.
copyTo
(
result_roi
);
}
}
cufftSafeCall
(
cufftDestroy
(
planR2C
));
cufftSafeCall
(
cufftDestroy
(
planC2R
));
cudaSafeCall
(
cudaFree
(
image_spect
));
cudaSafeCall
(
cudaFree
(
templ_spect
));
cudaSafeCall
(
cudaFree
(
result_spect
));
cudaSafeCall
(
cudaFree
(
image_data
));
cudaSafeCall
(
cudaFree
(
templ_data
));
cudaSafeCall
(
cudaFree
(
result_data
));
}
#endif
/* !defined (HAVE_CUDA) */
modules/gpu/src/match_template.cpp
View file @
68aba9f2
...
...
@@ -196,7 +196,7 @@ namespace
}
GpuMat
result_
;
c
rossCorr
(
image
.
reshape
(
1
),
templ
.
reshape
(
1
),
result_
);
c
onvolve
(
image
.
reshape
(
1
),
templ
.
reshape
(
1
),
result_
,
true
);
imgproc
::
extractFirstChannel_32F
(
result_
,
result
,
image
.
channels
());
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment