Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
00f3ad72
Commit
00f3ad72
authored
Mar 15, 2017
by
Naba Kumar
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implement DFT as cv::Algorithm to support concurrent streams
parent
c1007c72
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
180 additions
and
81 deletions
+180
-81
base.hpp
modules/core/include/opencv2/core/base.hpp
+4
-0
dxt.cpp
modules/core/src/dxt.cpp
+3
-0
cudaarithm.hpp
modules/cudaarithm/include/opencv2/cudaarithm.hpp
+30
-0
arithm.cpp
modules/cudaarithm/src/arithm.cpp
+116
-81
test_arithm.cpp
modules/cudaarithm/test/test_arithm.cpp
+27
-0
No files found.
modules/core/include/opencv2/core/base.hpp
View file @
00f3ad72
...
...
@@ -239,6 +239,10 @@ enum DftFlags {
into a real array and inverse transformation is executed, the function treats the input as a
packed complex-conjugate symmetrical array, and the output will also be a real array). */
DFT_REAL_OUTPUT
=
32
,
/** specifies that input is complex input. If this flag is set, the input must have 2 channels.
On the other hand, for backwards compatibility reason, if input has 2 channels, input is
already considered complex. */
DFT_COMPLEX_INPUT
=
64
,
/** performs an inverse 1D or 2D transform instead of the default forward transform. */
DCT_INVERSE
=
DFT_INVERSE
,
/** performs a forward or inverse transform of every individual row of the input
...
...
modules/core/src/dxt.cpp
View file @
00f3ad72
...
...
@@ -3342,6 +3342,9 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
CV_Assert
(
type
==
CV_32FC1
||
type
==
CV_32FC2
||
type
==
CV_64FC1
||
type
==
CV_64FC2
);
// Fail if DFT_COMPLEX_INPUT is specified, but src is not 2 channels.
CV_Assert
(
!
((
flags
&
DFT_COMPLEX_INPUT
)
&&
src
.
channels
()
!=
2
)
);
if
(
!
inv
&&
src
.
channels
()
==
1
&&
(
flags
&
DFT_COMPLEX_OUTPUT
)
)
_dst
.
create
(
src
.
size
(),
CV_MAKETYPE
(
depth
,
2
)
);
else
if
(
inv
&&
src
.
channels
()
==
2
&&
(
flags
&
DFT_REAL_OUTPUT
)
)
...
...
modules/cudaarithm/include/opencv2/cudaarithm.hpp
View file @
00f3ad72
...
...
@@ -788,6 +788,7 @@ CV_EXPORTS void mulAndScaleSpectrums(InputArray src1, InputArray src2, OutputArr
(obtained from dft_size ).
- **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real
cases are always forward and inverse, respectively).
- **DFT_COMPLEX_INPUT** Specifies that input is complex input with 2 channels.
- **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of
real-complex transform, so the destination matrix must be real.
@param stream Stream for the asynchronous version.
...
...
@@ -813,6 +814,35 @@ instead of the width.
*/
CV_EXPORTS
void
dft
(
InputArray
src
,
OutputArray
dst
,
Size
dft_size
,
int
flags
=
0
,
Stream
&
stream
=
Stream
::
Null
());
/** @brief Base class for DFT operator as a cv::Algorithm. :
*/
class
CV_EXPORTS
DFT
:
public
Algorithm
{
public
:
/** @brief Computes an FFT of a given image.
@param image Source image. Only CV_32FC1 images are supported for now.
@param result Result image.
@param stream Stream for the asynchronous version.
*/
virtual
void
compute
(
InputArray
image
,
OutputArray
result
,
Stream
&
stream
=
Stream
::
Null
())
=
0
;
};
/** @brief Creates implementation for cuda::DFT.
@param dft_size The image size.
@param flags Optional flags:
- **DFT_ROWS** transforms each individual row of the source matrix.
- **DFT_SCALE** scales the result: divide it by the number of elements in the transform
(obtained from dft_size ).
- **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real
cases are always forward and inverse, respectively).
- **DFT_COMPLEX_INPUT** Specifies that inputs will be complex with 2 channels.
- **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of
real-complex transform, so the destination matrix must be real.
*/
CV_EXPORTS
Ptr
<
DFT
>
createDFT
(
Size
dft_size
,
int
flags
);
/** @brief Base class for convolution (or cross-correlation) operator. :
*/
class
CV_EXPORTS
Convolution
:
public
Algorithm
...
...
modules/cudaarithm/src/arithm.cpp
View file @
00f3ad72
...
...
@@ -286,111 +286,146 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
}
//////////////////////////////////////////////////////////////////////////////
//
dft
//
DFT function
void
cv
::
cuda
::
dft
(
InputArray
_src
,
OutputArray
_dst
,
Size
dft_size
,
int
flags
,
Stream
&
stream
)
{
#ifndef HAVE_CUFFT
(
void
)
_src
;
(
void
)
_dst
;
(
void
)
dft_size
;
(
void
)
flags
;
(
void
)
stream
;
throw_no_cuda
();
#else
GpuMat
src
=
getInputMat
(
_src
,
stream
);
if
(
getInputMat
(
_src
,
stream
).
channels
()
==
2
)
flags
|=
DFT_COMPLEX_INPUT
;
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
Ptr
<
DFT
>
dft
=
createDFT
(
dft_size
,
flags
);
dft
->
compute
(
_src
,
_dst
,
stream
);
}
// We don't support unpacked output (in the case of real input)
CV_Assert
(
!
(
flags
&
DFT_COMPLEX_OUTPUT
)
);
//////////////////////////////////////////////////////////////////////////////
// DFT algorithm
const
bool
is_1d_input
=
(
dft_size
.
height
==
1
)
||
(
dft_size
.
width
==
1
);
const
bool
is_row_dft
=
(
flags
&
DFT_ROWS
)
!=
0
;
const
bool
is_scaled_dft
=
(
flags
&
DFT_SCALE
)
!=
0
;
const
bool
is_inverse
=
(
flags
&
DFT_INVERSE
)
!=
0
;
const
bool
is_complex_input
=
src
.
channels
()
==
2
;
const
bool
is_complex_output
=
!
(
flags
&
DFT_REAL_OUTPUT
);
#ifdef HAVE_CUFFT
// We don't support real-to-real transform
CV_Assert
(
is_complex_input
||
is_complex_output
);
namespace
{
// Make sure here we work with the continuous input,
// as CUFFT can't handle gaps
GpuMat
src_cont
;
if
(
src
.
isContinuous
())
class
DFTImpl
:
public
DFT
{
src_cont
=
src
;
}
else
{
BufferPool
pool
(
stream
);
src_cont
.
allocator
=
pool
.
getAllocator
();
createContinuous
(
src
.
rows
,
src
.
cols
,
src
.
type
(),
src_cont
);
src
.
copyTo
(
src_cont
,
stream
);
}
Size
dft_size
,
dft_size_opt
;
bool
is_1d_input
,
is_row_dft
,
is_scaled_dft
,
is_inverse
,
is_complex_input
,
is_complex_output
;
Size
dft_size_opt
=
dft_size
;
if
(
is_1d_input
&&
!
is_row_dft
)
{
// If the source matrix is single column handle it as single row
dft_size_opt
.
width
=
std
::
max
(
dft_size
.
width
,
dft_size
.
height
);
dft_size_opt
.
height
=
std
::
min
(
dft_size
.
width
,
dft_size
.
height
);
}
cufftType
dft_type
;
cufftHandle
plan
;
CV_Assert
(
dft_size_opt
.
width
>
1
);
public
:
DFTImpl
(
Size
dft_size
,
int
flags
)
:
dft_size
(
dft_size
),
dft_size_opt
(
dft_size
),
is_1d_input
((
dft_size
.
height
==
1
)
||
(
dft_size
.
width
==
1
)),
is_row_dft
((
flags
&
DFT_ROWS
)
!=
0
),
is_scaled_dft
((
flags
&
DFT_SCALE
)
!=
0
),
is_inverse
((
flags
&
DFT_INVERSE
)
!=
0
),
is_complex_input
((
flags
&
DFT_COMPLEX_INPUT
)
!=
0
),
is_complex_output
(
!
(
flags
&
DFT_REAL_OUTPUT
)),
dft_type
(
!
is_complex_input
?
CUFFT_R2C
:
(
is_complex_output
?
CUFFT_C2C
:
CUFFT_C2R
))
{
// We don't support unpacked output (in the case of real input)
CV_Assert
(
!
(
flags
&
DFT_COMPLEX_OUTPUT
)
);
cufftType
dft_type
=
CUFFT_R2C
;
if
(
is_complex_input
)
dft_type
=
is_complex_output
?
CUFFT_C2C
:
CUFFT_C2R
;
// We don't support real-to-real transform
CV_Assert
(
is_complex_input
||
is_complex_output
);
cufftHandle
plan
;
if
(
is_1d_input
||
is_row_dft
)
cufftSafeCall
(
cufftPlan1d
(
&
plan
,
dft_size_opt
.
width
,
dft_type
,
dft_size_opt
.
height
)
);
else
cufftSafeCall
(
cufftPlan2d
(
&
plan
,
dft_size_opt
.
height
,
dft_size_opt
.
width
,
dft_type
)
);
if
(
is_1d_input
&&
!
is_row_dft
)
{
// If the source matrix is single column handle it as single row
dft_size_opt
.
width
=
std
::
max
(
dft_size
.
width
,
dft_size
.
height
);
dft_size_opt
.
height
=
std
::
min
(
dft_size
.
width
,
dft_size
.
height
);
}
cufftSafeCall
(
cufftSetStream
(
plan
,
StreamAccessor
::
getStream
(
stream
))
);
CV_Assert
(
dft_size_opt
.
width
>
1
);
if
(
is_complex_input
)
{
if
(
is_complex_output
)
{
createContinuous
(
dft_size
,
CV_32FC2
,
_dst
);
GpuMat
dst
=
_dst
.
getGpuMat
();
if
(
is_1d_input
||
is_row_dft
)
cufftSafeCall
(
cufftPlan1d
(
&
plan
,
dft_size_opt
.
width
,
dft_type
,
dft_size_opt
.
height
)
);
else
cufftSafeCall
(
cufftPlan2d
(
&
plan
,
dft_size_opt
.
height
,
dft_size_opt
.
width
,
dft_type
)
);
}
cufftSafeCall
(
cufftExecC2C
(
plan
,
src_cont
.
ptr
<
cufftComplex
>
(),
dst
.
ptr
<
cufftComplex
>
(),
is_inverse
?
CUFFT_INVERSE
:
CUFFT_FORWARD
)
);
~
DFTImpl
()
{
cufftSafeCall
(
cufftDestroy
(
plan
)
);
}
else
void
compute
(
InputArray
_src
,
OutputArray
_dst
,
Stream
&
stream
)
{
createContinuous
(
dft_size
,
CV_32F
,
_dst
);
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
src
=
getInputMat
(
_src
,
stream
);
cufftSafeCall
(
cufftExecC2R
(
plan
,
src_cont
.
ptr
<
cufftComplex
>
(),
dst
.
ptr
<
cufftReal
>
()));
}
}
else
{
// We could swap dft_size for efficiency. Here we must reflect it
if
(
dft_size
==
dft_size_opt
)
createContinuous
(
Size
(
dft_size
.
width
/
2
+
1
,
dft_size
.
height
),
CV_32FC2
,
_dst
);
else
createContinuous
(
Size
(
dft_size
.
width
,
dft_size
.
height
/
2
+
1
),
CV_32FC2
,
_dst
);
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
CV_Assert
(
is_complex_input
==
(
src
.
channels
()
==
2
)
);
GpuMat
dst
=
_dst
.
getGpuMat
();
// Make sure here we work with the continuous input,
// as CUFFT can't handle gaps
GpuMat
src_cont
;
if
(
src
.
isContinuous
())
{
src_cont
=
src
;
}
else
{
BufferPool
pool
(
stream
);
src_cont
.
allocator
=
pool
.
getAllocator
();
createContinuous
(
src
.
rows
,
src
.
cols
,
src
.
type
(),
src_cont
);
src
.
copyTo
(
src_cont
,
stream
);
}
cufftSafeCall
(
cufftExecR2C
(
plan
,
src_cont
.
ptr
<
cufftReal
>
(),
dst
.
ptr
<
cufftComplex
>
()));
}
cufftSafeCall
(
cufftSetStream
(
plan
,
StreamAccessor
::
getStream
(
stream
))
);
cufftSafeCall
(
cufftDestroy
(
plan
)
);
if
(
is_complex_input
)
{
if
(
is_complex_output
)
{
createContinuous
(
dft_size
,
CV_32FC2
,
_dst
);
GpuMat
dst
=
_dst
.
getGpuMat
();
cufftSafeCall
(
cufftExecC2C
(
plan
,
src_cont
.
ptr
<
cufftComplex
>
(),
dst
.
ptr
<
cufftComplex
>
(),
is_inverse
?
CUFFT_INVERSE
:
CUFFT_FORWARD
));
}
else
{
createContinuous
(
dft_size
,
CV_32F
,
_dst
);
GpuMat
dst
=
_dst
.
getGpuMat
();
cufftSafeCall
(
cufftExecC2R
(
plan
,
src_cont
.
ptr
<
cufftComplex
>
(),
dst
.
ptr
<
cufftReal
>
()));
}
}
else
{
// We could swap dft_size for efficiency. Here we must reflect it
if
(
dft_size
==
dft_size_opt
)
createContinuous
(
Size
(
dft_size
.
width
/
2
+
1
,
dft_size
.
height
),
CV_32FC2
,
_dst
);
else
createContinuous
(
Size
(
dft_size
.
width
,
dft_size
.
height
/
2
+
1
),
CV_32FC2
,
_dst
);
if
(
is_scaled_dft
)
cuda
::
multiply
(
_dst
,
Scalar
::
all
(
1.
/
dft_size
.
area
()),
_dst
,
1
,
-
1
,
stream
);
GpuMat
dst
=
_dst
.
getGpuMat
();
cufftSafeCall
(
cufftExecR2C
(
plan
,
src_cont
.
ptr
<
cufftReal
>
(),
dst
.
ptr
<
cufftComplex
>
()));
}
if
(
is_scaled_dft
)
cuda
::
multiply
(
_dst
,
Scalar
::
all
(
1.
/
dft_size
.
area
()),
_dst
,
1
,
-
1
,
stream
);
}
};
}
#endif
Ptr
<
DFT
>
cv
::
cuda
::
createDFT
(
Size
dft_size
,
int
flags
)
{
#ifndef HAVE_CUFFT
(
void
)
dft_size
;
(
void
)
flags
;
CV_Error
(
Error
::
StsNotImplemented
,
"The library was build without CUFFT"
);
return
Ptr
<
DFT
>
();
#else
return
makePtr
<
DFTImpl
>
(
dft_size
,
flags
);
#endif
}
...
...
modules/cudaarithm/test/test_arithm.cpp
View file @
00f3ad72
...
...
@@ -250,6 +250,33 @@ CUDA_TEST_P(Dft, C2C)
}
}
CUDA_TEST_P
(
Dft
,
Algorithm
)
{
int
cols
=
randomInt
(
2
,
100
);
int
rows
=
randomInt
(
2
,
100
);
int
flags
=
0
;
cv
::
Ptr
<
cv
::
cuda
::
DFT
>
dft
=
cv
::
cuda
::
createDFT
(
cv
::
Size
(
cols
,
rows
),
flags
);
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
SCOPED_TRACE
(
"dft algorithm"
);
cv
::
Mat
a
=
randomMat
(
cv
::
Size
(
cols
,
rows
),
CV_32FC2
,
0.0
,
10.0
);
cv
::
cuda
::
GpuMat
d_b
;
cv
::
cuda
::
GpuMat
d_b_data
;
dft
->
compute
(
loadMat
(
a
),
d_b
);
cv
::
Mat
b_gold
;
cv
::
dft
(
a
,
b_gold
,
flags
);
ASSERT_EQ
(
CV_32F
,
d_b
.
depth
());
ASSERT_EQ
(
2
,
d_b
.
channels
());
EXPECT_MAT_NEAR
(
b_gold
,
cv
::
Mat
(
d_b
),
rows
*
cols
*
1e-4
);
}
}
namespace
{
void
testR2CThenC2R
(
const
std
::
string
&
hint
,
int
cols
,
int
rows
,
bool
inplace
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment