Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
00f3ad72
Commit
00f3ad72
authored
8 years ago
by
Naba Kumar
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implement DFT as cv::Algorithm to support concurrent streams
parent
c1007c72
No related merge requests found
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
180 additions
and
81 deletions
+180
-81
base.hpp
modules/core/include/opencv2/core/base.hpp
+4
-0
dxt.cpp
modules/core/src/dxt.cpp
+3
-0
cudaarithm.hpp
modules/cudaarithm/include/opencv2/cudaarithm.hpp
+30
-0
arithm.cpp
modules/cudaarithm/src/arithm.cpp
+116
-81
test_arithm.cpp
modules/cudaarithm/test/test_arithm.cpp
+27
-0
No files found.
modules/core/include/opencv2/core/base.hpp
View file @
00f3ad72
...
...
@@ -239,6 +239,10 @@ enum DftFlags {
into a real array and inverse transformation is executed, the function treats the input as a
packed complex-conjugate symmetrical array, and the output will also be a real array). */
DFT_REAL_OUTPUT
=
32
,
/** specifies that input is complex input. If this flag is set, the input must have 2 channels.
On the other hand, for backwards compatibility reason, if input has 2 channels, input is
already considered complex. */
DFT_COMPLEX_INPUT
=
64
,
/** performs an inverse 1D or 2D transform instead of the default forward transform. */
DCT_INVERSE
=
DFT_INVERSE
,
/** performs a forward or inverse transform of every individual row of the input
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/dxt.cpp
View file @
00f3ad72
...
...
@@ -3342,6 +3342,9 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
CV_Assert
(
type
==
CV_32FC1
||
type
==
CV_32FC2
||
type
==
CV_64FC1
||
type
==
CV_64FC2
);
// Fail if DFT_COMPLEX_INPUT is specified, but src is not 2 channels.
CV_Assert
(
!
((
flags
&
DFT_COMPLEX_INPUT
)
&&
src
.
channels
()
!=
2
)
);
if
(
!
inv
&&
src
.
channels
()
==
1
&&
(
flags
&
DFT_COMPLEX_OUTPUT
)
)
_dst
.
create
(
src
.
size
(),
CV_MAKETYPE
(
depth
,
2
)
);
else
if
(
inv
&&
src
.
channels
()
==
2
&&
(
flags
&
DFT_REAL_OUTPUT
)
)
...
...
This diff is collapsed.
Click to expand it.
modules/cudaarithm/include/opencv2/cudaarithm.hpp
View file @
00f3ad72
...
...
@@ -788,6 +788,7 @@ CV_EXPORTS void mulAndScaleSpectrums(InputArray src1, InputArray src2, OutputArr
(obtained from dft_size ).
- **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real
cases are always forward and inverse, respectively).
- **DFT_COMPLEX_INPUT** Specifies that input is complex input with 2 channels.
- **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of
real-complex transform, so the destination matrix must be real.
@param stream Stream for the asynchronous version.
...
...
@@ -813,6 +814,35 @@ instead of the width.
*/
CV_EXPORTS
void
dft
(
InputArray
src
,
OutputArray
dst
,
Size
dft_size
,
int
flags
=
0
,
Stream
&
stream
=
Stream
::
Null
());
/** @brief Base class for DFT operator as a cv::Algorithm. :
*/
class
CV_EXPORTS
DFT
:
public
Algorithm
{
public
:
/** @brief Computes an FFT of a given image.
@param image Source image. Only CV_32FC1 images are supported for now.
@param result Result image.
@param stream Stream for the asynchronous version.
*/
virtual
void
compute
(
InputArray
image
,
OutputArray
result
,
Stream
&
stream
=
Stream
::
Null
())
=
0
;
};
/** @brief Creates implementation for cuda::DFT.
@param dft_size The image size.
@param flags Optional flags:
- **DFT_ROWS** transforms each individual row of the source matrix.
- **DFT_SCALE** scales the result: divide it by the number of elements in the transform
(obtained from dft_size ).
- **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real
cases are always forward and inverse, respectively).
- **DFT_COMPLEX_INPUT** Specifies that inputs will be complex with 2 channels.
- **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of
real-complex transform, so the destination matrix must be real.
*/
CV_EXPORTS
Ptr
<
DFT
>
createDFT
(
Size
dft_size
,
int
flags
);
/** @brief Base class for convolution (or cross-correlation) operator. :
*/
class
CV_EXPORTS
Convolution
:
public
Algorithm
...
...
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/arithm.cpp
View file @
00f3ad72
...
...
@@ -286,111 +286,146 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
}
//////////////////////////////////////////////////////////////////////////////
//
dft
//
DFT function
void
cv
::
cuda
::
dft
(
InputArray
_src
,
OutputArray
_dst
,
Size
dft_size
,
int
flags
,
Stream
&
stream
)
{
#ifndef HAVE_CUFFT
(
void
)
_src
;
(
void
)
_dst
;
(
void
)
dft_size
;
(
void
)
flags
;
(
void
)
stream
;
throw_no_cuda
();
#else
GpuMat
src
=
getInputMat
(
_src
,
stream
);
if
(
getInputMat
(
_src
,
stream
).
channels
()
==
2
)
flags
|=
DFT_COMPLEX_INPUT
;
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
Ptr
<
DFT
>
dft
=
createDFT
(
dft_size
,
flags
);
dft
->
compute
(
_src
,
_dst
,
stream
);
}
// We don't support unpacked output (in the case of real input)
CV_Assert
(
!
(
flags
&
DFT_COMPLEX_OUTPUT
)
);
//////////////////////////////////////////////////////////////////////////////
// DFT algorithm
const
bool
is_1d_input
=
(
dft_size
.
height
==
1
)
||
(
dft_size
.
width
==
1
);
const
bool
is_row_dft
=
(
flags
&
DFT_ROWS
)
!=
0
;
const
bool
is_scaled_dft
=
(
flags
&
DFT_SCALE
)
!=
0
;
const
bool
is_inverse
=
(
flags
&
DFT_INVERSE
)
!=
0
;
const
bool
is_complex_input
=
src
.
channels
()
==
2
;
const
bool
is_complex_output
=
!
(
flags
&
DFT_REAL_OUTPUT
);
#ifdef HAVE_CUFFT
// We don't support real-to-real transform
CV_Assert
(
is_complex_input
||
is_complex_output
);
namespace
{
// Make sure here we work with the continuous input,
// as CUFFT can't handle gaps
GpuMat
src_cont
;
if
(
src
.
isContinuous
())
class
DFTImpl
:
public
DFT
{
src_cont
=
src
;
}
else
{
BufferPool
pool
(
stream
);
src_cont
.
allocator
=
pool
.
getAllocator
();
createContinuous
(
src
.
rows
,
src
.
cols
,
src
.
type
(),
src_cont
);
src
.
copyTo
(
src_cont
,
stream
);
}
Size
dft_size
,
dft_size_opt
;
bool
is_1d_input
,
is_row_dft
,
is_scaled_dft
,
is_inverse
,
is_complex_input
,
is_complex_output
;
Size
dft_size_opt
=
dft_size
;
if
(
is_1d_input
&&
!
is_row_dft
)
{
// If the source matrix is single column handle it as single row
dft_size_opt
.
width
=
std
::
max
(
dft_size
.
width
,
dft_size
.
height
);
dft_size_opt
.
height
=
std
::
min
(
dft_size
.
width
,
dft_size
.
height
);
}
cufftType
dft_type
;
cufftHandle
plan
;
CV_Assert
(
dft_size_opt
.
width
>
1
);
public
:
DFTImpl
(
Size
dft_size
,
int
flags
)
:
dft_size
(
dft_size
),
dft_size_opt
(
dft_size
),
is_1d_input
((
dft_size
.
height
==
1
)
||
(
dft_size
.
width
==
1
)),
is_row_dft
((
flags
&
DFT_ROWS
)
!=
0
),
is_scaled_dft
((
flags
&
DFT_SCALE
)
!=
0
),
is_inverse
((
flags
&
DFT_INVERSE
)
!=
0
),
is_complex_input
((
flags
&
DFT_COMPLEX_INPUT
)
!=
0
),
is_complex_output
(
!
(
flags
&
DFT_REAL_OUTPUT
)),
dft_type
(
!
is_complex_input
?
CUFFT_R2C
:
(
is_complex_output
?
CUFFT_C2C
:
CUFFT_C2R
))
{
// We don't support unpacked output (in the case of real input)
CV_Assert
(
!
(
flags
&
DFT_COMPLEX_OUTPUT
)
);
cufftType
dft_type
=
CUFFT_R2C
;
if
(
is_complex_input
)
dft_type
=
is_complex_output
?
CUFFT_C2C
:
CUFFT_C2R
;
// We don't support real-to-real transform
CV_Assert
(
is_complex_input
||
is_complex_output
);
cufftHandle
plan
;
if
(
is_1d_input
||
is_row_dft
)
cufftSafeCall
(
cufftPlan1d
(
&
plan
,
dft_size_opt
.
width
,
dft_type
,
dft_size_opt
.
height
)
);
else
cufftSafeCall
(
cufftPlan2d
(
&
plan
,
dft_size_opt
.
height
,
dft_size_opt
.
width
,
dft_type
)
);
if
(
is_1d_input
&&
!
is_row_dft
)
{
// If the source matrix is single column handle it as single row
dft_size_opt
.
width
=
std
::
max
(
dft_size
.
width
,
dft_size
.
height
);
dft_size_opt
.
height
=
std
::
min
(
dft_size
.
width
,
dft_size
.
height
);
}
cufftSafeCall
(
cufftSetStream
(
plan
,
StreamAccessor
::
getStream
(
stream
))
);
CV_Assert
(
dft_size_opt
.
width
>
1
);
if
(
is_complex_input
)
{
if
(
is_complex_output
)
{
createContinuous
(
dft_size
,
CV_32FC2
,
_dst
);
GpuMat
dst
=
_dst
.
getGpuMat
();
if
(
is_1d_input
||
is_row_dft
)
cufftSafeCall
(
cufftPlan1d
(
&
plan
,
dft_size_opt
.
width
,
dft_type
,
dft_size_opt
.
height
)
);
else
cufftSafeCall
(
cufftPlan2d
(
&
plan
,
dft_size_opt
.
height
,
dft_size_opt
.
width
,
dft_type
)
);
}
cufftSafeCall
(
cufftExecC2C
(
plan
,
src_cont
.
ptr
<
cufftComplex
>
(),
dst
.
ptr
<
cufftComplex
>
(),
is_inverse
?
CUFFT_INVERSE
:
CUFFT_FORWARD
)
);
~
DFTImpl
()
{
cufftSafeCall
(
cufftDestroy
(
plan
)
);
}
else
void
compute
(
InputArray
_src
,
OutputArray
_dst
,
Stream
&
stream
)
{
createContinuous
(
dft_size
,
CV_32F
,
_dst
);
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
src
=
getInputMat
(
_src
,
stream
);
cufftSafeCall
(
cufftExecC2R
(
plan
,
src_cont
.
ptr
<
cufftComplex
>
(),
dst
.
ptr
<
cufftReal
>
()));
}
}
else
{
// We could swap dft_size for efficiency. Here we must reflect it
if
(
dft_size
==
dft_size_opt
)
createContinuous
(
Size
(
dft_size
.
width
/
2
+
1
,
dft_size
.
height
),
CV_32FC2
,
_dst
);
else
createContinuous
(
Size
(
dft_size
.
width
,
dft_size
.
height
/
2
+
1
),
CV_32FC2
,
_dst
);
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
CV_Assert
(
is_complex_input
==
(
src
.
channels
()
==
2
)
);
GpuMat
dst
=
_dst
.
getGpuMat
();
// Make sure here we work with the continuous input,
// as CUFFT can't handle gaps
GpuMat
src_cont
;
if
(
src
.
isContinuous
())
{
src_cont
=
src
;
}
else
{
BufferPool
pool
(
stream
);
src_cont
.
allocator
=
pool
.
getAllocator
();
createContinuous
(
src
.
rows
,
src
.
cols
,
src
.
type
(),
src_cont
);
src
.
copyTo
(
src_cont
,
stream
);
}
cufftSafeCall
(
cufftExecR2C
(
plan
,
src_cont
.
ptr
<
cufftReal
>
(),
dst
.
ptr
<
cufftComplex
>
()));
}
cufftSafeCall
(
cufftSetStream
(
plan
,
StreamAccessor
::
getStream
(
stream
))
);
cufftSafeCall
(
cufftDestroy
(
plan
)
);
if
(
is_complex_input
)
{
if
(
is_complex_output
)
{
createContinuous
(
dft_size
,
CV_32FC2
,
_dst
);
GpuMat
dst
=
_dst
.
getGpuMat
();
cufftSafeCall
(
cufftExecC2C
(
plan
,
src_cont
.
ptr
<
cufftComplex
>
(),
dst
.
ptr
<
cufftComplex
>
(),
is_inverse
?
CUFFT_INVERSE
:
CUFFT_FORWARD
));
}
else
{
createContinuous
(
dft_size
,
CV_32F
,
_dst
);
GpuMat
dst
=
_dst
.
getGpuMat
();
cufftSafeCall
(
cufftExecC2R
(
plan
,
src_cont
.
ptr
<
cufftComplex
>
(),
dst
.
ptr
<
cufftReal
>
()));
}
}
else
{
// We could swap dft_size for efficiency. Here we must reflect it
if
(
dft_size
==
dft_size_opt
)
createContinuous
(
Size
(
dft_size
.
width
/
2
+
1
,
dft_size
.
height
),
CV_32FC2
,
_dst
);
else
createContinuous
(
Size
(
dft_size
.
width
,
dft_size
.
height
/
2
+
1
),
CV_32FC2
,
_dst
);
if
(
is_scaled_dft
)
cuda
::
multiply
(
_dst
,
Scalar
::
all
(
1.
/
dft_size
.
area
()),
_dst
,
1
,
-
1
,
stream
);
GpuMat
dst
=
_dst
.
getGpuMat
();
cufftSafeCall
(
cufftExecR2C
(
plan
,
src_cont
.
ptr
<
cufftReal
>
(),
dst
.
ptr
<
cufftComplex
>
()));
}
if
(
is_scaled_dft
)
cuda
::
multiply
(
_dst
,
Scalar
::
all
(
1.
/
dft_size
.
area
()),
_dst
,
1
,
-
1
,
stream
);
}
};
}
#endif
Ptr
<
DFT
>
cv
::
cuda
::
createDFT
(
Size
dft_size
,
int
flags
)
{
#ifndef HAVE_CUFFT
(
void
)
dft_size
;
(
void
)
flags
;
CV_Error
(
Error
::
StsNotImplemented
,
"The library was build without CUFFT"
);
return
Ptr
<
DFT
>
();
#else
return
makePtr
<
DFTImpl
>
(
dft_size
,
flags
);
#endif
}
...
...
This diff is collapsed.
Click to expand it.
modules/cudaarithm/test/test_arithm.cpp
View file @
00f3ad72
...
...
@@ -250,6 +250,33 @@ CUDA_TEST_P(Dft, C2C)
}
}
CUDA_TEST_P
(
Dft
,
Algorithm
)
{
int
cols
=
randomInt
(
2
,
100
);
int
rows
=
randomInt
(
2
,
100
);
int
flags
=
0
;
cv
::
Ptr
<
cv
::
cuda
::
DFT
>
dft
=
cv
::
cuda
::
createDFT
(
cv
::
Size
(
cols
,
rows
),
flags
);
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
SCOPED_TRACE
(
"dft algorithm"
);
cv
::
Mat
a
=
randomMat
(
cv
::
Size
(
cols
,
rows
),
CV_32FC2
,
0.0
,
10.0
);
cv
::
cuda
::
GpuMat
d_b
;
cv
::
cuda
::
GpuMat
d_b_data
;
dft
->
compute
(
loadMat
(
a
),
d_b
);
cv
::
Mat
b_gold
;
cv
::
dft
(
a
,
b_gold
,
flags
);
ASSERT_EQ
(
CV_32F
,
d_b
.
depth
());
ASSERT_EQ
(
2
,
d_b
.
channels
());
EXPECT_MAT_NEAR
(
b_gold
,
cv
::
Mat
(
d_b
),
rows
*
cols
*
1e-4
);
}
}
namespace
{
void
testR2CThenC2R
(
const
std
::
string
&
hint
,
int
cols
,
int
rows
,
bool
inplace
)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment