Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
1d2cf0e2
Commit
1d2cf0e2
authored
Jul 22, 2014
by
Alexander Karsakov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added nonzero_rows support
parent
52f76a32
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
109 additions
and
100 deletions
+109
-100
dxt.cpp
modules/core/src/dxt.cpp
+21
-28
ocl.cpp
modules/core/src/ocl.cpp
+5
-3
fft.cl
modules/core/src/opencl/fft.cl
+69
-51
test_dft.cpp
modules/core/test/ocl/test_dft.cpp
+14
-18
No files found.
modules/core/src/dxt.cpp
View file @
1d2cf0e2
...
...
@@ -2034,19 +2034,19 @@ enum FftType
C2C
=
3
};
static
std
::
vector
<
int
>
ocl_getRadixes
(
int
cols
,
std
::
vector
<
int
>&
radixes
,
std
::
vector
<
int
>&
blocks
,
int
&
min_radix
)
static
void
ocl_getRadixes
(
int
cols
,
std
::
vector
<
int
>&
radixes
,
std
::
vector
<
int
>&
blocks
,
int
&
min_radix
)
{
int
factors
[
34
];
int
nf
=
DFTFactorize
(
cols
,
factors
);
int
nf
=
DFTFactorize
(
cols
,
factors
);
int
n
=
1
;
int
factor_index
=
0
;
min_radix
=
INT_MAX
;
// 2^n transforms
if
(
(
factors
[
factor_index
]
&
1
)
==
0
)
if
(
(
factors
[
factor_index
]
&
1
)
==
0
)
{
for
(
;
n
<
factors
[
factor_index
];
)
for
(
;
n
<
factors
[
factor_index
];)
{
int
radix
=
2
,
block
=
1
;
if
(
8
*
n
<=
factors
[
0
])
...
...
@@ -2080,7 +2080,7 @@ static std::vector<int> ocl_getRadixes(int cols, std::vector<int>& radixes, std:
}
// all the other transforms
for
(
;
factor_index
<
nf
;
factor_index
++
)
for
(
;
factor_index
<
nf
;
factor_index
++
)
{
int
radix
=
factors
[
factor_index
],
block
=
1
;
if
(
radix
==
3
)
...
...
@@ -2101,7 +2101,6 @@ static std::vector<int> ocl_getRadixes(int cols, std::vector<int>& radixes, std:
blocks
.
push_back
(
block
);
min_radix
=
min
(
min_radix
,
block
*
radix
);
}
return
radixes
;
}
struct
OCL_FftPlan
...
...
@@ -2111,14 +2110,13 @@ struct OCL_FftPlan
int
thread_count
;
int
dft_size
;
int
flags
;
bool
status
;
OCL_FftPlan
(
int
_size
,
int
_flags
)
:
dft_size
(
_size
),
flags
(
_flags
),
status
(
true
)
OCL_FftPlan
(
int
_size
)
:
dft_size
(
_size
),
status
(
true
)
{
int
min_radix
;
std
::
vector
<
int
>
radixes
,
blocks
;
ocl_getRadixes
(
dft_size
,
radixes
,
blocks
,
min_radix
);
thread_count
=
(
dft_size
+
min_radix
-
1
)
/
min_radix
;
thread_count
=
dft_size
/
min_radix
;
if
(
thread_count
>
ocl
::
Device
::
getDefault
().
maxWorkGroupSize
())
{
...
...
@@ -2140,8 +2138,7 @@ struct OCL_FftPlan
n
*=
radix
;
}
twiddles
.
create
(
1
,
twiddle_size
,
CV_32FC2
);
Mat
tw
=
twiddles
.
getMat
(
ACCESS_WRITE
);
Mat
tw
(
1
,
twiddle_size
,
CV_32FC2
);
float
*
ptr
=
tw
.
ptr
<
float
>
();
int
ptr_index
=
0
;
...
...
@@ -2162,6 +2159,7 @@ struct OCL_FftPlan
}
}
}
twiddles
=
tw
.
getUMat
(
ACCESS_READ
);
buildOptions
=
format
(
"-D LOCAL_SIZE=%d -D kercn=%d -D RADIX_PROCESS=%s"
,
dft_size
,
dft_size
/
thread_count
,
radix_processing
.
c_str
());
...
...
@@ -2185,10 +2183,10 @@ struct OCL_FftPlan
if
(
rows
)
{
globalsize
[
0
]
=
thread_count
;
globalsize
[
1
]
=
dft_size
;
globalsize
[
0
]
=
thread_count
;
globalsize
[
1
]
=
src
.
rows
;
localsize
[
0
]
=
thread_count
;
localsize
[
1
]
=
1
;
kernel_name
=
!
inv
?
"fft_multi_radix_rows"
:
"ifft_multi_radix_rows"
;
if
(
is1d
&&
(
flags
&
DFT_SCALE
))
if
(
(
is1d
||
inv
)
&&
(
flags
&
DFT_SCALE
))
options
+=
" -D DFT_SCALE"
;
}
else
...
...
@@ -2200,14 +2198,9 @@ struct OCL_FftPlan
options
+=
" -D DFT_SCALE"
;
}
if
(
src
.
channels
()
==
1
)
options
+=
" -D REAL_INPUT"
;
else
options
+=
" -D COMPLEX_INPUT"
;
if
(
dst
.
channels
()
==
1
)
options
+=
" -D REAL_OUTPUT"
;
if
(
is1d
)
options
+=
" -D IS_1D"
;
options
+=
src
.
channels
()
==
1
?
" -D REAL_INPUT"
:
" -D COMPLEX_INPUT"
;
options
+=
dst
.
channels
()
==
1
?
" -D REAL_OUTPUT"
:
" -D COMPLEX_OUTPUT"
;
options
+=
is1d
?
" -D IS_1D"
:
""
;
if
(
!
inv
)
{
...
...
@@ -2216,10 +2209,10 @@ struct OCL_FftPlan
}
else
{
if
(
is1d
&&
fftType
==
C2R
||
(
rows
&&
fftType
==
R2R
))
if
(
rows
&&
(
fftType
==
C2R
||
fftType
==
R2R
))
options
+=
" -D NO_CONJUGATE"
;
if
(
dst
.
cols
%
2
==
0
)
options
+=
" -D EVEN"
;
options
+=
" -D EVEN"
;
}
ocl
::
Kernel
k
(
kernel_name
.
c_str
(),
ocl
::
core
::
fft_oclsrc
,
options
);
...
...
@@ -2240,7 +2233,7 @@ public:
return
planCache
;
}
OCL_FftPlan
*
getFftPlan
(
int
dft_size
,
int
flags
)
OCL_FftPlan
*
getFftPlan
(
int
dft_size
)
{
for
(
size_t
i
=
0
,
size
=
planStorage
.
size
();
i
<
size
;
++
i
)
{
...
...
@@ -2252,7 +2245,7 @@ public:
}
}
OCL_FftPlan
*
newPlan
=
new
OCL_FftPlan
(
dft_size
,
flags
);
OCL_FftPlan
*
newPlan
=
new
OCL_FftPlan
(
dft_size
);
planStorage
.
push_back
(
newPlan
);
return
newPlan
;
}
...
...
@@ -2275,13 +2268,13 @@ protected:
static
bool
ocl_dft_C2C_rows
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_rows
,
int
flags
,
int
fftType
)
{
const
OCL_FftPlan
*
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
cols
()
,
flags
);
const
OCL_FftPlan
*
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
cols
());
return
plan
->
enqueueTransform
(
_src
,
_dst
,
nonzero_rows
,
flags
,
fftType
,
true
);
}
static
bool
ocl_dft_C2C_cols
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_cols
,
int
flags
,
int
fftType
)
{
const
OCL_FftPlan
*
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
rows
()
,
flags
);
const
OCL_FftPlan
*
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
rows
());
return
plan
->
enqueueTransform
(
_src
,
_dst
,
nonzero_cols
,
flags
,
fftType
,
false
);
}
...
...
@@ -2385,7 +2378,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
}
else
{
int
nonzero_cols
=
src
.
cols
/
2
+
1
;
// : src.cols;
int
nonzero_cols
=
src
.
cols
/
2
+
1
;
if
(
!
ocl_dft_C2C_cols
(
src
,
output
,
nonzero_cols
,
flags
,
fftType
))
return
false
;
...
...
modules/core/src/ocl.cpp
View file @
1d2cf0e2
...
...
@@ -3002,7 +3002,8 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
sync
?
0
:
&
p
->
e
);
if
(
sync
||
retval
!=
CL_SUCCESS
)
{
CV_OclDbgAssert
(
clFinish
(
qq
)
==
CL_SUCCESS
);
int
a
=
clFinish
(
qq
);
CV_OclDbgAssert
(
a
==
CL_SUCCESS
);
p
->
cleanupUMats
();
}
else
...
...
@@ -3898,8 +3899,9 @@ public:
if
(
(
accessFlags
&
ACCESS_READ
)
!=
0
&&
u
->
hostCopyObsolete
()
)
{
AlignedDataPtr
<
false
,
true
>
alignedPtr
(
u
->
data
,
u
->
size
,
CV_OPENCL_DATA_PTR_ALIGNMENT
);
CV_Assert
(
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
u
->
size
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
==
CL_SUCCESS
);
int
a
=
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
u
->
size
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
);
CV_Assert
(
a
==
CL_SUCCESS
);
u
->
markHostCopyObsolete
(
false
);
}
}
...
...
modules/core/src/opencl/fft.cl
View file @
1d2cf0e2
...
...
@@ -16,7 +16,7 @@ float2 twiddle(float2 a) {
}
__attribute__
((
always_inline
))
void
butterfly2
(
float2
a0,
float2
a1,
__local
float2*
smem,
__
constant
const
float2*
twiddles,
void
butterfly2
(
float2
a0,
float2
a1,
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x,
const
int
block_size
)
{
const
int
k
=
x
&
(
block_size
-
1
)
;
...
...
@@ -28,7 +28,7 @@ void butterfly2(float2 a0, float2 a1, __local float2* smem, __constant const flo
}
__attribute__
((
always_inline
))
void
butterfly4
(
float2
a0,
float2
a1,
float2
a2,
float2
a3,
__local
float2*
smem,
__
constant
const
float2*
twiddles,
void
butterfly4
(
float2
a0,
float2
a1,
float2
a2,
float2
a3,
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x,
const
int
block_size
)
{
const
int
k
=
x
&
(
block_size
-
1
)
;
...
...
@@ -50,10 +50,10 @@ void butterfly4(float2 a0, float2 a1, float2 a2, float2 a3, __local float2* smem
}
__attribute__
((
always_inline
))
void
butterfly3
(
float2
a0,
float2
a1,
float2
a2,
__local
float2*
smem,
__
constant
const
float2*
twiddles,
void
butterfly3
(
float2
a0,
float2
a1,
float2
a2,
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x,
const
int
block_size
)
{
const
int
k
=
x
&
(
block_size
-
1
)
;
const
int
k
=
x
%
block_size
;
a1
=
mul_float2
(
twiddles[k],
a1
)
;
a2
=
mul_float2
(
twiddles[k+block_size],
a2
)
;
const
int
dst_ind
=
((
x
-
k
)
*
3
)
+
k
;
...
...
@@ -68,10 +68,10 @@ void butterfly3(float2 a0, float2 a1, float2 a2, __local float2* smem, __constan
}
__attribute__
((
always_inline
))
void
butterfly5
(
float2
a0,
float2
a1,
float2
a2,
float2
a3,
float2
a4,
__local
float2*
smem,
__
constant
const
float2*
twiddles,
void
butterfly5
(
float2
a0,
float2
a1,
float2
a2,
float2
a3,
float2
a4,
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x,
const
int
block_size
)
{
const
int
k
=
x
&
(
block_size
-
1
)
;
const
int
k
=
x
%
block_size
;
a1
=
mul_float2
(
twiddles[k],
a1
)
;
a2
=
mul_float2
(
twiddles[k
+
block_size],
a2
)
;
a3
=
mul_float2
(
twiddles[k+2*block_size],
a3
)
;
...
...
@@ -109,7 +109,7 @@ void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local f
}
__attribute__
((
always_inline
))
void
fft_radix2
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
void
fft_radix2
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
float2
a0,
a1
;
...
...
@@ -128,7 +128,7 @@ void fft_radix2(__local float2* smem, __constant const float2* twiddles, const i
}
__attribute__
((
always_inline
))
void
fft_radix2_B2
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix2_B2
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
const
int
x2
=
x1
+
t/2
;
float2
a0,
a1,
a2,
a3
;
...
...
@@ -151,7 +151,7 @@ void fft_radix2_B2(__local float2* smem, __constant const float2* twiddles, cons
}
__attribute__
((
always_inline
))
void
fft_radix2_B3
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix2_B3
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
const
int
x2
=
x1
+
t/3
;
const
int
x3
=
x1
+
2*t/3
;
...
...
@@ -177,7 +177,7 @@ void fft_radix2_B3(__local float2* smem, __constant const float2* twiddles, cons
}
__attribute__
((
always_inline
))
void
fft_radix2_B4
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix2_B4
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
const
int
thread_block
=
t/4
;
const
int
x2
=
x1
+
thread_block
;
...
...
@@ -207,7 +207,7 @@ void fft_radix2_B4(__local float2* smem, __constant const float2* twiddles, cons
}
__attribute__
((
always_inline
))
void
fft_radix2_B5
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix2_B5
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
const
int
thread_block
=
t/5
;
const
int
x2
=
x1
+
thread_block
;
...
...
@@ -240,7 +240,7 @@ void fft_radix2_B5(__local float2* smem, __constant const float2* twiddles, cons
}
__attribute__
((
always_inline
))
void
fft_radix4
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
void
fft_radix4
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
float2
a0,
a1,
a2,
a3
;
...
...
@@ -258,7 +258,7 @@ void fft_radix4(__local float2* smem, __constant const float2* twiddles, const i
}
__attribute__
((
always_inline
))
void
fft_radix4_B2
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix4_B2
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
const
int
x2
=
x1
+
t/2
;
float2
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7
;
...
...
@@ -281,7 +281,7 @@ void fft_radix4_B2(__local float2* smem, __constant const float2* twiddles, cons
}
__attribute__
((
always_inline
))
void
fft_radix4_B3
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix4_B3
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
const
int
x2
=
x1
+
t/3
;
const
int
x3
=
x2
+
t/3
;
...
...
@@ -307,7 +307,7 @@ void fft_radix4_B3(__local float2* smem, __constant const float2* twiddles, cons
}
__attribute__
((
always_inline
))
void
fft_radix8
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
void
fft_radix8
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
const
int
k
=
x
%
block_size
;
float2
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7
;
...
...
@@ -370,7 +370,7 @@ void fft_radix8(__local float2* smem, __constant const float2* twiddles, const i
}
__attribute__
((
always_inline
))
void
fft_radix3
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
void
fft_radix3
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
float2
a0,
a1,
a2
;
...
...
@@ -388,7 +388,7 @@ void fft_radix3(__local float2* smem, __constant const float2* twiddles, const i
}
__attribute__
((
always_inline
))
void
fft_radix3_B2
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix3_B2
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
const
int
x2
=
x1
+
t/2
;
float2
a0,
a1,
a2,
a3,
a4,
a5
;
...
...
@@ -411,7 +411,7 @@ void fft_radix3_B2(__local float2* smem, __constant const float2* twiddles, cons
}
__attribute__
((
always_inline
))
void
fft_radix3_B3
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix3_B3
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
const
int
x2
=
x1
+
t/3
;
const
int
x3
=
x2
+
t/3
;
...
...
@@ -437,7 +437,7 @@ void fft_radix3_B3(__local float2* smem, __constant const float2* twiddles, cons
}
__attribute__
((
always_inline
))
void
fft_radix3_B4
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix3_B4
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
const
int
thread_block
=
t/4
;
const
int
x2
=
x1
+
thread_block
;
...
...
@@ -467,7 +467,7 @@ void fft_radix3_B4(__local float2* smem, __constant const float2* twiddles, cons
}
__attribute__
((
always_inline
))
void
fft_radix5
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
void
fft_radix5
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
const
int
k
=
x
%
block_size
;
float2
a0,
a1,
a2,
a3,
a4
;
...
...
@@ -486,7 +486,7 @@ void fft_radix5(__local float2* smem, __constant const float2* twiddles, const i
}
__attribute__
((
always_inline
))
void
fft_radix5_B2
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix5_B2
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
const
int
x2
=
x1+t/2
;
float2
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7,
a8,
a9
;
...
...
@@ -516,24 +516,23 @@ void fft_radix5_B2(__local float2* smem, __constant const float2* twiddles, cons
__kernel
void
fft_multi_radix_rows
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__
constant
float2
*
twiddles_ptr,
const
int
t
,
const
int
nz
)
__
global
float2
*
twiddles_ptr,
const
int
t
,
const
int
nz
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
if
(
y
<
nz
)
{
__local
float2
smem[LOCAL_SIZE]
;
__
constant
const
float2*
twiddles
=
(
__constant
float2*
)
twiddles_ptr
;
__
global
const
float2*
twiddles
=
(
__global
float2*
)
twiddles_ptr
;
const
int
ind
=
x
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
#
ifdef
IS_1D
float
scale
=
1.f/dst_cols
;
#
else
float
scale
=
1.f/
(
dst_cols*dst_rows
)
;
#
endif
#
if
ndef
REAL
_INPUT
#
if
def
COMPLEX
_INPUT
__global
const
float2*
src
=
(
__global
const
float2*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
src_offset
)))
;
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
...
...
@@ -548,7 +547,7 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
RADIX_PROCESS
;
#
if
ndef
REAL
_OUTPUT
#
if
def
COMPLEX
_OUTPUT
#
ifdef
NO_CONJUGATE
//
copy
result
without
complex
conjugate
const
int
cols
=
dst_cols/2
+
1
;
...
...
@@ -570,11 +569,18 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
dst[0]
=
VAL
(
smem_1cn[0],
scale
)
;
#
endif
}
else
{
__global
float2*
dst
=
(
__global
float2*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
#
pragma
unroll
for
(
int
i=x
; i<dst_cols; i+=block_size)
dst[i]
=
(
float2
)
0.f
;
}
}
__kernel
void
fft_multi_radix_cols
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__
constant
float2
*
twiddles_ptr,
const
int
t
,
const
int
nz
)
__
global
float2
*
twiddles_ptr,
const
int
t
,
const
int
nz
)
{
const
int
x
=
get_group_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
...
...
@@ -583,7 +589,7 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
{
__local
float2
smem[LOCAL_SIZE]
;
__global
const
uchar*
src
=
src_ptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
src_offset
))
;
__
constant
const
float2*
twiddles
=
(
__constant
float2*
)
twiddles_ptr
;
__
global
const
float2*
twiddles
=
(
__global
float2*
)
twiddles_ptr
;
const
int
ind
=
y
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
float
scale
=
1.f/
(
dst_rows*dst_cols
)
;
...
...
@@ -596,7 +602,7 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
RADIX_PROCESS
;
#
if
ndef
REAL
_OUTPUT
#
if
def
COMPLEX
_OUTPUT
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
dst_offset
))
;
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
...
...
@@ -633,21 +639,26 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
}
}
__kernel
void
ifft_multi_radix_rows
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
__
constant
float2
*
twiddles_ptr,
const
int
t
,
const
int
nz
)
__kernel
void
ifft_multi_radix_rows
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__
global
float2
*
twiddles_ptr,
const
int
t
,
const
int
nz
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
#
ifdef
IS_1D
const
float
scale
=
1.f/dst_cols
;
#
else
const
float
scale
=
1.f/
(
dst_cols*dst_rows
)
;
#
endif
if
(
y
<
nz
)
{
__local
float2
smem[LOCAL_SIZE]
;
__
constant
const
float2*
twiddles
=
(
__constant
float2*
)
twiddles_ptr
;
__
global
const
float2*
twiddles
=
(
__global
float2*
)
twiddles_ptr
;
const
int
ind
=
x
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
#
if
ndef
REAL
#
if
defined
(
COMPLEX_INPUT
)
&&
!defined
(
NO_CONJUGATE
)
__global
const
float2*
src
=
(
__global
const
float2*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
src_offset
)))
;
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
...
...
@@ -657,10 +668,10 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
}
#
else
__global
const
float2*
src
;
#
if
def
COMPLEX_INPUT
src
=
(
__global
const
float2*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
2
,
(
int
)
sizeof
(
float
)
,
src_offset
)))
;
#
if
!defined
(
REAL_INPUT
)
&&
defined
(
NO_CONJUGATE
)
src
=
(
__global
const
float2*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
2
,
(
int
)
sizeof
(
float
)
,
src_offset
)))
;
#
else
src
=
(
__global
const
float2*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
1
,
(
int
)
sizeof
(
float
)
,
src_offset
)))
;
src
=
(
__global
const
float2*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
1
,
(
int
)
sizeof
(
float
)
,
src_offset
)))
;
#
endif
#
pragma
unroll
...
...
@@ -688,39 +699,46 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
RADIX_PROCESS
;
//
copy
data
to
dst
#
if
ndef
REAL
#
if
def
COMPLEX_OUTPUT
__global
float2*
dst
=
(
__global
float*
)(
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
dst_offset
)))
;
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
{
dst[i*block_size].x
=
smem[x
+
i*block_size].x
;
dst[i*block_size].y
=
-smem[x
+
i*block_size].y
;
dst[i*block_size].x
=
VAL
(
smem[x
+
i*block_size].x,
scale
)
;
dst[i*block_size].y
=
VAL
(
-smem[x
+
i*block_size].y,
scale
)
;
}
#
else
__global
float*
dst
=
(
__global
float*
)(
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
))
,
dst_offset
)))
;
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
{
dst[i*block_size]
=
smem[x
+
i*block_size].x
;
dst[i*block_size]
=
VAL
(
smem[x
+
i*block_size].x,
scale
)
;
}
#
endif
}
else
{
__global
float2*
dst
=
(
__global
float*
)(
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
dst_offset
)))
;
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
dst[i*block_size]
=
(
float2
)
0.f
;
}
}
__kernel
void
ifft_multi_radix_cols
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
__
constant
float2
*
twiddles_ptr,
const
int
t
,
const
int
nz
)
__kernel
void
ifft_multi_radix_cols
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__
global
float2
*
twiddles_ptr,
const
int
t
,
const
int
nz
)
{
const
int
x
=
get_group_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
#
if
ndef
REAL
#
if
def
COMPLEX_INPUT
if
(
x
<
nz
)
{
__local
float2
smem[LOCAL_SIZE]
;
__global
const
uchar*
src
=
src_ptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
src_offset
))
;
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
dst_offset
))
;
__
constant
const
float2*
twiddles
=
(
__constant
float2*
)
twiddles_ptr
;
__
global
const
float2*
twiddles
=
(
__global
float2*
)
twiddles_ptr
;
const
int
ind
=
y
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
...
...
@@ -748,7 +766,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
#
else
if
(
x
<
nz
)
{
__
constant
const
float2*
twiddles
=
(
__constant
float2*
)
twiddles_ptr
;
__
global
const
float2*
twiddles
=
(
__global
float2*
)
twiddles_ptr
;
const
int
ind
=
y
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
...
...
@@ -756,7 +774,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
#
ifdef
EVEN
if
(
x!=0
&&
(
x!=
(
nz-1
)))
#
else
if
(
x!=0
)
if
(
x!=0
)
#
endif
{
__global
const
uchar*
src
=
src_ptr
+
mad24
(
y,
src_step,
mad24
(
2*x-1,
(
int
)
sizeof
(
float
)
,
src_offset
))
;
...
...
@@ -800,7 +818,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
RADIX_PROCESS
;
//
copy
data
to
dst
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
))
,
dst_offset
))
;
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
2
))
,
dst_offset
))
;
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
...
...
modules/core/test/ocl/test_dft.cpp
View file @
1d2cf0e2
...
...
@@ -66,7 +66,7 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
{
cv
::
Size
dft_size
;
int
dft_flags
,
depth
,
cn
,
dft_type
;
bool
inplace
;
bool
hint
;
bool
is1d
;
TEST_DECLARE_INPUT_PARAMETER
(
src
);
...
...
@@ -93,9 +93,7 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
dft_flags
|=
cv
::
DFT_ROWS
;
if
(
GET_PARAM
(
4
))
dft_flags
|=
cv
::
DFT_SCALE
;
inplace
=
GET_PARAM
(
5
);
hint
=
GET_PARAM
(
5
);
is1d
=
(
dft_flags
&
DFT_ROWS
)
!=
0
||
dft_size
.
height
==
1
;
}
...
...
@@ -103,9 +101,6 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
{
src
=
randomMat
(
dft_size
,
CV_MAKE_TYPE
(
depth
,
cn
),
0.0
,
100.0
);
usrc
=
src
.
getUMat
(
ACCESS_READ
);
if
(
inplace
)
dst
=
src
,
udst
=
usrc
;
}
};
...
...
@@ -113,8 +108,9 @@ OCL_TEST_P(Dft, Mat)
{
generateTestData
();
OCL_OFF
(
cv
::
dft
(
src
,
dst
,
dft_flags
));
OCL_ON
(
cv
::
dft
(
usrc
,
udst
,
dft_flags
));
int
nonzero_rows
=
hint
?
src
.
cols
-
randomInt
(
1
,
src
.
rows
-
1
)
:
0
;
OCL_OFF
(
cv
::
dft
(
src
,
dst
,
dft_flags
,
nonzero_rows
));
OCL_ON
(
cv
::
dft
(
usrc
,
udst
,
dft_flags
,
nonzero_rows
));
if
(
dft_type
==
R2C
&&
is1d
&&
(
dft_flags
&
cv
::
DFT_INVERSE
)
==
0
)
{
...
...
@@ -122,15 +118,16 @@ OCL_TEST_P(Dft, Mat)
udst
=
udst
(
cv
::
Range
(
0
,
udst
.
rows
),
cv
::
Range
(
0
,
udst
.
cols
/
2
+
1
));
}
Mat
gpu
=
udst
.
getMat
(
ACCESS_READ
);
std
::
cout
<<
src
<<
std
::
endl
;
std
::
cout
<<
dst
<<
std
::
endl
;
std
::
cout
<<
gpu
<<
std
::
endl
;
//Mat gpu = udst.getMat(ACCESS_READ);
//std::cout << dst << std::endl;
//std::cout << gpu << std::endl;
//int cn = udst.channels();
//
//Mat dst1ch = dst.reshape(1);
//Mat gpu1ch = gpu.reshape(1);
//Mat df;
//absdiff(dst
, gpu
, df);
//absdiff(dst
1ch, gpu1ch
, df);
//std::cout << Mat_<int>(df) << std::endl;
double
eps
=
src
.
size
().
area
()
*
1e-4
;
...
...
@@ -188,13 +185,12 @@ OCL_TEST_P(MulSpectrums, Mat)
OCL_INSTANTIATE_TEST_CASE_P
(
OCL_ImgProc
,
MulSpectrums
,
testing
::
Combine
(
Bool
(),
Bool
()));
OCL_INSTANTIATE_TEST_CASE_P
(
Core
,
Dft
,
Combine
(
Values
(
cv
::
Size
(
4
,
1
),
cv
::
Size
(
5
,
8
),
cv
::
Size
(
6
,
6
),
cv
::
Size
(
512
,
1
),
cv
::
Size
(
1280
,
768
)),
Values
((
OCL_FFT_TYPE
)
R2C
,
(
OCL_FFT_TYPE
)
C2C
,
(
OCL_FFT_TYPE
)
R2R
,
(
OCL_FFT_TYPE
)
C2R
),
OCL_INSTANTIATE_TEST_CASE_P
(
Core
,
Dft
,
Combine
(
Values
(
cv
::
Size
(
10
,
10
),
cv
::
Size
(
36
,
36
),
cv
::
Size
(
512
,
1
),
cv
::
Size
(
1280
,
768
)),
Values
((
OCL_FFT_TYPE
)
R2C
,
(
OCL_FFT_TYPE
)
C2C
,
(
OCL_FFT_TYPE
)
R2R
,
(
OCL_FFT_TYPE
)
C2R
),
Bool
(),
// DFT_INVERSE
Bool
(),
// DFT_ROWS
Bool
(),
// DFT_SCALE
Bool
()
//
inplace
Bool
()
//
hint
)
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment