Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
5dd92638
Commit
5dd92638
authored
Jun 26, 2014
by
Alexander Karsakov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Multi-radix with kernel generation
parent
8f845079
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
668 additions
and
36 deletions
+668
-36
perf_dxt.cpp
modules/core/perf/opencl/perf_dxt.cpp
+3
-3
dxt.cpp
modules/core/src/dxt.cpp
+252
-5
fft.cl
modules/core/src/opencl/fft.cl
+298
-0
test_dft.cpp
modules/core/test/ocl/test_dft.cpp
+52
-22
dft.cpp
samples/cpp/dft.cpp
+63
-6
No files found.
modules/core/perf/opencl/perf_dxt.cpp
View file @
5dd92638
...
...
@@ -57,9 +57,9 @@ namespace ocl {
typedef
tuple
<
Size
,
int
>
DftParams
;
typedef
TestBaseWithParam
<
DftParams
>
DftFixture
;
OCL_PERF_TEST_P
(
DftFixture
,
Dft
,
::
testing
::
Combine
(
Values
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
),
Values
((
int
)
DFT_ROWS
,
(
int
)
DFT_SCALE
,
(
int
)
DFT_INVERSE
,
(
int
)
DFT_INVERSE
|
DFT_SCALE
,
(
int
)
DFT_ROWS
|
DFT_INVERSE
)))
OCL_PERF_TEST_P
(
DftFixture
,
Dft
,
::
testing
::
Combine
(
Values
(
/*OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3, */
Size
(
1024
,
1024
),
Size
(
1024
,
2048
),
Size
(
512
,
512
),
Size
(
2048
,
2048
)
),
Values
((
int
)
DFT_ROWS
/*, (int) 0/*
, (int)DFT_SCALE, (int)DFT_INVERSE,
(int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE
*/
)))
{
const
DftParams
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
...
...
modules/core/src/dxt.cpp
View file @
5dd92638
...
...
@@ -1960,7 +1960,7 @@ static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
}
static
bool
ocl_dft
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
)
static
bool
ocl_dft
_amdfft
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
Size
ssize
=
_src
.
size
();
...
...
@@ -2029,12 +2029,257 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags)
#endif // HAVE_CLAMDFFT
namespace
cv
{
#ifdef HAVE_OPENCL
static
bool
fft_radixN
(
InputArray
_src
,
OutputArray
_dst
,
int
radix
,
int
block_size
,
int
nonzero_rows
,
int
flags
)
{
int
N
=
_src
.
size
().
width
;
if
(
N
%
radix
)
return
false
;
UMat
src
=
_src
.
getUMat
();
UMat
dst
=
_dst
.
getUMat
();
int
thread_count
=
N
/
radix
;
size_t
globalsize
[
2
]
=
{
thread_count
,
nonzero_rows
};
String
kernel_name
=
format
(
"fft_radix%d"
,
radix
);
ocl
::
Kernel
k
(
kernel_name
.
c_str
(),
ocl
::
core
::
fft_oclsrc
,
(
flags
&
DFT_INVERSE
)
!=
0
?
"-D INVERSE"
:
""
);
if
(
k
.
empty
())
return
false
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
dst
),
block_size
,
thread_count
,
nonzero_rows
);
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
static
bool
ocl_packToCCS
(
InputArray
_buffer
,
OutputArray
_dst
,
int
flags
)
{
UMat
buffer
=
_buffer
.
getUMat
();
UMat
dst
=
_dst
.
getUMat
();
buffer
=
buffer
.
reshape
(
1
);
if
((
flags
&
DFT_ROWS
)
==
0
&&
buffer
.
rows
>
1
)
{
// pack to CCS by rows
if
(
dst
.
cols
>
2
)
buffer
.
colRange
(
2
,
dst
.
cols
+
(
dst
.
cols
%
2
)).
copyTo
(
dst
.
colRange
(
1
,
dst
.
cols
-
1
+
(
dst
.
cols
%
2
)));
Mat
dst_mat
=
dst
.
getMat
(
ACCESS_WRITE
);
Mat
buffer_mat
=
buffer
.
getMat
(
ACCESS_READ
);
dst_mat
.
at
<
float
>
(
0
,
0
)
=
buffer_mat
.
at
<
float
>
(
0
,
0
);
dst_mat
.
at
<
float
>
(
dst_mat
.
rows
-
1
,
0
)
=
buffer_mat
.
at
<
float
>
(
buffer
.
rows
/
2
,
0
);
for
(
int
i
=
1
;
i
<
dst_mat
.
rows
-
1
;
i
+=
2
)
{
dst_mat
.
at
<
float
>
(
i
,
0
)
=
buffer_mat
.
at
<
float
>
((
i
+
1
)
/
2
,
0
);
dst_mat
.
at
<
float
>
(
i
+
1
,
0
)
=
buffer_mat
.
at
<
float
>
((
i
+
1
)
/
2
,
1
);
}
if
(
dst_mat
.
cols
%
2
==
0
)
{
dst_mat
.
at
<
float
>
(
0
,
dst_mat
.
cols
-
1
)
=
buffer_mat
.
at
<
float
>
(
0
,
buffer
.
cols
/
2
);
dst_mat
.
at
<
float
>
(
dst_mat
.
rows
-
1
,
dst_mat
.
cols
-
1
)
=
buffer_mat
.
at
<
float
>
(
buffer
.
rows
/
2
,
buffer
.
cols
/
2
);
for
(
int
i
=
1
;
i
<
dst_mat
.
rows
-
1
;
i
+=
2
)
{
dst_mat
.
at
<
float
>
(
i
,
dst_mat
.
cols
-
1
)
=
buffer_mat
.
at
<
float
>
((
i
+
1
)
/
2
,
buffer
.
cols
/
2
);
dst_mat
.
at
<
float
>
(
i
+
1
,
dst_mat
.
cols
-
1
)
=
buffer_mat
.
at
<
float
>
((
i
+
1
)
/
2
,
buffer
.
cols
/
2
+
1
);
}
}
}
else
{
// pack to CCS each row
buffer
.
colRange
(
0
,
1
).
copyTo
(
dst
.
colRange
(
0
,
1
));
buffer
.
colRange
(
2
,
(
dst
.
cols
+
1
)).
copyTo
(
dst
.
colRange
(
1
,
dst
.
cols
));
}
return
true
;
}
static
bool
ocl_dft_C2C_row
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_rows
,
int
flags
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
channels
=
CV_MAT_CN
(
type
);
UMat
src
=
_src
.
getUMat
();
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
depth
==
CV_64F
&&
!
doubleSupport
)
return
false
;
int
factors
[
34
];
int
nf
=
DFTFactorize
(
src
.
cols
,
factors
);
int
n
=
1
;
int
factor_index
=
0
;
String
radix_processing
;
int
min_radix
=
INT_MAX
;
// 1. 2^n transforms
if
(
(
factors
[
factor_index
]
&
1
)
==
0
)
{
for
(
;
n
<
factors
[
factor_index
];
)
{
int
radix
=
2
;
if
(
8
*
n
<=
factors
[
0
])
radix
=
8
;
else
if
(
4
*
n
<=
factors
[
0
])
radix
=
4
;
radix_processing
+=
format
(
"fft_radix%d(smem,x,%d,%d);"
,
radix
,
n
,
src
.
cols
/
radix
);
min_radix
=
min
(
radix
,
min_radix
);
n
*=
radix
;
}
factor_index
++
;
}
// 2. all the other transforms
for
(
;
factor_index
<
nf
;
factor_index
++
)
{
int
radix
=
factors
[
factor_index
];
radix_processing
+=
format
(
"fft_radix%d(smem,x,%d,%d);"
,
radix
,
n
,
src
.
cols
/
radix
);
min_radix
=
min
(
radix
,
min_radix
);
n
*=
radix
;
}
UMat
dst
=
_dst
.
getUMat
();
int
thread_count
=
src
.
cols
/
min_radix
;
size_t
globalsize
[
2
]
=
{
thread_count
,
nonzero_rows
};
size_t
localsize
[
2
]
=
{
thread_count
,
1
};
String
buildOptions
=
format
(
"-D LOCAL_SIZE=%d -D kercn=%d -D RADIX_PROCESS=%s"
,
src
.
cols
,
src
.
cols
/
thread_count
,
radix_processing
.
c_str
());
ocl
::
Kernel
k
(
"fft_multi_radix"
,
ocl
::
core
::
fft_oclsrc
,
buildOptions
);
if
(
k
.
empty
())
return
false
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
dst
),
thread_count
,
nonzero_rows
);
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
static
bool
ocl_dft
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
,
int
nonzero_rows
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
Size
ssize
=
_src
.
size
();
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
!
(
type
==
CV_32FC1
||
type
==
CV_32FC2
||
type
==
CV_64FC1
||
type
==
CV_64FC2
))
return
false
;
// if is not a multiplication of prime numbers { 2, 3, 5 }
if
(
ssize
.
area
()
!=
getOptimalDFTSize
(
ssize
.
area
()))
return
false
;
UMat
src
=
_src
.
getUMat
();
int
complex_input
=
cn
==
2
?
1
:
0
;
int
complex_output
=
(
flags
&
DFT_COMPLEX_OUTPUT
)
!=
0
;
int
real_input
=
cn
==
1
?
1
:
0
;
int
real_output
=
(
flags
&
DFT_REAL_OUTPUT
)
!=
0
;
bool
inv
=
(
flags
&
DFT_INVERSE
)
!=
0
?
1
:
0
;
bool
is1d
=
(
flags
&
DFT_ROWS
)
!=
0
||
src
.
rows
==
1
;
// if output format is not specified
if
(
complex_output
+
real_output
==
0
)
{
if
(
!
inv
)
{
if
(
real_input
)
real_output
=
1
;
else
complex_output
=
1
;
}
}
if
(
complex_output
)
{
//if (is1d)
// _dst.create(Size(src.cols/2+1, src.rows), CV_MAKE_TYPE(depth, 2));
//else
_dst
.
create
(
src
.
size
(),
CV_MAKE_TYPE
(
depth
,
2
));
}
else
_dst
.
create
(
src
.
size
(),
CV_MAKE_TYPE
(
depth
,
1
));
UMat
dst
=
_dst
.
getUMat
();
bool
inplace
=
src
.
u
==
dst
.
u
;
//UMat buffer;
//if (complex_input)
//{
// if (inplace)
// buffer = src;
// else
// src.copyTo(buffer);
//}
//else
//{
// if (!inv)
// {
// // in case real input convert it to complex
// buffer.create(src.size(), CV_MAKE_TYPE(depth, 2));
// std::vector<UMat> planes;
// planes.push_back(src);
// planes.push_back(UMat::zeros(src.size(), CV_32F));
// merge(planes, buffer);
// }
// else
// {
// // TODO: unpack from CCS format
// }
//}
if
(
nonzero_rows
<=
0
||
nonzero_rows
>
_src
.
rows
()
)
nonzero_rows
=
_src
.
rows
();
if
(
!
ocl_dft_C2C_row
(
src
,
dst
,
nonzero_rows
,
flags
))
return
false
;
if
((
flags
&
DFT_ROWS
)
==
0
&&
nonzero_rows
>
1
)
{
transpose
(
dst
,
dst
);
if
(
!
ocl_dft_C2C_row
(
dst
,
dst
,
dst
.
rows
,
flags
))
return
false
;
transpose
(
dst
,
dst
);
}
//if (complex_output)
//{
// if (real_input && is1d)
// _dst.assign(buffer.colRange(0, buffer.cols/2+1));
// else
// _dst.assign(buffer);
//}
//else
//{
// if (!inv)
// ocl_packToCCS(buffer, _dst, flags);
// else
// {
// // copy real part to dst
// }
//}
return
true
;
}
#endif
}
// namespace cv;
void
cv
::
dft
(
InputArray
_src0
,
OutputArray
_dst
,
int
flags
,
int
nonzero_rows
)
{
#ifdef HAVE_CLAMDFFT
CV_OCL_RUN
(
ocl
::
haveAmdFft
()
&&
ocl
::
Device
::
getDefault
().
type
()
!=
ocl
::
Device
::
TYPE_CPU
&&
_dst
.
isUMat
()
&&
_src0
.
dims
()
<=
2
&&
nonzero_rows
==
0
,
ocl_dft
(
_src0
,
_dst
,
flags
))
ocl_dft_amdfft
(
_src0
,
_dst
,
flags
))
#endif
#ifdef HAVE_OPENCL
CV_OCL_RUN
(
_dst
.
isUMat
()
&&
_src0
.
dims
()
<=
2
,
ocl_dft
(
_src0
,
_dst
,
flags
,
nonzero_rows
))
#endif
static
DFTFunc
dft_tbl
[
6
]
=
...
...
@@ -2046,10 +2291,8 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
(
DFTFunc
)
RealDFT_64f
,
(
DFTFunc
)
CCSIDFT_64f
};
AutoBuffer
<
uchar
>
buf
;
void
*
spec
=
0
;
Mat
src0
=
_src0
.
getMat
(),
src
=
src0
;
int
prev_len
=
0
,
stage
=
0
;
bool
inv
=
(
flags
&
DFT_INVERSE
)
!=
0
;
...
...
@@ -2058,6 +2301,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
int
elem_size
=
(
int
)
src
.
elemSize1
(),
complex_elem_size
=
elem_size
*
2
;
int
factors
[
34
];
bool
inplace_transform
=
false
;
bool
is1d
=
(
flags
&
DFT_ROWS
)
!=
0
||
src
.
rows
==
1
;
#ifdef USE_IPP_DFT
AutoBuffer
<
uchar
>
ippbuf
;
int
ipp_norm_flag
=
!
(
flags
&
DFT_SCALE
)
?
8
:
inv
?
2
:
1
;
...
...
@@ -2066,7 +2310,10 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
CV_Assert
(
type
==
CV_32FC1
||
type
==
CV_32FC2
||
type
==
CV_64FC1
||
type
==
CV_64FC2
);
if
(
!
inv
&&
src
.
channels
()
==
1
&&
(
flags
&
DFT_COMPLEX_OUTPUT
)
)
_dst
.
create
(
src
.
size
(),
CV_MAKETYPE
(
depth
,
2
)
);
if
(
!
is1d
)
_dst
.
create
(
src
.
size
(),
CV_MAKETYPE
(
depth
,
2
)
);
else
_dst
.
create
(
Size
(
src
.
cols
/
2
+
1
,
src
.
rows
),
CV_MAKETYPE
(
depth
,
2
)
);
else
if
(
inv
&&
src
.
channels
()
==
2
&&
(
flags
&
DFT_REAL_OUTPUT
)
)
_dst
.
create
(
src
.
size
(),
depth
);
else
...
...
modules/core/src/opencl/fft.cl
0 → 100644
View file @
5dd92638
__constant
float
PI
=
3.14159265f
;
__constant
float
SQRT_2
=
0.707106781188f
;
__constant
float
sin_120
=
0.866025403784f
;
__constant
float
fft5_2
=
0.559016994374f
;
__constant
float
fft5_3
=
-0.951056516295f
;
__constant
float
fft5_4
=
-1.538841768587f
;
__constant
float
fft5_5
=
0.363271264002f
;
inline
float2
mul_float2
(
float2
a,
float2
b
)
{
float2
res
;
res.x
=
a.x
*
b.x
-
a.y
*
b.y
;
res.y
=
a.x
*
b.y
+
a.y
*
b.x
;
return
res
;
}
inline
float2
sincos_float2
(
float
alpha
)
{
float
cs,
sn
;
sn
=
sincos
(
alpha,
&cs
)
; // sincos
return
(
float2
)(
cs,
sn
)
;
}
inline
float2
twiddle
(
float2
a
)
{
return
(
float2
)(
a.y,
-a.x
)
;
}
inline
float2
square
(
float2
a
)
{
return
(
float2
)(
a.x
*
a.x
-
a.y
*
a.y,
2.0f
*
a.x
*
a.y
)
;
}
inline
float2
square3
(
float2
a
)
{
return
(
float2
)(
a.x
*
a.x
-
a.y
*
a.y,
3.0f
*
a.x
*
a.y
)
;
}
inline
float2
mul_p1q4
(
float2
a
)
{
return
(
float2
)(
SQRT_2
)
*
(
float2
)(
a.x
+
a.y,
-a.x
+
a.y
)
;
}
inline
float2
mul_p3q4
(
float2
a
)
{
return
(
float2
)(
SQRT_2
)
*
(
float2
)(
-a.x
+
a.y,
-a.x
-
a.y
)
;
}
__attribute__
((
always_inline
))
void
fft_radix2
(
__local
float2*
smem,
const
int
x,
const
int
block_size,
const
int
t
)
{
const
int
k
=
x
&
(
block_size
-
1
)
;
float2
in1,
temp
;
if
(
x
<
t
)
{
in1
=
smem[x]
;
float2
in2
=
smem[x+t]
;
float
theta
=
-PI
*
k
/
block_size
;
float
cs
;
float
sn
=
sincos
(
theta,
&cs
)
;
temp
=
(
float2
)
(
in2.x
*
cs
-
in2.y
*
sn,
in2.y
*
cs
+
in2.x
*
sn
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
x
<
t
)
{
const
int
dst_ind
=
(
x
<<
1
)
-
k
;
smem[dst_ind]
=
in1
+
temp
;
smem[dst_ind+block_size]
=
in1
-
temp
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
__attribute__
((
always_inline
))
void
fft_radix4
(
__local
float2*
smem,
const
int
x,
const
int
block_size,
const
int
t
)
{
const
int
k
=
x
&
(
block_size
-
1
)
;
float2
b0,
b1,
b2,
b3
;
if
(
x
<
t
)
{
float
theta
=
-PI
*
k
/
(
2
*
block_size
)
;
float2
tw
=
sincos_float2
(
theta
)
;
float2
a0
=
smem[x]
;
float2
a1
=
mul_float2
(
tw,
smem[x+t]
)
;
float2
a2
=
smem[x
+
2*t]
;
float2
a3
=
mul_float2
(
tw,
smem[x
+
3*t]
)
;
tw
=
square
(
tw
)
;
a2
=
mul_float2
(
tw,
a2
)
;
a3
=
mul_float2
(
tw,
a3
)
;
b0
=
a0
+
a2
;
b1
=
a0
-
a2
;
b2
=
a1
+
a3
;
b3
=
twiddle
(
a1
-
a3
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
x
<
t
)
{
const
int
dst_ind
=
((
x
-
k
)
<<
2
)
+
k
;
smem[dst_ind]
=
b0
+
b2
;
smem[dst_ind
+
block_size]
=
b1
+
b3
;
smem[dst_ind
+
2*block_size]
=
b0
-
b2
;
smem[dst_ind
+
3*block_size]
=
b1
-
b3
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
__attribute__
((
always_inline
))
void
fft_radix8
(
__local
float2*
smem,
const
int
x,
const
int
block_size,
const
int
t
)
{
const
int
k
=
x
%
block_size
;
float2
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7
;
if
(
x
<
t
)
{
float
theta
=
-PI
*
k
/
(
4
*
block_size
)
;
float2
tw
=
sincos_float2
(
theta
)
; // W
a0
=
smem[x]
;
a1
=
mul_float2
(
tw,
smem[x
+
t]
)
;
a2
=
smem[x
+
2
*
t]
;
a3
=
mul_float2
(
tw,
smem[x
+
3
*
t]
)
;
a4
=
smem[x
+
4
*
t]
;
a5
=
mul_float2
(
tw,
smem[x
+
5
*
t]
)
;
a6
=
smem[x
+
6
*
t]
;
a7
=
mul_float2
(
tw,
smem[x
+
7
*
t]
)
;
tw
=
square
(
tw
)
; // W^2
a2
=
mul_float2
(
tw,
a2
)
;
a3
=
mul_float2
(
tw,
a3
)
;
a6
=
mul_float2
(
tw,
a6
)
;
a7
=
mul_float2
(
tw,
a7
)
;
tw
=
square
(
tw
)
; // W^4
a4
=
mul_float2
(
tw,
a4
)
;
a5
=
mul_float2
(
tw,
a5
)
;
a6
=
mul_float2
(
tw,
a6
)
;
a7
=
mul_float2
(
tw,
a7
)
;
float2
b0
=
a0
+
a4
;
float2
b4
=
a0
-
a4
;
float2
b1
=
a1
+
a5
;
float2
b5
=
mul_p1q4
(
a1
-
a5
)
;
float2
b2
=
a2
+
a6
;
float2
b6
=
twiddle
(
a2
-
a6
)
;
float2
b3
=
a3
+
a7
;
float2
b7
=
mul_p3q4
(
a3
-
a7
)
;
a0
=
b0
+
b2
;
a2
=
b0
-
b2
;
a1
=
b1
+
b3
;
a3
=
twiddle
(
b1
-
b3
)
;
a4
=
b4
+
b6
;
a6
=
b4
-
b6
;
a5
=
b5
+
b7
;
a7
=
twiddle
(
b5
-
b7
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
x
<
t
)
{
const
int
dst_ind
=
((
x
-
k
)
<<
3
)
+
k
;
__local
float2*
dst
=
smem
+
dst_ind
;
dst[0]
=
a0
+
a1
;
dst[block_size]
=
a4
+
a5
;
dst[2
*
block_size]
=
a2
+
a3
;
dst[3
*
block_size]
=
a6
+
a7
;
dst[4
*
block_size]
=
a0
-
a1
;
dst[5
*
block_size]
=
a4
-
a5
;
dst[6
*
block_size]
=
a2
-
a3
;
dst[7
*
block_size]
=
a6
-
a7
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
__attribute__
((
always_inline
))
void
fft_radix3
(
__local
float2*
smem,
const
int
x,
const
int
block_size,
const
int
t
)
{
const
int
k
=
x
%
block_size
;
float2
a0,
a1,
a2,
b0,
b1
;
if
(
x
<
t
)
{
const
float
theta
=
-PI
*
k
*
2
/
(
3
*
block_size
)
;
a0
=
smem[x]
;
a1
=
mul_float2
(
sincos_float2
(
theta
)
,
smem[x+t]
)
;
a2
=
mul_float2
(
sincos_float2
(
2
*
theta
)
,
smem[x+2*t]
)
;
b1
=
a1
+
a2
;
a2
=
twiddle
((
float2
)
sin_120*
(
a1
-
a2
))
;
b0
=
a0
-
(
float2
)(
0.5f
)
*b1
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
x
<
t
)
{
const
int
dst_ind
=
((
x
-
k
)
*
3
)
+
k
;
smem[dst_ind]
=
a0
+
b1
;
smem[dst_ind
+
block_size]
=
b0
+
a2
;
smem[dst_ind
+
2*block_size]
=
b0
-
a2
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
__attribute__
((
always_inline
))
void
fft_radix5
(
__local
float2*
smem,
const
int
x,
const
int
block_size,
const
int
t
)
{
const
int
k
=
x
%
block_size
;
float2
a0,
a1,
a2,
a3,
a4,
b0,
b1,
b2,
b5
;
if
(
x
<
t
)
{
const
float
theta
=
-PI
*
k
*
2
/
(
5
*
block_size
)
;
a0
=
smem[x]
;
a1
=
mul_float2
(
sincos_float2
(
theta
)
,
smem[x
+
t]
)
;
a2
=
mul_float2
(
sincos_float2
(
theta*2
)
,
smem[x+2*t]
)
;
a3
=
mul_float2
(
sincos_float2
(
theta*3
)
,
smem[x+3*t]
)
;
a4
=
mul_float2
(
sincos_float2
(
theta*4
)
,
smem[x+4*t]
)
;
b1
=
a1
+
a4
;
a1
-=
a4
;
a4
=
a3
+
a2
;
a3
-=
a2
;
b2
=
b1
+
a4
;
b0
=
a0
-
(
float2
)
0.25f
*
b2
;
b1
=
(
float2
)
fft5_2
*
(
b1
-
a4
)
;
a4
=
-
(
float2
)
fft5_3
*
(
a1
+
a3
)
;
a4
=
twiddle
(
a4
)
;
b5
=
(
float2
)(
a4.x
-
fft5_5
*
a1.y,
a4.y
+
fft5_5
*
a1.x
)
;
a4.x
+=
fft5_4
*
a3.y
;
a4.y
-=
fft5_4
*
a3.x
;
a1
=
b0
+
b1
;
b0
-=
b1
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
x
<
t
)
{
const
int
dst_ind
=
((
x
-
k
)
*
5
)
+
k
;
__local
float2*
dst
=
smem
+
dst_ind
;
dst[0]
=
a0
+
b2
;
dst[block_size]
=
a1
+
a4
;
dst[2
*
block_size]
=
b0
+
b5
;
dst[3
*
block_size]
=
b0
-
b5
;
dst[4
*
block_size]
=
a1
-
a4
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
__kernel
void
fft_multi_radix
(
__global
const
uchar*
srcptr,
int
src_step,
int
src_offset,
__global
uchar*
dstptr,
int
dst_step,
int
dst_offset,
const
int
t
,
const
int
nz
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
if
(
y
<
nz
)
{
__local
float2
smem[LOCAL_SIZE]
;
__global
const
float2*
src
=
(
__global
const
float2*
)(
srcptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
src_offset
)))
;
__global
float2*
dst
=
(
__global
float2*
)(
dstptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
dst_offset
)))
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
smem[x+i*block_size]
=
src[i*block_size]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
RADIX_PROCESS
;
//
copy
data
to
dst
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
dst[i*block_size]
=
smem[x
+
i*block_size]
;
}
}
\ No newline at end of file
modules/core/test/ocl/test_dft.cpp
View file @
5dd92638
...
...
@@ -48,16 +48,24 @@
#ifdef HAVE_OPENCL
enum
OCL_FFT_TYPE
{
R2R
=
0
,
// real to real (CCS)
C2R
=
1
,
// complex to real (CCS)
R2C
=
2
,
// real (CCS) to complex
C2C
=
3
// complex to complex
};
namespace
cvtest
{
namespace
ocl
{
////////////////////////////////////////////////////////////////////////////
// Dft
PARAM_TEST_CASE
(
Dft
,
cv
::
Size
,
MatDepth
,
bool
,
bool
,
bool
,
bool
)
PARAM_TEST_CASE
(
Dft
,
cv
::
Size
,
OCL_FFT_TYPE
,
bool
)
{
cv
::
Size
dft_size
;
int
dft_flags
,
depth
;
int
dft_flags
,
depth
,
cn
,
dft_type
;
bool
inplace
;
TEST_DECLARE_INPUT_PARAMETER
(
src
);
...
...
@@ -66,19 +74,31 @@ PARAM_TEST_CASE(Dft, cv::Size, MatDepth, bool, bool, bool, bool)
virtual
void
SetUp
()
{
dft_size
=
GET_PARAM
(
0
);
d
epth
=
GET_PARAM
(
1
);
inplace
=
GET_PARAM
(
2
)
;
d
ft_type
=
GET_PARAM
(
1
);
depth
=
CV_32F
;
dft_flags
=
0
;
if
(
GET_PARAM
(
3
))
dft_flags
|=
cv
::
DFT_ROWS
;
if
(
GET_PARAM
(
4
))
dft_flags
|=
cv
::
DFT_SCALE
;
if
(
GET_PARAM
(
5
))
dft_flags
|=
cv
::
DFT_INVERSE
;
switch
(
dft_type
)
{
case
R2R
:
dft_flags
|=
cv
::
DFT_REAL_OUTPUT
;
cn
=
1
;
break
;
case
C2R
:
dft_flags
|=
cv
::
DFT_REAL_OUTPUT
;
cn
=
2
;
break
;
case
R2C
:
dft_flags
|=
cv
::
DFT_COMPLEX_OUTPUT
;
cn
=
1
;
break
;
case
C2C
:
dft_flags
|=
cv
::
DFT_COMPLEX_OUTPUT
;
cn
=
2
;
break
;
}
inplace
=
false
;
if
(
GET_PARAM
(
2
))
dft_flags
|=
cv
::
DFT_ROWS
;
// (DFT_COMPLEX_OUTPUT | DFT_ROWS) works incorrect
//if (GET_PARAM(3))
// if (dft_type == C2C) dft_flags |= cv::DFT_INVERSE;
//if (GET_PARAM(3))
// dft_flags |= cv::DFT_SCALE;
}
void
generateTestData
(
int
cn
=
2
)
void
generateTestData
()
{
src
=
randomMat
(
dft_size
,
CV_MAKE_TYPE
(
depth
,
cn
),
0.0
,
100.0
);
usrc
=
src
.
getUMat
(
ACCESS_READ
);
...
...
@@ -88,12 +108,23 @@ PARAM_TEST_CASE(Dft, cv::Size, MatDepth, bool, bool, bool, bool)
}
};
OCL_TEST_P
(
Dft
,
C2C
)
OCL_TEST_P
(
Dft
,
Mat
)
{
generateTestData
();
OCL_OFF
(
cv
::
dft
(
src
,
dst
,
dft_flags
|
cv
::
DFT_COMPLEX_OUTPUT
));
OCL_ON
(
cv
::
dft
(
usrc
,
udst
,
dft_flags
|
cv
::
DFT_COMPLEX_OUTPUT
));
OCL_OFF
(
cv
::
dft
(
src
,
dst
,
dft_flags
));
OCL_ON
(
cv
::
dft
(
usrc
,
udst
,
dft_flags
));
Mat
gpu
=
udst
.
getMat
(
ACCESS_READ
);
std
::
cout
<<
src
<<
std
::
endl
;
std
::
cout
<<
dst
<<
std
::
endl
;
std
::
cout
<<
gpu
<<
std
::
endl
;
//int cn = udst.channels();
//Mat df;
//absdiff(dst, gpu, df);
//std::cout << df << std::endl;
double
eps
=
src
.
size
().
area
()
*
1e-4
;
EXPECT_MAT_NEAR
(
dst
,
udst
,
eps
);
...
...
@@ -150,15 +181,13 @@ OCL_TEST_P(MulSpectrums, Mat)
OCL_INSTANTIATE_TEST_CASE_P
(
OCL_ImgProc
,
MulSpectrums
,
testing
::
Combine
(
Bool
(),
Bool
()));
OCL_INSTANTIATE_TEST_CASE_P
(
Core
,
Dft
,
Combine
(
Values
(
cv
::
Size
(
2
,
3
),
cv
::
Size
(
5
,
4
),
cv
::
Size
(
25
,
20
),
cv
::
Size
(
512
,
1
),
cv
::
Size
(
1024
,
768
)),
Values
(
CV_32F
,
CV_64F
),
Bool
(),
// inplace
Bool
(),
// DFT_ROWS
Bool
(),
// DFT_SCALE
Bool
())
// DFT_INVERSE
OCL_INSTANTIATE_TEST_CASE_P
(
Core
,
Dft
,
Combine
(
Values
(
cv
::
Size
(
2
,
3
),
cv
::
Size
(
5
,
4
),
cv
::
Size
(
30
,
20
),
cv
::
Size
(
512
,
1
),
cv
::
Size
(
1024
,
1024
)),
Values
((
OCL_FFT_TYPE
)
C2C
/*, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) R2C/*, (OCL_FFT_TYPE) C2R*/
),
Bool
()
// DFT_ROWS
)
);
}
}
// namespace cvtest::ocl
#endif // HAVE_OPENCL
#endif // HAVE_OPENCL
\ No newline at end of file
samples/cpp/dft.cpp
View file @
5dd92638
...
...
@@ -5,6 +5,8 @@
#include "opencv2/highgui.hpp"
#include <stdio.h>
#include <iostream>
#include <chrono>
using
namespace
cv
;
using
namespace
std
;
...
...
@@ -24,6 +26,31 @@ const char* keys =
int
main
(
int
argc
,
const
char
**
argv
)
{
//int cols = 4;
//int rows = 768;
//srand(0);
//Mat input(Size(cols, rows), CV_32FC2);
//for (int i=0; i<cols; i++)
// for (int j=0; j<rows; j++)
// input.at<Vec2f>(j,i) = Vec2f((float) rand() / RAND_MAX, (float) rand() / RAND_MAX);
//Mat dst;
//
//UMat gpu_input, gpu_dst;
//input.copyTo(gpu_input);
//auto start = std::chrono::system_clock::now();
//dft(input, dst, DFT_ROWS);
//auto cpu_duration = chrono::duration_cast<chrono::milliseconds>(chrono::system_clock::now() - start);
//
//start = std::chrono::system_clock::now();
//dft(gpu_input, gpu_dst, DFT_ROWS);
//auto gpu_duration = chrono::duration_cast<chrono::milliseconds>(chrono::system_clock::now() - start);
//double n = norm(dst, gpu_dst);
//cout << "norm = " << n << endl;
//cout << "CPU time: " << cpu_duration.count() << "ms" << endl;
//cout << "GPU time: " << gpu_duration.count() << "ms" << endl;
help
();
CommandLineParser
parser
(
argc
,
argv
,
keys
);
string
filename
=
parser
.
get
<
string
>
(
0
);
...
...
@@ -35,16 +62,46 @@ int main(int argc, const char ** argv)
printf
(
"Cannot read image file: %s
\n
"
,
filename
.
c_str
());
return
-
1
;
}
int
M
=
getOptimalDFTSize
(
img
.
rows
);
int
N
=
getOptimalDFTSize
(
img
.
cols
);
Mat
small_img
=
img
(
Rect
(
0
,
0
,
6
,
6
));
int
M
=
getOptimalDFTSize
(
small_img
.
rows
);
int
N
=
getOptimalDFTSize
(
small_img
.
cols
);
Mat
padded
;
copyMakeBorder
(
img
,
padded
,
0
,
M
-
img
.
rows
,
0
,
N
-
img
.
cols
,
BORDER_CONSTANT
,
Scalar
::
all
(
0
));
copyMakeBorder
(
small_img
,
padded
,
0
,
M
-
small_img
.
rows
,
0
,
N
-
small_
img
.
cols
,
BORDER_CONSTANT
,
Scalar
::
all
(
0
));
Mat
planes
[]
=
{
Mat_
<
float
>
(
padded
),
Mat
::
zero
s
(
padded
.
size
(),
CV_32F
)};
Mat
complexImg
;
Mat
planes
[]
=
{
Mat_
<
float
>
(
padded
),
Mat
::
one
s
(
padded
.
size
(),
CV_32F
)};
Mat
complexImg
,
complexImg1
,
complexInput
;
merge
(
planes
,
2
,
complexImg
);
dft
(
complexImg
,
complexImg
);
Mat
realInput
;
padded
.
convertTo
(
realInput
,
CV_32F
);
complexInput
=
complexImg
;
//cout << complexImg << endl;
//dft(complexImg, complexImg, DFT_REAL_OUTPUT);
//cout << "Complex to Complex" << endl;
//cout << complexImg << endl;
cout
<<
"Complex input"
<<
endl
<<
complexInput
<<
endl
;
cout
<<
"Real input"
<<
endl
<<
realInput
<<
endl
;
dft
(
complexInput
,
complexImg1
,
DFT_COMPLEX_OUTPUT
);
cout
<<
"Complex to Complex image: "
<<
endl
;
cout
<<
endl
<<
complexImg1
<<
endl
;
Mat
realImg1
;
dft
(
complexInput
,
realImg1
,
DFT_REAL_OUTPUT
);
cout
<<
"Complex to Real image: "
<<
endl
;
cout
<<
endl
<<
realImg1
<<
endl
;
Mat
realOut
;
dft
(
complexImg1
,
realOut
,
DFT_INVERSE
|
DFT_COMPLEX_OUTPUT
);
cout
<<
"Complex to Complex (inverse):"
<<
endl
;
cout
<<
realOut
<<
endl
;
Mat
complexOut
;
dft
(
realImg1
,
complexOut
,
DFT_INVERSE
|
DFT_REAL_OUTPUT
|
DFT_SCALE
);
cout
<<
"Complex to Real (inverse):"
<<
endl
;
cout
<<
complexOut
<<
endl
;
// compute log(1 + sqrt(Re(DFT(img))**2 + Im(DFT(img))**2))
split
(
complexImg
,
planes
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment