Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
e5a3ab3c
Commit
e5a3ab3c
authored
Jul 11, 2014
by
Alexander Karsakov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added fftplan cache
parent
0318d277
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
226 additions
and
165 deletions
+226
-165
dxt.cpp
modules/core/src/dxt.cpp
+193
-139
fft.cl
modules/core/src/opencl/fft.cl
+31
-24
test_dft.cpp
modules/core/test/ocl/test_dft.cpp
+2
-2
No files found.
modules/core/src/dxt.cpp
View file @
e5a3ab3c
...
...
@@ -2034,50 +2034,6 @@ namespace cv
#ifdef HAVE_OPENCL
static
bool
ocl_packToCCS
(
InputArray
_buffer
,
OutputArray
_dst
,
int
flags
)
{
UMat
buffer
=
_buffer
.
getUMat
();
UMat
dst
=
_dst
.
getUMat
();
buffer
=
buffer
.
reshape
(
1
);
if
((
flags
&
DFT_ROWS
)
==
0
&&
buffer
.
rows
>
1
)
{
// pack to CCS by rows
if
(
dst
.
cols
>
2
)
buffer
.
colRange
(
2
,
dst
.
cols
+
(
dst
.
cols
%
2
)).
copyTo
(
dst
.
colRange
(
1
,
dst
.
cols
-
1
+
(
dst
.
cols
%
2
)));
Mat
dst_mat
=
dst
.
getMat
(
ACCESS_WRITE
);
Mat
buffer_mat
=
buffer
.
getMat
(
ACCESS_READ
);
dst_mat
.
at
<
float
>
(
0
,
0
)
=
buffer_mat
.
at
<
float
>
(
0
,
0
);
dst_mat
.
at
<
float
>
(
dst_mat
.
rows
-
1
,
0
)
=
buffer_mat
.
at
<
float
>
(
buffer
.
rows
/
2
,
0
);
for
(
int
i
=
1
;
i
<
dst_mat
.
rows
-
1
;
i
+=
2
)
{
dst_mat
.
at
<
float
>
(
i
,
0
)
=
buffer_mat
.
at
<
float
>
((
i
+
1
)
/
2
,
0
);
dst_mat
.
at
<
float
>
(
i
+
1
,
0
)
=
buffer_mat
.
at
<
float
>
((
i
+
1
)
/
2
,
1
);
}
if
(
dst_mat
.
cols
%
2
==
0
)
{
dst_mat
.
at
<
float
>
(
0
,
dst_mat
.
cols
-
1
)
=
buffer_mat
.
at
<
float
>
(
0
,
buffer
.
cols
/
2
);
dst_mat
.
at
<
float
>
(
dst_mat
.
rows
-
1
,
dst_mat
.
cols
-
1
)
=
buffer_mat
.
at
<
float
>
(
buffer
.
rows
/
2
,
buffer
.
cols
/
2
);
for
(
int
i
=
1
;
i
<
dst_mat
.
rows
-
1
;
i
+=
2
)
{
dst_mat
.
at
<
float
>
(
i
,
dst_mat
.
cols
-
1
)
=
buffer_mat
.
at
<
float
>
((
i
+
1
)
/
2
,
buffer
.
cols
/
2
);
dst_mat
.
at
<
float
>
(
i
+
1
,
dst_mat
.
cols
-
1
)
=
buffer_mat
.
at
<
float
>
((
i
+
1
)
/
2
,
buffer
.
cols
/
2
+
1
);
}
}
}
else
{
// pack to CCS each row
buffer
.
colRange
(
0
,
1
).
copyTo
(
dst
.
colRange
(
0
,
1
));
buffer
.
colRange
(
2
,
(
dst
.
cols
+
1
)).
copyTo
(
dst
.
colRange
(
1
,
dst
.
cols
));
}
return
true
;
}
static
std
::
vector
<
int
>
ocl_getRadixes
(
int
cols
,
int
&
min_radix
)
{
int
factors
[
34
];
...
...
@@ -2116,72 +2072,175 @@ static std::vector<int> ocl_getRadixes(int cols, int& min_radix)
return
radixes
;
}
st
atic
bool
ocl_dft_C2C_row
(
InputArray
_src
,
OutputArray
_dst
,
InputOutputArray
_twiddles
,
int
nonzero_rows
,
int
flags
)
st
ruct
OCL_FftPlan
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
channels
=
CV_MAT_CN
(
type
);
UMat
src
=
_src
.
getUMat
();
UMat
twiddles
;
String
buildOptions
;
int
thread_count
;
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
depth
==
CV_64F
&&
!
doubleSupport
)
return
false
;
int
min_radix
=
INT_MAX
;
std
::
vector
<
int
>
radixes
=
ocl_getRadixes
(
src
.
cols
,
min_radix
);
// generate string with radix calls
String
radix_processing
;
int
n
=
1
,
twiddle_index
=
0
;
for
(
size_t
i
=
0
;
i
<
radixes
.
size
();
i
++
)
{
int
radix
=
radixes
[
i
];
radix_processing
+=
format
(
"fft_radix%d(smem,twiddles+%d,x,%d,%d);"
,
radix
,
twiddle_index
,
n
,
src
.
cols
/
radix
);
twiddle_index
+=
(
radix
-
1
)
*
n
;
n
*=
radix
;
}
int
dft_size
;
int
flags
;
UMat
twiddles
=
_twiddles
.
getUMat
();
if
(
twiddles
.
cols
!=
twiddle_index
)
OCL_FftPlan
(
int
_size
,
int
_flags
)
:
dft_size
(
_size
),
flags
(
_flags
)
{
// need to create/update tweedle table
int
buffer_size
=
twiddle_index
;
twiddles
.
create
(
1
,
buffer_size
,
CV_32FC2
);
int
min_radix
=
INT_MAX
;
std
::
vector
<
int
>
radixes
=
ocl_getRadixes
(
dft_size
,
min_radix
);
thread_count
=
dft_size
/
min_radix
;
// generate string with radix calls
String
radix_processing
;
int
n
=
1
,
twiddle_size
=
0
;
for
(
size_t
i
=
0
;
i
<
radixes
.
size
();
i
++
)
{
int
radix
=
radixes
[
i
];
radix_processing
+=
format
(
"fft_radix%d(smem,twiddles+%d,x,%d,%d);"
,
radix
,
twiddle_size
,
n
,
dft_size
/
radix
);
twiddle_size
+=
(
radix
-
1
)
*
n
;
n
*=
radix
;
}
twiddles
.
create
(
1
,
twiddle_size
,
CV_32FC2
);
Mat
tw
=
twiddles
.
getMat
(
ACCESS_WRITE
);
float
*
ptr
=
tw
.
ptr
<
float
>
();
int
ptr_index
=
0
;
int
n
=
1
;
n
=
1
;
for
(
size_t
i
=
0
;
i
<
radixes
.
size
();
i
++
)
{
int
radix
=
radixes
[
i
];
n
*=
radix
;
for
(
int
k
=
0
;
k
<
(
n
/
radix
);
k
++
)
for
(
int
j
=
1
;
j
<
radix
;
j
++
)
{
double
theta
=
-
CV_TWO_PI
*
k
/
n
;
double
theta
=
-
CV_TWO_PI
*
j
/
n
;
for
(
int
j
=
1
;
j
<
radix
;
j
++
)
for
(
int
k
=
0
;
k
<
(
n
/
radix
);
k
++
)
{
ptr
[
ptr_index
++
]
=
cos
(
j
*
theta
);
ptr
[
ptr_index
++
]
=
sin
(
j
*
theta
);
ptr
[
ptr_index
++
]
=
cos
(
k
*
theta
);
ptr
[
ptr_index
++
]
=
sin
(
k
*
theta
);
}
}
}
buildOptions
=
format
(
"-D LOCAL_SIZE=%d -D kercn=%d -D RADIX_PROCESS=%s"
,
dft_size
,
dft_size
/
thread_count
,
radix_processing
.
c_str
());
}
bool
enqueueTransform
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_rows
)
const
{
UMat
src
=
_src
.
getUMat
();
_dst
.
create
(
src
.
size
(),
src
.
type
());
UMat
dst
=
_dst
.
getUMat
();
size_t
globalsize
[
2
]
=
{
thread_count
,
nonzero_rows
};
size_t
localsize
[
2
]
=
{
thread_count
,
1
};
ocl
::
Kernel
k
(
"fft_multi_radix"
,
ocl
::
core
::
fft_oclsrc
,
buildOptions
);
if
(
k
.
empty
())
return
false
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
twiddles
),
thread_count
,
nonzero_rows
);
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
};
class
OCL_FftPlanCache
{
public
:
static
OCL_FftPlanCache
&
getInstance
()
{
static
OCL_FftPlanCache
planCache
;
return
planCache
;
}
//Mat buf = twiddles.getMat(ACCESS_READ);
UMat
dst
=
_dst
.
getUMat
();
int
thread_count
=
src
.
cols
/
min_radix
;
size_t
globalsize
[
2
]
=
{
thread_count
,
nonzero_rows
};
size_t
localsize
[
2
]
=
{
thread_count
,
1
};
OCL_FftPlan
*
getFftPlan
(
int
dft_size
,
int
flags
)
{
for
(
size_t
i
=
0
,
size
=
planStorage
.
size
();
i
<
size
;
++
i
)
{
OCL_FftPlan
*
const
plan
=
planStorage
[
i
];
String
buildOptions
=
format
(
"-D LOCAL_SIZE=%d -D kercn=%d -D RADIX_PROCESS=%s"
,
src
.
cols
,
src
.
cols
/
thread_count
,
radix_processing
.
c_str
());
ocl
::
Kernel
k
(
"fft_multi_radix"
,
ocl
::
core
::
fft_oclsrc
,
buildOptions
)
;
if
(
k
.
empty
())
return
false
;
if
(
plan
->
dft_size
==
dft_size
)
{
return
plan
;
}
}
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
dst
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
twiddles
),
thread_count
,
nonzero_rows
);
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
OCL_FftPlan
*
newPlan
=
new
OCL_FftPlan
(
dft_size
,
flags
);
planStorage
.
push_back
(
newPlan
);
return
newPlan
;
}
~
OCL_FftPlanCache
()
{
for
(
std
::
vector
<
OCL_FftPlan
*>::
iterator
i
=
planStorage
.
begin
(),
end
=
planStorage
.
end
();
i
!=
end
;
++
i
)
delete
(
*
i
);
planStorage
.
clear
();
}
protected
:
OCL_FftPlanCache
()
:
planStorage
()
{
}
std
::
vector
<
OCL_FftPlan
*>
planStorage
;
};
static
bool
ocl_packToCCS
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
)
{
UMat
src
=
_src
.
getUMat
();
_dst
.
create
(
src
.
size
(),
CV_32F
);
UMat
dst
=
_dst
.
getUMat
();
src
=
src
.
reshape
(
1
);
if
((
flags
&
DFT_ROWS
)
==
0
&&
src
.
rows
>
1
)
{
// pack to CCS by rows
if
(
dst
.
cols
>
2
)
src
.
colRange
(
2
,
dst
.
cols
+
(
dst
.
cols
%
2
)).
copyTo
(
dst
.
colRange
(
1
,
dst
.
cols
-
1
+
(
dst
.
cols
%
2
)));
Mat
dst_mat
=
dst
.
getMat
(
ACCESS_WRITE
);
Mat
buffer_mat
=
src
.
getMat
(
ACCESS_READ
);
dst_mat
.
at
<
float
>
(
0
,
0
)
=
buffer_mat
.
at
<
float
>
(
0
,
0
);
dst_mat
.
at
<
float
>
(
dst_mat
.
rows
-
1
,
0
)
=
buffer_mat
.
at
<
float
>
(
src
.
rows
/
2
,
0
);
for
(
int
i
=
1
;
i
<
dst_mat
.
rows
-
1
;
i
+=
2
)
{
dst_mat
.
at
<
float
>
(
i
,
0
)
=
buffer_mat
.
at
<
float
>
((
i
+
1
)
/
2
,
0
);
dst_mat
.
at
<
float
>
(
i
+
1
,
0
)
=
buffer_mat
.
at
<
float
>
((
i
+
1
)
/
2
,
1
);
}
if
(
dst_mat
.
cols
%
2
==
0
)
{
dst_mat
.
at
<
float
>
(
0
,
dst_mat
.
cols
-
1
)
=
buffer_mat
.
at
<
float
>
(
0
,
src
.
cols
/
2
);
dst_mat
.
at
<
float
>
(
dst_mat
.
rows
-
1
,
dst_mat
.
cols
-
1
)
=
buffer_mat
.
at
<
float
>
(
src
.
rows
/
2
,
src
.
cols
/
2
);
for
(
int
i
=
1
;
i
<
dst_mat
.
rows
-
1
;
i
+=
2
)
{
dst_mat
.
at
<
float
>
(
i
,
dst_mat
.
cols
-
1
)
=
buffer_mat
.
at
<
float
>
((
i
+
1
)
/
2
,
src
.
cols
/
2
);
dst_mat
.
at
<
float
>
(
i
+
1
,
dst_mat
.
cols
-
1
)
=
buffer_mat
.
at
<
float
>
((
i
+
1
)
/
2
,
src
.
cols
/
2
+
1
);
}
}
}
else
{
// pack to CCS each row
src
.
colRange
(
0
,
1
).
copyTo
(
dst
.
colRange
(
0
,
1
));
src
.
colRange
(
2
,
(
dst
.
cols
+
1
)).
copyTo
(
dst
.
colRange
(
1
,
dst
.
cols
));
}
return
true
;
}
static
bool
ocl_dft_C2C_row
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_rows
,
int
flags
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
channels
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
depth
==
CV_64F
&&
!
doubleSupport
)
return
false
;
const
OCL_FftPlan
*
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
cols
(),
flags
);
return
plan
->
enqueueTransform
(
_src
,
_dst
,
nonzero_rows
);
}
static
bool
ocl_dft
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
,
int
nonzero_rows
)
...
...
@@ -2217,76 +2276,71 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
}
}
if
(
complex_output
)
UMat
input
,
output
;
if
(
complex_input
)
{
//if (is1d)
// _dst.create(Size(src.cols/2+1, src.rows), CV_MAKE_TYPE(depth, 2));
//else
_dst
.
create
(
src
.
size
(),
CV_MAKE_TYPE
(
depth
,
2
));
input
=
src
;
}
else
_dst
.
create
(
src
.
size
(),
CV_MAKE_TYPE
(
depth
,
1
));
UMat
dst
=
_dst
.
getUMat
();
{
if
(
!
inv
)
{
// in case real input convert it to complex
input
.
create
(
src
.
size
(),
CV_MAKE_TYPE
(
depth
,
2
));
std
::
vector
<
UMat
>
planes
;
planes
.
push_back
(
src
);
planes
.
push_back
(
UMat
::
zeros
(
src
.
size
(),
CV_32F
));
merge
(
planes
,
input
);
}
else
{
// TODO: unpack from CCS format
}
}
bool
inplace
=
src
.
u
==
dst
.
u
;
//UMat buffer;
//if (complex_input)
//{
// if (inplace)
// buffer = src;
// else
// src.copyTo(buffer);
//}
//else
//{
// if (!inv)
// {
// // in case real input convert it to complex
// buffer.create(src.size(), CV_MAKE_TYPE(depth, 2));
// std::vector<UMat> planes;
// planes.push_back(src);
// planes.push_back(UMat::zeros(src.size(), CV_32F));
// merge(planes, buffer);
// }
// else
// {
// // TODO: unpack from CCS format
// }
//}
UMat
dst
=
_dst
.
getUMat
();
if
(
complex_output
)
{
if
(
real_input
&&
is1d
&&
!
inv
)
output
.
create
(
src
.
size
(),
CV_32FC2
);
else
output
=
dst
;
}
else
{
output
.
create
(
src
.
size
(),
CV_32FC2
);
}
if
(
nonzero_rows
<=
0
||
nonzero_rows
>
_src
.
rows
()
)
nonzero_rows
=
_src
.
rows
();
UMat
buffer
;
if
(
!
ocl_dft_C2C_row
(
src
,
dst
,
buffer
,
nonzero_rows
,
flags
))
if
(
!
ocl_dft_C2C_row
(
input
,
output
,
nonzero_rows
,
flags
))
return
false
;
if
((
flags
&
DFT_ROWS
)
==
0
&&
nonzero_rows
>
1
)
{
transpose
(
dst
,
ds
t
);
if
(
!
ocl_dft_C2C_row
(
dst
,
dst
,
buffer
,
ds
t
.
rows
,
flags
))
transpose
(
output
,
outpu
t
);
if
(
!
ocl_dft_C2C_row
(
output
,
output
,
outpu
t
.
rows
,
flags
))
return
false
;
transpose
(
dst
,
ds
t
);
transpose
(
output
,
outpu
t
);
}
if
(
complex_output
)
{
if
(
real_input
&&
is1d
)
_dst
.
assign
(
dst
.
colRange
(
0
,
ds
t
.
cols
/
2
+
1
));
if
(
real_input
&&
is1d
&&
!
inv
)
_dst
.
assign
(
output
.
colRange
(
0
,
outpu
t
.
cols
/
2
+
1
));
else
_dst
.
assign
(
ds
t
);
}
//
else
//
{
//
if (!inv)
// ocl_packToCCS(buffer
, _dst, flags);
//
else
//
{
//
// copy real part to dst
//
}
//
}
_dst
.
assign
(
outpu
t
);
}
else
{
if
(
!
inv
)
ocl_packToCCS
(
output
,
_dst
,
flags
);
else
{
// copy real part to dst
}
}
return
true
;
}
...
...
modules/core/src/opencl/fft.cl
View file @
e5a3ab3c
...
...
@@ -28,7 +28,7 @@ float2 twiddle(float2 a) {
}
__attribute__
((
always_inline
))
void
fft_radix2
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
void
fft_radix2
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
const
int
k
=
x
&
(
block_size
-
1
)
;
float2
a0,
a1
;
...
...
@@ -53,17 +53,18 @@ void fft_radix2(__local float2* smem, __global const float2* twiddles, const int
}
__attribute__
((
always_inline
))
void
fft_radix4
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
void
fft_radix4
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
const
int
k
=
x
&
(
block_size
-
1
)
;
float2
a0,
a1,
a2,
a3
;
if
(
x
<
t
)
{
const
int
twiddle_block
=
block_size
/
4
;
a0
=
smem[x]
;
a1
=
mul_float2
(
twiddles[
3*
k],smem[x+t]
)
;
a2
=
mul_float2
(
twiddles[
3*k
+
1
],smem[x+2*t]
)
;
a3
=
mul_float2
(
twiddles[
3*k
+
2
],smem[x+3*t]
)
;
a1
=
mul_float2
(
twiddles[k],smem[x+t]
)
;
a2
=
mul_float2
(
twiddles[
k
+
block_size
],smem[x+2*t]
)
;
a3
=
mul_float2
(
twiddles[
k
+
2*block_size
],smem[x+3*t]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -87,7 +88,7 @@ void fft_radix4(__local float2* smem, __global const float2* twiddles, const int
}
__attribute__
((
always_inline
))
void
fft_radix8
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
void
fft_radix8
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
const
int
k
=
x
%
block_size
;
float2
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7
;
...
...
@@ -97,13 +98,13 @@ void fft_radix8(__local float2* smem, __global const float2* twiddles, const int
int
tw_ind
=
block_size
/
8
;
a0
=
smem[x]
;
a1
=
mul_float2
(
twiddles[
7*
k],
smem[x
+
t]
)
;
a2
=
mul_float2
(
twiddles[
7*k+1
],smem[x+2*t]
)
;
a3
=
mul_float2
(
twiddles[
7*k+2
],smem[x+3*t]
)
;
a4
=
mul_float2
(
twiddles[
7*k+3
],smem[x+4*t]
)
;
a5
=
mul_float2
(
twiddles[
7*k+4
],smem[x+5*t]
)
;
a6
=
mul_float2
(
twiddles[
7*k+5
],smem[x+6*t]
)
;
a7
=
mul_float2
(
twiddles[
7*k+6
],smem[x+7*t]
)
;
a1
=
mul_float2
(
twiddles[k],
smem[x
+
t]
)
;
a2
=
mul_float2
(
twiddles[
k
+
block_size
],smem[x+2*t]
)
;
a3
=
mul_float2
(
twiddles[
k+2*block_size
],smem[x+3*t]
)
;
a4
=
mul_float2
(
twiddles[
k+3*block_size
],smem[x+4*t]
)
;
a5
=
mul_float2
(
twiddles[
k+4*block_size
],smem[x+5*t]
)
;
a6
=
mul_float2
(
twiddles[
k+5*block_size
],smem[x+6*t]
)
;
a7
=
mul_float2
(
twiddles[
k+6*block_size
],smem[x+7*t]
)
;
float2
b0,
b1,
b6,
b7
;
...
...
@@ -150,16 +151,23 @@ void fft_radix8(__local float2* smem, __global const float2* twiddles, const int
}
__attribute__
((
always_inline
))
void
fft_radix3
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
void
fft_radix3
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
const
int
k
=
x
%
block_size
;
float2
a0,
a1,
a2
;
if
(
x
<
t
)
{
//const
int
twiddle_block
=
block_size
/
3
;
//const
float
theta
=
-PI
*
k
*
2
/
(
3
*
block_size
)
;
//float2
tw
=
sincos_float2
(
theta
)
;
//printf
(
"radix3 %d (%f,%f)(%f,%f)\n"
,
k,
tw.x,
tw.y,
twiddles[k].x,
twiddles[k].y
)
;
//tw
=
sincos_float2
(
2*theta
)
;
//printf
(
"radix3- %d %d (%f,%f)(%f,%f)\n"
,
k,
twiddle_block,
tw.x,
tw.y,
twiddles[k+block_size].x,
twiddles[k+block_size].y
)
;
a0
=
smem[x]
;
a1
=
mul_float2
(
twiddles[
2*
k],
smem[x+t]
)
;
a2
=
mul_float2
(
twiddles[
2*k+1
],
smem[x+2*t]
)
;
a1
=
mul_float2
(
twiddles[k],
smem[x+t]
)
;
a2
=
mul_float2
(
twiddles[
k+block_size
],
smem[x+2*t]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -181,7 +189,7 @@ void fft_radix3(__local float2* smem, __global const float2* twiddles, const int
}
__attribute__
((
always_inline
))
void
fft_radix5
(
__local
float2*
smem,
__
global
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
void
fft_radix5
(
__local
float2*
smem,
__
constant
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
const
int
k
=
x
%
block_size
;
float2
a0,
a1,
a2,
a3,
a4
;
...
...
@@ -191,10 +199,10 @@ void fft_radix5(__local float2* smem, __global const float2* twiddles, const int
int
tw_ind
=
block_size
/
5
;
a0
=
smem[x]
;
a1
=
mul_float2
(
twiddles[
4*
k],
smem[x
+
t]
)
;
a2
=
mul_float2
(
twiddles[
4*k+1
],smem[x+2*t]
)
;
a3
=
mul_float2
(
twiddles[
4*k+2
],smem[x+3*t]
)
;
a4
=
mul_float2
(
twiddles[
4*k+3
],smem[x+4*t]
)
;
a1
=
mul_float2
(
twiddles[k],
smem[x
+
t]
)
;
a2
=
mul_float2
(
twiddles[
k
+
block_size
],smem[x+2*t]
)
;
a3
=
mul_float2
(
twiddles[
k+2*block_size
],smem[x+3*t]
)
;
a4
=
mul_float2
(
twiddles[
k+3*block_size
],smem[x+4*t]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -237,8 +245,7 @@ void fft_radix5(__local float2* smem, __global const float2* twiddles, const int
__kernel
void
fft_multi_radix
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
__global
const
uchar*
twiddles_ptr,
int
twiddles_step,
int
twiddles_offset,
const
int
t
,
const
int
nz
)
__constant
float2
*
twiddles_ptr,
const
int
t
,
const
int
nz
)
{
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
...
...
@@ -248,7 +255,7 @@ __kernel void fft_multi_radix(__global const uchar* src_ptr, int src_step, int s
__local
float2
smem[LOCAL_SIZE]
;
__global
const
float2*
src
=
(
__global
const
float2*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
src_offset
)))
;
__global
float2*
dst
=
(
__global
float2*
)(
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
dst_offset
)))
;
__
global
const
float2*
twiddles
=
(
__global
float2*
)
twiddles_ptr
;
__
constant
const
float2*
twiddles
=
(
__constant
float2*
)
twiddles_ptr
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
#
pragma
unroll
...
...
modules/core/test/ocl/test_dft.cpp
View file @
e5a3ab3c
...
...
@@ -181,9 +181,9 @@ OCL_TEST_P(MulSpectrums, Mat)
OCL_INSTANTIATE_TEST_CASE_P
(
OCL_ImgProc
,
MulSpectrums
,
testing
::
Combine
(
Bool
(),
Bool
()));
OCL_INSTANTIATE_TEST_CASE_P
(
Core
,
Dft
,
Combine
(
Values
(
cv
::
Size
(
2
,
3
),
cv
::
Size
(
5
,
4
),
cv
::
Size
(
30
,
20
),
OCL_INSTANTIATE_TEST_CASE_P
(
Core
,
Dft
,
Combine
(
Values
(
cv
::
Size
(
1920
,
1
),
cv
::
Size
(
5
,
4
),
cv
::
Size
(
30
,
20
),
cv
::
Size
(
512
,
1
),
cv
::
Size
(
1024
,
1024
)),
Values
(
(
OCL_FFT_TYPE
)
C2C
/*, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) R2C
/*, (OCL_FFT_TYPE) C2R*/
),
Values
(
/*(OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2C,*/
(
OCL_FFT_TYPE
)
R2R
/*, (OCL_FFT_TYPE) C2R*/
),
Bool
()
// DFT_ROWS
)
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment