Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
a89ff402
Commit
a89ff402
authored
Aug 26, 2014
by
Alexander Karsakov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Refactoring of OCL_FftPlan class
parent
3ae95150
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
134 additions
and
121 deletions
+134
-121
dxt.cpp
modules/core/src/dxt.cpp
+50
-38
fft.cl
modules/core/src/opencl/fft.cl
+77
-77
test_dft.cpp
modules/core/test/ocl/test_dft.cpp
+7
-6
No files found.
modules/core/src/dxt.cpp
View file @
a89ff402
...
@@ -1802,11 +1802,14 @@ private:
...
@@ -1802,11 +1802,14 @@ private:
String
buildOptions
;
String
buildOptions
;
int
thread_count
;
int
thread_count
;
int
dft_size
;
int
dft_size
;
int
dft_depth
;
bool
status
;
bool
status
;
public
:
public
:
OCL_FftPlan
(
int
_size
)
:
dft_size
(
_size
),
status
(
true
)
OCL_FftPlan
(
int
_size
,
int
_depth
)
:
dft_size
(
_size
),
dft_depth
(
_depth
),
status
(
true
)
{
{
CV_Assert
(
dft_depth
==
CV_32F
||
dft_depth
==
CV_64F
);
int
min_radix
;
int
min_radix
;
std
::
vector
<
int
>
radixes
,
blocks
;
std
::
vector
<
int
>
radixes
,
blocks
;
ocl_getRadixes
(
dft_size
,
radixes
,
blocks
,
min_radix
);
ocl_getRadixes
(
dft_size
,
radixes
,
blocks
,
min_radix
);
...
@@ -1832,31 +1835,15 @@ public:
...
@@ -1832,31 +1835,15 @@ public:
n
*=
radix
;
n
*=
radix
;
}
}
twiddles
.
create
(
1
,
twiddle_size
,
CV_32FC2
);
twiddles
.
create
(
1
,
twiddle_size
,
CV_MAKE_TYPE
(
dft_depth
,
2
));
Mat
tw
=
twiddles
.
getMat
(
ACCESS_WRITE
);
if
(
dft_depth
==
CV_32F
)
float
*
ptr
=
tw
.
ptr
<
float
>
();
fillRadixTable
<
float
>
(
twiddles
,
radixes
);
int
ptr_index
=
0
;
else
fillRadixTable
<
double
>
(
twiddles
,
radixes
);
n
=
1
;
for
(
size_t
i
=
0
;
i
<
radixes
.
size
();
i
++
)
{
int
radix
=
radixes
[
i
];
n
*=
radix
;
for
(
int
j
=
1
;
j
<
radix
;
j
++
)
{
double
theta
=
-
CV_2PI
*
j
/
n
;
for
(
int
k
=
0
;
k
<
(
n
/
radix
);
k
++
)
{
ptr
[
ptr_index
++
]
=
(
float
)
cos
(
k
*
theta
);
ptr
[
ptr_index
++
]
=
(
float
)
sin
(
k
*
theta
);
}
}
}
buildOptions
=
format
(
"-D LOCAL_SIZE=%d -D kercn=%d -D RADIX_PROCESS=%s"
,
buildOptions
=
format
(
"-D LOCAL_SIZE=%d -D kercn=%d -D FT=%s -D CT=%s%s -D RADIX_PROCESS=%s"
,
dft_size
,
min_radix
,
radix_processing
.
c_str
());
dft_size
,
min_radix
,
ocl
::
typeToStr
(
dft_depth
),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
dft_depth
,
2
)),
dft_depth
==
CV_64F
?
" -D DOUBLE_SUPPORT"
:
""
,
radix_processing
.
c_str
());
}
}
bool
enqueueTransform
(
InputArray
_src
,
OutputArray
_dst
,
int
num_dfts
,
int
flags
,
int
fftType
,
bool
rows
=
true
)
const
bool
enqueueTransform
(
InputArray
_src
,
OutputArray
_dst
,
int
num_dfts
,
int
flags
,
int
fftType
,
bool
rows
=
true
)
const
...
@@ -1867,17 +1854,13 @@ public:
...
@@ -1867,17 +1854,13 @@ public:
UMat
src
=
_src
.
getUMat
();
UMat
src
=
_src
.
getUMat
();
UMat
dst
=
_dst
.
getUMat
();
UMat
dst
=
_dst
.
getUMat
();
int
type
=
src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
);
size_t
globalsize
[
2
];
size_t
globalsize
[
2
];
size_t
localsize
[
2
];
size_t
localsize
[
2
];
String
kernel_name
;
String
kernel_name
;
bool
is1d
=
(
flags
&
DFT_ROWS
)
!=
0
||
num_dfts
==
1
;
bool
is1d
=
(
flags
&
DFT_ROWS
)
!=
0
||
num_dfts
==
1
;
bool
inv
=
(
flags
&
DFT_INVERSE
)
!=
0
;
bool
inv
=
(
flags
&
DFT_INVERSE
)
!=
0
;
String
options
=
buildOptions
+
format
(
" -D FT=%s CT=%s%s"
,
ocl
::
typeToStr
(
depth
),
String
options
=
buildOptions
;
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
2
)),
depth
==
CV_64F
?
" -D DOUBLE_SUPPORT"
:
""
);
if
(
rows
)
if
(
rows
)
{
{
...
@@ -1917,7 +1900,7 @@ public:
...
@@ -1917,7 +1900,7 @@ public:
if
(
k
.
empty
())
if
(
k
.
empty
())
return
false
;
return
false
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
twiddles
),
thread_count
,
num_dfts
);
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
twiddles
),
thread_count
,
num_dfts
);
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
}
...
@@ -1990,6 +1973,32 @@ private:
...
@@ -1990,6 +1973,32 @@ private:
min_radix
=
min
(
min_radix
,
block
*
radix
);
min_radix
=
min
(
min_radix
,
block
*
radix
);
}
}
}
}
template
<
typename
T
>
static
void
fillRadixTable
(
UMat
twiddles
,
const
std
::
vector
<
int
>&
radixes
)
{
Mat
tw
=
twiddles
.
getMat
(
ACCESS_WRITE
);
T
*
ptr
=
tw
.
ptr
<
T
>
();
int
ptr_index
=
0
;
int
n
=
1
;
for
(
size_t
i
=
0
;
i
<
radixes
.
size
();
i
++
)
{
int
radix
=
radixes
[
i
];
n
*=
radix
;
for
(
int
j
=
1
;
j
<
radix
;
j
++
)
{
double
theta
=
-
CV_2PI
*
j
/
n
;
for
(
int
k
=
0
;
k
<
(
n
/
radix
);
k
++
)
{
ptr
[
ptr_index
++
]
=
(
T
)
cos
(
k
*
theta
);
ptr
[
ptr_index
++
]
=
(
T
)
sin
(
k
*
theta
);
}
}
}
}
};
};
class
OCL_FftPlanCache
class
OCL_FftPlanCache
...
@@ -2001,17 +2010,18 @@ public:
...
@@ -2001,17 +2010,18 @@ public:
return
planCache
;
return
planCache
;
}
}
Ptr
<
OCL_FftPlan
>
getFftPlan
(
int
dft_size
)
Ptr
<
OCL_FftPlan
>
getFftPlan
(
int
dft_size
,
int
depth
)
{
{
std
::
map
<
int
,
Ptr
<
OCL_FftPlan
>
>::
iterator
f
=
planStorage
.
find
(
dft_size
);
int
key
=
(
dft_size
<<
16
)
|
(
depth
&
0xFFFF
);
std
::
map
<
int
,
Ptr
<
OCL_FftPlan
>
>::
iterator
f
=
planStorage
.
find
(
key
);
if
(
f
!=
planStorage
.
end
())
if
(
f
!=
planStorage
.
end
())
{
{
return
f
->
second
;
return
f
->
second
;
}
}
else
else
{
{
Ptr
<
OCL_FftPlan
>
newPlan
=
Ptr
<
OCL_FftPlan
>
(
new
OCL_FftPlan
(
dft_size
));
Ptr
<
OCL_FftPlan
>
newPlan
=
Ptr
<
OCL_FftPlan
>
(
new
OCL_FftPlan
(
dft_size
,
depth
));
planStorage
[
dft_size
]
=
newPlan
;
planStorage
[
key
]
=
newPlan
;
return
newPlan
;
return
newPlan
;
}
}
}
}
...
@@ -2031,13 +2041,15 @@ protected:
...
@@ -2031,13 +2041,15 @@ protected:
static
bool
ocl_dft_rows
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_rows
,
int
flags
,
int
fftType
)
static
bool
ocl_dft_rows
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_rows
,
int
flags
,
int
fftType
)
{
{
Ptr
<
OCL_FftPlan
>
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
cols
());
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
);
Ptr
<
OCL_FftPlan
>
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
cols
(),
depth
);
return
plan
->
enqueueTransform
(
_src
,
_dst
,
nonzero_rows
,
flags
,
fftType
,
true
);
return
plan
->
enqueueTransform
(
_src
,
_dst
,
nonzero_rows
,
flags
,
fftType
,
true
);
}
}
static
bool
ocl_dft_cols
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_cols
,
int
flags
,
int
fftType
)
static
bool
ocl_dft_cols
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_cols
,
int
flags
,
int
fftType
)
{
{
Ptr
<
OCL_FftPlan
>
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
rows
());
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
);
Ptr
<
OCL_FftPlan
>
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
rows
(),
depth
);
return
plan
->
enqueueTransform
(
_src
,
_dst
,
nonzero_cols
,
flags
,
fftType
,
false
);
return
plan
->
enqueueTransform
(
_src
,
_dst
,
nonzero_cols
,
flags
,
fftType
,
false
);
}
}
...
@@ -2045,7 +2057,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
...
@@ -2045,7 +2057,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
{
{
int
type
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
type
),
depth
=
CV_MAT_DEPTH
(
type
);
int
type
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
type
),
depth
=
CV_MAT_DEPTH
(
type
);
Size
ssize
=
_src
.
size
();
Size
ssize
=
_src
.
size
();
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
();
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
!
((
cn
==
1
||
cn
==
2
)
&&
(
depth
==
CV_32F
||
(
depth
==
CV_64F
&&
doubleSupport
)))
)
if
(
!
((
cn
==
1
||
cn
==
2
)
&&
(
depth
==
CV_32F
||
(
depth
==
CV_64F
&&
doubleSupport
)))
)
return
false
;
return
false
;
...
...
modules/core/src/opencl/fft.cl
View file @
a89ff402
...
@@ -21,21 +21,21 @@
...
@@ -21,21 +21,21 @@
#
endif
#
endif
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
float2
mul_float2
(
float2
a,
float2
b
)
{
CT
mul_complex
(
CT
a,
CT
b
)
{
return
(
float2
)(
fma
(
a.x,
b.x,
-a.y
*
b.y
)
,
fma
(
a.x,
b.y,
a.y
*
b.x
))
;
return
(
CT
)(
fma
(
a.x,
b.x,
-a.y
*
b.y
)
,
fma
(
a.x,
b.y,
a.y
*
b.x
))
;
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
float2
twiddle
(
float2
a
)
{
CT
twiddle
(
CT
a
)
{
return
(
float2
)(
a.y,
-a.x
)
;
return
(
CT
)(
a.y,
-a.x
)
;
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
butterfly2
(
float2
a0,
float2
a1,
__local
float2*
smem,
__global
const
float2
*
twiddles,
void
butterfly2
(
CT
a0,
CT
a1,
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x,
const
int
block_size
)
const
int
x,
const
int
block_size
)
{
{
const
int
k
=
x
&
(
block_size
-
1
)
;
const
int
k
=
x
&
(
block_size
-
1
)
;
a1
=
mul_
float2
(
twiddles[k],
a1
)
;
a1
=
mul_
complex
(
twiddles[k],
a1
)
;
const
int
dst_ind
=
(
x
<<
1
)
-
k
;
const
int
dst_ind
=
(
x
<<
1
)
-
k
;
smem[dst_ind]
=
a0
+
a1
;
smem[dst_ind]
=
a0
+
a1
;
...
@@ -43,19 +43,19 @@ void butterfly2(float2 a0, float2 a1, __local float2* smem, __global const float
...
@@ -43,19 +43,19 @@ void butterfly2(float2 a0, float2 a1, __local float2* smem, __global const float
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
butterfly4
(
float2
a0,
float2
a1,
float2
a2,
float2
a3,
__local
float2*
smem,
__global
const
float2
*
twiddles,
void
butterfly4
(
CT
a0,
CT
a1,
CT
a2,
CT
a3,
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x,
const
int
block_size
)
const
int
x,
const
int
block_size
)
{
{
const
int
k
=
x
&
(
block_size
-
1
)
;
const
int
k
=
x
&
(
block_size
-
1
)
;
a1
=
mul_
float2
(
twiddles[k],
a1
)
;
a1
=
mul_
complex
(
twiddles[k],
a1
)
;
a2
=
mul_
float2
(
twiddles[k
+
block_size],
a2
)
;
a2
=
mul_
complex
(
twiddles[k
+
block_size],
a2
)
;
a3
=
mul_
float2
(
twiddles[k
+
2*block_size],
a3
)
;
a3
=
mul_
complex
(
twiddles[k
+
2*block_size],
a3
)
;
const
int
dst_ind
=
((
x
-
k
)
<<
2
)
+
k
;
const
int
dst_ind
=
((
x
-
k
)
<<
2
)
+
k
;
float2
b0
=
a0
+
a2
;
CT
b0
=
a0
+
a2
;
a2
=
a0
-
a2
;
a2
=
a0
-
a2
;
float2
b1
=
a1
+
a3
;
CT
b1
=
a1
+
a3
;
a3
=
twiddle
(
a1
-
a3
)
;
a3
=
twiddle
(
a1
-
a3
)
;
smem[dst_ind]
=
b0
+
b1
;
smem[dst_ind]
=
b0
+
b1
;
...
@@ -65,17 +65,17 @@ void butterfly4(float2 a0, float2 a1, float2 a2, float2 a3, __local float2* smem
...
@@ -65,17 +65,17 @@ void butterfly4(float2 a0, float2 a1, float2 a2, float2 a3, __local float2* smem
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
butterfly3
(
float2
a0,
float2
a1,
float2
a2,
__local
float2*
smem,
__global
const
float2
*
twiddles,
void
butterfly3
(
CT
a0,
CT
a1,
CT
a2,
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x,
const
int
block_size
)
const
int
x,
const
int
block_size
)
{
{
const
int
k
=
x
%
block_size
;
const
int
k
=
x
%
block_size
;
a1
=
mul_
float2
(
twiddles[k],
a1
)
;
a1
=
mul_
complex
(
twiddles[k],
a1
)
;
a2
=
mul_
float2
(
twiddles[k+block_size],
a2
)
;
a2
=
mul_
complex
(
twiddles[k+block_size],
a2
)
;
const
int
dst_ind
=
((
x
-
k
)
*
3
)
+
k
;
const
int
dst_ind
=
((
x
-
k
)
*
3
)
+
k
;
float2
b1
=
a1
+
a2
;
CT
b1
=
a1
+
a2
;
a2
=
twiddle
(
sin_120*
(
a1
-
a2
))
;
a2
=
twiddle
(
sin_120*
(
a1
-
a2
))
;
float2
b0
=
a0
-
(
float2
)(
0.5f
)
*b1
;
CT
b0
=
a0
-
(
CT
)(
0.5f
)
*b1
;
smem[dst_ind]
=
a0
+
b1
;
smem[dst_ind]
=
a0
+
b1
;
smem[dst_ind
+
block_size]
=
b0
+
a2
;
smem[dst_ind
+
block_size]
=
b0
+
a2
;
...
@@ -83,19 +83,19 @@ void butterfly3(float2 a0, float2 a1, float2 a2, __local float2* smem, __global
...
@@ -83,19 +83,19 @@ void butterfly3(float2 a0, float2 a1, float2 a2, __local float2* smem, __global
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
butterfly5
(
float2
a0,
float2
a1,
float2
a2,
float2
a3,
float2
a4,
__local
float2*
smem,
__global
const
float2
*
twiddles,
void
butterfly5
(
CT
a0,
CT
a1,
CT
a2,
CT
a3,
CT
a4,
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x,
const
int
block_size
)
const
int
x,
const
int
block_size
)
{
{
const
int
k
=
x
%
block_size
;
const
int
k
=
x
%
block_size
;
a1
=
mul_
float2
(
twiddles[k],
a1
)
;
a1
=
mul_
complex
(
twiddles[k],
a1
)
;
a2
=
mul_
float2
(
twiddles[k
+
block_size],
a2
)
;
a2
=
mul_
complex
(
twiddles[k
+
block_size],
a2
)
;
a3
=
mul_
float2
(
twiddles[k+2*block_size],
a3
)
;
a3
=
mul_
complex
(
twiddles[k+2*block_size],
a3
)
;
a4
=
mul_
float2
(
twiddles[k+3*block_size],
a4
)
;
a4
=
mul_
complex
(
twiddles[k+3*block_size],
a4
)
;
const
int
dst_ind
=
((
x
-
k
)
*
5
)
+
k
;
const
int
dst_ind
=
((
x
-
k
)
*
5
)
+
k
;
__local
float2
*
dst
=
smem
+
dst_ind
;
__local
CT
*
dst
=
smem
+
dst_ind
;
float2
b0,
b1,
b5
;
CT
b0,
b1,
b5
;
b1
=
a1
+
a4
;
b1
=
a1
+
a4
;
a1
-=
a4
;
a1
-=
a4
;
...
@@ -104,11 +104,11 @@ void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local f
...
@@ -104,11 +104,11 @@ void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local f
a3
-=
a2
;
a3
-=
a2
;
a2
=
b1
+
a4
;
a2
=
b1
+
a4
;
b0
=
a0
-
(
float2
)
0.25f
*
a2
;
b0
=
a0
-
(
CT
)
0.25f
*
a2
;
b1
=
fft5_2
*
(
b1
-
a4
)
;
b1
=
fft5_2
*
(
b1
-
a4
)
;
a4
=
fft5_3
*
(
float2
)(
-a1.y
-
a3.y,
a1.x
+
a3.x
)
;
a4
=
fft5_3
*
(
CT
)(
-a1.y
-
a3.y,
a1.x
+
a3.x
)
;
b5
=
(
float2
)(
a4.x
-
fft5_5
*
a1.y,
a4.y
+
fft5_5
*
a1.x
)
;
b5
=
(
CT
)(
a4.x
-
fft5_5
*
a1.y,
a4.y
+
fft5_5
*
a1.x
)
;
a4.x
+=
fft5_4
*
a3.y
;
a4.x
+=
fft5_4
*
a3.y
;
a4.y
-=
fft5_4
*
a3.x
;
a4.y
-=
fft5_4
*
a3.x
;
...
@@ -124,9 +124,9 @@ void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local f
...
@@ -124,9 +124,9 @@ void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local f
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix2
(
__local
float2*
smem,
__global
const
float2
*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
void
fft_radix2
(
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
{
float2
a0,
a1
;
CT
a0,
a1
;
if
(
x
<
t
)
if
(
x
<
t
)
{
{
...
@@ -143,10 +143,10 @@ void fft_radix2(__local float2* smem, __global const float2* twiddles, const int
...
@@ -143,10 +143,10 @@ void fft_radix2(__local float2* smem, __global const float2* twiddles, const int
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix2_B2
(
__local
float2*
smem,
__global
const
float2
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix2_B2
(
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
{
const
int
x2
=
x1
+
t/2
;
const
int
x2
=
x1
+
t/2
;
float2
a0,
a1,
a2,
a3
;
CT
a0,
a1,
a2,
a3
;
if
(
x1
<
t/2
)
if
(
x1
<
t/2
)
{
{
...
@@ -166,11 +166,11 @@ void fft_radix2_B2(__local float2* smem, __global const float2* twiddles, const
...
@@ -166,11 +166,11 @@ void fft_radix2_B2(__local float2* smem, __global const float2* twiddles, const
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix2_B3
(
__local
float2*
smem,
__global
const
float2
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix2_B3
(
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
{
const
int
x2
=
x1
+
t/3
;
const
int
x2
=
x1
+
t/3
;
const
int
x3
=
x1
+
2*t/3
;
const
int
x3
=
x1
+
2*t/3
;
float2
a0,
a1,
a2,
a3,
a4,
a5
;
CT
a0,
a1,
a2,
a3,
a4,
a5
;
if
(
x1
<
t/3
)
if
(
x1
<
t/3
)
{
{
...
@@ -192,13 +192,13 @@ void fft_radix2_B3(__local float2* smem, __global const float2* twiddles, const
...
@@ -192,13 +192,13 @@ void fft_radix2_B3(__local float2* smem, __global const float2* twiddles, const
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix2_B4
(
__local
float2*
smem,
__global
const
float2
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix2_B4
(
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
{
const
int
thread_block
=
t/4
;
const
int
thread_block
=
t/4
;
const
int
x2
=
x1
+
thread_block
;
const
int
x2
=
x1
+
thread_block
;
const
int
x3
=
x1
+
2*thread_block
;
const
int
x3
=
x1
+
2*thread_block
;
const
int
x4
=
x1
+
3*thread_block
;
const
int
x4
=
x1
+
3*thread_block
;
float2
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7
;
CT
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7
;
if
(
x1
<
t/4
)
if
(
x1
<
t/4
)
{
{
...
@@ -222,14 +222,14 @@ void fft_radix2_B4(__local float2* smem, __global const float2* twiddles, const
...
@@ -222,14 +222,14 @@ void fft_radix2_B4(__local float2* smem, __global const float2* twiddles, const
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix2_B5
(
__local
float2*
smem,
__global
const
float2
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix2_B5
(
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
{
const
int
thread_block
=
t/5
;
const
int
thread_block
=
t/5
;
const
int
x2
=
x1
+
thread_block
;
const
int
x2
=
x1
+
thread_block
;
const
int
x3
=
x1
+
2*thread_block
;
const
int
x3
=
x1
+
2*thread_block
;
const
int
x4
=
x1
+
3*thread_block
;
const
int
x4
=
x1
+
3*thread_block
;
const
int
x5
=
x1
+
4*thread_block
;
const
int
x5
=
x1
+
4*thread_block
;
float2
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7,
a8,
a9
;
CT
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7,
a8,
a9
;
if
(
x1
<
t/5
)
if
(
x1
<
t/5
)
{
{
...
@@ -255,9 +255,9 @@ void fft_radix2_B5(__local float2* smem, __global const float2* twiddles, const
...
@@ -255,9 +255,9 @@ void fft_radix2_B5(__local float2* smem, __global const float2* twiddles, const
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix4
(
__local
float2*
smem,
__global
const
float2
*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
void
fft_radix4
(
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
{
float2
a0,
a1,
a2,
a3
;
CT
a0,
a1,
a2,
a3
;
if
(
x
<
t
)
if
(
x
<
t
)
{
{
...
@@ -273,10 +273,10 @@ void fft_radix4(__local float2* smem, __global const float2* twiddles, const int
...
@@ -273,10 +273,10 @@ void fft_radix4(__local float2* smem, __global const float2* twiddles, const int
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix4_B2
(
__local
float2*
smem,
__global
const
float2
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix4_B2
(
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
{
const
int
x2
=
x1
+
t/2
;
const
int
x2
=
x1
+
t/2
;
float2
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7
;
CT
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7
;
if
(
x1
<
t/2
)
if
(
x1
<
t/2
)
{
{
...
@@ -296,11 +296,11 @@ void fft_radix4_B2(__local float2* smem, __global const float2* twiddles, const
...
@@ -296,11 +296,11 @@ void fft_radix4_B2(__local float2* smem, __global const float2* twiddles, const
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix4_B3
(
__local
float2*
smem,
__global
const
float2
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix4_B3
(
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
{
const
int
x2
=
x1
+
t/3
;
const
int
x2
=
x1
+
t/3
;
const
int
x3
=
x2
+
t/3
;
const
int
x3
=
x2
+
t/3
;
float2
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7,
a8,
a9,
a10,
a11
;
CT
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7,
a8,
a9,
a10,
a11
;
if
(
x1
<
t/3
)
if
(
x1
<
t/3
)
{
{
...
@@ -322,35 +322,35 @@ void fft_radix4_B3(__local float2* smem, __global const float2* twiddles, const
...
@@ -322,35 +322,35 @@ void fft_radix4_B3(__local float2* smem, __global const float2* twiddles, const
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix8
(
__local
float2*
smem,
__global
const
float2
*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
void
fft_radix8
(
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
{
const
int
k
=
x
%
block_size
;
const
int
k
=
x
%
block_size
;
float2
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7
;
CT
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7
;
if
(
x
<
t
)
if
(
x
<
t
)
{
{
int
tw_ind
=
block_size
/
8
;
int
tw_ind
=
block_size
/
8
;
a0
=
smem[x]
;
a0
=
smem[x]
;
a1
=
mul_
float2
(
twiddles[k],
smem[x
+
t]
)
;
a1
=
mul_
complex
(
twiddles[k],
smem[x
+
t]
)
;
a2
=
mul_
float2
(
twiddles[k
+
block_size],smem[x+2*t]
)
;
a2
=
mul_
complex
(
twiddles[k
+
block_size],smem[x+2*t]
)
;
a3
=
mul_
float2
(
twiddles[k+2*block_size],smem[x+3*t]
)
;
a3
=
mul_
complex
(
twiddles[k+2*block_size],smem[x+3*t]
)
;
a4
=
mul_
float2
(
twiddles[k+3*block_size],smem[x+4*t]
)
;
a4
=
mul_
complex
(
twiddles[k+3*block_size],smem[x+4*t]
)
;
a5
=
mul_
float2
(
twiddles[k+4*block_size],smem[x+5*t]
)
;
a5
=
mul_
complex
(
twiddles[k+4*block_size],smem[x+5*t]
)
;
a6
=
mul_
float2
(
twiddles[k+5*block_size],smem[x+6*t]
)
;
a6
=
mul_
complex
(
twiddles[k+5*block_size],smem[x+6*t]
)
;
a7
=
mul_
float2
(
twiddles[k+6*block_size],smem[x+7*t]
)
;
a7
=
mul_
complex
(
twiddles[k+6*block_size],smem[x+7*t]
)
;
float2
b0,
b1,
b6,
b7
;
CT
b0,
b1,
b6,
b7
;
b0
=
a0
+
a4
;
b0
=
a0
+
a4
;
a4
=
a0
-
a4
;
a4
=
a0
-
a4
;
b1
=
a1
+
a5
;
b1
=
a1
+
a5
;
a5
=
a1
-
a5
;
a5
=
a1
-
a5
;
a5
=
(
float2
)(
SQRT_2
)
*
(
float2
)(
a5.x
+
a5.y,
-a5.x
+
a5.y
)
;
a5
=
(
CT
)(
SQRT_2
)
*
(
CT
)(
a5.x
+
a5.y,
-a5.x
+
a5.y
)
;
b6
=
twiddle
(
a2
-
a6
)
;
b6
=
twiddle
(
a2
-
a6
)
;
a2
=
a2
+
a6
;
a2
=
a2
+
a6
;
b7
=
a3
-
a7
;
b7
=
a3
-
a7
;
b7
=
(
float2
)(
SQRT_2
)
*
(
float2
)(
-b7.x
+
b7.y,
-b7.x
-
b7.y
)
;
b7
=
(
CT
)(
SQRT_2
)
*
(
CT
)(
-b7.x
+
b7.y,
-b7.x
-
b7.y
)
;
a3
=
a3
+
a7
;
a3
=
a3
+
a7
;
a0
=
b0
+
a2
;
a0
=
b0
+
a2
;
...
@@ -369,7 +369,7 @@ void fft_radix8(__local float2* smem, __global const float2* twiddles, const int
...
@@ -369,7 +369,7 @@ void fft_radix8(__local float2* smem, __global const float2* twiddles, const int
if
(
x
<
t
)
if
(
x
<
t
)
{
{
const
int
dst_ind
=
((
x
-
k
)
<<
3
)
+
k
;
const
int
dst_ind
=
((
x
-
k
)
<<
3
)
+
k
;
__local
float2
*
dst
=
smem
+
dst_ind
;
__local
CT
*
dst
=
smem
+
dst_ind
;
dst[0]
=
a0
+
a1
;
dst[0]
=
a0
+
a1
;
dst[block_size]
=
a4
+
a5
;
dst[block_size]
=
a4
+
a5
;
...
@@ -385,9 +385,9 @@ void fft_radix8(__local float2* smem, __global const float2* twiddles, const int
...
@@ -385,9 +385,9 @@ void fft_radix8(__local float2* smem, __global const float2* twiddles, const int
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix3
(
__local
float2*
smem,
__global
const
float2
*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
void
fft_radix3
(
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
{
float2
a0,
a1,
a2
;
CT
a0,
a1,
a2
;
if
(
x
<
t
)
if
(
x
<
t
)
{
{
...
@@ -403,10 +403,10 @@ void fft_radix3(__local float2* smem, __global const float2* twiddles, const int
...
@@ -403,10 +403,10 @@ void fft_radix3(__local float2* smem, __global const float2* twiddles, const int
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix3_B2
(
__local
float2*
smem,
__global
const
float2
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix3_B2
(
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
{
const
int
x2
=
x1
+
t/2
;
const
int
x2
=
x1
+
t/2
;
float2
a0,
a1,
a2,
a3,
a4,
a5
;
CT
a0,
a1,
a2,
a3,
a4,
a5
;
if
(
x1
<
t/2
)
if
(
x1
<
t/2
)
{
{
...
@@ -426,11 +426,11 @@ void fft_radix3_B2(__local float2* smem, __global const float2* twiddles, const
...
@@ -426,11 +426,11 @@ void fft_radix3_B2(__local float2* smem, __global const float2* twiddles, const
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix3_B3
(
__local
float2*
smem,
__global
const
float2
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix3_B3
(
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
{
const
int
x2
=
x1
+
t/3
;
const
int
x2
=
x1
+
t/3
;
const
int
x3
=
x2
+
t/3
;
const
int
x3
=
x2
+
t/3
;
float2
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7,
a8
;
CT
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7,
a8
;
if
(
x1
<
t/3
)
if
(
x1
<
t/3
)
{
{
...
@@ -452,13 +452,13 @@ void fft_radix3_B3(__local float2* smem, __global const float2* twiddles, const
...
@@ -452,13 +452,13 @@ void fft_radix3_B3(__local float2* smem, __global const float2* twiddles, const
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix3_B4
(
__local
float2*
smem,
__global
const
float2
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix3_B4
(
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
{
const
int
thread_block
=
t/4
;
const
int
thread_block
=
t/4
;
const
int
x2
=
x1
+
thread_block
;
const
int
x2
=
x1
+
thread_block
;
const
int
x3
=
x1
+
2*thread_block
;
const
int
x3
=
x1
+
2*thread_block
;
const
int
x4
=
x1
+
3*thread_block
;
const
int
x4
=
x1
+
3*thread_block
;
float2
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7,
a8,
a9,
a10,
a11
;
CT
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7,
a8,
a9,
a10,
a11
;
if
(
x1
<
t/4
)
if
(
x1
<
t/4
)
{
{
...
@@ -482,10 +482,10 @@ void fft_radix3_B4(__local float2* smem, __global const float2* twiddles, const
...
@@ -482,10 +482,10 @@ void fft_radix3_B4(__local float2* smem, __global const float2* twiddles, const
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix5
(
__local
float2*
smem,
__global
const
float2
*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
void
fft_radix5
(
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
{
const
int
k
=
x
%
block_size
;
const
int
k
=
x
%
block_size
;
float2
a0,
a1,
a2,
a3,
a4
;
CT
a0,
a1,
a2,
a3,
a4
;
if
(
x
<
t
)
if
(
x
<
t
)
{
{
...
@@ -501,10 +501,10 @@ void fft_radix5(__local float2* smem, __global const float2* twiddles, const int
...
@@ -501,10 +501,10 @@ void fft_radix5(__local float2* smem, __global const float2* twiddles, const int
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix5_B2
(
__local
float2*
smem,
__global
const
float2
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix5_B2
(
__local
CT*
smem,
__global
const
CT
*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
{
const
int
x2
=
x1+t/2
;
const
int
x2
=
x1+t/2
;
float2
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7,
a8,
a9
;
CT
a0,
a1,
a2,
a3,
a4,
a5,
a6,
a7,
a8,
a9
;
if
(
x1
<
t/2
)
if
(
x1
<
t/2
)
{
{
...
@@ -531,7 +531,7 @@ void fft_radix5_B2(__local float2* smem, __global const float2* twiddles, const
...
@@ -531,7 +531,7 @@ void fft_radix5_B2(__local float2* smem, __global const float2* twiddles, const
__kernel
void
fft_multi_radix_rows
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__kernel
void
fft_multi_radix_rows
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__global
float2*
twiddles_ptr
,
const
int
t
,
const
int
nz
)
__global
CT*
twiddles_ptr,
int
twiddles_step,
int
twiddles_offset
,
const
int
t
,
const
int
nz
)
{
{
const
int
x
=
get_global_id
(
0
)
;
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
const
int
y
=
get_group_id
(
1
)
;
...
@@ -539,7 +539,7 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
...
@@ -539,7 +539,7 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
if
(
y
<
nz
)
if
(
y
<
nz
)
{
{
__local
CT
smem[LOCAL_SIZE]
;
__local
CT
smem[LOCAL_SIZE]
;
__global
const
float2*
twiddles
=
(
__global
float2*
)
twiddles_ptr
;
__global
const
CT*
twiddles
=
(
__global
const
CT*
)(
twiddles_ptr
+
twiddles_offset
)
;
const
int
ind
=
x
;
const
int
ind
=
x
;
#
ifdef
IS_1D
#
ifdef
IS_1D
FT
scale
=
(
FT
)
1/dst_cols
;
FT
scale
=
(
FT
)
1/dst_cols
;
...
@@ -600,7 +600,7 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
...
@@ -600,7 +600,7 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
__kernel
void
fft_multi_radix_cols
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__kernel
void
fft_multi_radix_cols
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__global
float2*
twiddles_ptr
,
const
int
t
,
const
int
nz
)
__global
CT*
twiddles_ptr,
int
twiddles_step,
int
twiddles_offset
,
const
int
t
,
const
int
nz
)
{
{
const
int
x
=
get_group_id
(
0
)
;
const
int
x
=
get_group_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
const
int
y
=
get_global_id
(
1
)
;
...
@@ -609,7 +609,7 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
...
@@ -609,7 +609,7 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
{
{
__local
CT
smem[LOCAL_SIZE]
;
__local
CT
smem[LOCAL_SIZE]
;
__global
const
uchar*
src
=
src_ptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)(
sizeof
(
CT
))
,
src_offset
))
;
__global
const
uchar*
src
=
src_ptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)(
sizeof
(
CT
))
,
src_offset
))
;
__global
const
float2*
twiddles
=
(
__global
float2*
)
twiddles_ptr
;
__global
const
CT*
twiddles
=
(
__global
const
CT*
)(
twiddles_ptr
+
twiddles_offset
)
;
const
int
ind
=
y
;
const
int
ind
=
y
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
FT
scale
=
1.f/
(
dst_rows*dst_cols
)
;
FT
scale
=
1.f/
(
dst_rows*dst_cols
)
;
...
@@ -661,7 +661,7 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
...
@@ -661,7 +661,7 @@ __kernel void fft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
__kernel
void
ifft_multi_radix_rows
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__kernel
void
ifft_multi_radix_rows
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__global
float2*
twiddles_ptr
,
const
int
t
,
const
int
nz
)
__global
CT*
twiddles_ptr,
int
twiddles_step,
int
twiddles_offset
,
const
int
t
,
const
int
nz
)
{
{
const
int
x
=
get_global_id
(
0
)
;
const
int
x
=
get_global_id
(
0
)
;
const
int
y
=
get_group_id
(
1
)
;
const
int
y
=
get_group_id
(
1
)
;
...
@@ -675,7 +675,7 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
...
@@ -675,7 +675,7 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
if
(
y
<
nz
)
if
(
y
<
nz
)
{
{
__local
CT
smem[LOCAL_SIZE]
;
__local
CT
smem[LOCAL_SIZE]
;
__global
const
float2*
twiddles
=
(
__global
float2*
)
twiddles_ptr
;
__global
const
CT*
twiddles
=
(
__global
const
CT*
)(
twiddles_ptr
+
twiddles_offset
)
;
const
int
ind
=
x
;
const
int
ind
=
x
;
#
if
defined
(
COMPLEX_INPUT
)
&&
!defined
(
NO_CONJUGATE
)
#
if
defined
(
COMPLEX_INPUT
)
&&
!defined
(
NO_CONJUGATE
)
...
@@ -767,7 +767,7 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
...
@@ -767,7 +767,7 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
__kernel
void
ifft_multi_radix_cols
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__kernel
void
ifft_multi_radix_cols
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__global
float2*
twiddles_ptr
,
const
int
t
,
const
int
nz
)
__global
CT*
twiddles_ptr,
int
twiddles_step,
int
twiddles_offset
,
const
int
t
,
const
int
nz
)
{
{
const
int
x
=
get_group_id
(
0
)
;
const
int
x
=
get_group_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
const
int
y
=
get_global_id
(
1
)
;
...
@@ -778,7 +778,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
...
@@ -778,7 +778,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
__local
CT
smem[LOCAL_SIZE]
;
__local
CT
smem[LOCAL_SIZE]
;
__global
const
uchar*
src
=
src_ptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)(
sizeof
(
CT
))
,
src_offset
))
;
__global
const
uchar*
src
=
src_ptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)(
sizeof
(
CT
))
,
src_offset
))
;
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
CT
))
,
dst_offset
))
;
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
CT
))
,
dst_offset
))
;
__global
const
float2*
twiddles
=
(
__global
float2*
)
twiddles_ptr
;
__global
const
CT*
twiddles
=
(
__global
const
CT*
)(
twiddles_ptr
+
twiddles_offset
)
;
const
int
ind
=
y
;
const
int
ind
=
y
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
...
@@ -806,7 +806,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
...
@@ -806,7 +806,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
#
else
#
else
if
(
x
<
nz
)
if
(
x
<
nz
)
{
{
__global
const
CT*
twiddles
=
(
__global
CT*
)
twiddles_ptr
;
__global
const
CT*
twiddles
=
(
__global
const
CT*
)(
twiddles_ptr
+
twiddles_offset
)
;
const
int
ind
=
y
;
const
int
ind
=
y
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
...
...
modules/core/test/ocl/test_dft.cpp
View file @
a89ff402
...
@@ -62,7 +62,7 @@ namespace ocl {
...
@@ -62,7 +62,7 @@ namespace ocl {
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
// Dft
// Dft
PARAM_TEST_CASE
(
Dft
,
cv
::
Size
,
OCL_FFT_TYPE
,
bool
,
bool
,
bool
,
bool
)
PARAM_TEST_CASE
(
Dft
,
cv
::
Size
,
OCL_FFT_TYPE
,
MatDepth
,
bool
,
bool
,
bool
,
bool
)
{
{
cv
::
Size
dft_size
;
cv
::
Size
dft_size
;
int
dft_flags
,
depth
,
cn
,
dft_type
;
int
dft_flags
,
depth
,
cn
,
dft_type
;
...
@@ -76,7 +76,7 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
...
@@ -76,7 +76,7 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
{
{
dft_size
=
GET_PARAM
(
0
);
dft_size
=
GET_PARAM
(
0
);
dft_type
=
GET_PARAM
(
1
);
dft_type
=
GET_PARAM
(
1
);
depth
=
CV_32F
;
depth
=
GET_PARAM
(
2
)
;
dft_flags
=
0
;
dft_flags
=
0
;
switch
(
dft_type
)
switch
(
dft_type
)
...
@@ -87,13 +87,13 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
...
@@ -87,13 +87,13 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
case
C2C
:
dft_flags
|=
cv
::
DFT_COMPLEX_OUTPUT
;
cn
=
2
;
break
;
case
C2C
:
dft_flags
|=
cv
::
DFT_COMPLEX_OUTPUT
;
cn
=
2
;
break
;
}
}
if
(
GET_PARAM
(
2
))
dft_flags
|=
cv
::
DFT_INVERSE
;
if
(
GET_PARAM
(
3
))
if
(
GET_PARAM
(
3
))
dft_flags
|=
cv
::
DFT_
ROWS
;
dft_flags
|=
cv
::
DFT_
INVERSE
;
if
(
GET_PARAM
(
4
))
if
(
GET_PARAM
(
4
))
dft_flags
|=
cv
::
DFT_ROWS
;
if
(
GET_PARAM
(
5
))
dft_flags
|=
cv
::
DFT_SCALE
;
dft_flags
|=
cv
::
DFT_SCALE
;
hint
=
GET_PARAM
(
5
);
hint
=
GET_PARAM
(
6
);
is1d
=
(
dft_flags
&
DFT_ROWS
)
!=
0
||
dft_size
.
height
==
1
;
is1d
=
(
dft_flags
&
DFT_ROWS
)
!=
0
||
dft_size
.
height
==
1
;
}
}
...
@@ -177,6 +177,7 @@ OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(),
...
@@ -177,6 +177,7 @@ OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(),
OCL_INSTANTIATE_TEST_CASE_P
(
Core
,
Dft
,
Combine
(
Values
(
cv
::
Size
(
45
,
72
),
cv
::
Size
(
36
,
36
),
cv
::
Size
(
512
,
1
),
cv
::
Size
(
1280
,
768
)),
OCL_INSTANTIATE_TEST_CASE_P
(
Core
,
Dft
,
Combine
(
Values
(
cv
::
Size
(
45
,
72
),
cv
::
Size
(
36
,
36
),
cv
::
Size
(
512
,
1
),
cv
::
Size
(
1280
,
768
)),
Values
((
OCL_FFT_TYPE
)
R2C
,
(
OCL_FFT_TYPE
)
C2C
,
(
OCL_FFT_TYPE
)
R2R
,
(
OCL_FFT_TYPE
)
C2R
),
Values
((
OCL_FFT_TYPE
)
R2C
,
(
OCL_FFT_TYPE
)
C2C
,
(
OCL_FFT_TYPE
)
R2R
,
(
OCL_FFT_TYPE
)
C2R
),
Values
(
CV_32F
,
CV_64F
),
Bool
(),
// DFT_INVERSE
Bool
(),
// DFT_INVERSE
Bool
(),
// DFT_ROWS
Bool
(),
// DFT_ROWS
Bool
(),
// DFT_SCALE
Bool
(),
// DFT_SCALE
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment