Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
ed07241f
Commit
ed07241f
authored
Jul 15, 2014
by
Alexander Karsakov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Completed all forward transforms.
parent
e5a3ab3c
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
271 additions
and
95 deletions
+271
-95
perf_arithm.cpp
modules/core/perf/opencl/perf_arithm.cpp
+1
-1
perf_dxt.cpp
modules/core/perf/opencl/perf_dxt.cpp
+26
-7
dxt.cpp
modules/core/src/dxt.cpp
+92
-39
fft.cl
modules/core/src/opencl/fft.cl
+142
-39
test_dft.cpp
modules/core/test/ocl/test_dft.cpp
+10
-9
No files found.
modules/core/perf/opencl/perf_arithm.cpp
View file @
ed07241f
...
...
@@ -292,7 +292,7 @@ OCL_PERF_TEST_P(MagnitudeFixture, Magnitude, ::testing::Combine(
typedef
Size_MatType
TransposeFixture
;
OCL_PERF_TEST_P
(
TransposeFixture
,
Transpose
,
::
testing
::
Combine
(
OCL_TEST_SIZES
,
OCL_TEST_TYPES_134
))
OCL_TEST_SIZES
,
Values
(
CV_8UC1
,
CV_32FC1
,
CV_8UC2
,
CV_32FC2
,
CV_8UC4
,
CV_32FC4
)
))
{
const
Size_MatType_t
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
...
...
modules/core/perf/opencl/perf_dxt.cpp
View file @
ed07241f
...
...
@@ -54,21 +54,40 @@ namespace ocl {
///////////// dft ////////////////////////
typedef
tuple
<
Size
,
int
>
DftParams
;
enum
OCL_FFT_TYPE
{
R2R
=
0
,
// real to real (CCS)
C2R
=
1
,
// complex to real
R2C
=
2
,
// real to complex
C2C
=
3
// complex to complex
};
typedef
tuple
<
OCL_FFT_TYPE
,
Size
,
int
>
DftParams
;
typedef
TestBaseWithParam
<
DftParams
>
DftFixture
;
OCL_PERF_TEST_P
(
DftFixture
,
Dft
,
::
testing
::
Combine
(
Values
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
,
Size
(
1024
,
1024
),
Size
(
1024
,
2048
),
Size
(
512
,
512
),
Size
(
2048
,
2048
)),
OCL_PERF_TEST_P
(
DftFixture
,
Dft
,
::
testing
::
Combine
(
Values
(
C2C
,
R2R
,
C2R
,
R2C
),
Values
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
,
Size
(
1024
,
1024
),
Size
(
512
,
512
),
Size
(
2048
,
2048
)),
Values
((
int
)
DFT_ROWS
,
(
int
)
0
/*, (int)DFT_SCALE, (int)DFT_INVERSE,
(int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE*/
)))
{
const
DftParams
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
const
int
flags
=
get
<
1
>
(
params
);
UMat
src
(
srcSize
,
CV_32FC2
),
dst
(
srcSize
,
CV_32FC2
);
const
int
dft_type
=
get
<
0
>
(
params
);
const
Size
srcSize
=
get
<
1
>
(
params
);
int
flags
=
get
<
2
>
(
params
);
int
in_cn
,
out_cn
;
switch
(
dft_type
)
{
case
R2R
:
flags
|=
cv
::
DFT_REAL_OUTPUT
;
in_cn
=
1
;
out_cn
=
1
;
break
;
case
C2R
:
flags
|=
cv
::
DFT_REAL_OUTPUT
;
in_cn
=
2
;
out_cn
=
2
;
break
;
case
R2C
:
flags
|=
cv
::
DFT_COMPLEX_OUTPUT
;
in_cn
=
1
;
out_cn
=
2
;
break
;
case
C2C
:
flags
|=
cv
::
DFT_COMPLEX_OUTPUT
;
in_cn
=
2
;
out_cn
=
2
;
break
;
}
UMat
src
(
srcSize
,
CV_MAKE_TYPE
(
CV_32F
,
in_cn
)),
dst
(
srcSize
,
CV_MAKE_TYPE
(
CV_32F
,
out_cn
));
declare
.
in
(
src
,
WARMUP_RNG
).
out
(
dst
);
OCL_TEST_CYCLE
()
cv
::
dft
(
src
,
dst
,
flags
|
DFT_COMPLEX_OUTPUT
);
OCL_TEST_CYCLE
()
cv
::
dft
(
src
,
dst
,
flags
);
SANITY_CHECK
(
dst
,
1e-3
);
}
...
...
modules/core/src/dxt.cpp
View file @
ed07241f
...
...
@@ -2034,7 +2034,7 @@ namespace cv
#ifdef HAVE_OPENCL
static
std
::
vector
<
int
>
ocl_getRadixes
(
int
cols
,
int
&
min_radix
)
static
std
::
vector
<
int
>
ocl_getRadixes
(
int
cols
,
std
::
vector
<
int
>&
radixes
,
std
::
vector
<
int
>&
blocks
,
int
&
min_radix
)
{
int
factors
[
34
];
int
nf
=
DFTFactorize
(
cols
,
factors
);
...
...
@@ -2042,9 +2042,6 @@ static std::vector<int> ocl_getRadixes(int cols, int& min_radix)
int
n
=
1
;
int
factor_index
=
0
;
// choose radix order
std
::
vector
<
int
>
radixes
;
// 2^n transforms
if
(
(
factors
[
factor_index
]
&
1
)
==
0
)
{
...
...
@@ -2057,6 +2054,9 @@ static std::vector<int> ocl_getRadixes(int cols, int& min_radix)
radix
=
4
;
radixes
.
push_back
(
radix
);
if
(
radix
==
2
&&
cols
%
4
==
0
)
min_radix
=
min
(
min_radix
,
2
*
radix
);
else
min_radix
=
min
(
min_radix
,
radix
);
n
*=
radix
;
}
...
...
@@ -2067,6 +2067,9 @@ static std::vector<int> ocl_getRadixes(int cols, int& min_radix)
for
(
;
factor_index
<
nf
;
factor_index
++
)
{
radixes
.
push_back
(
factors
[
factor_index
]);
if
(
factors
[
factor_index
]
==
3
&&
cols
%
6
==
0
)
min_radix
=
min
(
min_radix
,
2
*
factors
[
factor_index
]);
else
min_radix
=
min
(
min_radix
,
factors
[
factor_index
]);
}
return
radixes
;
...
...
@@ -2084,8 +2087,16 @@ struct OCL_FftPlan
OCL_FftPlan
(
int
_size
,
int
_flags
)
:
dft_size
(
_size
),
flags
(
_flags
)
{
int
min_radix
=
INT_MAX
;
std
::
vector
<
int
>
radixes
=
ocl_getRadixes
(
dft_size
,
min_radix
);
thread_count
=
dft_size
/
min_radix
;
std
::
vector
<
int
>
radixes
,
blocks
;
ocl_getRadixes
(
dft_size
,
radixes
,
blocks
,
min_radix
);
thread_count
=
(
dft_size
+
min_radix
-
1
)
/
min_radix
;
printf
(
"cols: %d - "
,
dft_size
);
for
(
int
i
=
0
;
i
<
radixes
.
size
();
i
++
)
{
printf
(
"%d "
,
radixes
[
i
]);
}
printf
(
"min radix - %d
\n
"
,
min_radix
);
// generate string with radix calls
String
radix_processing
;
...
...
@@ -2093,7 +2104,10 @@ struct OCL_FftPlan
for
(
size_t
i
=
0
;
i
<
radixes
.
size
();
i
++
)
{
int
radix
=
radixes
[
i
];
radix_processing
+=
format
(
"fft_radix%d(smem,twiddles+%d,x,%d,%d);"
,
radix
,
twiddle_size
,
n
,
dft_size
/
radix
);
if
((
radix
==
2
&&
dft_size
%
4
==
0
)
||
(
radix
==
3
&&
dft_size
%
6
==
0
))
radix_processing
+=
format
(
"fft_radix%d_B2(smem,twiddles+%d,ind,%d,%d);"
,
radix
,
twiddle_size
,
n
,
dft_size
/
radix
);
else
radix_processing
+=
format
(
"fft_radix%d(smem,twiddles+%d,ind,%d,%d);"
,
radix
,
twiddle_size
,
n
,
dft_size
/
radix
);
twiddle_size
+=
(
radix
-
1
)
*
n
;
n
*=
radix
;
}
...
...
@@ -2126,20 +2140,39 @@ struct OCL_FftPlan
dft_size
,
dft_size
/
thread_count
,
radix_processing
.
c_str
());
}
bool
enqueueTransform
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_rows
)
const
bool
enqueueTransform
(
InputArray
_src
,
OutputArray
_dst
,
int
dft_size
,
int
flags
,
bool
rows
=
true
)
const
{
UMat
src
=
_src
.
getUMat
();
_dst
.
create
(
src
.
size
(),
src
.
type
());
UMat
dst
=
_dst
.
getUMat
();
size_t
globalsize
[
2
]
=
{
thread_count
,
nonzero_rows
};
size_t
localsize
[
2
]
=
{
thread_count
,
1
};
size_t
globalsize
[
2
];
size_t
localsize
[
2
];
String
kernel_name
;
if
(
rows
)
{
globalsize
[
0
]
=
thread_count
;
globalsize
[
1
]
=
dft_size
;
localsize
[
0
]
=
thread_count
;
localsize
[
1
]
=
1
;
kernel_name
=
"fft_multi_radix_rows"
;
}
else
{
globalsize
[
0
]
=
dft_size
;
globalsize
[
1
]
=
thread_count
;
localsize
[
0
]
=
1
;
localsize
[
1
]
=
thread_count
;
kernel_name
=
"fft_multi_radix_cols"
;
}
ocl
::
Kernel
k
(
"fft_multi_radix"
,
ocl
::
core
::
fft_oclsrc
,
buildOptions
);
String
options
=
buildOptions
;
if
(
src
.
channels
()
==
1
)
options
+=
" -D REAL_INPUT"
;
if
(
dst
.
channels
()
==
1
)
options
+=
" -D CCS_OUTPUT"
;
ocl
::
Kernel
k
(
kernel_name
.
c_str
(),
ocl
::
core
::
fft_oclsrc
,
options
);
if
(
k
.
empty
())
return
false
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
NoSize
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
twiddles
),
thread_count
,
nonzero_rows
);
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
twiddles
),
thread_count
,
dft_size
);
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
};
...
...
@@ -2231,16 +2264,16 @@ static bool ocl_packToCCS(InputArray _src, OutputArray _dst, int flags)
return
true
;
}
static
bool
ocl_dft_C2C_row
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_rows
,
int
flags
)
static
bool
ocl_dft_C2C_row
s
(
InputArray
_src
,
OutputArray
_dst
,
int
nonzero_rows
,
int
flags
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
channels
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
depth
==
CV_64F
&&
!
doubleSupport
)
return
false
;
const
OCL_FftPlan
*
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
cols
(),
flags
);
return
plan
->
enqueueTransform
(
_src
,
_dst
,
nonzero_rows
);
return
plan
->
enqueueTransform
(
_src
,
_dst
,
nonzero_rows
,
flags
,
true
);
}
static
bool
ocl_dft_C2C_cols
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
)
{
const
OCL_FftPlan
*
plan
=
OCL_FftPlanCache
::
getInstance
().
getFftPlan
(
_src
.
rows
(),
flags
);
return
plan
->
enqueueTransform
(
_src
,
_dst
,
_src
.
cols
(),
flags
,
false
);
}
static
bool
ocl_dft
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
,
int
nonzero_rows
)
...
...
@@ -2262,7 +2295,10 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
int
real_input
=
cn
==
1
?
1
:
0
;
int
real_output
=
(
flags
&
DFT_REAL_OUTPUT
)
!=
0
;
bool
inv
=
(
flags
&
DFT_INVERSE
)
!=
0
?
1
:
0
;
bool
is1d
=
(
flags
&
DFT_ROWS
)
!=
0
||
src
.
rows
==
1
;
if
(
nonzero_rows
<=
0
||
nonzero_rows
>
_src
.
rows
()
)
nonzero_rows
=
_src
.
rows
();
bool
is1d
=
(
flags
&
DFT_ROWS
)
!=
0
||
nonzero_rows
==
1
;
// if output format is not specified
if
(
complex_output
+
real_output
==
0
)
...
...
@@ -2276,6 +2312,19 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
}
}
// Forward Complex to CCS not supported
if
(
complex_input
&&
real_output
&&
!
inv
)
{
real_output
=
0
;
complex_output
=
1
;
}
// Inverse CCS to Complex not supported
if
(
real_input
&&
complex_output
&&
inv
)
{
complex_output
=
0
;
real_output
=
1
;
}
UMat
input
,
output
;
if
(
complex_input
)
{
...
...
@@ -2285,12 +2334,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
{
if
(
!
inv
)
{
// in case real input convert it to complex
input
.
create
(
src
.
size
(),
CV_MAKE_TYPE
(
depth
,
2
));
std
::
vector
<
UMat
>
planes
;
planes
.
push_back
(
src
);
planes
.
push_back
(
UMat
::
zeros
(
src
.
size
(),
CV_32F
));
merge
(
planes
,
input
);
input
=
src
;
}
else
{
...
...
@@ -2298,31 +2342,34 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
}
}
UMat
dst
=
_dst
.
getUMat
();
if
(
complex_output
)
{
if
(
real_input
&&
is1d
&&
!
inv
)
output
.
create
(
src
.
size
(),
CV_32FC2
);
else
output
=
dst
;
{
_dst
.
create
(
src
.
size
(),
CV_32FC2
);
output
=
_dst
.
getUMat
();
}
}
else
{
// CCS
if
(
is1d
)
{
_dst
.
create
(
src
.
size
(),
CV_32FC1
);
output
=
_dst
.
getUMat
();
}
else
output
.
create
(
src
.
size
(),
CV_32FC2
);
}
if
(
nonzero_rows
<=
0
||
nonzero_rows
>
_src
.
rows
()
)
nonzero_rows
=
_src
.
rows
();
if
(
!
ocl_dft_C2C_row
(
input
,
output
,
nonzero_rows
,
flags
))
if
(
!
ocl_dft_C2C_rows
(
input
,
output
,
nonzero_rows
,
flags
))
return
false
;
if
(
(
flags
&
DFT_ROWS
)
==
0
&&
nonzero_rows
>
1
)
if
(
!
is1d
)
{
transpose
(
output
,
output
);
if
(
!
ocl_dft_C2C_row
(
output
,
output
,
output
.
rows
,
flags
))
if
(
!
ocl_dft_C2C_cols
(
output
,
output
,
flags
))
return
false
;
transpose
(
output
,
output
);
}
if
(
complex_output
)
...
...
@@ -2335,12 +2382,18 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
else
{
if
(
!
inv
)
{
if
(
!
is1d
)
ocl_packToCCS
(
output
,
_dst
,
flags
);
else
_dst
.
assign
(
output
);
}
else
{
// copy real part to dst
}
}
//printf("OCL!\n");
return
true
;
}
...
...
modules/core/src/opencl/fft.cl
View file @
ed07241f
__constant
float
PI
=
3.14159265f
;
__constant
float
SQRT_2
=
0.707106781188f
;
__constant
float
sin_120
=
0.866025403784f
;
__constant
float
fft5_2
=
0.559016994374f
;
__constant
float
fft5_3
=
-0.951056516295f
;
__constant
float
fft5_4
=
-1.538841768587f
;
__constant
float
fft5_5
=
0.363271264002f
;
__attribute__
((
always_inline
))
float2
mul_float2
(
float2
a,
float2
b
)
{
float2
res
;
res.x
=
a.x
*
b.x
-
a.y
*
b.y
;
res.y
=
a.x
*
b.y
+
a.y
*
b.x
;
return
res
;
}
#
define
SQRT_2
0.707106781188f
#
define
sin_120
0.866025403784f
#
define
fft5_2
0.559016994374f
#
define
fft5_3
-0.951056516295f
#
define
fft5_4
-1.538841768587f
#
define
fft5_5
0.363271264002f
__attribute__
((
always_inline
))
float2
sincos_float2
(
float
alpha
)
{
float
cs,
sn
;
sn
=
sincos
(
alpha,
&cs
)
; // sincos
return
(
float2
)(
cs,
sn
)
;
float2
mul_float2
(
float2
a,
float2
b
)
{
return
(
float2
)(
fma
(
a.x,
b.x,
-a.y
*
b.y
)
,
fma
(
a.x,
b.y,
a.y
*
b.x
))
;
}
__attribute__
((
always_inline
))
...
...
@@ -52,6 +40,38 @@ void fft_radix2(__local float2* smem, __constant const float2* twiddles, const i
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
__attribute__
((
always_inline
))
void
fft_radix2_B2
(
__local
float2*
smem,
__constant
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
const
int
k1
=
x
&
(
block_size
-
1
)
;
const
int
x2
=
x
+
(
t+1
)
/2
;
const
int
k2
=
x2
&
(
block_size
-
1
)
;
float2
a0,
a1,
a2,
a3
;
if
(
x
<
(
t+1
)
/2
)
{
a0
=
smem[x]
;
a1
=
mul_float2
(
twiddles[k1],smem[x+t]
)
;
a2
=
smem[x2]
;
a3
=
mul_float2
(
twiddles[k2],smem[x2+t]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
x
<
(
t+1
)
/2
)
{
int
dst_ind
=
(
x
<<
1
)
-
k1
;
smem[dst_ind]
=
a0
+
a1
;
smem[dst_ind+block_size]
=
a0
-
a1
;
dst_ind
=
(
x2
<<
1
)
-
k2
;
smem[dst_ind]
=
a2
+
a3
;
smem[dst_ind+block_size]
=
a2
-
a3
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
__attribute__
((
always_inline
))
void
fft_radix4
(
__local
float2*
smem,
__constant
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
...
...
@@ -158,13 +178,6 @@ void fft_radix3(__local float2* smem, __constant const float2* twiddles, const i
if
(
x
<
t
)
{
//const
int
twiddle_block
=
block_size
/
3
;
//const
float
theta
=
-PI
*
k
*
2
/
(
3
*
block_size
)
;
//float2
tw
=
sincos_float2
(
theta
)
;
//printf
(
"radix3 %d (%f,%f)(%f,%f)\n"
,
k,
tw.x,
tw.y,
twiddles[k].x,
twiddles[k].y
)
;
//tw
=
sincos_float2
(
2*theta
)
;
//printf
(
"radix3- %d %d (%f,%f)(%f,%f)\n"
,
k,
twiddle_block,
tw.x,
tw.y,
twiddles[k+block_size].x,
twiddles[k+block_size].y
)
;
a0
=
smem[x]
;
a1
=
mul_float2
(
twiddles[k],
smem[x+t]
)
;
a2
=
mul_float2
(
twiddles[k+block_size],
smem[x+2*t]
)
;
...
...
@@ -177,12 +190,59 @@ void fft_radix3(__local float2* smem, __constant const float2* twiddles, const i
const
int
dst_ind
=
((
x
-
k
)
*
3
)
+
k
;
float2
b1
=
a1
+
a2
;
a2
=
twiddle
((
float2
)
sin_120*
(
a1
-
a2
))
;
a2
=
twiddle
(
sin_120*
(
a1
-
a2
))
;
float2
b0
=
a0
-
(
float2
)(
0.5f
)
*b1
;
smem[dst_ind]
=
a0
+
b1
;
smem[dst_ind
+
block_size]
=
b0
+
a2
;
smem[dst_ind
+
2*block_size]
=
b0
-
a2
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
__attribute__
((
always_inline
))
void
fft_radix3_B2
(
__local
float2*
smem,
__constant
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
const
int
k
=
x
%
block_size
;
const
int
x2
=
x
+
(
t+1
)
/2
;
const
int
k2
=
x2
%
block_size
;
float2
a0,
a1,
a2,
a3,
a4,
a5
;
if
(
x
<
(
t+1
)
/2
)
{
a0
=
smem[x]
;
a1
=
mul_float2
(
twiddles[k],
smem[x+t]
)
;
a2
=
mul_float2
(
twiddles[k+block_size],
smem[x+2*t]
)
;
a3
=
smem[x2]
;
a4
=
mul_float2
(
twiddles[k2],
smem[x2+t]
)
;
a5
=
mul_float2
(
twiddles[k2+block_size],
smem[x2+2*t]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
x
<
(
t+1
)
/2
)
{
int
dst_ind
=
((
x
-
k
)
*
3
)
+
k
;
float2
b1
=
a1
+
a2
;
a2
=
twiddle
(
sin_120*
(
a1
-
a2
))
;
float2
b0
=
a0
-
(
float2
)(
0.5f
)
*b1
;
smem[dst_ind]
=
a0
+
b1
;
smem[dst_ind
+
block_size]
=
b0
+
a2
;
smem[dst_ind
+
2*block_size]
=
b0
-
a2
;
dst_ind
=
((
x2
-
k2
)
*
3
)
+
k2
;
b1
=
a4
+
a5
;
a5
=
twiddle
(
sin_120*
(
a4
-
a5
))
;
b0
=
a3
-
(
float2
)(
0.5f
)
*b1
;
smem[dst_ind]
=
a3
+
b1
;
smem[dst_ind
+
block_size]
=
b0
+
a5
;
smem[dst_ind
+
2*block_size]
=
b0
-
a5
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -196,8 +256,6 @@ void fft_radix5(__local float2* smem, __constant const float2* twiddles, const i
if
(
x
<
t
)
{
int
tw_ind
=
block_size
/
5
;
a0
=
smem[x]
;
a1
=
mul_float2
(
twiddles[k],
smem[x
+
t]
)
;
a2
=
mul_float2
(
twiddles[k
+
block_size],smem[x+2*t]
)
;
...
...
@@ -223,8 +281,8 @@ void fft_radix5(__local float2* smem, __constant const float2* twiddles, const i
a2
=
b1
+
a4
;
b0
=
a0
-
(
float2
)
0.25f
*
a2
;
b1
=
(
float2
)
fft5_2
*
(
b1
-
a4
)
;
a4
=
(
float2
)
fft5_3
*
(
float2
)(
-a1.y
-
a3.y,
a1.x
+
a3.x
)
;
b1
=
fft5_2
*
(
b1
-
a4
)
;
a4
=
fft5_3
*
(
float2
)(
-a1.y
-
a3.y,
a1.x
+
a3.x
)
;
b5
=
(
float2
)(
a4.x
-
fft5_5
*
a1.y,
a4.y
+
fft5_5
*
a1.x
)
;
a4.x
+=
fft5_4
*
a3.y
;
...
...
@@ -243,8 +301,8 @@ void fft_radix5(__local float2* smem, __constant const float2* twiddles, const i
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
__kernel
void
fft_multi_radix
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset
,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset
,
__kernel
void
fft_multi_radix
_rows
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols
,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
,
__constant
float2
*
twiddles_ptr,
const
int
t
,
const
int
nz
)
{
const
int
x
=
get_global_id
(
0
)
;
...
...
@@ -253,14 +311,60 @@ __kernel void fft_multi_radix(__global const uchar* src_ptr, int src_step, int s
if
(
y
<
nz
)
{
__local
float2
smem[LOCAL_SIZE]
;
__constant
const
float2*
twiddles
=
(
__constant
float2*
)
twiddles_ptr
;
const
int
ind
=
x
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
#
ifndef
REAL_INPUT
__global
const
float2*
src
=
(
__global
const
float2*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
src_offset
)))
;
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
smem[x+i*block_size]
=
src[i*block_size]
;
#
else
__global
const
float*
src
=
(
__global
const
float*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
src_offset
)))
;
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
smem[x+i*block_size]
=
(
float2
)(
src[i*block_size],
0.f
)
;
#
endif
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
RADIX_PROCESS
;
#
ifndef
CCS_OUTPUT
__global
float2*
dst
=
(
__global
float2*
)(
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
dst_offset
)))
;
__constant
const
float2*
twiddles
=
(
__constant
float2*
)
twiddles_ptr
;
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
dst[i*block_size]
=
smem[x
+
i*block_size]
;
#
else
//
pack
row
to
CCS
__local
float*
smem_1cn
=
(
__local
float*
)
smem
;
__global
float*
dst
=
(
__global
float*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
for
(
int
i=x
; i<dst_cols-1; i+=block_size)
dst[i+1]
=
smem_1cn[i+2]
;
if
(
x
==
0
)
dst[0]
=
smem_1cn[0]
;
#
endif
}
}
__kernel
void
fft_multi_radix_cols
(
__global
const
uchar*
src_ptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar*
dst_ptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__constant
float2
*
twiddles_ptr,
const
int
t
,
const
int
nz
)
{
const
int
x
=
get_group_id
(
0
)
;
const
int
y
=
get_global_id
(
1
)
;
if
(
x
<
nz
)
{
__local
float2
smem[LOCAL_SIZE]
;
__global
const
uchar*
src
=
src_ptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
src_offset
))
;
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
dst_offset
))
;
__constant
const
float2*
twiddles
=
(
__constant
float2*
)
twiddles_ptr
;
const
int
ind
=
y
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
smem[
x+i*block_size]
=
src[i*block_size]
;
smem[
y+i*block_size]
=
*
((
__global
const
float2*
)(
src
+
i*block_size*src_step
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -269,8 +373,6 @@ __kernel void fft_multi_radix(__global const uchar* src_ptr, int src_step, int s
//
copy
data
to
dst
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
{
dst[i*block_size]
=
smem[x
+
i*block_size]
;
}
*
((
__global
float2*
)(
dst
+
i*block_size*src_step
))
=
smem[y
+
i*block_size]
;
}
}
\ No newline at end of file
modules/core/test/ocl/test_dft.cpp
View file @
ed07241f
...
...
@@ -62,7 +62,7 @@ namespace ocl {
////////////////////////////////////////////////////////////////////////////
// Dft
PARAM_TEST_CASE
(
Dft
,
cv
::
Size
,
OCL_FFT_TYPE
,
bool
)
PARAM_TEST_CASE
(
Dft
,
cv
::
Size
,
OCL_FFT_TYPE
,
bool
,
bool
)
{
cv
::
Size
dft_size
;
int
dft_flags
,
depth
,
cn
,
dft_type
;
...
...
@@ -86,16 +86,16 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool)
case
C2C
:
dft_flags
|=
cv
::
DFT_COMPLEX_OUTPUT
;
cn
=
2
;
break
;
}
inplace
=
false
;
if
(
GET_PARAM
(
2
))
dft_flags
|=
cv
::
DFT_ROWS
;
// (DFT_COMPLEX_OUTPUT | DFT_ROWS) works incorrect
dft_flags
|=
cv
::
DFT_ROWS
;
//if (GET_PARAM(3))
// if (dft_type == C2C) dft_flags |= cv::DFT_INVERSE;
//if (GET_PARAM(3))
// dft_flags |= cv::DFT_SCALE;
inplace
=
GET_PARAM
(
3
);
if
(
inplace
&&
dft_type
==
0
)
inplace
=
0
;
}
void
generateTestData
()
...
...
@@ -181,10 +181,11 @@ OCL_TEST_P(MulSpectrums, Mat)
OCL_INSTANTIATE_TEST_CASE_P
(
OCL_ImgProc
,
MulSpectrums
,
testing
::
Combine
(
Bool
(),
Bool
()));
OCL_INSTANTIATE_TEST_CASE_P
(
Core
,
Dft
,
Combine
(
Values
(
cv
::
Size
(
1920
,
1
),
cv
::
Size
(
5
,
4
),
cv
::
Size
(
30
,
20
),
cv
::
Size
(
512
,
1
),
cv
::
Size
(
1024
,
1024
)),
Values
(
/*(OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2C,*/
(
OCL_FFT_TYPE
)
R2R
/*, (OCL_FFT_TYPE) C2R*/
),
Bool
()
// DFT_ROWS
OCL_INSTANTIATE_TEST_CASE_P
(
Core
,
Dft
,
Combine
(
Values
(
cv
::
Size
(
6
,
1
),
cv
::
Size
(
5
,
8
),
cv
::
Size
(
30
,
20
),
cv
::
Size
(
512
,
1
),
cv
::
Size
(
1280
,
768
)),
Values
((
OCL_FFT_TYPE
)
R2C
,
(
OCL_FFT_TYPE
)
C2C
,
(
OCL_FFT_TYPE
)
R2R
/*, (OCL_FFT_TYPE) C2R*/
),
Bool
(),
// DFT_ROWS
Bool
()
// inplace
)
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment