Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
66ac4621
Commit
66ac4621
authored
Jul 23, 2014
by
Alexander Karsakov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Final refactoring, fixes
parent
1d2cf0e2
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
353 additions
and
389 deletions
+353
-389
perf_arithm.cpp
modules/core/perf/opencl/perf_arithm.cpp
+1
-1
perf_dxt.cpp
modules/core/perf/opencl/perf_dxt.cpp
+9
-28
dxt.cpp
modules/core/src/dxt.cpp
+254
-259
ocl.cpp
modules/core/src/ocl.cpp
+3
-5
fft.cl
modules/core/src/opencl/fft.cl
+66
-40
test_dft.cpp
modules/core/test/ocl/test_dft.cpp
+20
-56
No files found.
modules/core/perf/opencl/perf_arithm.cpp
View file @
66ac4621
...
@@ -292,7 +292,7 @@ OCL_PERF_TEST_P(MagnitudeFixture, Magnitude, ::testing::Combine(
...
@@ -292,7 +292,7 @@ OCL_PERF_TEST_P(MagnitudeFixture, Magnitude, ::testing::Combine(
typedef
Size_MatType
TransposeFixture
;
typedef
Size_MatType
TransposeFixture
;
OCL_PERF_TEST_P
(
TransposeFixture
,
Transpose
,
::
testing
::
Combine
(
OCL_PERF_TEST_P
(
TransposeFixture
,
Transpose
,
::
testing
::
Combine
(
OCL_TEST_SIZES
,
Values
(
CV_8UC1
,
CV_32FC1
,
CV_8UC2
,
CV_32FC2
,
CV_8UC4
,
CV_32FC4
)
))
OCL_TEST_SIZES
,
OCL_TEST_TYPES_134
))
{
{
const
Size_MatType_t
params
=
GetParam
();
const
Size_MatType_t
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
const
Size
srcSize
=
get
<
0
>
(
params
);
...
...
modules/core/perf/opencl/perf_dxt.cpp
View file @
66ac4621
...
@@ -54,40 +54,21 @@ namespace ocl {
...
@@ -54,40 +54,21 @@ namespace ocl {
///////////// dft ////////////////////////
///////////// dft ////////////////////////
enum
OCL_FFT_TYPE
typedef
tuple
<
Size
,
int
>
DftParams
;
{
R2R
=
0
,
// real to real (CCS)
C2R
=
1
,
// complex to real
R2C
=
2
,
// real to complex
C2C
=
3
// complex to complex
};
typedef
tuple
<
OCL_FFT_TYPE
,
Size
,
int
>
DftParams
;
typedef
TestBaseWithParam
<
DftParams
>
DftFixture
;
typedef
TestBaseWithParam
<
DftParams
>
DftFixture
;
OCL_PERF_TEST_P
(
DftFixture
,
Dft
,
::
testing
::
Combine
(
Values
(
C2C
,
R2R
,
C2R
,
R2C
),
OCL_PERF_TEST_P
(
DftFixture
,
Dft
,
::
testing
::
Combine
(
Values
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
),
Values
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
,
Size
(
1024
,
1024
),
Size
(
512
,
512
),
Size
(
2048
,
2048
)),
Values
((
int
)
DFT_ROWS
,
(
int
)
DFT_SCALE
,
(
int
)
DFT_INVERSE
,
Values
((
int
)
0
,
(
int
)
DFT_ROWS
,
(
int
)
DFT_SCALE
/*, (int)DFT_INVERSE,
(
int
)
DFT_INVERSE
|
DFT_SCALE
,
(
int
)
DFT_ROWS
|
DFT_INVERSE
)))
(int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE*/
)))
{
{
const
DftParams
params
=
GetParam
();
const
DftParams
params
=
GetParam
();
const
int
dft_type
=
get
<
0
>
(
params
);
const
Size
srcSize
=
get
<
0
>
(
params
);
const
Size
srcSize
=
get
<
1
>
(
params
);
const
int
flags
=
get
<
1
>
(
params
);
int
flags
=
get
<
2
>
(
params
);
UMat
src
(
srcSize
,
CV_32FC2
),
dst
(
srcSize
,
CV_32FC2
);
int
in_cn
,
out_cn
;
switch
(
dft_type
)
{
case
R2R
:
flags
|=
cv
::
DFT_REAL_OUTPUT
;
in_cn
=
1
;
out_cn
=
1
;
break
;
case
C2R
:
flags
|=
cv
::
DFT_REAL_OUTPUT
;
in_cn
=
2
;
out_cn
=
2
;
break
;
case
R2C
:
flags
|=
cv
::
DFT_COMPLEX_OUTPUT
;
in_cn
=
1
;
out_cn
=
2
;
break
;
case
C2C
:
flags
|=
cv
::
DFT_COMPLEX_OUTPUT
;
in_cn
=
2
;
out_cn
=
2
;
break
;
}
UMat
src
(
srcSize
,
CV_MAKE_TYPE
(
CV_32F
,
in_cn
)),
dst
(
srcSize
,
CV_MAKE_TYPE
(
CV_32F
,
out_cn
));
declare
.
in
(
src
,
WARMUP_RNG
).
out
(
dst
);
declare
.
in
(
src
,
WARMUP_RNG
).
out
(
dst
);
OCL_TEST_CYCLE
()
cv
::
dft
(
src
,
dst
,
flags
);
OCL_TEST_CYCLE
()
cv
::
dft
(
src
,
dst
,
flags
|
DFT_COMPLEX_OUTPUT
);
SANITY_CHECK
(
dst
,
1e-3
);
SANITY_CHECK
(
dst
,
1e-3
);
}
}
...
...
modules/core/src/dxt.cpp
View file @
66ac4621
...
@@ -1781,251 +1781,11 @@ static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
...
@@ -1781,251 +1781,11 @@ static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
#endif
#endif
}
}
#ifdef HAVE_CLAMDFFT
#ifdef HAVE_OPENCL
namespace
cv
{
#define CLAMDDFT_Assert(func) \
{ \
clAmdFftStatus s = (func); \
CV_Assert(s == CLFFT_SUCCESS); \
}
class
PlanCache
{
struct
FftPlan
{
FftPlan
(
const
Size
&
_dft_size
,
int
_src_step
,
int
_dst_step
,
bool
_doubleFP
,
bool
_inplace
,
int
_flags
,
FftType
_fftType
)
:
dft_size
(
_dft_size
),
src_step
(
_src_step
),
dst_step
(
_dst_step
),
doubleFP
(
_doubleFP
),
inplace
(
_inplace
),
flags
(
_flags
),
fftType
(
_fftType
),
context
((
cl_context
)
ocl
::
Context
::
getDefault
().
ptr
()),
plHandle
(
0
)
{
bool
dft_inverse
=
(
flags
&
DFT_INVERSE
)
!=
0
;
bool
dft_scale
=
(
flags
&
DFT_SCALE
)
!=
0
;
bool
dft_rows
=
(
flags
&
DFT_ROWS
)
!=
0
;
clAmdFftLayout
inLayout
=
CLFFT_REAL
,
outLayout
=
CLFFT_REAL
;
clAmdFftDim
dim
=
dft_size
.
height
==
1
||
dft_rows
?
CLFFT_1D
:
CLFFT_2D
;
size_t
batchSize
=
dft_rows
?
dft_size
.
height
:
1
;
size_t
clLengthsIn
[
3
]
=
{
dft_size
.
width
,
dft_rows
?
1
:
dft_size
.
height
,
1
};
size_t
clStridesIn
[
3
]
=
{
1
,
1
,
1
};
size_t
clStridesOut
[
3
]
=
{
1
,
1
,
1
};
int
elemSize
=
doubleFP
?
sizeof
(
double
)
:
sizeof
(
float
);
switch
(
fftType
)
{
case
C2C
:
inLayout
=
CLFFT_COMPLEX_INTERLEAVED
;
outLayout
=
CLFFT_COMPLEX_INTERLEAVED
;
clStridesIn
[
1
]
=
src_step
/
(
elemSize
<<
1
);
clStridesOut
[
1
]
=
dst_step
/
(
elemSize
<<
1
);
break
;
case
R2C
:
inLayout
=
CLFFT_REAL
;
outLayout
=
CLFFT_HERMITIAN_INTERLEAVED
;
clStridesIn
[
1
]
=
src_step
/
elemSize
;
clStridesOut
[
1
]
=
dst_step
/
(
elemSize
<<
1
);
break
;
case
C2R
:
inLayout
=
CLFFT_HERMITIAN_INTERLEAVED
;
outLayout
=
CLFFT_REAL
;
clStridesIn
[
1
]
=
src_step
/
(
elemSize
<<
1
);
clStridesOut
[
1
]
=
dst_step
/
elemSize
;
break
;
case
R2R
:
default
:
CV_Error
(
Error
::
StsNotImplemented
,
"AMD Fft does not support this type"
);
break
;
}
clStridesIn
[
2
]
=
dft_rows
?
clStridesIn
[
1
]
:
dft_size
.
width
*
clStridesIn
[
1
];
clStridesOut
[
2
]
=
dft_rows
?
clStridesOut
[
1
]
:
dft_size
.
width
*
clStridesOut
[
1
];
CLAMDDFT_Assert
(
clAmdFftCreateDefaultPlan
(
&
plHandle
,
(
cl_context
)
ocl
::
Context
::
getDefault
().
ptr
(),
dim
,
clLengthsIn
))
// setting plan properties
CLAMDDFT_Assert
(
clAmdFftSetPlanPrecision
(
plHandle
,
doubleFP
?
CLFFT_DOUBLE
:
CLFFT_SINGLE
));
CLAMDDFT_Assert
(
clAmdFftSetResultLocation
(
plHandle
,
inplace
?
CLFFT_INPLACE
:
CLFFT_OUTOFPLACE
))
CLAMDDFT_Assert
(
clAmdFftSetLayout
(
plHandle
,
inLayout
,
outLayout
))
CLAMDDFT_Assert
(
clAmdFftSetPlanBatchSize
(
plHandle
,
batchSize
))
CLAMDDFT_Assert
(
clAmdFftSetPlanInStride
(
plHandle
,
dim
,
clStridesIn
))
CLAMDDFT_Assert
(
clAmdFftSetPlanOutStride
(
plHandle
,
dim
,
clStridesOut
))
CLAMDDFT_Assert
(
clAmdFftSetPlanDistance
(
plHandle
,
clStridesIn
[
dim
],
clStridesOut
[
dim
]))
float
scale
=
dft_scale
?
1.0
f
/
(
dft_rows
?
dft_size
.
width
:
dft_size
.
area
())
:
1.0
f
;
CLAMDDFT_Assert
(
clAmdFftSetPlanScale
(
plHandle
,
dft_inverse
?
CLFFT_BACKWARD
:
CLFFT_FORWARD
,
scale
))
// ready to bake
cl_command_queue
queue
=
(
cl_command_queue
)
ocl
::
Queue
::
getDefault
().
ptr
();
CLAMDDFT_Assert
(
clAmdFftBakePlan
(
plHandle
,
1
,
&
queue
,
NULL
,
NULL
))
}
~
FftPlan
()
{
// clAmdFftDestroyPlan(&plHandle);
}
friend
class
PlanCache
;
private
:
Size
dft_size
;
int
src_step
,
dst_step
;
bool
doubleFP
;
bool
inplace
;
int
flags
;
FftType
fftType
;
cl_context
context
;
clAmdFftPlanHandle
plHandle
;
};
public
:
static
PlanCache
&
getInstance
()
{
static
PlanCache
planCache
;
return
planCache
;
}
clAmdFftPlanHandle
getPlanHandle
(
const
Size
&
dft_size
,
int
src_step
,
int
dst_step
,
bool
doubleFP
,
bool
inplace
,
int
flags
,
FftType
fftType
)
{
cl_context
currentContext
=
(
cl_context
)
ocl
::
Context
::
getDefault
().
ptr
();
for
(
size_t
i
=
0
,
size
=
planStorage
.
size
();
i
<
size
;
++
i
)
{
const
FftPlan
*
const
plan
=
planStorage
[
i
];
if
(
plan
->
dft_size
==
dft_size
&&
plan
->
flags
==
flags
&&
plan
->
src_step
==
src_step
&&
plan
->
dst_step
==
dst_step
&&
plan
->
doubleFP
==
doubleFP
&&
plan
->
fftType
==
fftType
&&
plan
->
inplace
==
inplace
)
{
if
(
plan
->
context
!=
currentContext
)
{
planStorage
.
erase
(
planStorage
.
begin
()
+
i
);
break
;
}
return
plan
->
plHandle
;
}
}
// no baked plan is found, so let's create a new one
FftPlan
*
newPlan
=
new
FftPlan
(
dft_size
,
src_step
,
dst_step
,
doubleFP
,
inplace
,
flags
,
fftType
);
planStorage
.
push_back
(
newPlan
);
return
newPlan
->
plHandle
;
}
~
PlanCache
()
{
for
(
std
::
vector
<
FftPlan
*>::
iterator
i
=
planStorage
.
begin
(),
end
=
planStorage
.
end
();
i
!=
end
;
++
i
)
delete
(
*
i
);
planStorage
.
clear
();
}
protected
:
PlanCache
()
:
planStorage
()
{
}
std
::
vector
<
FftPlan
*>
planStorage
;
};
extern
"C"
{
static
void
CL_CALLBACK
oclCleanupCallback
(
cl_event
e
,
cl_int
,
void
*
p
)
{
UMatData
*
u
=
(
UMatData
*
)
p
;
if
(
u
&&
CV_XADD
(
&
u
->
urefcount
,
-
1
)
==
1
)
u
->
currAllocator
->
deallocate
(
u
);
u
=
0
;
clReleaseEvent
(
e
),
e
=
0
;
}
}
static
bool
ocl_dft_amdfft
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
Size
ssize
=
_src
.
size
();
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
!
(
type
==
CV_32FC1
||
type
==
CV_32FC2
||
type
==
CV_64FC1
||
type
==
CV_64FC2
)
||
_src
.
offset
()
!=
0
)
return
false
;
// if is not a multiplication of prime numbers { 2, 3, 5 }
if
(
ssize
.
area
()
!=
getOptimalDFTSize
(
ssize
.
area
()))
return
false
;
int
dst_complex_input
=
cn
==
2
?
1
:
0
;
bool
dft_inverse
=
(
flags
&
DFT_INVERSE
)
!=
0
?
1
:
0
;
int
dft_complex_output
=
(
flags
&
DFT_COMPLEX_OUTPUT
)
!=
0
;
bool
dft_real_output
=
(
flags
&
DFT_REAL_OUTPUT
)
!=
0
;
CV_Assert
(
dft_complex_output
+
dft_real_output
<
2
);
FftType
fftType
=
(
FftType
)(
dst_complex_input
<<
0
|
dft_complex_output
<<
1
);
switch
(
fftType
)
{
case
C2C
:
_dst
.
create
(
ssize
.
height
,
ssize
.
width
,
CV_MAKE_TYPE
(
depth
,
2
));
break
;
case
R2C
:
// TODO implement it if possible
case
C2R
:
// TODO implement it if possible
case
R2R
:
// AMD Fft does not support this type
default
:
return
false
;
}
UMat
src
=
_src
.
getUMat
(),
dst
=
_dst
.
getUMat
();
bool
inplace
=
src
.
u
==
dst
.
u
;
clAmdFftPlanHandle
plHandle
=
PlanCache
::
getInstance
().
getPlanHandle
(
ssize
,
(
int
)
src
.
step
,
(
int
)
dst
.
step
,
depth
==
CV_64F
,
inplace
,
flags
,
fftType
);
// get the bufferSize
size_t
bufferSize
=
0
;
CLAMDDFT_Assert
(
clAmdFftGetTmpBufSize
(
plHandle
,
&
bufferSize
))
UMat
tmpBuffer
(
1
,
(
int
)
bufferSize
,
CV_8UC1
);
cl_mem
srcarg
=
(
cl_mem
)
src
.
handle
(
ACCESS_READ
);
cl_mem
dstarg
=
(
cl_mem
)
dst
.
handle
(
ACCESS_RW
);
cl_command_queue
queue
=
(
cl_command_queue
)
ocl
::
Queue
::
getDefault
().
ptr
();
cl_event
e
=
0
;
CLAMDDFT_Assert
(
clAmdFftEnqueueTransform
(
plHandle
,
dft_inverse
?
CLFFT_BACKWARD
:
CLFFT_FORWARD
,
1
,
&
queue
,
0
,
NULL
,
&
e
,
&
srcarg
,
&
dstarg
,
(
cl_mem
)
tmpBuffer
.
handle
(
ACCESS_RW
)))
tmpBuffer
.
addref
();
clSetEventCallback
(
e
,
CL_COMPLETE
,
oclCleanupCallback
,
tmpBuffer
.
u
);
return
true
;
}
#undef DFT_ASSERT
}
#endif // HAVE_CLAMDFFT
namespace
cv
namespace
cv
{
{
#ifdef HAVE_OPENCL
enum
FftType
enum
FftType
{
{
R2R
=
0
,
R2R
=
0
,
...
@@ -2038,7 +1798,7 @@ static void ocl_getRadixes(int cols, std::vector<int>& radixes, std::vector<int>
...
@@ -2038,7 +1798,7 @@ static void ocl_getRadixes(int cols, std::vector<int>& radixes, std::vector<int>
{
{
int
factors
[
34
];
int
factors
[
34
];
int
nf
=
DFTFactorize
(
cols
,
factors
);
int
nf
=
DFTFactorize
(
cols
,
factors
);
int
n
=
1
;
int
n
=
1
;
int
factor_index
=
0
;
int
factor_index
=
0
;
min_radix
=
INT_MAX
;
min_radix
=
INT_MAX
;
...
@@ -2118,7 +1878,7 @@ struct OCL_FftPlan
...
@@ -2118,7 +1878,7 @@ struct OCL_FftPlan
ocl_getRadixes
(
dft_size
,
radixes
,
blocks
,
min_radix
);
ocl_getRadixes
(
dft_size
,
radixes
,
blocks
,
min_radix
);
thread_count
=
dft_size
/
min_radix
;
thread_count
=
dft_size
/
min_radix
;
if
(
thread_count
>
ocl
::
Device
::
getDefault
().
maxWorkGroupSize
())
if
(
thread_count
>
(
int
)
ocl
::
Device
::
getDefault
().
maxWorkGroupSize
())
{
{
status
=
false
;
status
=
false
;
return
;
return
;
...
@@ -2141,13 +1901,13 @@ struct OCL_FftPlan
...
@@ -2141,13 +1901,13 @@ struct OCL_FftPlan
Mat
tw
(
1
,
twiddle_size
,
CV_32FC2
);
Mat
tw
(
1
,
twiddle_size
,
CV_32FC2
);
float
*
ptr
=
tw
.
ptr
<
float
>
();
float
*
ptr
=
tw
.
ptr
<
float
>
();
int
ptr_index
=
0
;
int
ptr_index
=
0
;
n
=
1
;
n
=
1
;
for
(
size_t
i
=
0
;
i
<
radixes
.
size
();
i
++
)
for
(
size_t
i
=
0
;
i
<
radixes
.
size
();
i
++
)
{
{
int
radix
=
radixes
[
i
];
int
radix
=
radixes
[
i
];
n
*=
radix
;
n
*=
radix
;
for
(
int
j
=
1
;
j
<
radix
;
j
++
)
for
(
int
j
=
1
;
j
<
radix
;
j
++
)
{
{
double
theta
=
-
CV_TWO_PI
*
j
/
n
;
double
theta
=
-
CV_TWO_PI
*
j
/
n
;
...
@@ -2157,7 +1917,7 @@ struct OCL_FftPlan
...
@@ -2157,7 +1917,7 @@ struct OCL_FftPlan
ptr
[
ptr_index
++
]
=
(
float
)
cos
(
k
*
theta
);
ptr
[
ptr_index
++
]
=
(
float
)
cos
(
k
*
theta
);
ptr
[
ptr_index
++
]
=
(
float
)
sin
(
k
*
theta
);
ptr
[
ptr_index
++
]
=
(
float
)
sin
(
k
*
theta
);
}
}
}
}
}
}
twiddles
=
tw
.
getUMat
(
ACCESS_READ
);
twiddles
=
tw
.
getUMat
(
ACCESS_READ
);
...
@@ -2165,7 +1925,7 @@ struct OCL_FftPlan
...
@@ -2165,7 +1925,7 @@ struct OCL_FftPlan
dft_size
,
dft_size
/
thread_count
,
radix_processing
.
c_str
());
dft_size
,
dft_size
/
thread_count
,
radix_processing
.
c_str
());
}
}
bool
enqueueTransform
(
InputArray
_src
,
OutputArray
_dst
,
int
dft_size
,
int
flags
,
int
fftType
,
bool
rows
=
true
)
const
bool
enqueueTransform
(
InputArray
_src
,
OutputArray
_dst
,
int
num_dfts
,
int
flags
,
int
fftType
,
bool
rows
=
true
)
const
{
{
if
(
!
status
)
if
(
!
status
)
return
false
;
return
false
;
...
@@ -2177,7 +1937,7 @@ struct OCL_FftPlan
...
@@ -2177,7 +1937,7 @@ struct OCL_FftPlan
size_t
localsize
[
2
];
size_t
localsize
[
2
];
String
kernel_name
;
String
kernel_name
;
bool
is1d
=
(
flags
&
DFT_ROWS
)
!=
0
||
dft_size
==
1
;
bool
is1d
=
(
flags
&
DFT_ROWS
)
!=
0
||
num_dfts
==
1
;
bool
inv
=
(
flags
&
DFT_INVERSE
)
!=
0
;
bool
inv
=
(
flags
&
DFT_INVERSE
)
!=
0
;
String
options
=
buildOptions
;
String
options
=
buildOptions
;
...
@@ -2191,7 +1951,7 @@ struct OCL_FftPlan
...
@@ -2191,7 +1951,7 @@ struct OCL_FftPlan
}
}
else
else
{
{
globalsize
[
0
]
=
dft_size
;
globalsize
[
1
]
=
thread_count
;
globalsize
[
0
]
=
num_dfts
;
globalsize
[
1
]
=
thread_count
;
localsize
[
0
]
=
1
;
localsize
[
1
]
=
thread_count
;
localsize
[
0
]
=
1
;
localsize
[
1
]
=
thread_count
;
kernel_name
=
!
inv
?
"fft_multi_radix_cols"
:
"ifft_multi_radix_cols"
;
kernel_name
=
!
inv
?
"fft_multi_radix_cols"
:
"ifft_multi_radix_cols"
;
if
(
flags
&
DFT_SCALE
)
if
(
flags
&
DFT_SCALE
)
...
@@ -2201,7 +1961,7 @@ struct OCL_FftPlan
...
@@ -2201,7 +1961,7 @@ struct OCL_FftPlan
options
+=
src
.
channels
()
==
1
?
" -D REAL_INPUT"
:
" -D COMPLEX_INPUT"
;
options
+=
src
.
channels
()
==
1
?
" -D REAL_INPUT"
:
" -D COMPLEX_INPUT"
;
options
+=
dst
.
channels
()
==
1
?
" -D REAL_OUTPUT"
:
" -D COMPLEX_OUTPUT"
;
options
+=
dst
.
channels
()
==
1
?
" -D REAL_OUTPUT"
:
" -D COMPLEX_OUTPUT"
;
options
+=
is1d
?
" -D IS_1D"
:
""
;
options
+=
is1d
?
" -D IS_1D"
:
""
;
if
(
!
inv
)
if
(
!
inv
)
{
{
if
((
is1d
&&
src
.
channels
()
==
1
)
||
(
rows
&&
(
fftType
==
R2R
)))
if
((
is1d
&&
src
.
channels
()
==
1
)
||
(
rows
&&
(
fftType
==
R2R
)))
...
@@ -2219,7 +1979,7 @@ struct OCL_FftPlan
...
@@ -2219,7 +1979,7 @@ struct OCL_FftPlan
if
(
k
.
empty
())
if
(
k
.
empty
())
return
false
;
return
false
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
twiddles
),
thread_count
,
dft_size
);
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
twiddles
),
thread_count
,
num_dfts
);
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
}
};
};
...
@@ -2232,7 +1992,7 @@ public:
...
@@ -2232,7 +1992,7 @@ public:
static
OCL_FftPlanCache
planCache
;
static
OCL_FftPlanCache
planCache
;
return
planCache
;
return
planCache
;
}
}
OCL_FftPlan
*
getFftPlan
(
int
dft_size
)
OCL_FftPlan
*
getFftPlan
(
int
dft_size
)
{
{
for
(
size_t
i
=
0
,
size
=
planStorage
.
size
();
i
<
size
;
++
i
)
for
(
size_t
i
=
0
,
size
=
planStorage
.
size
();
i
<
size
;
++
i
)
...
@@ -2280,11 +2040,9 @@ static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols
...
@@ -2280,11 +2040,9 @@ static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols
static
bool
ocl_dft
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
,
int
nonzero_rows
)
static
bool
ocl_dft
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
,
int
nonzero_rows
)
{
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
int
type
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
type
);
Size
ssize
=
_src
.
size
();
Size
ssize
=
_src
.
size
();
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
!
(
type
==
CV_32FC1
||
type
==
CV_32FC2
)
)
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
!
(
type
==
CV_32FC1
||
type
==
CV_32FC2
||
type
==
CV_64FC1
||
type
==
CV_64FC2
))
return
false
;
return
false
;
// if is not a multiplication of prime numbers { 2, 3, 5 }
// if is not a multiplication of prime numbers { 2, 3, 5 }
...
@@ -2325,7 +2083,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
...
@@ -2325,7 +2083,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
if
(
fftType
==
C2C
||
fftType
==
R2C
)
if
(
fftType
==
C2C
||
fftType
==
R2C
)
{
{
// complex output
// complex output
_dst
.
create
(
src
.
size
(),
CV_32FC2
);
_dst
.
create
(
src
.
size
(),
CV_32FC2
);
output
=
_dst
.
getUMat
();
output
=
_dst
.
getUMat
();
}
}
else
else
...
@@ -2381,7 +2139,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
...
@@ -2381,7 +2139,7 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
int
nonzero_cols
=
src
.
cols
/
2
+
1
;
int
nonzero_cols
=
src
.
cols
/
2
+
1
;
if
(
!
ocl_dft_C2C_cols
(
src
,
output
,
nonzero_cols
,
flags
,
fftType
))
if
(
!
ocl_dft_C2C_cols
(
src
,
output
,
nonzero_cols
,
flags
,
fftType
))
return
false
;
return
false
;
if
(
!
ocl_dft_C2C_rows
(
output
,
_dst
,
nonzero_rows
,
flags
,
fftType
))
if
(
!
ocl_dft_C2C_rows
(
output
,
_dst
,
nonzero_rows
,
flags
,
fftType
))
return
false
;
return
false
;
}
}
...
@@ -2390,11 +2148,248 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
...
@@ -2390,11 +2148,248 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
return
true
;
return
true
;
}
}
}
// namespace cv;
#endif
#endif
}
// namespace cv;
#ifdef HAVE_CLAMDFFT
namespace
cv
{
#define CLAMDDFT_Assert(func) \
{ \
clAmdFftStatus s = (func); \
CV_Assert(s == CLFFT_SUCCESS); \
}
class
PlanCache
{
struct
FftPlan
{
FftPlan
(
const
Size
&
_dft_size
,
int
_src_step
,
int
_dst_step
,
bool
_doubleFP
,
bool
_inplace
,
int
_flags
,
FftType
_fftType
)
:
dft_size
(
_dft_size
),
src_step
(
_src_step
),
dst_step
(
_dst_step
),
doubleFP
(
_doubleFP
),
inplace
(
_inplace
),
flags
(
_flags
),
fftType
(
_fftType
),
context
((
cl_context
)
ocl
::
Context
::
getDefault
().
ptr
()),
plHandle
(
0
)
{
bool
dft_inverse
=
(
flags
&
DFT_INVERSE
)
!=
0
;
bool
dft_scale
=
(
flags
&
DFT_SCALE
)
!=
0
;
bool
dft_rows
=
(
flags
&
DFT_ROWS
)
!=
0
;
clAmdFftLayout
inLayout
=
CLFFT_REAL
,
outLayout
=
CLFFT_REAL
;
clAmdFftDim
dim
=
dft_size
.
height
==
1
||
dft_rows
?
CLFFT_1D
:
CLFFT_2D
;
size_t
batchSize
=
dft_rows
?
dft_size
.
height
:
1
;
size_t
clLengthsIn
[
3
]
=
{
dft_size
.
width
,
dft_rows
?
1
:
dft_size
.
height
,
1
};
size_t
clStridesIn
[
3
]
=
{
1
,
1
,
1
};
size_t
clStridesOut
[
3
]
=
{
1
,
1
,
1
};
int
elemSize
=
doubleFP
?
sizeof
(
double
)
:
sizeof
(
float
);
switch
(
fftType
)
{
case
C2C
:
inLayout
=
CLFFT_COMPLEX_INTERLEAVED
;
outLayout
=
CLFFT_COMPLEX_INTERLEAVED
;
clStridesIn
[
1
]
=
src_step
/
(
elemSize
<<
1
);
clStridesOut
[
1
]
=
dst_step
/
(
elemSize
<<
1
);
break
;
case
R2C
:
inLayout
=
CLFFT_REAL
;
outLayout
=
CLFFT_HERMITIAN_INTERLEAVED
;
clStridesIn
[
1
]
=
src_step
/
elemSize
;
clStridesOut
[
1
]
=
dst_step
/
(
elemSize
<<
1
);
break
;
case
C2R
:
inLayout
=
CLFFT_HERMITIAN_INTERLEAVED
;
outLayout
=
CLFFT_REAL
;
clStridesIn
[
1
]
=
src_step
/
(
elemSize
<<
1
);
clStridesOut
[
1
]
=
dst_step
/
elemSize
;
break
;
case
R2R
:
default
:
CV_Error
(
Error
::
StsNotImplemented
,
"AMD Fft does not support this type"
);
break
;
}
clStridesIn
[
2
]
=
dft_rows
?
clStridesIn
[
1
]
:
dft_size
.
width
*
clStridesIn
[
1
];
clStridesOut
[
2
]
=
dft_rows
?
clStridesOut
[
1
]
:
dft_size
.
width
*
clStridesOut
[
1
];
CLAMDDFT_Assert
(
clAmdFftCreateDefaultPlan
(
&
plHandle
,
(
cl_context
)
ocl
::
Context
::
getDefault
().
ptr
(),
dim
,
clLengthsIn
))
// setting plan properties
CLAMDDFT_Assert
(
clAmdFftSetPlanPrecision
(
plHandle
,
doubleFP
?
CLFFT_DOUBLE
:
CLFFT_SINGLE
));
CLAMDDFT_Assert
(
clAmdFftSetResultLocation
(
plHandle
,
inplace
?
CLFFT_INPLACE
:
CLFFT_OUTOFPLACE
))
CLAMDDFT_Assert
(
clAmdFftSetLayout
(
plHandle
,
inLayout
,
outLayout
))
CLAMDDFT_Assert
(
clAmdFftSetPlanBatchSize
(
plHandle
,
batchSize
))
CLAMDDFT_Assert
(
clAmdFftSetPlanInStride
(
plHandle
,
dim
,
clStridesIn
))
CLAMDDFT_Assert
(
clAmdFftSetPlanOutStride
(
plHandle
,
dim
,
clStridesOut
))
CLAMDDFT_Assert
(
clAmdFftSetPlanDistance
(
plHandle
,
clStridesIn
[
dim
],
clStridesOut
[
dim
]))
float
scale
=
dft_scale
?
1.0
f
/
(
dft_rows
?
dft_size
.
width
:
dft_size
.
area
())
:
1.0
f
;
CLAMDDFT_Assert
(
clAmdFftSetPlanScale
(
plHandle
,
dft_inverse
?
CLFFT_BACKWARD
:
CLFFT_FORWARD
,
scale
))
// ready to bake
cl_command_queue
queue
=
(
cl_command_queue
)
ocl
::
Queue
::
getDefault
().
ptr
();
CLAMDDFT_Assert
(
clAmdFftBakePlan
(
plHandle
,
1
,
&
queue
,
NULL
,
NULL
))
}
~
FftPlan
()
{
// clAmdFftDestroyPlan(&plHandle);
}
friend
class
PlanCache
;
private
:
Size
dft_size
;
int
src_step
,
dst_step
;
bool
doubleFP
;
bool
inplace
;
int
flags
;
FftType
fftType
;
cl_context
context
;
clAmdFftPlanHandle
plHandle
;
};
public
:
static
PlanCache
&
getInstance
()
{
static
PlanCache
planCache
;
return
planCache
;
}
clAmdFftPlanHandle
getPlanHandle
(
const
Size
&
dft_size
,
int
src_step
,
int
dst_step
,
bool
doubleFP
,
bool
inplace
,
int
flags
,
FftType
fftType
)
{
cl_context
currentContext
=
(
cl_context
)
ocl
::
Context
::
getDefault
().
ptr
();
for
(
size_t
i
=
0
,
size
=
planStorage
.
size
();
i
<
size
;
++
i
)
{
const
FftPlan
*
const
plan
=
planStorage
[
i
];
if
(
plan
->
dft_size
==
dft_size
&&
plan
->
flags
==
flags
&&
plan
->
src_step
==
src_step
&&
plan
->
dst_step
==
dst_step
&&
plan
->
doubleFP
==
doubleFP
&&
plan
->
fftType
==
fftType
&&
plan
->
inplace
==
inplace
)
{
if
(
plan
->
context
!=
currentContext
)
{
planStorage
.
erase
(
planStorage
.
begin
()
+
i
);
break
;
}
return
plan
->
plHandle
;
}
}
// no baked plan is found, so let's create a new one
FftPlan
*
newPlan
=
new
FftPlan
(
dft_size
,
src_step
,
dst_step
,
doubleFP
,
inplace
,
flags
,
fftType
);
planStorage
.
push_back
(
newPlan
);
return
newPlan
->
plHandle
;
}
~
PlanCache
()
{
for
(
std
::
vector
<
FftPlan
*>::
iterator
i
=
planStorage
.
begin
(),
end
=
planStorage
.
end
();
i
!=
end
;
++
i
)
delete
(
*
i
);
planStorage
.
clear
();
}
protected
:
PlanCache
()
:
planStorage
()
{
}
std
::
vector
<
FftPlan
*>
planStorage
;
};
extern
"C"
{
static
void
CL_CALLBACK
oclCleanupCallback
(
cl_event
e
,
cl_int
,
void
*
p
)
{
UMatData
*
u
=
(
UMatData
*
)
p
;
if
(
u
&&
CV_XADD
(
&
u
->
urefcount
,
-
1
)
==
1
)
u
->
currAllocator
->
deallocate
(
u
);
u
=
0
;
clReleaseEvent
(
e
),
e
=
0
;
}
}
static
bool
ocl_dft_amdfft
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
Size
ssize
=
_src
.
size
();
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
!
(
type
==
CV_32FC1
||
type
==
CV_32FC2
||
type
==
CV_64FC1
||
type
==
CV_64FC2
)
||
_src
.
offset
()
!=
0
)
return
false
;
// if is not a multiplication of prime numbers { 2, 3, 5 }
if
(
ssize
.
area
()
!=
getOptimalDFTSize
(
ssize
.
area
()))
return
false
;
int
dst_complex_input
=
cn
==
2
?
1
:
0
;
bool
dft_inverse
=
(
flags
&
DFT_INVERSE
)
!=
0
?
1
:
0
;
int
dft_complex_output
=
(
flags
&
DFT_COMPLEX_OUTPUT
)
!=
0
;
bool
dft_real_output
=
(
flags
&
DFT_REAL_OUTPUT
)
!=
0
;
CV_Assert
(
dft_complex_output
+
dft_real_output
<
2
);
FftType
fftType
=
(
FftType
)(
dst_complex_input
<<
0
|
dft_complex_output
<<
1
);
switch
(
fftType
)
{
case
C2C
:
_dst
.
create
(
ssize
.
height
,
ssize
.
width
,
CV_MAKE_TYPE
(
depth
,
2
));
break
;
case
R2C
:
// TODO implement it if possible
case
C2R
:
// TODO implement it if possible
case
R2R
:
// AMD Fft does not support this type
default
:
return
false
;
}
UMat
src
=
_src
.
getUMat
(),
dst
=
_dst
.
getUMat
();
bool
inplace
=
src
.
u
==
dst
.
u
;
clAmdFftPlanHandle
plHandle
=
PlanCache
::
getInstance
().
getPlanHandle
(
ssize
,
(
int
)
src
.
step
,
(
int
)
dst
.
step
,
depth
==
CV_64F
,
inplace
,
flags
,
fftType
);
// get the bufferSize
size_t
bufferSize
=
0
;
CLAMDDFT_Assert
(
clAmdFftGetTmpBufSize
(
plHandle
,
&
bufferSize
))
UMat
tmpBuffer
(
1
,
(
int
)
bufferSize
,
CV_8UC1
);
cl_mem
srcarg
=
(
cl_mem
)
src
.
handle
(
ACCESS_READ
);
cl_mem
dstarg
=
(
cl_mem
)
dst
.
handle
(
ACCESS_RW
);
cl_command_queue
queue
=
(
cl_command_queue
)
ocl
::
Queue
::
getDefault
().
ptr
();
cl_event
e
=
0
;
CLAMDDFT_Assert
(
clAmdFftEnqueueTransform
(
plHandle
,
dft_inverse
?
CLFFT_BACKWARD
:
CLFFT_FORWARD
,
1
,
&
queue
,
0
,
NULL
,
&
e
,
&
srcarg
,
&
dstarg
,
(
cl_mem
)
tmpBuffer
.
handle
(
ACCESS_RW
)))
tmpBuffer
.
addref
();
clSetEventCallback
(
e
,
CL_COMPLETE
,
oclCleanupCallback
,
tmpBuffer
.
u
);
return
true
;
}
#undef DFT_ASSERT
}
#endif // HAVE_CLAMDFFT
void
cv
::
dft
(
InputArray
_src0
,
OutputArray
_dst
,
int
flags
,
int
nonzero_rows
)
void
cv
::
dft
(
InputArray
_src0
,
OutputArray
_dst
,
int
flags
,
int
nonzero_rows
)
{
{
...
...
modules/core/src/ocl.cpp
View file @
66ac4621
...
@@ -3002,8 +3002,7 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
...
@@ -3002,8 +3002,7 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
sync
?
0
:
&
p
->
e
);
sync
?
0
:
&
p
->
e
);
if
(
sync
||
retval
!=
CL_SUCCESS
)
if
(
sync
||
retval
!=
CL_SUCCESS
)
{
{
int
a
=
clFinish
(
qq
);
CV_OclDbgAssert
(
clFinish
(
qq
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
a
==
CL_SUCCESS
);
p
->
cleanupUMats
();
p
->
cleanupUMats
();
}
}
else
else
...
@@ -3899,9 +3898,8 @@ public:
...
@@ -3899,9 +3898,8 @@ public:
if
(
(
accessFlags
&
ACCESS_READ
)
!=
0
&&
u
->
hostCopyObsolete
()
)
if
(
(
accessFlags
&
ACCESS_READ
)
!=
0
&&
u
->
hostCopyObsolete
()
)
{
{
AlignedDataPtr
<
false
,
true
>
alignedPtr
(
u
->
data
,
u
->
size
,
CV_OPENCL_DATA_PTR_ALIGNMENT
);
AlignedDataPtr
<
false
,
true
>
alignedPtr
(
u
->
data
,
u
->
size
,
CV_OPENCL_DATA_PTR_ALIGNMENT
);
int
a
=
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
CV_Assert
(
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
u
->
size
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
);
u
->
size
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
==
CL_SUCCESS
);
CV_Assert
(
a
==
CL_SUCCESS
);
u
->
markHostCopyObsolete
(
false
);
u
->
markHostCopyObsolete
(
false
);
}
}
}
}
...
...
modules/core/src/opencl/fft.cl
View file @
66ac4621
...
@@ -6,36 +6,36 @@
...
@@ -6,36 +6,36 @@
#
define
fft5_5
0.363271264002f
#
define
fft5_5
0.363271264002f
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
float2
mul_float2
(
float2
a,
float2
b
)
{
float2
mul_float2
(
float2
a,
float2
b
)
{
return
(
float2
)(
fma
(
a.x,
b.x,
-a.y
*
b.y
)
,
fma
(
a.x,
b.y,
a.y
*
b.x
))
;
return
(
float2
)(
fma
(
a.x,
b.x,
-a.y
*
b.y
)
,
fma
(
a.x,
b.y,
a.y
*
b.x
))
;
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
float2
twiddle
(
float2
a
)
{
float2
twiddle
(
float2
a
)
{
return
(
float2
)(
a.y,
-a.x
)
;
return
(
float2
)(
a.y,
-a.x
)
;
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
butterfly2
(
float2
a0,
float2
a1,
__local
float2*
smem,
__global
const
float2*
twiddles,
void
butterfly2
(
float2
a0,
float2
a1,
__local
float2*
smem,
__global
const
float2*
twiddles,
const
int
x,
const
int
block_size
)
const
int
x,
const
int
block_size
)
{
{
const
int
k
=
x
&
(
block_size
-
1
)
;
const
int
k
=
x
&
(
block_size
-
1
)
;
a1
=
mul_float2
(
twiddles[k],
a1
)
;
a1
=
mul_float2
(
twiddles[k],
a1
)
;
const
int
dst_ind
=
(
x
<<
1
)
-
k
;
const
int
dst_ind
=
(
x
<<
1
)
-
k
;
smem[dst_ind]
=
a0
+
a1
;
smem[dst_ind]
=
a0
+
a1
;
smem[dst_ind+block_size]
=
a0
-
a1
;
smem[dst_ind+block_size]
=
a0
-
a1
;
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
butterfly4
(
float2
a0,
float2
a1,
float2
a2,
float2
a3,
__local
float2*
smem,
__global
const
float2*
twiddles,
void
butterfly4
(
float2
a0,
float2
a1,
float2
a2,
float2
a3,
__local
float2*
smem,
__global
const
float2*
twiddles,
const
int
x,
const
int
block_size
)
const
int
x,
const
int
block_size
)
{
{
const
int
k
=
x
&
(
block_size
-
1
)
;
const
int
k
=
x
&
(
block_size
-
1
)
;
a1
=
mul_float2
(
twiddles[k],
a1
)
;
a1
=
mul_float2
(
twiddles[k],
a1
)
;
a2
=
mul_float2
(
twiddles[k
+
block_size],
a2
)
;
a2
=
mul_float2
(
twiddles[k
+
block_size],
a2
)
;
a3
=
mul_float2
(
twiddles[k
+
2*block_size],
a3
)
;
a3
=
mul_float2
(
twiddles[k
+
2*block_size],
a3
)
;
const
int
dst_ind
=
((
x
-
k
)
<<
2
)
+
k
;
const
int
dst_ind
=
((
x
-
k
)
<<
2
)
+
k
;
float2
b0
=
a0
+
a2
;
float2
b0
=
a0
+
a2
;
...
@@ -50,9 +50,9 @@ void butterfly4(float2 a0, float2 a1, float2 a2, float2 a3, __local float2* smem
...
@@ -50,9 +50,9 @@ void butterfly4(float2 a0, float2 a1, float2 a2, float2 a3, __local float2* smem
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
butterfly3
(
float2
a0,
float2
a1,
float2
a2,
__local
float2*
smem,
__global
const
float2*
twiddles,
void
butterfly3
(
float2
a0,
float2
a1,
float2
a2,
__local
float2*
smem,
__global
const
float2*
twiddles,
const
int
x,
const
int
block_size
)
const
int
x,
const
int
block_size
)
{
{
const
int
k
=
x
%
block_size
;
const
int
k
=
x
%
block_size
;
a1
=
mul_float2
(
twiddles[k],
a1
)
;
a1
=
mul_float2
(
twiddles[k],
a1
)
;
a2
=
mul_float2
(
twiddles[k+block_size],
a2
)
;
a2
=
mul_float2
(
twiddles[k+block_size],
a2
)
;
...
@@ -69,8 +69,8 @@ void butterfly3(float2 a0, float2 a1, float2 a2, __local float2* smem, __global
...
@@ -69,8 +69,8 @@ void butterfly3(float2 a0, float2 a1, float2 a2, __local float2* smem, __global
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
butterfly5
(
float2
a0,
float2
a1,
float2
a2,
float2
a3,
float2
a4,
__local
float2*
smem,
__global
const
float2*
twiddles,
void
butterfly5
(
float2
a0,
float2
a1,
float2
a2,
float2
a3,
float2
a4,
__local
float2*
smem,
__global
const
float2*
twiddles,
const
int
x,
const
int
block_size
)
const
int
x,
const
int
block_size
)
{
{
const
int
k
=
x
%
block_size
;
const
int
k
=
x
%
block_size
;
a1
=
mul_float2
(
twiddles[k],
a1
)
;
a1
=
mul_float2
(
twiddles[k],
a1
)
;
a2
=
mul_float2
(
twiddles[k
+
block_size],
a2
)
;
a2
=
mul_float2
(
twiddles[k
+
block_size],
a2
)
;
...
@@ -95,7 +95,7 @@ void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local f
...
@@ -95,7 +95,7 @@ void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local f
a4
=
fft5_3
*
(
float2
)(
-a1.y
-
a3.y,
a1.x
+
a3.x
)
;
a4
=
fft5_3
*
(
float2
)(
-a1.y
-
a3.y,
a1.x
+
a3.x
)
;
b5
=
(
float2
)(
a4.x
-
fft5_5
*
a1.y,
a4.y
+
fft5_5
*
a1.x
)
;
b5
=
(
float2
)(
a4.x
-
fft5_5
*
a1.y,
a4.y
+
fft5_5
*
a1.x
)
;
a4.x
+=
fft5_4
*
a3.y
;
a4.x
+=
fft5_4
*
a3.y
;
a4.y
-=
fft5_4
*
a3.x
;
a4.y
-=
fft5_4
*
a3.x
;
a1
=
b0
+
b1
;
a1
=
b0
+
b1
;
...
@@ -109,7 +109,7 @@ void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local f
...
@@ -109,7 +109,7 @@ void butterfly5(float2 a0, float2 a1, float2 a2, float2 a3, float2 a4, __local f
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix2
(
__local
float2*
smem,
__global
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
void
fft_radix2
(
__local
float2*
smem,
__global
const
float2*
twiddles,
const
int
x,
const
int
block_size,
const
int
t
)
{
{
float2
a0,
a1
;
float2
a0,
a1
;
...
@@ -122,13 +122,13 @@ void fft_radix2(__local float2* smem, __global const float2* twiddles, const int
...
@@ -122,13 +122,13 @@ void fft_radix2(__local float2* smem, __global const float2* twiddles, const int
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
x
<
t
)
if
(
x
<
t
)
butterfly2
(
a0,
a1,
smem,
twiddles,
x,
block_size
)
;
butterfly2
(
a0,
a1,
smem,
twiddles,
x,
block_size
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix2_B2
(
__local
float2*
smem,
__global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix2_B2
(
__local
float2*
smem,
__global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
{
const
int
x2
=
x1
+
t/2
;
const
int
x2
=
x1
+
t/2
;
float2
a0,
a1,
a2,
a3
;
float2
a0,
a1,
a2,
a3
;
...
@@ -151,7 +151,7 @@ void fft_radix2_B2(__local float2* smem, __global const float2* twiddles, const
...
@@ -151,7 +151,7 @@ void fft_radix2_B2(__local float2* smem, __global const float2* twiddles, const
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix2_B3
(
__local
float2*
smem,
__global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix2_B3
(
__local
float2*
smem,
__global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
{
const
int
x2
=
x1
+
t/3
;
const
int
x2
=
x1
+
t/3
;
const
int
x3
=
x1
+
2*t/3
;
const
int
x3
=
x1
+
2*t/3
;
...
@@ -177,7 +177,7 @@ void fft_radix2_B3(__local float2* smem, __global const float2* twiddles, const
...
@@ -177,7 +177,7 @@ void fft_radix2_B3(__local float2* smem, __global const float2* twiddles, const
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix2_B4
(
__local
float2*
smem,
__global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix2_B4
(
__local
float2*
smem,
__global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
{
const
int
thread_block
=
t/4
;
const
int
thread_block
=
t/4
;
const
int
x2
=
x1
+
thread_block
;
const
int
x2
=
x1
+
thread_block
;
...
@@ -207,7 +207,7 @@ void fft_radix2_B4(__local float2* smem, __global const float2* twiddles, const
...
@@ -207,7 +207,7 @@ void fft_radix2_B4(__local float2* smem, __global const float2* twiddles, const
}
}
__attribute__
((
always_inline
))
__attribute__
((
always_inline
))
void
fft_radix2_B5
(
__local
float2*
smem,
__global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
void
fft_radix2_B5
(
__local
float2*
smem,
__global
const
float2*
twiddles,
const
int
x1,
const
int
block_size,
const
int
t
)
{
{
const
int
thread_block
=
t/5
;
const
int
thread_block
=
t/5
;
const
int
x2
=
x1
+
thread_block
;
const
int
x2
=
x1
+
thread_block
;
...
@@ -326,7 +326,7 @@ void fft_radix8(__local float2* smem, __global const float2* twiddles, const int
...
@@ -326,7 +326,7 @@ void fft_radix8(__local float2* smem, __global const float2* twiddles, const int
a7
=
mul_float2
(
twiddles[k+6*block_size],smem[x+7*t]
)
;
a7
=
mul_float2
(
twiddles[k+6*block_size],smem[x+7*t]
)
;
float2
b0,
b1,
b6,
b7
;
float2
b0,
b1,
b6,
b7
;
b0
=
a0
+
a4
;
b0
=
a0
+
a4
;
a4
=
a0
-
a4
;
a4
=
a0
-
a4
;
b1
=
a1
+
a5
;
b1
=
a1
+
a5
;
...
@@ -335,7 +335,7 @@ void fft_radix8(__local float2* smem, __global const float2* twiddles, const int
...
@@ -335,7 +335,7 @@ void fft_radix8(__local float2* smem, __global const float2* twiddles, const int
b6
=
twiddle
(
a2
-
a6
)
;
b6
=
twiddle
(
a2
-
a6
)
;
a2
=
a2
+
a6
;
a2
=
a2
+
a6
;
b7
=
a3
-
a7
;
b7
=
a3
-
a7
;
b7
=
(
float2
)(
SQRT_2
)
*
(
float2
)(
-b7.x
+
b7.y,
-b7.x
-
b7.y
)
;
b7
=
(
float2
)(
SQRT_2
)
*
(
float2
)(
-b7.x
+
b7.y,
-b7.x
-
b7.y
)
;
a3
=
a3
+
a7
;
a3
=
a3
+
a7
;
a0
=
b0
+
a2
;
a0
=
b0
+
a2
;
...
@@ -571,10 +571,15 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
...
@@ -571,10 +571,15 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
}
}
else
else
{
{
//
fill
with
zero
other
rows
#
ifdef
COMPLEX_OUTPUT
__global
float2*
dst
=
(
__global
float2*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
__global
float2*
dst
=
(
__global
float2*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
#
else
__global
float*
dst
=
(
__global
float*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
#
endif
#
pragma
unroll
#
pragma
unroll
for
(
int
i=x
; i<dst_cols; i+=block_size)
for
(
int
i=x
; i<dst_cols; i+=block_size)
dst[i]
=
(
float2
)
0.f
;
dst[i]
=
0.f
;
}
}
}
}
...
@@ -658,7 +663,7 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
...
@@ -658,7 +663,7 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
__global
const
float2*
twiddles
=
(
__global
float2*
)
twiddles_ptr
;
__global
const
float2*
twiddles
=
(
__global
float2*
)
twiddles_ptr
;
const
int
ind
=
x
;
const
int
ind
=
x
;
#
if
defined
(
COMPLEX_INPUT
)
&&
!defined
(
NO_CONJUGATE
)
#
if
defined
(
COMPLEX_INPUT
)
&&
!defined
(
NO_CONJUGATE
)
__global
const
float2*
src
=
(
__global
const
float2*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
src_offset
)))
;
__global
const
float2*
src
=
(
__global
const
float2*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
src_offset
)))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
for
(
int
i=0
; i<kercn; i++)
...
@@ -667,12 +672,9 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
...
@@ -667,12 +672,9 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
smem[x+i*block_size].y
=
-src[i*block_size].y
;
smem[x+i*block_size].y
=
-src[i*block_size].y
;
}
}
#
else
#
else
__global
const
float2*
src
;
#
if
!defined
(
REAL_INPUT
)
&&
defined
(
NO_CONJUGATE
)
#
if
!defined
(
REAL_INPUT
)
&&
defined
(
NO_CONJUGATE
)
src
=
(
__global
const
float2*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
2
,
(
int
)
sizeof
(
float
)
,
src_offset
)))
;
__global
const
float2*
src
=
(
__global
const
float2*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
2
,
(
int
)
sizeof
(
float
)
,
src_offset
)))
;
#
else
src
=
(
__global
const
float2*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
1
,
(
int
)
sizeof
(
float
)
,
src_offset
)))
;
#
endif
#
pragma
unroll
#
pragma
unroll
for
(
int
i=x
; i<(LOCAL_SIZE-1)/2; i+=block_size)
for
(
int
i=x
; i<(LOCAL_SIZE-1)/2; i+=block_size)
...
@@ -681,6 +683,20 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
...
@@ -681,6 +683,20 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
smem[i+1].y
=
-src[i].y
;
smem[i+1].y
=
-src[i].y
;
smem[LOCAL_SIZE-i-1]
=
src[i]
;
smem[LOCAL_SIZE-i-1]
=
src[i]
;
}
}
#
else
#
pragma
unroll
for
(
int
i=x
; i<(LOCAL_SIZE-1)/2; i+=block_size)
{
float2
src
=
vload2
(
0
,
(
__global
const
float*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
2*i+1,
(
int
)
sizeof
(
float
)
,
src_offset
))))
;
smem[i+1].x
=
src.x
;
smem[i+1].y
=
-src.y
;
smem[LOCAL_SIZE-i-1]
=
src
;
}
#
endif
if
(
x==0
)
if
(
x==0
)
{
{
smem[0].x
=
*
(
__global
const
float*
)(
src_ptr
+
mad24
(
y,
src_step,
src_offset
))
;
smem[0].x
=
*
(
__global
const
float*
)(
src_ptr
+
mad24
(
y,
src_step,
src_offset
))
;
...
@@ -688,7 +704,11 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
...
@@ -688,7 +704,11 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
if
(
LOCAL_SIZE
%
2
==0
)
if
(
LOCAL_SIZE
%
2
==0
)
{
{
#
if
!defined
(
REAL_INPUT
)
&&
defined
(
NO_CONJUGATE
)
smem[LOCAL_SIZE/2].x
=
src[LOCAL_SIZE/2-1].x
;
smem[LOCAL_SIZE/2].x
=
src[LOCAL_SIZE/2-1].x
;
#
else
smem[LOCAL_SIZE/2].x
=
*
(
__global
const
float*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
LOCAL_SIZE-1,
(
int
)
sizeof
(
float
)
,
src_offset
)))
;
#
endif
smem[LOCAL_SIZE/2].y
=
0.f
;
smem[LOCAL_SIZE/2].y
=
0.f
;
}
}
}
}
...
@@ -718,10 +738,15 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
...
@@ -718,10 +738,15 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
}
}
else
else
{
{
__global
float2*
dst
=
(
__global
float*
)(
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
dst_offset
)))
;
//
fill
with
zero
other
rows
#
ifdef
COMPLEX_OUTPUT
__global
float2*
dst
=
(
__global
float2*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
#
else
__global
float*
dst
=
(
__global
float*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
#
endif
#
pragma
unroll
#
pragma
unroll
for
(
int
i=
0
; i<kercn; i++
)
for
(
int
i=
x
; i<dst_cols; i+=block_size
)
dst[i
*block_size]
=
(
float2
)
0.f
;
dst[i
]
=
0.f
;
}
}
}
}
...
@@ -763,13 +788,13 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
...
@@ -763,13 +788,13 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
rez[0].y
=
-smem[y
+
i*block_size].y
;
rez[0].y
=
-smem[y
+
i*block_size].y
;
}
}
}
}
#
else
#
else
if
(
x
<
nz
)
if
(
x
<
nz
)
{
{
__global
const
float2*
twiddles
=
(
__global
float2*
)
twiddles_ptr
;
__global
const
float2*
twiddles
=
(
__global
float2*
)
twiddles_ptr
;
const
int
ind
=
y
;
const
int
ind
=
y
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
const
int
block_size
=
LOCAL_SIZE/kercn
;
__local
float2
smem[LOCAL_SIZE]
;
__local
float2
smem[LOCAL_SIZE]
;
#
ifdef
EVEN
#
ifdef
EVEN
if
(
x!=0
&&
(
x!=
(
nz-1
)))
if
(
x!=0
&&
(
x!=
(
nz-1
)))
...
@@ -781,7 +806,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
...
@@ -781,7 +806,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
#
pragma
unroll
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
for
(
int
i=0
; i<kercn; i++)
{
{
float2
temp
=
*
((
__global
const
float2
*
)(
src
+
i*block_size*src_step
))
;
float2
temp
=
vload2
(
0
,
(
__global
const
float
*
)(
src
+
i*block_size*src_step
))
;
smem[y+i*block_size].x
=
temp.x
;
smem[y+i*block_size].x
=
temp.x
;
smem[y+i*block_size].y
=
-temp.y
;
smem[y+i*block_size].y
=
-temp.y
;
}
}
...
@@ -819,7 +844,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
...
@@ -819,7 +844,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
//
copy
data
to
dst
//
copy
data
to
dst
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float2
))
,
dst_offset
))
;
__global
uchar*
dst
=
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float2
))
,
dst_offset
))
;
#
pragma
unroll
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
for
(
int
i=0
; i<kercn; i++)
{
{
...
@@ -827,6 +852,6 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
...
@@ -827,6 +852,6 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
rez[0].x
=
smem[y
+
i*block_size].x
;
rez[0].x
=
smem[y
+
i*block_size].x
;
rez[0].y
=
-smem[y
+
i*block_size].y
;
rez[0].y
=
-smem[y
+
i*block_size].y
;
}
}
}
}
#
endif
#
endif
}
}
\ No newline at end of file
modules/core/test/ocl/test_dft.cpp
View file @
66ac4621
...
@@ -48,26 +48,17 @@
...
@@ -48,26 +48,17 @@
#ifdef HAVE_OPENCL
#ifdef HAVE_OPENCL
enum
OCL_FFT_TYPE
{
R2R
=
0
,
C2R
=
1
,
R2C
=
2
,
C2C
=
3
};
namespace
cvtest
{
namespace
cvtest
{
namespace
ocl
{
namespace
ocl
{
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
// Dft
// Dft
PARAM_TEST_CASE
(
Dft
,
cv
::
Size
,
OCL_FFT_TYPE
,
bool
,
bool
,
bool
,
bool
)
PARAM_TEST_CASE
(
Dft
,
cv
::
Size
,
MatDepth
,
bool
,
bool
,
bool
,
bool
)
{
{
cv
::
Size
dft_size
;
cv
::
Size
dft_size
;
int
dft_flags
,
depth
,
cn
,
dft_type
;
int
dft_flags
,
depth
;
bool
hint
;
bool
inplace
;
bool
is1d
;
TEST_DECLARE_INPUT_PARAMETER
(
src
);
TEST_DECLARE_INPUT_PARAMETER
(
src
);
TEST_DECLARE_OUTPUT_PARAMETER
(
dst
);
TEST_DECLARE_OUTPUT_PARAMETER
(
dst
);
...
@@ -75,60 +66,34 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
...
@@ -75,60 +66,34 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
virtual
void
SetUp
()
virtual
void
SetUp
()
{
{
dft_size
=
GET_PARAM
(
0
);
dft_size
=
GET_PARAM
(
0
);
d
ft_type
=
GET_PARAM
(
1
);
d
epth
=
GET_PARAM
(
1
);
depth
=
CV_32F
;
inplace
=
GET_PARAM
(
2
)
;
dft_flags
=
0
;
dft_flags
=
0
;
switch
(
dft_type
)
{
case
R2R
:
dft_flags
|=
cv
::
DFT_REAL_OUTPUT
;
cn
=
1
;
break
;
case
C2R
:
dft_flags
|=
cv
::
DFT_REAL_OUTPUT
;
cn
=
2
;
break
;
case
R2C
:
dft_flags
|=
cv
::
DFT_COMPLEX_OUTPUT
;
cn
=
1
;
break
;
case
C2C
:
dft_flags
|=
cv
::
DFT_COMPLEX_OUTPUT
;
cn
=
2
;
break
;
}
if
(
GET_PARAM
(
2
))
dft_flags
|=
cv
::
DFT_INVERSE
;
if
(
GET_PARAM
(
3
))
if
(
GET_PARAM
(
3
))
dft_flags
|=
cv
::
DFT_ROWS
;
dft_flags
|=
cv
::
DFT_ROWS
;
if
(
GET_PARAM
(
4
))
if
(
GET_PARAM
(
4
))
dft_flags
|=
cv
::
DFT_SCALE
;
dft_flags
|=
cv
::
DFT_SCALE
;
hint
=
GET_PARAM
(
5
);
if
(
GET_PARAM
(
5
))
is1d
=
(
dft_flags
&
DFT_ROWS
)
!=
0
||
dft_size
.
height
==
1
;
dft_flags
|=
cv
::
DFT_INVERSE
;
}
}
void
generateTestData
()
void
generateTestData
(
int
cn
=
2
)
{
{
src
=
randomMat
(
dft_size
,
CV_MAKE_TYPE
(
depth
,
cn
),
0.0
,
100.0
);
src
=
randomMat
(
dft_size
,
CV_MAKE_TYPE
(
depth
,
cn
),
0.0
,
100.0
);
usrc
=
src
.
getUMat
(
ACCESS_READ
);
usrc
=
src
.
getUMat
(
ACCESS_READ
);
if
(
inplace
)
dst
=
src
,
udst
=
usrc
;
}
}
};
};
OCL_TEST_P
(
Dft
,
Mat
)
OCL_TEST_P
(
Dft
,
C2C
)
{
{
generateTestData
();
generateTestData
();
int
nonzero_rows
=
hint
?
src
.
cols
-
randomInt
(
1
,
src
.
rows
-
1
)
:
0
;
OCL_OFF
(
cv
::
dft
(
src
,
dst
,
dft_flags
|
cv
::
DFT_COMPLEX_OUTPUT
));
OCL_OFF
(
cv
::
dft
(
src
,
dst
,
dft_flags
,
nonzero_rows
));
OCL_ON
(
cv
::
dft
(
usrc
,
udst
,
dft_flags
|
cv
::
DFT_COMPLEX_OUTPUT
));
OCL_ON
(
cv
::
dft
(
usrc
,
udst
,
dft_flags
,
nonzero_rows
));
if
(
dft_type
==
R2C
&&
is1d
&&
(
dft_flags
&
cv
::
DFT_INVERSE
)
==
0
)
{
dst
=
dst
(
cv
::
Range
(
0
,
dst
.
rows
),
cv
::
Range
(
0
,
dst
.
cols
/
2
+
1
));
udst
=
udst
(
cv
::
Range
(
0
,
udst
.
rows
),
cv
::
Range
(
0
,
udst
.
cols
/
2
+
1
));
}
//Mat gpu = udst.getMat(ACCESS_READ);
//std::cout << dst << std::endl;
//std::cout << gpu << std::endl;
//int cn = udst.channels();
//
//Mat dst1ch = dst.reshape(1);
//Mat gpu1ch = gpu.reshape(1);
//Mat df;
//absdiff(dst1ch, gpu1ch, df);
//std::cout << Mat_<int>(df) << std::endl;
double
eps
=
src
.
size
().
area
()
*
1e-4
;
double
eps
=
src
.
size
().
area
()
*
1e-4
;
EXPECT_MAT_NEAR
(
dst
,
udst
,
eps
);
EXPECT_MAT_NEAR
(
dst
,
udst
,
eps
);
...
@@ -185,15 +150,15 @@ OCL_TEST_P(MulSpectrums, Mat)
...
@@ -185,15 +150,15 @@ OCL_TEST_P(MulSpectrums, Mat)
OCL_INSTANTIATE_TEST_CASE_P
(
OCL_ImgProc
,
MulSpectrums
,
testing
::
Combine
(
Bool
(),
Bool
()));
OCL_INSTANTIATE_TEST_CASE_P
(
OCL_ImgProc
,
MulSpectrums
,
testing
::
Combine
(
Bool
(),
Bool
()));
OCL_INSTANTIATE_TEST_CASE_P
(
Core
,
Dft
,
Combine
(
Values
(
cv
::
Size
(
10
,
10
),
cv
::
Size
(
36
,
36
),
cv
::
Size
(
512
,
1
),
cv
::
Size
(
1280
,
768
)),
OCL_INSTANTIATE_TEST_CASE_P
(
Core
,
Dft
,
Combine
(
Values
(
cv
::
Size
(
2
,
3
),
cv
::
Size
(
5
,
4
),
cv
::
Size
(
25
,
20
),
Values
((
OCL_FFT_TYPE
)
R2C
,
(
OCL_FFT_TYPE
)
C2C
,
(
OCL_FFT_TYPE
)
R2R
,
(
OCL_FFT_TYPE
)
C2R
),
cv
::
Size
(
512
,
1
),
cv
::
Size
(
1024
,
768
)),
Bool
(),
// DFT_INVERSE
Values
(
CV_32F
,
CV_64F
),
Bool
(),
// inplace
Bool
(),
// DFT_ROWS
Bool
(),
// DFT_ROWS
Bool
(),
// DFT_SCALE
Bool
(),
// DFT_SCALE
Bool
()
// hint
Bool
())
// DFT_INVERSE
)
);
);
}
}
// namespace cvtest::ocl
}
}
// namespace cvtest::ocl
#endif // HAVE_OPENCL
#endif // HAVE_OPENCL
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment