Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
66ac4621
Commit
66ac4621
authored
Jul 23, 2014
by
Alexander Karsakov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Final refactoring, fixes
parent
1d2cf0e2
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
313 additions
and
349 deletions
+313
-349
perf_arithm.cpp
modules/core/perf/opencl/perf_arithm.cpp
+1
-1
perf_dxt.cpp
modules/core/perf/opencl/perf_dxt.cpp
+9
-28
dxt.cpp
modules/core/src/dxt.cpp
+246
-251
ocl.cpp
modules/core/src/ocl.cpp
+3
-5
fft.cl
modules/core/src/opencl/fft.cl
+35
-10
test_dft.cpp
modules/core/test/ocl/test_dft.cpp
+19
-54
No files found.
modules/core/perf/opencl/perf_arithm.cpp
View file @
66ac4621
...
@@ -292,7 +292,7 @@ OCL_PERF_TEST_P(MagnitudeFixture, Magnitude, ::testing::Combine(
...
@@ -292,7 +292,7 @@ OCL_PERF_TEST_P(MagnitudeFixture, Magnitude, ::testing::Combine(
typedef
Size_MatType
TransposeFixture
;
typedef
Size_MatType
TransposeFixture
;
OCL_PERF_TEST_P
(
TransposeFixture
,
Transpose
,
::
testing
::
Combine
(
OCL_PERF_TEST_P
(
TransposeFixture
,
Transpose
,
::
testing
::
Combine
(
OCL_TEST_SIZES
,
Values
(
CV_8UC1
,
CV_32FC1
,
CV_8UC2
,
CV_32FC2
,
CV_8UC4
,
CV_32FC4
)
))
OCL_TEST_SIZES
,
OCL_TEST_TYPES_134
))
{
{
const
Size_MatType_t
params
=
GetParam
();
const
Size_MatType_t
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
const
Size
srcSize
=
get
<
0
>
(
params
);
...
...
modules/core/perf/opencl/perf_dxt.cpp
View file @
66ac4621
...
@@ -54,40 +54,21 @@ namespace ocl {
...
@@ -54,40 +54,21 @@ namespace ocl {
///////////// dft ////////////////////////
///////////// dft ////////////////////////
enum
OCL_FFT_TYPE
typedef
tuple
<
Size
,
int
>
DftParams
;
{
R2R
=
0
,
// real to real (CCS)
C2R
=
1
,
// complex to real
R2C
=
2
,
// real to complex
C2C
=
3
// complex to complex
};
typedef
tuple
<
OCL_FFT_TYPE
,
Size
,
int
>
DftParams
;
typedef
TestBaseWithParam
<
DftParams
>
DftFixture
;
typedef
TestBaseWithParam
<
DftParams
>
DftFixture
;
OCL_PERF_TEST_P
(
DftFixture
,
Dft
,
::
testing
::
Combine
(
Values
(
C2C
,
R2R
,
C2R
,
R2C
),
OCL_PERF_TEST_P
(
DftFixture
,
Dft
,
::
testing
::
Combine
(
Values
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
),
Values
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
,
Size
(
1024
,
1024
),
Size
(
512
,
512
),
Size
(
2048
,
2048
)),
Values
((
int
)
DFT_ROWS
,
(
int
)
DFT_SCALE
,
(
int
)
DFT_INVERSE
,
Values
((
int
)
0
,
(
int
)
DFT_ROWS
,
(
int
)
DFT_SCALE
/*, (int)DFT_INVERSE,
(
int
)
DFT_INVERSE
|
DFT_SCALE
,
(
int
)
DFT_ROWS
|
DFT_INVERSE
)))
(int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE*/
)))
{
{
const
DftParams
params
=
GetParam
();
const
DftParams
params
=
GetParam
();
const
int
dft_type
=
get
<
0
>
(
params
);
const
Size
srcSize
=
get
<
0
>
(
params
);
const
Size
srcSize
=
get
<
1
>
(
params
);
const
int
flags
=
get
<
1
>
(
params
);
int
flags
=
get
<
2
>
(
params
);
UMat
src
(
srcSize
,
CV_32FC2
),
dst
(
srcSize
,
CV_32FC2
);
int
in_cn
,
out_cn
;
switch
(
dft_type
)
{
case
R2R
:
flags
|=
cv
::
DFT_REAL_OUTPUT
;
in_cn
=
1
;
out_cn
=
1
;
break
;
case
C2R
:
flags
|=
cv
::
DFT_REAL_OUTPUT
;
in_cn
=
2
;
out_cn
=
2
;
break
;
case
R2C
:
flags
|=
cv
::
DFT_COMPLEX_OUTPUT
;
in_cn
=
1
;
out_cn
=
2
;
break
;
case
C2C
:
flags
|=
cv
::
DFT_COMPLEX_OUTPUT
;
in_cn
=
2
;
out_cn
=
2
;
break
;
}
UMat
src
(
srcSize
,
CV_MAKE_TYPE
(
CV_32F
,
in_cn
)),
dst
(
srcSize
,
CV_MAKE_TYPE
(
CV_32F
,
out_cn
));
declare
.
in
(
src
,
WARMUP_RNG
).
out
(
dst
);
declare
.
in
(
src
,
WARMUP_RNG
).
out
(
dst
);
OCL_TEST_CYCLE
()
cv
::
dft
(
src
,
dst
,
flags
);
OCL_TEST_CYCLE
()
cv
::
dft
(
src
,
dst
,
flags
|
DFT_COMPLEX_OUTPUT
);
SANITY_CHECK
(
dst
,
1e-3
);
SANITY_CHECK
(
dst
,
1e-3
);
}
}
...
...
modules/core/src/dxt.cpp
View file @
66ac4621
...
@@ -1781,251 +1781,11 @@ static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
...
@@ -1781,251 +1781,11 @@ static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
#endif
#endif
}
}
#ifdef HAVE_CLAMDFFT
#ifdef HAVE_OPENCL
namespace
cv
{
#define CLAMDDFT_Assert(func) \
{ \
clAmdFftStatus s = (func); \
CV_Assert(s == CLFFT_SUCCESS); \
}
class
PlanCache
{
struct
FftPlan
{
FftPlan
(
const
Size
&
_dft_size
,
int
_src_step
,
int
_dst_step
,
bool
_doubleFP
,
bool
_inplace
,
int
_flags
,
FftType
_fftType
)
:
dft_size
(
_dft_size
),
src_step
(
_src_step
),
dst_step
(
_dst_step
),
doubleFP
(
_doubleFP
),
inplace
(
_inplace
),
flags
(
_flags
),
fftType
(
_fftType
),
context
((
cl_context
)
ocl
::
Context
::
getDefault
().
ptr
()),
plHandle
(
0
)
{
bool
dft_inverse
=
(
flags
&
DFT_INVERSE
)
!=
0
;
bool
dft_scale
=
(
flags
&
DFT_SCALE
)
!=
0
;
bool
dft_rows
=
(
flags
&
DFT_ROWS
)
!=
0
;
clAmdFftLayout
inLayout
=
CLFFT_REAL
,
outLayout
=
CLFFT_REAL
;
clAmdFftDim
dim
=
dft_size
.
height
==
1
||
dft_rows
?
CLFFT_1D
:
CLFFT_2D
;
size_t
batchSize
=
dft_rows
?
dft_size
.
height
:
1
;
size_t
clLengthsIn
[
3
]
=
{
dft_size
.
width
,
dft_rows
?
1
:
dft_size
.
height
,
1
};
size_t
clStridesIn
[
3
]
=
{
1
,
1
,
1
};
size_t
clStridesOut
[
3
]
=
{
1
,
1
,
1
};
int
elemSize
=
doubleFP
?
sizeof
(
double
)
:
sizeof
(
float
);
switch
(
fftType
)
{
case
C2C
:
inLayout
=
CLFFT_COMPLEX_INTERLEAVED
;
outLayout
=
CLFFT_COMPLEX_INTERLEAVED
;
clStridesIn
[
1
]
=
src_step
/
(
elemSize
<<
1
);
clStridesOut
[
1
]
=
dst_step
/
(
elemSize
<<
1
);
break
;
case
R2C
:
inLayout
=
CLFFT_REAL
;
outLayout
=
CLFFT_HERMITIAN_INTERLEAVED
;
clStridesIn
[
1
]
=
src_step
/
elemSize
;
clStridesOut
[
1
]
=
dst_step
/
(
elemSize
<<
1
);
break
;
case
C2R
:
inLayout
=
CLFFT_HERMITIAN_INTERLEAVED
;
outLayout
=
CLFFT_REAL
;
clStridesIn
[
1
]
=
src_step
/
(
elemSize
<<
1
);
clStridesOut
[
1
]
=
dst_step
/
elemSize
;
break
;
case
R2R
:
default
:
CV_Error
(
Error
::
StsNotImplemented
,
"AMD Fft does not support this type"
);
break
;
}
clStridesIn
[
2
]
=
dft_rows
?
clStridesIn
[
1
]
:
dft_size
.
width
*
clStridesIn
[
1
];
clStridesOut
[
2
]
=
dft_rows
?
clStridesOut
[
1
]
:
dft_size
.
width
*
clStridesOut
[
1
];
CLAMDDFT_Assert
(
clAmdFftCreateDefaultPlan
(
&
plHandle
,
(
cl_context
)
ocl
::
Context
::
getDefault
().
ptr
(),
dim
,
clLengthsIn
))
// setting plan properties
CLAMDDFT_Assert
(
clAmdFftSetPlanPrecision
(
plHandle
,
doubleFP
?
CLFFT_DOUBLE
:
CLFFT_SINGLE
));
CLAMDDFT_Assert
(
clAmdFftSetResultLocation
(
plHandle
,
inplace
?
CLFFT_INPLACE
:
CLFFT_OUTOFPLACE
))
CLAMDDFT_Assert
(
clAmdFftSetLayout
(
plHandle
,
inLayout
,
outLayout
))
CLAMDDFT_Assert
(
clAmdFftSetPlanBatchSize
(
plHandle
,
batchSize
))
CLAMDDFT_Assert
(
clAmdFftSetPlanInStride
(
plHandle
,
dim
,
clStridesIn
))
CLAMDDFT_Assert
(
clAmdFftSetPlanOutStride
(
plHandle
,
dim
,
clStridesOut
))
CLAMDDFT_Assert
(
clAmdFftSetPlanDistance
(
plHandle
,
clStridesIn
[
dim
],
clStridesOut
[
dim
]))
float
scale
=
dft_scale
?
1.0
f
/
(
dft_rows
?
dft_size
.
width
:
dft_size
.
area
())
:
1.0
f
;
CLAMDDFT_Assert
(
clAmdFftSetPlanScale
(
plHandle
,
dft_inverse
?
CLFFT_BACKWARD
:
CLFFT_FORWARD
,
scale
))
// ready to bake
cl_command_queue
queue
=
(
cl_command_queue
)
ocl
::
Queue
::
getDefault
().
ptr
();
CLAMDDFT_Assert
(
clAmdFftBakePlan
(
plHandle
,
1
,
&
queue
,
NULL
,
NULL
))
}
~
FftPlan
()
{
// clAmdFftDestroyPlan(&plHandle);
}
friend
class
PlanCache
;
private
:
Size
dft_size
;
int
src_step
,
dst_step
;
bool
doubleFP
;
bool
inplace
;
int
flags
;
FftType
fftType
;
cl_context
context
;
clAmdFftPlanHandle
plHandle
;
};
public
:
static
PlanCache
&
getInstance
()
{
static
PlanCache
planCache
;
return
planCache
;
}
clAmdFftPlanHandle
getPlanHandle
(
const
Size
&
dft_size
,
int
src_step
,
int
dst_step
,
bool
doubleFP
,
bool
inplace
,
int
flags
,
FftType
fftType
)
{
cl_context
currentContext
=
(
cl_context
)
ocl
::
Context
::
getDefault
().
ptr
();
for
(
size_t
i
=
0
,
size
=
planStorage
.
size
();
i
<
size
;
++
i
)
{
const
FftPlan
*
const
plan
=
planStorage
[
i
];
if
(
plan
->
dft_size
==
dft_size
&&
plan
->
flags
==
flags
&&
plan
->
src_step
==
src_step
&&
plan
->
dst_step
==
dst_step
&&
plan
->
doubleFP
==
doubleFP
&&
plan
->
fftType
==
fftType
&&
plan
->
inplace
==
inplace
)
{
if
(
plan
->
context
!=
currentContext
)
{
planStorage
.
erase
(
planStorage
.
begin
()
+
i
);
break
;
}
return
plan
->
plHandle
;
}
}
// no baked plan is found, so let's create a new one
FftPlan
*
newPlan
=
new
FftPlan
(
dft_size
,
src_step
,
dst_step
,
doubleFP
,
inplace
,
flags
,
fftType
);
planStorage
.
push_back
(
newPlan
);
return
newPlan
->
plHandle
;
}
~
PlanCache
()
{
for
(
std
::
vector
<
FftPlan
*>::
iterator
i
=
planStorage
.
begin
(),
end
=
planStorage
.
end
();
i
!=
end
;
++
i
)
delete
(
*
i
);
planStorage
.
clear
();
}
protected
:
PlanCache
()
:
planStorage
()
{
}
std
::
vector
<
FftPlan
*>
planStorage
;
};
extern
"C"
{
static
void
CL_CALLBACK
oclCleanupCallback
(
cl_event
e
,
cl_int
,
void
*
p
)
{
UMatData
*
u
=
(
UMatData
*
)
p
;
if
(
u
&&
CV_XADD
(
&
u
->
urefcount
,
-
1
)
==
1
)
u
->
currAllocator
->
deallocate
(
u
);
u
=
0
;
clReleaseEvent
(
e
),
e
=
0
;
}
}
static
bool
ocl_dft_amdfft
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
Size
ssize
=
_src
.
size
();
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
!
(
type
==
CV_32FC1
||
type
==
CV_32FC2
||
type
==
CV_64FC1
||
type
==
CV_64FC2
)
||
_src
.
offset
()
!=
0
)
return
false
;
// if is not a multiplication of prime numbers { 2, 3, 5 }
if
(
ssize
.
area
()
!=
getOptimalDFTSize
(
ssize
.
area
()))
return
false
;
int
dst_complex_input
=
cn
==
2
?
1
:
0
;
bool
dft_inverse
=
(
flags
&
DFT_INVERSE
)
!=
0
?
1
:
0
;
int
dft_complex_output
=
(
flags
&
DFT_COMPLEX_OUTPUT
)
!=
0
;
bool
dft_real_output
=
(
flags
&
DFT_REAL_OUTPUT
)
!=
0
;
CV_Assert
(
dft_complex_output
+
dft_real_output
<
2
);
FftType
fftType
=
(
FftType
)(
dst_complex_input
<<
0
|
dft_complex_output
<<
1
);
switch
(
fftType
)
{
case
C2C
:
_dst
.
create
(
ssize
.
height
,
ssize
.
width
,
CV_MAKE_TYPE
(
depth
,
2
));
break
;
case
R2C
:
// TODO implement it if possible
case
C2R
:
// TODO implement it if possible
case
R2R
:
// AMD Fft does not support this type
default
:
return
false
;
}
UMat
src
=
_src
.
getUMat
(),
dst
=
_dst
.
getUMat
();
bool
inplace
=
src
.
u
==
dst
.
u
;
clAmdFftPlanHandle
plHandle
=
PlanCache
::
getInstance
().
getPlanHandle
(
ssize
,
(
int
)
src
.
step
,
(
int
)
dst
.
step
,
depth
==
CV_64F
,
inplace
,
flags
,
fftType
);
// get the bufferSize
size_t
bufferSize
=
0
;
CLAMDDFT_Assert
(
clAmdFftGetTmpBufSize
(
plHandle
,
&
bufferSize
))
UMat
tmpBuffer
(
1
,
(
int
)
bufferSize
,
CV_8UC1
);
cl_mem
srcarg
=
(
cl_mem
)
src
.
handle
(
ACCESS_READ
);
cl_mem
dstarg
=
(
cl_mem
)
dst
.
handle
(
ACCESS_RW
);
cl_command_queue
queue
=
(
cl_command_queue
)
ocl
::
Queue
::
getDefault
().
ptr
();
cl_event
e
=
0
;
CLAMDDFT_Assert
(
clAmdFftEnqueueTransform
(
plHandle
,
dft_inverse
?
CLFFT_BACKWARD
:
CLFFT_FORWARD
,
1
,
&
queue
,
0
,
NULL
,
&
e
,
&
srcarg
,
&
dstarg
,
(
cl_mem
)
tmpBuffer
.
handle
(
ACCESS_RW
)))
tmpBuffer
.
addref
();
clSetEventCallback
(
e
,
CL_COMPLETE
,
oclCleanupCallback
,
tmpBuffer
.
u
);
return
true
;
}
#undef DFT_ASSERT
}
#endif // HAVE_CLAMDFFT
namespace
cv
namespace
cv
{
{
#ifdef HAVE_OPENCL
enum
FftType
enum
FftType
{
{
R2R
=
0
,
R2R
=
0
,
...
@@ -2118,7 +1878,7 @@ struct OCL_FftPlan
...
@@ -2118,7 +1878,7 @@ struct OCL_FftPlan
ocl_getRadixes
(
dft_size
,
radixes
,
blocks
,
min_radix
);
ocl_getRadixes
(
dft_size
,
radixes
,
blocks
,
min_radix
);
thread_count
=
dft_size
/
min_radix
;
thread_count
=
dft_size
/
min_radix
;
if
(
thread_count
>
ocl
::
Device
::
getDefault
().
maxWorkGroupSize
())
if
(
thread_count
>
(
int
)
ocl
::
Device
::
getDefault
().
maxWorkGroupSize
())
{
{
status
=
false
;
status
=
false
;
return
;
return
;
...
@@ -2165,7 +1925,7 @@ struct OCL_FftPlan
...
@@ -2165,7 +1925,7 @@ struct OCL_FftPlan
dft_size
,
dft_size
/
thread_count
,
radix_processing
.
c_str
());
dft_size
,
dft_size
/
thread_count
,
radix_processing
.
c_str
());
}
}
bool
enqueueTransform
(
InputArray
_src
,
OutputArray
_dst
,
int
dft_size
,
int
flags
,
int
fftType
,
bool
rows
=
true
)
const
bool
enqueueTransform
(
InputArray
_src
,
OutputArray
_dst
,
int
num_dfts
,
int
flags
,
int
fftType
,
bool
rows
=
true
)
const
{
{
if
(
!
status
)
if
(
!
status
)
return
false
;
return
false
;
...
@@ -2177,7 +1937,7 @@ struct OCL_FftPlan
...
@@ -2177,7 +1937,7 @@ struct OCL_FftPlan
size_t
localsize
[
2
];
size_t
localsize
[
2
];
String
kernel_name
;
String
kernel_name
;
bool
is1d
=
(
flags
&
DFT_ROWS
)
!=
0
||
dft_size
==
1
;
bool
is1d
=
(
flags
&
DFT_ROWS
)
!=
0
||
num_dfts
==
1
;
bool
inv
=
(
flags
&
DFT_INVERSE
)
!=
0
;
bool
inv
=
(
flags
&
DFT_INVERSE
)
!=
0
;
String
options
=
buildOptions
;
String
options
=
buildOptions
;
...
@@ -2191,7 +1951,7 @@ struct OCL_FftPlan
...
@@ -2191,7 +1951,7 @@ struct OCL_FftPlan
}
}
else
else
{
{
globalsize
[
0
]
=
dft_size
;
globalsize
[
1
]
=
thread_count
;
globalsize
[
0
]
=
num_dfts
;
globalsize
[
1
]
=
thread_count
;
localsize
[
0
]
=
1
;
localsize
[
1
]
=
thread_count
;
localsize
[
0
]
=
1
;
localsize
[
1
]
=
thread_count
;
kernel_name
=
!
inv
?
"fft_multi_radix_cols"
:
"ifft_multi_radix_cols"
;
kernel_name
=
!
inv
?
"fft_multi_radix_cols"
:
"ifft_multi_radix_cols"
;
if
(
flags
&
DFT_SCALE
)
if
(
flags
&
DFT_SCALE
)
...
@@ -2219,7 +1979,7 @@ struct OCL_FftPlan
...
@@ -2219,7 +1979,7 @@ struct OCL_FftPlan
if
(
k
.
empty
())
if
(
k
.
empty
())
return
false
;
return
false
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
twiddles
),
thread_count
,
dft_size
);
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
twiddles
),
thread_count
,
num_dfts
);
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
}
};
};
...
@@ -2280,11 +2040,9 @@ static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols
...
@@ -2280,11 +2040,9 @@ static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols
static
bool
ocl_dft
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
,
int
nonzero_rows
)
static
bool
ocl_dft
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
,
int
nonzero_rows
)
{
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
int
type
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
type
);
Size
ssize
=
_src
.
size
();
Size
ssize
=
_src
.
size
();
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
!
(
type
==
CV_32FC1
||
type
==
CV_32FC2
)
)
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
!
(
type
==
CV_32FC1
||
type
==
CV_32FC2
||
type
==
CV_64FC1
||
type
==
CV_64FC2
))
return
false
;
return
false
;
// if is not a multiplication of prime numbers { 2, 3, 5 }
// if is not a multiplication of prime numbers { 2, 3, 5 }
...
@@ -2390,11 +2148,248 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
...
@@ -2390,11 +2148,248 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro
return
true
;
return
true
;
}
}
}
// namespace cv;
#endif
#endif
}
// namespace cv;
#ifdef HAVE_CLAMDFFT
namespace
cv
{
#define CLAMDDFT_Assert(func) \
{ \
clAmdFftStatus s = (func); \
CV_Assert(s == CLFFT_SUCCESS); \
}
class
PlanCache
{
struct
FftPlan
{
FftPlan
(
const
Size
&
_dft_size
,
int
_src_step
,
int
_dst_step
,
bool
_doubleFP
,
bool
_inplace
,
int
_flags
,
FftType
_fftType
)
:
dft_size
(
_dft_size
),
src_step
(
_src_step
),
dst_step
(
_dst_step
),
doubleFP
(
_doubleFP
),
inplace
(
_inplace
),
flags
(
_flags
),
fftType
(
_fftType
),
context
((
cl_context
)
ocl
::
Context
::
getDefault
().
ptr
()),
plHandle
(
0
)
{
bool
dft_inverse
=
(
flags
&
DFT_INVERSE
)
!=
0
;
bool
dft_scale
=
(
flags
&
DFT_SCALE
)
!=
0
;
bool
dft_rows
=
(
flags
&
DFT_ROWS
)
!=
0
;
clAmdFftLayout
inLayout
=
CLFFT_REAL
,
outLayout
=
CLFFT_REAL
;
clAmdFftDim
dim
=
dft_size
.
height
==
1
||
dft_rows
?
CLFFT_1D
:
CLFFT_2D
;
size_t
batchSize
=
dft_rows
?
dft_size
.
height
:
1
;
size_t
clLengthsIn
[
3
]
=
{
dft_size
.
width
,
dft_rows
?
1
:
dft_size
.
height
,
1
};
size_t
clStridesIn
[
3
]
=
{
1
,
1
,
1
};
size_t
clStridesOut
[
3
]
=
{
1
,
1
,
1
};
int
elemSize
=
doubleFP
?
sizeof
(
double
)
:
sizeof
(
float
);
switch
(
fftType
)
{
case
C2C
:
inLayout
=
CLFFT_COMPLEX_INTERLEAVED
;
outLayout
=
CLFFT_COMPLEX_INTERLEAVED
;
clStridesIn
[
1
]
=
src_step
/
(
elemSize
<<
1
);
clStridesOut
[
1
]
=
dst_step
/
(
elemSize
<<
1
);
break
;
case
R2C
:
inLayout
=
CLFFT_REAL
;
outLayout
=
CLFFT_HERMITIAN_INTERLEAVED
;
clStridesIn
[
1
]
=
src_step
/
elemSize
;
clStridesOut
[
1
]
=
dst_step
/
(
elemSize
<<
1
);
break
;
case
C2R
:
inLayout
=
CLFFT_HERMITIAN_INTERLEAVED
;
outLayout
=
CLFFT_REAL
;
clStridesIn
[
1
]
=
src_step
/
(
elemSize
<<
1
);
clStridesOut
[
1
]
=
dst_step
/
elemSize
;
break
;
case
R2R
:
default
:
CV_Error
(
Error
::
StsNotImplemented
,
"AMD Fft does not support this type"
);
break
;
}
clStridesIn
[
2
]
=
dft_rows
?
clStridesIn
[
1
]
:
dft_size
.
width
*
clStridesIn
[
1
];
clStridesOut
[
2
]
=
dft_rows
?
clStridesOut
[
1
]
:
dft_size
.
width
*
clStridesOut
[
1
];
CLAMDDFT_Assert
(
clAmdFftCreateDefaultPlan
(
&
plHandle
,
(
cl_context
)
ocl
::
Context
::
getDefault
().
ptr
(),
dim
,
clLengthsIn
))
// setting plan properties
CLAMDDFT_Assert
(
clAmdFftSetPlanPrecision
(
plHandle
,
doubleFP
?
CLFFT_DOUBLE
:
CLFFT_SINGLE
));
CLAMDDFT_Assert
(
clAmdFftSetResultLocation
(
plHandle
,
inplace
?
CLFFT_INPLACE
:
CLFFT_OUTOFPLACE
))
CLAMDDFT_Assert
(
clAmdFftSetLayout
(
plHandle
,
inLayout
,
outLayout
))
CLAMDDFT_Assert
(
clAmdFftSetPlanBatchSize
(
plHandle
,
batchSize
))
CLAMDDFT_Assert
(
clAmdFftSetPlanInStride
(
plHandle
,
dim
,
clStridesIn
))
CLAMDDFT_Assert
(
clAmdFftSetPlanOutStride
(
plHandle
,
dim
,
clStridesOut
))
CLAMDDFT_Assert
(
clAmdFftSetPlanDistance
(
plHandle
,
clStridesIn
[
dim
],
clStridesOut
[
dim
]))
float
scale
=
dft_scale
?
1.0
f
/
(
dft_rows
?
dft_size
.
width
:
dft_size
.
area
())
:
1.0
f
;
CLAMDDFT_Assert
(
clAmdFftSetPlanScale
(
plHandle
,
dft_inverse
?
CLFFT_BACKWARD
:
CLFFT_FORWARD
,
scale
))
// ready to bake
cl_command_queue
queue
=
(
cl_command_queue
)
ocl
::
Queue
::
getDefault
().
ptr
();
CLAMDDFT_Assert
(
clAmdFftBakePlan
(
plHandle
,
1
,
&
queue
,
NULL
,
NULL
))
}
~
FftPlan
()
{
// clAmdFftDestroyPlan(&plHandle);
}
friend
class
PlanCache
;
private
:
Size
dft_size
;
int
src_step
,
dst_step
;
bool
doubleFP
;
bool
inplace
;
int
flags
;
FftType
fftType
;
cl_context
context
;
clAmdFftPlanHandle
plHandle
;
};
public
:
static
PlanCache
&
getInstance
()
{
static
PlanCache
planCache
;
return
planCache
;
}
clAmdFftPlanHandle
getPlanHandle
(
const
Size
&
dft_size
,
int
src_step
,
int
dst_step
,
bool
doubleFP
,
bool
inplace
,
int
flags
,
FftType
fftType
)
{
cl_context
currentContext
=
(
cl_context
)
ocl
::
Context
::
getDefault
().
ptr
();
for
(
size_t
i
=
0
,
size
=
planStorage
.
size
();
i
<
size
;
++
i
)
{
const
FftPlan
*
const
plan
=
planStorage
[
i
];
if
(
plan
->
dft_size
==
dft_size
&&
plan
->
flags
==
flags
&&
plan
->
src_step
==
src_step
&&
plan
->
dst_step
==
dst_step
&&
plan
->
doubleFP
==
doubleFP
&&
plan
->
fftType
==
fftType
&&
plan
->
inplace
==
inplace
)
{
if
(
plan
->
context
!=
currentContext
)
{
planStorage
.
erase
(
planStorage
.
begin
()
+
i
);
break
;
}
return
plan
->
plHandle
;
}
}
// no baked plan is found, so let's create a new one
FftPlan
*
newPlan
=
new
FftPlan
(
dft_size
,
src_step
,
dst_step
,
doubleFP
,
inplace
,
flags
,
fftType
);
planStorage
.
push_back
(
newPlan
);
return
newPlan
->
plHandle
;
}
~
PlanCache
()
{
for
(
std
::
vector
<
FftPlan
*>::
iterator
i
=
planStorage
.
begin
(),
end
=
planStorage
.
end
();
i
!=
end
;
++
i
)
delete
(
*
i
);
planStorage
.
clear
();
}
protected
:
PlanCache
()
:
planStorage
()
{
}
std
::
vector
<
FftPlan
*>
planStorage
;
};
extern
"C"
{
static
void
CL_CALLBACK
oclCleanupCallback
(
cl_event
e
,
cl_int
,
void
*
p
)
{
UMatData
*
u
=
(
UMatData
*
)
p
;
if
(
u
&&
CV_XADD
(
&
u
->
urefcount
,
-
1
)
==
1
)
u
->
currAllocator
->
deallocate
(
u
);
u
=
0
;
clReleaseEvent
(
e
),
e
=
0
;
}
}
static
bool
ocl_dft_amdfft
(
InputArray
_src
,
OutputArray
_dst
,
int
flags
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
Size
ssize
=
_src
.
size
();
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
!
(
type
==
CV_32FC1
||
type
==
CV_32FC2
||
type
==
CV_64FC1
||
type
==
CV_64FC2
)
||
_src
.
offset
()
!=
0
)
return
false
;
// if is not a multiplication of prime numbers { 2, 3, 5 }
if
(
ssize
.
area
()
!=
getOptimalDFTSize
(
ssize
.
area
()))
return
false
;
int
dst_complex_input
=
cn
==
2
?
1
:
0
;
bool
dft_inverse
=
(
flags
&
DFT_INVERSE
)
!=
0
?
1
:
0
;
int
dft_complex_output
=
(
flags
&
DFT_COMPLEX_OUTPUT
)
!=
0
;
bool
dft_real_output
=
(
flags
&
DFT_REAL_OUTPUT
)
!=
0
;
CV_Assert
(
dft_complex_output
+
dft_real_output
<
2
);
FftType
fftType
=
(
FftType
)(
dst_complex_input
<<
0
|
dft_complex_output
<<
1
);
switch
(
fftType
)
{
case
C2C
:
_dst
.
create
(
ssize
.
height
,
ssize
.
width
,
CV_MAKE_TYPE
(
depth
,
2
));
break
;
case
R2C
:
// TODO implement it if possible
case
C2R
:
// TODO implement it if possible
case
R2R
:
// AMD Fft does not support this type
default
:
return
false
;
}
UMat
src
=
_src
.
getUMat
(),
dst
=
_dst
.
getUMat
();
bool
inplace
=
src
.
u
==
dst
.
u
;
clAmdFftPlanHandle
plHandle
=
PlanCache
::
getInstance
().
getPlanHandle
(
ssize
,
(
int
)
src
.
step
,
(
int
)
dst
.
step
,
depth
==
CV_64F
,
inplace
,
flags
,
fftType
);
// get the bufferSize
size_t
bufferSize
=
0
;
CLAMDDFT_Assert
(
clAmdFftGetTmpBufSize
(
plHandle
,
&
bufferSize
))
UMat
tmpBuffer
(
1
,
(
int
)
bufferSize
,
CV_8UC1
);
cl_mem
srcarg
=
(
cl_mem
)
src
.
handle
(
ACCESS_READ
);
cl_mem
dstarg
=
(
cl_mem
)
dst
.
handle
(
ACCESS_RW
);
cl_command_queue
queue
=
(
cl_command_queue
)
ocl
::
Queue
::
getDefault
().
ptr
();
cl_event
e
=
0
;
CLAMDDFT_Assert
(
clAmdFftEnqueueTransform
(
plHandle
,
dft_inverse
?
CLFFT_BACKWARD
:
CLFFT_FORWARD
,
1
,
&
queue
,
0
,
NULL
,
&
e
,
&
srcarg
,
&
dstarg
,
(
cl_mem
)
tmpBuffer
.
handle
(
ACCESS_RW
)))
tmpBuffer
.
addref
();
clSetEventCallback
(
e
,
CL_COMPLETE
,
oclCleanupCallback
,
tmpBuffer
.
u
);
return
true
;
}
#undef DFT_ASSERT
}
#endif // HAVE_CLAMDFFT
void
cv
::
dft
(
InputArray
_src0
,
OutputArray
_dst
,
int
flags
,
int
nonzero_rows
)
void
cv
::
dft
(
InputArray
_src0
,
OutputArray
_dst
,
int
flags
,
int
nonzero_rows
)
{
{
...
...
modules/core/src/ocl.cpp
View file @
66ac4621
...
@@ -3002,8 +3002,7 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
...
@@ -3002,8 +3002,7 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
sync
?
0
:
&
p
->
e
);
sync
?
0
:
&
p
->
e
);
if
(
sync
||
retval
!=
CL_SUCCESS
)
if
(
sync
||
retval
!=
CL_SUCCESS
)
{
{
int
a
=
clFinish
(
qq
);
CV_OclDbgAssert
(
clFinish
(
qq
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
a
==
CL_SUCCESS
);
p
->
cleanupUMats
();
p
->
cleanupUMats
();
}
}
else
else
...
@@ -3899,9 +3898,8 @@ public:
...
@@ -3899,9 +3898,8 @@ public:
if
(
(
accessFlags
&
ACCESS_READ
)
!=
0
&&
u
->
hostCopyObsolete
()
)
if
(
(
accessFlags
&
ACCESS_READ
)
!=
0
&&
u
->
hostCopyObsolete
()
)
{
{
AlignedDataPtr
<
false
,
true
>
alignedPtr
(
u
->
data
,
u
->
size
,
CV_OPENCL_DATA_PTR_ALIGNMENT
);
AlignedDataPtr
<
false
,
true
>
alignedPtr
(
u
->
data
,
u
->
size
,
CV_OPENCL_DATA_PTR_ALIGNMENT
);
int
a
=
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
CV_Assert
(
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
u
->
size
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
);
u
->
size
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
==
CL_SUCCESS
);
CV_Assert
(
a
==
CL_SUCCESS
);
u
->
markHostCopyObsolete
(
false
);
u
->
markHostCopyObsolete
(
false
);
}
}
}
}
...
...
modules/core/src/opencl/fft.cl
View file @
66ac4621
...
@@ -571,10 +571,15 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
...
@@ -571,10 +571,15 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
}
}
else
else
{
{
//
fill
with
zero
other
rows
#
ifdef
COMPLEX_OUTPUT
__global
float2*
dst
=
(
__global
float2*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
__global
float2*
dst
=
(
__global
float2*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
#
else
__global
float*
dst
=
(
__global
float*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
#
endif
#
pragma
unroll
#
pragma
unroll
for
(
int
i=x
; i<dst_cols; i+=block_size)
for
(
int
i=x
; i<dst_cols; i+=block_size)
dst[i]
=
(
float2
)
0.f
;
dst[i]
=
0.f
;
}
}
}
}
...
@@ -667,12 +672,9 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
...
@@ -667,12 +672,9 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
smem[x+i*block_size].y
=
-src[i*block_size].y
;
smem[x+i*block_size].y
=
-src[i*block_size].y
;
}
}
#
else
#
else
__global
const
float2*
src
;
#
if
!defined
(
REAL_INPUT
)
&&
defined
(
NO_CONJUGATE
)
#
if
!defined
(
REAL_INPUT
)
&&
defined
(
NO_CONJUGATE
)
src
=
(
__global
const
float2*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
2
,
(
int
)
sizeof
(
float
)
,
src_offset
)))
;
__global
const
float2*
src
=
(
__global
const
float2*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
2
,
(
int
)
sizeof
(
float
)
,
src_offset
)))
;
#
else
src
=
(
__global
const
float2*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
1
,
(
int
)
sizeof
(
float
)
,
src_offset
)))
;
#
endif
#
pragma
unroll
#
pragma
unroll
for
(
int
i=x
; i<(LOCAL_SIZE-1)/2; i+=block_size)
for
(
int
i=x
; i<(LOCAL_SIZE-1)/2; i+=block_size)
...
@@ -681,6 +683,20 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
...
@@ -681,6 +683,20 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
smem[i+1].y
=
-src[i].y
;
smem[i+1].y
=
-src[i].y
;
smem[LOCAL_SIZE-i-1]
=
src[i]
;
smem[LOCAL_SIZE-i-1]
=
src[i]
;
}
}
#
else
#
pragma
unroll
for
(
int
i=x
; i<(LOCAL_SIZE-1)/2; i+=block_size)
{
float2
src
=
vload2
(
0
,
(
__global
const
float*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
2*i+1,
(
int
)
sizeof
(
float
)
,
src_offset
))))
;
smem[i+1].x
=
src.x
;
smem[i+1].y
=
-src.y
;
smem[LOCAL_SIZE-i-1]
=
src
;
}
#
endif
if
(
x==0
)
if
(
x==0
)
{
{
smem[0].x
=
*
(
__global
const
float*
)(
src_ptr
+
mad24
(
y,
src_step,
src_offset
))
;
smem[0].x
=
*
(
__global
const
float*
)(
src_ptr
+
mad24
(
y,
src_step,
src_offset
))
;
...
@@ -688,7 +704,11 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
...
@@ -688,7 +704,11 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
if
(
LOCAL_SIZE
%
2
==0
)
if
(
LOCAL_SIZE
%
2
==0
)
{
{
#
if
!defined
(
REAL_INPUT
)
&&
defined
(
NO_CONJUGATE
)
smem[LOCAL_SIZE/2].x
=
src[LOCAL_SIZE/2-1].x
;
smem[LOCAL_SIZE/2].x
=
src[LOCAL_SIZE/2-1].x
;
#
else
smem[LOCAL_SIZE/2].x
=
*
(
__global
const
float*
)(
src_ptr
+
mad24
(
y,
src_step,
mad24
(
LOCAL_SIZE-1,
(
int
)
sizeof
(
float
)
,
src_offset
)))
;
#
endif
smem[LOCAL_SIZE/2].y
=
0.f
;
smem[LOCAL_SIZE/2].y
=
0.f
;
}
}
}
}
...
@@ -718,10 +738,15 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
...
@@ -718,10 +738,15 @@ __kernel void ifft_multi_radix_rows(__global const uchar* src_ptr, int src_step,
}
}
else
else
{
{
__global
float2*
dst
=
(
__global
float*
)(
dst_ptr
+
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)(
sizeof
(
float
)
*2
)
,
dst_offset
)))
;
//
fill
with
zero
other
rows
#
ifdef
COMPLEX_OUTPUT
__global
float2*
dst
=
(
__global
float2*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
#
else
__global
float*
dst
=
(
__global
float*
)(
dst_ptr
+
mad24
(
y,
dst_step,
dst_offset
))
;
#
endif
#
pragma
unroll
#
pragma
unroll
for
(
int
i=
0
; i<kercn; i++
)
for
(
int
i=
x
; i<dst_cols; i+=block_size
)
dst[i
*block_size]
=
(
float2
)
0.f
;
dst[i
]
=
0.f
;
}
}
}
}
...
@@ -781,7 +806,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
...
@@ -781,7 +806,7 @@ __kernel void ifft_multi_radix_cols(__global const uchar* src_ptr, int src_step,
#
pragma
unroll
#
pragma
unroll
for
(
int
i=0
; i<kercn; i++)
for
(
int
i=0
; i<kercn; i++)
{
{
float2
temp
=
*
((
__global
const
float2
*
)(
src
+
i*block_size*src_step
))
;
float2
temp
=
vload2
(
0
,
(
__global
const
float
*
)(
src
+
i*block_size*src_step
))
;
smem[y+i*block_size].x
=
temp.x
;
smem[y+i*block_size].x
=
temp.x
;
smem[y+i*block_size].y
=
-temp.y
;
smem[y+i*block_size].y
=
-temp.y
;
}
}
...
...
modules/core/test/ocl/test_dft.cpp
View file @
66ac4621
...
@@ -48,26 +48,17 @@
...
@@ -48,26 +48,17 @@
#ifdef HAVE_OPENCL
#ifdef HAVE_OPENCL
enum
OCL_FFT_TYPE
{
R2R
=
0
,
C2R
=
1
,
R2C
=
2
,
C2C
=
3
};
namespace
cvtest
{
namespace
cvtest
{
namespace
ocl
{
namespace
ocl
{
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
// Dft
// Dft
PARAM_TEST_CASE
(
Dft
,
cv
::
Size
,
OCL_FFT_TYPE
,
bool
,
bool
,
bool
,
bool
)
PARAM_TEST_CASE
(
Dft
,
cv
::
Size
,
MatDepth
,
bool
,
bool
,
bool
,
bool
)
{
{
cv
::
Size
dft_size
;
cv
::
Size
dft_size
;
int
dft_flags
,
depth
,
cn
,
dft_type
;
int
dft_flags
,
depth
;
bool
hint
;
bool
inplace
;
bool
is1d
;
TEST_DECLARE_INPUT_PARAMETER
(
src
);
TEST_DECLARE_INPUT_PARAMETER
(
src
);
TEST_DECLARE_OUTPUT_PARAMETER
(
dst
);
TEST_DECLARE_OUTPUT_PARAMETER
(
dst
);
...
@@ -75,60 +66,34 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
...
@@ -75,60 +66,34 @@ PARAM_TEST_CASE(Dft, cv::Size, OCL_FFT_TYPE, bool, bool, bool, bool)
virtual
void
SetUp
()
virtual
void
SetUp
()
{
{
dft_size
=
GET_PARAM
(
0
);
dft_size
=
GET_PARAM
(
0
);
d
ft_type
=
GET_PARAM
(
1
);
d
epth
=
GET_PARAM
(
1
);
depth
=
CV_32F
;
inplace
=
GET_PARAM
(
2
)
;
dft_flags
=
0
;
dft_flags
=
0
;
switch
(
dft_type
)
{
case
R2R
:
dft_flags
|=
cv
::
DFT_REAL_OUTPUT
;
cn
=
1
;
break
;
case
C2R
:
dft_flags
|=
cv
::
DFT_REAL_OUTPUT
;
cn
=
2
;
break
;
case
R2C
:
dft_flags
|=
cv
::
DFT_COMPLEX_OUTPUT
;
cn
=
1
;
break
;
case
C2C
:
dft_flags
|=
cv
::
DFT_COMPLEX_OUTPUT
;
cn
=
2
;
break
;
}
if
(
GET_PARAM
(
2
))
dft_flags
|=
cv
::
DFT_INVERSE
;
if
(
GET_PARAM
(
3
))
if
(
GET_PARAM
(
3
))
dft_flags
|=
cv
::
DFT_ROWS
;
dft_flags
|=
cv
::
DFT_ROWS
;
if
(
GET_PARAM
(
4
))
if
(
GET_PARAM
(
4
))
dft_flags
|=
cv
::
DFT_SCALE
;
dft_flags
|=
cv
::
DFT_SCALE
;
hint
=
GET_PARAM
(
5
);
if
(
GET_PARAM
(
5
))
is1d
=
(
dft_flags
&
DFT_ROWS
)
!=
0
||
dft_size
.
height
==
1
;
dft_flags
|=
cv
::
DFT_INVERSE
;
}
}
void
generateTestData
()
void
generateTestData
(
int
cn
=
2
)
{
{
src
=
randomMat
(
dft_size
,
CV_MAKE_TYPE
(
depth
,
cn
),
0.0
,
100.0
);
src
=
randomMat
(
dft_size
,
CV_MAKE_TYPE
(
depth
,
cn
),
0.0
,
100.0
);
usrc
=
src
.
getUMat
(
ACCESS_READ
);
usrc
=
src
.
getUMat
(
ACCESS_READ
);
if
(
inplace
)
dst
=
src
,
udst
=
usrc
;
}
}
};
};
OCL_TEST_P
(
Dft
,
Mat
)
OCL_TEST_P
(
Dft
,
C2C
)
{
{
generateTestData
();
generateTestData
();
int
nonzero_rows
=
hint
?
src
.
cols
-
randomInt
(
1
,
src
.
rows
-
1
)
:
0
;
OCL_OFF
(
cv
::
dft
(
src
,
dst
,
dft_flags
|
cv
::
DFT_COMPLEX_OUTPUT
));
OCL_OFF
(
cv
::
dft
(
src
,
dst
,
dft_flags
,
nonzero_rows
));
OCL_ON
(
cv
::
dft
(
usrc
,
udst
,
dft_flags
|
cv
::
DFT_COMPLEX_OUTPUT
));
OCL_ON
(
cv
::
dft
(
usrc
,
udst
,
dft_flags
,
nonzero_rows
));
if
(
dft_type
==
R2C
&&
is1d
&&
(
dft_flags
&
cv
::
DFT_INVERSE
)
==
0
)
{
dst
=
dst
(
cv
::
Range
(
0
,
dst
.
rows
),
cv
::
Range
(
0
,
dst
.
cols
/
2
+
1
));
udst
=
udst
(
cv
::
Range
(
0
,
udst
.
rows
),
cv
::
Range
(
0
,
udst
.
cols
/
2
+
1
));
}
//Mat gpu = udst.getMat(ACCESS_READ);
//std::cout << dst << std::endl;
//std::cout << gpu << std::endl;
//int cn = udst.channels();
//
//Mat dst1ch = dst.reshape(1);
//Mat gpu1ch = gpu.reshape(1);
//Mat df;
//absdiff(dst1ch, gpu1ch, df);
//std::cout << Mat_<int>(df) << std::endl;
double
eps
=
src
.
size
().
area
()
*
1e-4
;
double
eps
=
src
.
size
().
area
()
*
1e-4
;
EXPECT_MAT_NEAR
(
dst
,
udst
,
eps
);
EXPECT_MAT_NEAR
(
dst
,
udst
,
eps
);
...
@@ -185,13 +150,13 @@ OCL_TEST_P(MulSpectrums, Mat)
...
@@ -185,13 +150,13 @@ OCL_TEST_P(MulSpectrums, Mat)
OCL_INSTANTIATE_TEST_CASE_P
(
OCL_ImgProc
,
MulSpectrums
,
testing
::
Combine
(
Bool
(),
Bool
()));
OCL_INSTANTIATE_TEST_CASE_P
(
OCL_ImgProc
,
MulSpectrums
,
testing
::
Combine
(
Bool
(),
Bool
()));
OCL_INSTANTIATE_TEST_CASE_P
(
Core
,
Dft
,
Combine
(
Values
(
cv
::
Size
(
10
,
10
),
cv
::
Size
(
36
,
36
),
cv
::
Size
(
512
,
1
),
cv
::
Size
(
1280
,
768
)),
OCL_INSTANTIATE_TEST_CASE_P
(
Core
,
Dft
,
Combine
(
Values
(
cv
::
Size
(
2
,
3
),
cv
::
Size
(
5
,
4
),
cv
::
Size
(
25
,
20
),
Values
((
OCL_FFT_TYPE
)
R2C
,
(
OCL_FFT_TYPE
)
C2C
,
(
OCL_FFT_TYPE
)
R2R
,
(
OCL_FFT_TYPE
)
C2R
),
cv
::
Size
(
512
,
1
),
cv
::
Size
(
1024
,
768
)),
Bool
(),
// DFT_INVERSE
Values
(
CV_32F
,
CV_64F
),
Bool
(),
// inplace
Bool
(),
// DFT_ROWS
Bool
(),
// DFT_ROWS
Bool
(),
// DFT_SCALE
Bool
(),
// DFT_SCALE
Bool
()
// hint
Bool
())
// DFT_INVERSE
)
);
);
}
}
// namespace cvtest::ocl
}
}
// namespace cvtest::ocl
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment