Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
c03ac12f
Commit
c03ac12f
authored
Aug 07, 2012
by
yao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add dft and gemm to ocl module, using AMD's clAmdFft and clAmdBlas libraries
parent
7741d585
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
710 additions
and
3 deletions
+710
-3
CMakeLists.txt
CMakeLists.txt
+9
-0
OpenCVDetectOpenCL.cmake
cmake/OpenCVDetectOpenCL.cmake
+13
-2
cvconfig.h.cmake
cmake/templates/cvconfig.h.cmake
+6
-0
CMakeLists.txt
modules/ocl/CMakeLists.txt
+8
-0
ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+29
-1
fft.cpp
modules/ocl/src/fft.cpp
+302
-0
gemm.cpp
modules/ocl/src/gemm.cpp
+161
-0
test_fft.cpp
modules/ocl/test/test_fft.cpp
+97
-0
test_gemm.cpp
modules/ocl/test/test_gemm.cpp
+85
-0
No files found.
CMakeLists.txt
View file @
c03ac12f
...
...
@@ -140,6 +140,9 @@ OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF
OCV_OPTION
(
WITH_XINE
"Include Xine support (GPL)"
OFF
IF
(
UNIX AND NOT APPLE AND NOT ANDROID
)
)
OCV_OPTION
(
WITH_CLP
"Include Clp support (EPL)"
OFF
)
OCV_OPTION
(
WITH_OPENCL
"Include OpenCL Runtime support"
OFF
IF
(
NOT ANDROID AND NOT IOS
)
)
OCV_OPTION
(
WITH_OPENCLAMDFFT
"Include AMD OpenCL FFT library support"
OFF
IF
(
NOT ANDROID AND NOT IOS
)
)
OCV_OPTION
(
WITH_OPENCLAMDBLAS
"Include AMD OpenCL BLAS library support"
OFF
IF
(
NOT ANDROID AND NOT IOS
)
)
# OpenCV build components
# ===================================================
...
...
@@ -396,6 +399,12 @@ if(WITH_OPENCL)
if
(
OPENCL_FOUND
)
set
(
HAVE_OPENCL 1
)
endif
()
if
(
WITH_OPENCLAMDFFT
)
set
(
HAVE_CLAMDFFT 1
)
endif
()
if
(
WITH_OPENCLAMDBLAS
)
set
(
HAVE_CLAMDBLAS 1
)
endif
()
endif
()
# ----------------------------------------------------------------------------
...
...
cmake/OpenCVDetectOpenCL.cmake
View file @
c03ac12f
...
...
@@ -2,8 +2,19 @@ if(APPLE)
set
(
OPENCL_FOUND YES
)
set
(
OPENCL_LIBRARIES
"-framework OpenCL"
)
else
()
find_package
(
OpenCL QUIET
)
#find_package(OpenCL QUIET)
if
(
WITH_OPENCLAMDFFT
)
find_path
(
CLAMDFFT_INCLUDE_DIR
NAMES clAmdFft.h
)
find_library
(
CLAMDFFT_LIBRARIES
NAMES clAmdFft.Runtime
)
endif
()
if
(
WITH_OPENCLAMDBLAS
)
find_path
(
CLAMDBLAS_INCLUDE_DIR
NAMES clAmdBlas.h
)
find_library
(
CLAMDBLAS_LIBRARIES
NAMES clAmdBlas
)
endif
()
# Try AMD/ATI Stream SDK
if
(
NOT OPENCL_FOUND
)
set
(
ENV_AMDSTREAMSDKROOT $ENV{AMDAPPSDKROOT}
)
...
...
cmake/templates/cvconfig.h.cmake
View file @
c03ac12f
...
...
@@ -175,6 +175,12 @@
/* OpenCL Support */
#cmakedefine HAVE_OPENCL
/* AMD's OpenCL Fast Fourier Transform Library*/
#cmakedefine HAVE_CLAMDFFT
/* AMD's Basic Linear Algebra Subprograms Library*/
#cmakedefine HAVE_CLAMDBLAS
/* NVidia Cuda Fast Fourier
Transform
(
FFT
)
API*/
#cmakedefine HAVE_CUFFT
...
...
modules/ocl/CMakeLists.txt
View file @
c03ac12f
...
...
@@ -29,6 +29,14 @@ if (HAVE_OPENCL)
if
(
OPENCL_INCLUDE_DIR
)
ocv_include_directories
(
${
OPENCL_INCLUDE_DIR
}
)
endif
()
if
(
HAVE_CLAMDFFT
)
set
(
ocl_link_libs
${
ocl_link_libs
}
${
CLAMDFFT_LIBRARIES
}
)
ocv_include_directories
(
${
CLAMDFFT_INCLUDE_DIR
}
)
endif
()
if
(
HAVE_CLAMDBLAS
)
set
(
ocl_link_libs
${
ocl_link_libs
}
${
CLAMDBLAS_LIBRARIES
}
)
ocv_include_directories
(
${
CLAMDBLAS_INCLUDE_DIR
}
)
endif
()
endif
()
ocv_set_module_sources
(
...
...
modules/ocl/include/opencv2/ocl/ocl.hpp
View file @
c03ac12f
...
...
@@ -894,7 +894,35 @@ namespace cv
// Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
CV_EXPORTS
void
matchTemplate
(
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
int
method
,
MatchTemplateBuf
&
buf
);
#ifdef HAVE_CLAMDFFT
///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
// the two functions must be called before/after run any fft library functions.
CV_EXPORTS
void
fft_setup
();
// this will be implicitly invoked
CV_EXPORTS
void
fft_teardown
();
// you need to teardown fft library manually
/////////////////////////////////////// DFT /////////////////////////////////////////////////////
//! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
//! Param dft_size is the size of DFT transform.
//!
//! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
// support src type of CV32FC1, CV32FC2
// support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
// dft_size is the size of original input, which is used for transformation from complex to real.
// dft_size must be powers of 2, 3 and 5
// real to complex dft requires at least v1.8 clAmdFft
// real to complex dft output is not the same with cpu version
// real to complex and complex to real does not support DFT_ROWS
CV_EXPORTS
void
dft
(
const
oclMat
&
src
,
oclMat
&
dst
,
Size
dft_size
=
Size
(
0
,
0
),
int
flags
=
0
);
#endif // HAVE_CLAMDFFT
#ifdef HAVE_CLAMDBLAS
//! implements generalized matrix product algorithm GEMM from BLAS
// The functionality requires clAmdBlas library
// only support type CV_32FC1
// flag GEMM_3_T is not supported
CV_EXPORTS
void
gemm
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
double
alpha
,
const
oclMat
&
src3
,
double
beta
,
oclMat
&
dst
,
int
flags
=
0
);
#endif
}
}
...
...
modules/ocl/src/fft.cpp
0 → 100644
View file @
c03ac12f
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <iomanip>
#include "precomp.hpp"
#ifdef HAVE_CLAMDFFT
using
namespace
cv
;
using
namespace
cv
::
ocl
;
using
namespace
std
;
#if !defined (HAVE_OPENCL)
void
cv
::
ocl
::
dft
(
const
oclMat
&
src
,
oclMat
&
dst
,
int
flags
)
{
throw_nogpu
();
}
#else
#include <clAmdFft.h>
namespace
cv
{
namespace
ocl
{
enum
FftType
{
C2R
=
1
,
// complex to complex
R2C
=
2
,
// real to opencl HERMITIAN_INTERLEAVED
C2C
=
3
// opencl HERMITIAN_INTERLEAVED to real
};
struct
FftPlan
{
friend
void
fft_setup
();
friend
void
fft_teardown
();
~
FftPlan
();
protected
:
FftPlan
(
Size
_dft_size
,
int
_src_step
,
int
_dst_step
,
int
_flags
,
FftType
_type
);
const
Size
dft_size
;
const
int
src_step
,
dst_step
;
const
int
flags
;
const
FftType
type
;
clAmdFftPlanHandle
plHandle
;
static
vector
<
FftPlan
*>
planStore
;
static
bool
started
;
static
clAmdFftSetupData
*
setupData
;
public
:
// return a baked plan->
// if there is one matched plan, return it
// if not, bake a new one, put it into the planStore and return it.
static
clAmdFftPlanHandle
getPlan
(
Size
_dft_size
,
int
_src_step
,
int
_dst_step
,
int
_flags
,
FftType
_type
);
};
}}
bool
cv
::
ocl
::
FftPlan
::
started
=
false
;
vector
<
cv
::
ocl
::
FftPlan
*>
cv
::
ocl
::
FftPlan
::
planStore
=
vector
<
cv
::
ocl
::
FftPlan
*>
();
clAmdFftSetupData
*
cv
::
ocl
::
FftPlan
::
setupData
=
0
;
void
cv
::
ocl
::
fft_setup
()
{
if
(
FftPlan
::
started
)
{
return
;
}
FftPlan
::
setupData
=
new
clAmdFftSetupData
;
openCLSafeCall
(
clAmdFftInitSetupData
(
FftPlan
::
setupData
));
FftPlan
::
started
=
true
;
}
void
cv
::
ocl
::
fft_teardown
()
{
if
(
!
FftPlan
::
started
)
{
return
;
}
delete
FftPlan
::
setupData
;
for
(
int
i
=
0
;
i
<
FftPlan
::
planStore
.
size
();
i
++
)
{
delete
FftPlan
::
planStore
[
i
];
}
FftPlan
::
planStore
.
clear
();
openCLSafeCall
(
clAmdFftTeardown
(
)
);
FftPlan
::
started
=
false
;
}
// bake a new plan
cv
::
ocl
::
FftPlan
::
FftPlan
(
Size
_dft_size
,
int
_src_step
,
int
_dst_step
,
int
_flags
,
FftType
_type
)
:
dft_size
(
_dft_size
),
src_step
(
_src_step
),
dst_step
(
_dst_step
),
flags
(
_flags
),
type
(
_type
),
plHandle
(
0
)
{
if
(
!
FftPlan
::
started
)
{
// implicitly do fft setup
fft_setup
();
}
bool
is_1d_input
=
(
_dft_size
.
height
==
1
);
int
is_row_dft
=
flags
&
DFT_ROWS
;
int
is_scaled_dft
=
flags
&
DFT_SCALE
;
int
is_inverse
=
flags
&
DFT_INVERSE
;
clAmdFftResultLocation
place
;
clAmdFftLayout
inLayout
;
clAmdFftLayout
outLayout
;
clAmdFftDim
dim
=
is_1d_input
||
is_row_dft
?
CLFFT_1D
:
CLFFT_2D
;
size_t
batchSize
=
is_row_dft
?
dft_size
.
height
:
1
;
size_t
clLengthsIn
[
3
]
=
{
1
,
1
,
1
};
size_t
clStridesIn
[
3
]
=
{
1
,
1
,
1
};
size_t
clLengthsOut
[
3
]
=
{
1
,
1
,
1
};
size_t
clStridesOut
[
3
]
=
{
1
,
1
,
1
};
clLengthsIn
[
0
]
=
dft_size
.
width
;
clLengthsIn
[
1
]
=
is_row_dft
?
1
:
dft_size
.
height
;
clStridesIn
[
0
]
=
1
;
clStridesOut
[
0
]
=
1
;
switch
(
_type
)
{
case
C2C
:
inLayout
=
CLFFT_COMPLEX_INTERLEAVED
;
outLayout
=
CLFFT_COMPLEX_INTERLEAVED
;
clStridesIn
[
1
]
=
src_step
/
sizeof
(
std
::
complex
<
float
>
);
clStridesOut
[
1
]
=
clStridesIn
[
1
];
break
;
case
R2C
:
CV_Assert
(
!
is_row_dft
);
// this is not supported yet
inLayout
=
CLFFT_REAL
;
outLayout
=
CLFFT_HERMITIAN_INTERLEAVED
;
clStridesIn
[
1
]
=
src_step
/
sizeof
(
float
);
clStridesOut
[
1
]
=
dst_step
/
sizeof
(
std
::
complex
<
float
>
);
break
;
case
C2R
:
CV_Assert
(
!
is_row_dft
);
// this is not supported yet
inLayout
=
CLFFT_HERMITIAN_INTERLEAVED
;
outLayout
=
CLFFT_REAL
;
clStridesIn
[
1
]
=
src_step
/
sizeof
(
std
::
complex
<
float
>
);
clStridesOut
[
1
]
=
dst_step
/
sizeof
(
float
);
break
;
default
:
//std::runtime_error("does not support this convertion!");
cout
<<
"Does not support this convertion!"
<<
endl
;
throw
exception
();
break
;
}
clStridesIn
[
2
]
=
is_row_dft
?
clStridesIn
[
1
]
:
dft_size
.
width
*
clStridesIn
[
1
];
clStridesOut
[
2
]
=
is_row_dft
?
clStridesOut
[
1
]
:
dft_size
.
width
*
clStridesOut
[
1
];
openCLSafeCall
(
clAmdFftCreateDefaultPlan
(
&
plHandle
,
Context
::
getContext
()
->
impl
->
clContext
,
dim
,
clLengthsIn
)
);
openCLSafeCall
(
clAmdFftSetResultLocation
(
plHandle
,
CLFFT_OUTOFPLACE
)
);
openCLSafeCall
(
clAmdFftSetLayout
(
plHandle
,
inLayout
,
outLayout
)
);
openCLSafeCall
(
clAmdFftSetPlanBatchSize
(
plHandle
,
batchSize
)
);
openCLSafeCall
(
clAmdFftSetPlanInStride
(
plHandle
,
dim
,
clStridesIn
)
);
openCLSafeCall
(
clAmdFftSetPlanOutStride
(
plHandle
,
dim
,
clStridesOut
)
);
openCLSafeCall
(
clAmdFftSetPlanDistance
(
plHandle
,
clStridesIn
[
dim
],
clStridesIn
[
dim
])
);
openCLSafeCall
(
clAmdFftBakePlan
(
plHandle
,
1
,
&
(
Context
::
getContext
()
->
impl
->
clCmdQueue
),
NULL
,
NULL
)
);
}
cv
::
ocl
::
FftPlan
::~
FftPlan
()
{
for
(
int
i
=
0
;
i
<
planStore
.
size
();
i
++
)
{
if
(
planStore
[
i
]
->
plHandle
==
plHandle
)
{
planStore
.
erase
(
planStore
.
begin
()
+
i
);
}
}
openCLSafeCall
(
clAmdFftDestroyPlan
(
&
plHandle
)
);
}
clAmdFftPlanHandle
cv
::
ocl
::
FftPlan
::
getPlan
(
Size
_dft_size
,
int
_src_step
,
int
_dst_step
,
int
_flags
,
FftType
_type
)
{
// go through search
for
(
int
i
=
0
;
i
<
planStore
.
size
();
i
++
)
{
FftPlan
*
plan
=
planStore
[
i
];
if
(
plan
->
dft_size
.
width
==
_dft_size
.
width
&&
plan
->
dft_size
.
height
==
_dft_size
.
height
&&
plan
->
flags
==
_flags
&&
plan
->
src_step
==
_src_step
&&
plan
->
dst_step
==
_dst_step
&&
plan
->
type
==
_type
)
{
return
plan
->
plHandle
;
}
}
// no baked plan is found
FftPlan
*
newPlan
=
new
FftPlan
(
_dft_size
,
_src_step
,
_dst_step
,
_flags
,
_type
);
planStore
.
push_back
(
newPlan
);
return
newPlan
->
plHandle
;
}
void
cv
::
ocl
::
dft
(
const
oclMat
&
src
,
oclMat
&
dst
,
Size
dft_size
,
int
flags
)
{
if
(
dft_size
==
Size
(
0
,
0
))
{
dft_size
=
src
.
size
();
}
// check if the given dft size is of optimal dft size
CV_Assert
(
dft_size
.
area
()
==
getOptimalDFTSize
(
dft_size
.
area
()));
// similar assertions with cuda module
CV_Assert
(
src
.
type
()
==
CV_32F
||
src
.
type
()
==
CV_32FC2
);
// we don't support DFT_SCALE flag
CV_Assert
(
!
(
DFT_SCALE
&
flags
));
bool
is_1d_input
=
(
src
.
rows
==
1
);
int
is_row_dft
=
flags
&
DFT_ROWS
;
int
is_scaled_dft
=
flags
&
DFT_SCALE
;
int
is_inverse
=
flags
&
DFT_INVERSE
;
bool
is_complex_input
=
src
.
channels
()
==
2
;
bool
is_complex_output
=
!
(
flags
&
DFT_REAL_OUTPUT
);
// We don't support real-to-real transform
CV_Assert
(
is_complex_input
||
is_complex_output
);
FftType
type
=
(
FftType
)(
is_complex_input
<<
0
|
is_complex_output
<<
1
);
switch
(
type
)
{
case
C2C
:
dst
.
create
(
src
.
rows
,
src
.
cols
,
CV_32FC2
);
break
;
case
R2C
:
CV_Assert
(
!
is_row_dft
);
// this is not supported yet
dst
.
create
(
src
.
rows
,
src
.
cols
/
2
+
1
,
CV_32FC2
);
break
;
case
C2R
:
CV_Assert
(
dft_size
.
width
/
2
+
1
==
src
.
cols
&&
dft_size
.
height
==
src
.
rows
);
CV_Assert
(
!
is_row_dft
);
// this is not supported yet
dst
.
create
(
src
.
rows
,
dft_size
.
width
,
CV_32FC1
);
break
;
default
:
//std::runtime_error("does not support this convertion!");
cout
<<
"Does not support this convertion!"
<<
endl
;
throw
exception
();
break
;
}
clAmdFftPlanHandle
plHandle
=
FftPlan
::
getPlan
(
dft_size
,
src
.
step
,
dst
.
step
,
flags
,
type
);
//get the buffersize
size_t
buffersize
=
0
;
openCLSafeCall
(
clAmdFftGetTmpBufSize
(
plHandle
,
&
buffersize
)
);
//allocate the intermediate buffer
cl_mem
clMedBuffer
=
NULL
;
if
(
buffersize
)
{
cl_int
medstatus
;
clMedBuffer
=
clCreateBuffer
(
src
.
clCxt
->
impl
->
clContext
,
CL_MEM_READ_WRITE
,
buffersize
,
0
,
&
medstatus
);
openCLSafeCall
(
medstatus
);
}
openCLSafeCall
(
clAmdFftEnqueueTransform
(
plHandle
,
is_inverse
?
CLFFT_BACKWARD
:
CLFFT_FORWARD
,
1
,
&
src
.
clCxt
->
impl
->
clCmdQueue
,
0
,
NULL
,
NULL
,
(
cl_mem
*
)
&
src
.
data
,
(
cl_mem
*
)
&
dst
.
data
,
clMedBuffer
)
);
openCLSafeCall
(
clFinish
(
src
.
clCxt
->
impl
->
clCmdQueue
)
);
if
(
clMedBuffer
)
{
openCLFree
(
clMedBuffer
);
}
}
#endif
#endif //HAVE_CLAMDFFT
modules/ocl/src/gemm.cpp
0 → 100644
View file @
c03ac12f
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <iomanip>
#include "precomp.hpp"
#ifdef HAVE_CLAMDBLAS
#include "clAmdBlas.h"
#if !defined (HAVE_OPENCL)
void
cv
::
ocl
::
dft
(
const
oclMat
&
src
,
oclMat
&
dst
,
int
flags
)
{
throw_nogpu
();
}
#else
using
namespace
cv
;
void
cv
::
ocl
::
gemm
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
double
alpha
,
const
oclMat
&
src3
,
double
beta
,
oclMat
&
dst
,
int
flags
)
{
CV_Assert
(
src1
.
cols
==
src2
.
rows
&&
(
src3
.
empty
()
||
src1
.
rows
==
src3
.
rows
&&
src2
.
cols
==
src3
.
cols
));
CV_Assert
(
!
(
cv
::
GEMM_3_T
&
flags
));
// cv::GEMM_3_T is not supported
if
(
!
src3
.
empty
())
{
src3
.
copyTo
(
dst
);
}
else
{
dst
.
create
(
src1
.
rows
,
src2
.
cols
,
src1
.
type
());
dst
.
setTo
(
Scalar
::
all
(
0
));
}
openCLSafeCall
(
clAmdBlasSetup
()
);
const
clAmdBlasTranspose
transA
=
(
cv
::
GEMM_1_T
&
flags
)
?
clAmdBlasTrans
:
clAmdBlasNoTrans
;
const
clAmdBlasTranspose
transB
=
(
cv
::
GEMM_2_T
&
flags
)
?
clAmdBlasTrans
:
clAmdBlasNoTrans
;
const
clAmdBlasOrder
order
=
clAmdBlasRowMajor
;
const
int
M
=
src1
.
rows
;
const
int
N
=
src2
.
cols
;
const
int
K
=
src1
.
cols
;
int
lda
=
src1
.
step
;
int
ldb
=
src2
.
step
;
int
ldc
=
dst
.
step
;
int
offa
=
src1
.
offset
;
int
offb
=
src2
.
offset
;
int
offc
=
dst
.
offset
;
switch
(
src1
.
type
())
{
case
CV_32FC1
:
lda
/=
sizeof
(
float
);
ldb
/=
sizeof
(
float
);
ldc
/=
sizeof
(
float
);
offa
/=
sizeof
(
float
);
offb
/=
sizeof
(
float
);
offc
/=
sizeof
(
float
);
openCLSafeCall
(
clAmdBlasSgemmEx
(
order
,
transA
,
transB
,
M
,
N
,
K
,
alpha
,
(
const
cl_mem
)
src1
.
data
,
offa
,
lda
,
(
const
cl_mem
)
src2
.
data
,
offb
,
ldb
,
beta
,
(
cl_mem
)
dst
.
data
,
offc
,
ldc
,
1
,
&
src1
.
clCxt
->
impl
->
clCmdQueue
,
0
,
NULL
,
NULL
)
);
break
;
case
CV_64FC1
:
lda
/=
sizeof
(
double
);
ldb
/=
sizeof
(
double
);
ldc
/=
sizeof
(
double
);
offa
/=
sizeof
(
double
);
offb
/=
sizeof
(
double
);
offc
/=
sizeof
(
double
);
openCLSafeCall
(
clAmdBlasDgemmEx
(
order
,
transA
,
transB
,
M
,
N
,
K
,
alpha
,
(
const
cl_mem
)
src1
.
data
,
offa
,
lda
,
(
const
cl_mem
)
src2
.
data
,
offb
,
ldb
,
beta
,
(
cl_mem
)
dst
.
data
,
offc
,
ldc
,
1
,
&
src1
.
clCxt
->
impl
->
clCmdQueue
,
0
,
NULL
,
NULL
)
);
break
;
case
CV_32FC2
:
{
lda
/=
sizeof
(
std
::
complex
<
float
>
);
ldb
/=
sizeof
(
std
::
complex
<
float
>
);
ldc
/=
sizeof
(
std
::
complex
<
float
>
);
offa
/=
sizeof
(
std
::
complex
<
float
>
);
offb
/=
sizeof
(
std
::
complex
<
float
>
);
offc
/=
sizeof
(
std
::
complex
<
float
>
);
cl_float2
alpha_2
=
{{
alpha
,
0
}};
cl_float2
beta_2
=
{{
beta
,
0
}};
openCLSafeCall
(
clAmdBlasCgemmEx
(
order
,
transA
,
transB
,
M
,
N
,
K
,
alpha_2
,
(
const
cl_mem
)
src1
.
data
,
offa
,
lda
,
(
const
cl_mem
)
src2
.
data
,
offb
,
ldb
,
beta_2
,
(
cl_mem
)
dst
.
data
,
offc
,
ldc
,
1
,
&
src1
.
clCxt
->
impl
->
clCmdQueue
,
0
,
NULL
,
NULL
)
);
}
break
;
case
CV_64FC2
:
{
lda
/=
sizeof
(
std
::
complex
<
double
>
);
ldb
/=
sizeof
(
std
::
complex
<
double
>
);
ldc
/=
sizeof
(
std
::
complex
<
double
>
);
offa
/=
sizeof
(
std
::
complex
<
double
>
);
offb
/=
sizeof
(
std
::
complex
<
double
>
);
offc
/=
sizeof
(
std
::
complex
<
double
>
);
cl_double2
alpha_2
=
{{
alpha
,
0
}};
cl_double2
beta_2
=
{{
beta
,
0
}};
openCLSafeCall
(
clAmdBlasZgemmEx
(
order
,
transA
,
transB
,
M
,
N
,
K
,
alpha_2
,
(
const
cl_mem
)
src1
.
data
,
offa
,
lda
,
(
const
cl_mem
)
src2
.
data
,
offb
,
ldb
,
beta_2
,
(
cl_mem
)
dst
.
data
,
offc
,
ldc
,
1
,
&
src1
.
clCxt
->
impl
->
clCmdQueue
,
0
,
NULL
,
NULL
)
);
}
break
;
}
clAmdBlasTeardown
();
}
#endif
#endif
modules/ocl/test/test_fft.cpp
0 → 100644
View file @
c03ac12f
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using
namespace
std
;
#ifdef HAVE_CLAMDFFT
////////////////////////////////////////////////////////////////////////////
// Dft
PARAM_TEST_CASE
(
Dft
,
cv
::
Size
,
bool
)
{
cv
::
Size
dft_size
;
bool
dft_rows
;
std
::
vector
<
cv
::
ocl
::
Info
>
oclinfo
;
virtual
void
SetUp
()
{
int
devnums
=
getDevice
(
oclinfo
);
CV_Assert
(
devnums
>
0
);
dft_size
=
GET_PARAM
(
0
);
dft_rows
=
GET_PARAM
(
1
);
}
};
TEST_P
(
Dft
,
C2C
)
{
cv
::
Mat
a
=
randomMat
(
dft_size
,
CV_32FC2
,
0.0
,
10.0
);
cv
::
Mat
b_gold
;
int
flags
=
0
;
flags
|=
dft_rows
?
cv
::
DFT_ROWS
:
0
;
cv
::
ocl
::
oclMat
d_b
;
cv
::
dft
(
a
,
b_gold
,
flags
);
cv
::
ocl
::
dft
(
cv
::
ocl
::
oclMat
(
a
),
d_b
,
a
.
size
(),
flags
);
EXPECT_MAT_NEAR
(
b_gold
,
cv
::
Mat
(
d_b
),
a
.
size
().
area
()
*
1e-4
,
""
);
}
TEST_P
(
Dft
,
R2CthenC2R
)
{
cv
::
Mat
a
=
randomMat
(
dft_size
,
CV_32FC1
,
0.0
,
10.0
);
int
flags
=
0
;
//flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
cv
::
ocl
::
oclMat
d_b
,
d_c
;
cv
::
ocl
::
dft
(
cv
::
ocl
::
oclMat
(
a
),
d_b
,
a
.
size
(),
flags
);
cv
::
ocl
::
dft
(
d_b
,
d_c
,
a
.
size
(),
flags
+
cv
::
DFT_INVERSE
+
cv
::
DFT_REAL_OUTPUT
);
EXPECT_MAT_NEAR
(
a
,
d_c
,
a
.
size
().
area
()
*
1e-4
,
""
);
}
INSTANTIATE_TEST_CASE_P
(
ocl_DFT
,
Dft
,
testing
::
Combine
(
testing
::
Values
(
cv
::
Size
(
5
,
4
),
cv
::
Size
(
20
,
20
)),
testing
::
Values
(
false
,
true
)));
#endif // HAVE_CLAMDFFT
modules/ocl/test/test_gemm.cpp
0 → 100644
View file @
c03ac12f
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using
namespace
std
;
#ifdef HAVE_CLAMDBLAS
////////////////////////////////////////////////////////////////////////////
// GEMM
PARAM_TEST_CASE
(
Gemm
,
int
,
cv
::
Size
,
int
)
{
int
type
;
cv
::
Size
mat_size
;
int
flags
;
vector
<
cv
::
ocl
::
Info
>
info
;
virtual
void
SetUp
()
{
type
=
GET_PARAM
(
0
);
mat_size
=
GET_PARAM
(
1
);
flags
=
GET_PARAM
(
2
);
cv
::
ocl
::
getDevice
(
info
);
}
};
TEST_P
(
Gemm
,
Accuracy
)
{
cv
::
Mat
a
=
randomMat
(
mat_size
,
type
,
0.0
,
10.0
);
cv
::
Mat
b
=
randomMat
(
mat_size
,
type
,
0.0
,
10.0
);
cv
::
Mat
c
=
randomMat
(
mat_size
,
type
,
0.0
,
10.0
);
cv
::
Mat
dst
;
cv
::
ocl
::
oclMat
ocl_dst
;
cv
::
gemm
(
a
,
b
,
1.0
,
c
,
1.0
,
dst
,
flags
);
cv
::
ocl
::
gemm
(
cv
::
ocl
::
oclMat
(
a
),
cv
::
ocl
::
oclMat
(
b
),
1.0
,
cv
::
ocl
::
oclMat
(
c
),
1.0
,
ocl_dst
,
flags
);
EXPECT_MAT_NEAR
(
dst
,
ocl_dst
,
mat_size
.
area
()
*
1e-4
,
""
);
}
INSTANTIATE_TEST_CASE_P
(
ocl_gemm
,
Gemm
,
testing
::
Combine
(
testing
::
Values
(
CV_32FC1
,
CV_32FC2
/*, CV_64FC1, CV_64FC2*/
),
testing
::
Values
(
cv
::
Size
(
20
,
20
),
cv
::
Size
(
300
,
300
)),
testing
::
Values
(
0
,
cv
::
GEMM_1_T
,
cv
::
GEMM_2_T
,
cv
::
GEMM_1_T
+
cv
::
GEMM_2_T
)));
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment