Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
d78bc3c3
Commit
d78bc3c3
authored
Aug 22, 2014
by
Elena Gvozdeva
Committed by
ElenaGvozdeva
Oct 27, 2014
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
naive implementation
parent
dee56598
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
140 additions
and
2 deletions
+140
-2
matmul.cpp
modules/core/src/matmul.cpp
+80
-2
gemm.cl
modules/core/src/opencl/gemm.cl
+60
-0
No files found.
modules/core/src/matmul.cpp
View file @
d78bc3c3
...
...
@@ -693,7 +693,7 @@ static void GEMMStore_64fc( const Complexd* c_data, size_t c_step,
#ifdef HAVE_CLAMDBLAS
static
bool
ocl_gemm
(
InputArray
matA
,
InputArray
matB
,
double
alpha
,
static
bool
ocl_gemm
_amdblas
(
InputArray
matA
,
InputArray
matB
,
double
alpha
,
InputArray
matC
,
double
beta
,
OutputArray
matD
,
int
flags
)
{
int
type
=
matA
.
type
(),
esz
=
CV_ELEM_SIZE
(
type
);
...
...
@@ -775,6 +775,79 @@ static bool ocl_gemm( InputArray matA, InputArray matB, double alpha,
#endif
#ifdef HAVE_OPENCL
static
bool
ocl_gemm
(
InputArray
matA
,
InputArray
matB
,
double
alpha
,
InputArray
matC
,
double
beta
,
OutputArray
matD
,
int
flags
)
{
int
depth
=
matA
.
depth
(),
cn
=
matA
.
channels
();
int
type
=
CV_MAKETYPE
(
depth
,
cn
);
CV_Assert
(
type
==
matB
.
type
()
&&
(
type
==
CV_32FC1
||
type
==
CV_64FC1
||
type
==
CV_32FC2
||
type
==
CV_64FC2
)
);
const
ocl
::
Device
&
dev
=
ocl
::
Device
::
getDefault
();
bool
doubleSupport
=
dev
.
doubleFPConfig
()
>
0
;
if
((
!
doubleSupport
&&
depth
==
CV_64F
))
return
false
;
bool
haveC
=
matC
.
kind
()
!=
cv
::
_InputArray
::
NONE
;
Size
sizeA
=
matA
.
size
(),
sizeB
=
matB
.
size
(),
sizeC
=
haveC
?
matC
.
size
()
:
Size
(
0
,
0
);
bool
atrans
=
(
flags
&
GEMM_1_T
)
!=
0
,
btrans
=
(
flags
&
GEMM_2_T
)
!=
0
,
ctrans
=
(
flags
&
GEMM_3_T
)
!=
0
;
if
(
atrans
)
sizeA
=
Size
(
sizeA
.
height
,
sizeA
.
width
);
if
(
btrans
)
sizeB
=
Size
(
sizeB
.
height
,
sizeB
.
width
);
if
(
haveC
&&
ctrans
)
sizeC
=
Size
(
sizeC
.
height
,
sizeC
.
width
);
Size
sizeD
(
sizeB
.
width
,
sizeA
.
height
);
CV_Assert
(
matB
.
type
()
==
type
&&
(
!
haveC
||
matC
.
type
()
==
type
)
);
CV_Assert
(
sizeA
.
width
==
sizeB
.
height
&&
(
!
haveC
||
sizeC
==
sizeD
)
);
String
opts
=
format
(
"-D T=%s -D T1=%s -D cn=%d %s %s"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
cn
,
haveC
?
"-D HAVE_C"
:
""
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
k
(
"gemm"
,
cv
::
ocl
::
core
::
gemm_oclsrc
,
opts
);
if
(
k
.
empty
())
return
false
;
matD
.
create
(
sizeD
,
type
);
UMat
A
=
matA
.
getUMat
(),
B
=
matB
.
getUMat
(),
D
=
matD
.
getUMat
();
if
(
atrans
)
A
=
A
.
t
();
if
(
btrans
)
B
=
B
.
t
();
if
(
haveC
)
ctrans
?
transpose
(
matC
,
D
)
:
matC
.
copyTo
(
D
);
else
D
.
setTo
(
Scalar
::
all
(
0
));
if
(
depth
==
CV_64F
)
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
A
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
B
),
ocl
::
KernelArg
::
ReadWrite
(
D
),
sizeA
.
width
,
alpha
,
beta
);
else
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
A
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
B
),
ocl
::
KernelArg
::
ReadWrite
(
D
),
sizeA
.
width
,
(
float
)
alpha
,
(
float
)
beta
);
size_t
globalsize
[
2
]
=
{
sizeD
.
width
,
sizeD
.
height
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
#endif
}
void
cv
::
gemm
(
InputArray
matA
,
InputArray
matB
,
double
alpha
,
...
...
@@ -783,7 +856,12 @@ void cv::gemm( InputArray matA, InputArray matB, double alpha,
#ifdef HAVE_CLAMDBLAS
CV_OCL_RUN
(
ocl
::
haveAmdBlas
()
&&
matA
.
dims
()
<=
2
&&
matB
.
dims
()
<=
2
&&
matC
.
dims
()
<=
2
&&
_matD
.
isUMat
()
&&
matA
.
cols
()
>
20
&&
matA
.
rows
()
>
20
&&
matB
.
cols
()
>
20
,
// since it works incorrect for small sizes
ocl_gemm
(
matA
,
matB
,
alpha
,
matC
,
beta
,
_matD
,
flags
))
ocl_gemm_amdblas
(
matA
,
matB
,
alpha
,
matC
,
beta
,
_matD
,
flags
))
#endif
#ifdef HAVE_OPENCL
CV_OCL_RUN
(
_matD
.
isUMat
()
&&
matA
.
dims
()
<=
2
&&
matB
.
dims
()
<=
2
&&
matC
.
dims
()
<=
2
,
ocl_gemm
(
matA
,
matB
,
alpha
,
matC
,
beta
,
_matD
,
flags
))
#endif
const
int
block_lin_size
=
128
;
...
...
modules/core/src/opencl/gemm.cl
0 → 100644
View file @
d78bc3c3
//
This
file
is
part
of
OpenCV
project.
//
It
is
subject
to
the
license
terms
in
the
LICENSE
file
found
in
the
top-level
directory
//
of
this
distribution
and
at
http://opencv.org/license.html.
//
Copyright
(
C
)
2014
,
Itseez,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
TSIZE
(
int
)
sizeof
(
T
)
#
define
IND_A
mad24
(
y,
A_step,
A_offset
)
#
define
STEP_A
1
#
define
IND_B
mad24
(
x,
TSIZE,
B_offset
)
#
define
STEP_B
B_step
/
TSIZE
#
if
cn==2
#
define
MUL
(
i,
a,
b
)
\
{
\
sum.x
+=
fma
(
a.x,
b.x,
-
a.y
*
b.y
)
;\
sum.y
+=
fma
(
a.x,
b.y,
a.y
*
b.x
)
;\
}
#
else
#
define
MUL
(
i,
a,
b
)
sum
+=
a
*
b
#
endif
__kernel
void
gemm
(
__global
const
uchar
*
A_ptr,
int
A_step,
int
A_offset,
__global
const
uchar
*
B_ptr,
int
B_step,
int
B_offset,
__global
uchar
*
D_ptr,
int
D_step,
int
D_offset,
int
D_rows,
int
D_cols,
int
n,
T1
alpha,
T1
beta
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
D_cols
&&
y
<
D_rows
)
{
__global
const
T*
A
=
(
__global
const
T*
)(
A_ptr
+
IND_A
)
;
__global
const
T*
B
=
(
__global
const
T*
)(
B_ptr
+
IND_B
)
;
T
sum
=
(
T
)(
0
)
;
for
(
int
i
=
0
; i < n; ++i)
MUL
(
i,
A[i*STEP_A],
B[i*STEP_B]
)
;
__global
T*
D
=
(
__global
T*
)(
D_ptr
+
mad24
(
y,
D_step,
mad24
(
x,
TSIZE,
D_offset
)))
;
#
if
HAVE_C
D[0]
=
mad
(
alpha,
sum,
D[0]*beta
)
;
#
else
D[0]
=
alpha
*
sum
;
#
endif
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment