Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
efebd83b
Commit
efebd83b
authored
Oct 29, 2014
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #3159 from ElenaGvozdeva:ocl_gemm
parents
4a9cfbf8
d88fdd03
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
198 additions
and
2 deletions
+198
-2
matmul.cpp
modules/core/src/matmul.cpp
+85
-2
gemm.cl
modules/core/src/opencl/gemm.cl
+113
-0
No files found.
modules/core/src/matmul.cpp
View file @
efebd83b
...
...
@@ -693,7 +693,7 @@ static void GEMMStore_64fc( const Complexd* c_data, size_t c_step,
#ifdef HAVE_CLAMDBLAS
static
bool
ocl_gemm
(
InputArray
matA
,
InputArray
matB
,
double
alpha
,
static
bool
ocl_gemm
_amdblas
(
InputArray
matA
,
InputArray
matB
,
double
alpha
,
InputArray
matC
,
double
beta
,
OutputArray
matD
,
int
flags
)
{
int
type
=
matA
.
type
(),
esz
=
CV_ELEM_SIZE
(
type
);
...
...
@@ -775,6 +775,84 @@ static bool ocl_gemm( InputArray matA, InputArray matB, double alpha,
#endif
#ifdef HAVE_OPENCL
static
bool
ocl_gemm
(
InputArray
matA
,
InputArray
matB
,
double
alpha
,
InputArray
matC
,
double
beta
,
OutputArray
matD
,
int
flags
)
{
int
depth
=
matA
.
depth
(),
cn
=
matA
.
channels
();
int
type
=
CV_MAKETYPE
(
depth
,
cn
);
CV_Assert
(
type
==
matB
.
type
()
&&
(
type
==
CV_32FC1
||
type
==
CV_64FC1
||
type
==
CV_32FC2
||
type
==
CV_64FC2
)
);
const
ocl
::
Device
&
dev
=
ocl
::
Device
::
getDefault
();
bool
doubleSupport
=
dev
.
doubleFPConfig
()
>
0
;
if
(
!
doubleSupport
&&
depth
==
CV_64F
)
return
false
;
bool
haveC
=
matC
.
kind
()
!=
cv
::
_InputArray
::
NONE
;
Size
sizeA
=
matA
.
size
(),
sizeB
=
matB
.
size
(),
sizeC
=
haveC
?
matC
.
size
()
:
Size
(
0
,
0
);
bool
atrans
=
(
flags
&
GEMM_1_T
)
!=
0
,
btrans
=
(
flags
&
GEMM_2_T
)
!=
0
,
ctrans
=
(
flags
&
GEMM_3_T
)
!=
0
;
if
(
atrans
)
sizeA
=
Size
(
sizeA
.
height
,
sizeA
.
width
);
if
(
btrans
)
sizeB
=
Size
(
sizeB
.
height
,
sizeB
.
width
);
if
(
haveC
&&
ctrans
)
sizeC
=
Size
(
sizeC
.
height
,
sizeC
.
width
);
Size
sizeD
(
sizeB
.
width
,
sizeA
.
height
);
CV_Assert
(
!
haveC
||
matC
.
type
()
==
type
);
CV_Assert
(
sizeA
.
width
==
sizeB
.
height
&&
(
!
haveC
||
sizeC
==
sizeD
)
);
int
max_wg_size
=
(
int
)
dev
.
maxWorkGroupSize
();
int
block_size
=
(
max_wg_size
/
(
32
*
cn
)
<
32
)
?
(
max_wg_size
/
(
16
*
cn
)
<
16
)
?
(
max_wg_size
/
(
8
*
cn
)
<
8
)
?
1
:
8
:
16
:
32
;
matD
.
create
(
sizeD
,
type
);
UMat
A
=
matA
.
getUMat
(),
B
=
matB
.
getUMat
(),
D
=
matD
.
getUMat
();
if
(
atrans
)
A
=
A
.
t
();
if
(
btrans
)
B
=
B
.
t
();
if
(
haveC
)
ctrans
?
transpose
(
matC
,
D
)
:
matC
.
copyTo
(
D
);
int
vectorWidths
[]
=
{
4
,
4
,
2
,
2
,
1
,
4
,
cn
,
-
1
};
int
kercn
=
ocl
::
checkOptimalVectorWidth
(
vectorWidths
,
B
,
D
);
String
opts
=
format
(
"-D T=%s -D T1=%s -D WT=%s -D cn=%d -D kercn=%d -D LOCAL_SIZE=%d %s %s %s"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
CV_MAKETYPE
(
depth
,
kercn
)),
cn
,
kercn
,
block_size
,
(
sizeA
.
width
%
block_size
!=
0
)
?
"-D NO_MULT"
:
""
,
haveC
?
"-D HAVE_C"
:
""
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
k
(
"gemm"
,
cv
::
ocl
::
core
::
gemm_oclsrc
,
opts
);
if
(
k
.
empty
())
return
false
;
if
(
depth
==
CV_64F
)
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
A
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
B
,
cn
,
kercn
),
ocl
::
KernelArg
::
ReadWrite
(
D
,
cn
,
kercn
),
sizeA
.
width
,
alpha
,
beta
);
else
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
A
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
B
,
cn
,
kercn
),
ocl
::
KernelArg
::
ReadWrite
(
D
,
cn
,
kercn
),
sizeA
.
width
,
(
float
)
alpha
,
(
float
)
beta
);
size_t
globalsize
[
2
]
=
{
sizeD
.
width
*
cn
/
kercn
,
sizeD
.
height
};
size_t
localsize
[
2
]
=
{
block_size
,
block_size
};
return
k
.
run
(
2
,
globalsize
,
block_size
!=
1
?
localsize
:
NULL
,
false
);
}
#endif
}
void
cv
::
gemm
(
InputArray
matA
,
InputArray
matB
,
double
alpha
,
...
...
@@ -783,7 +861,12 @@ void cv::gemm( InputArray matA, InputArray matB, double alpha,
#ifdef HAVE_CLAMDBLAS
CV_OCL_RUN
(
ocl
::
haveAmdBlas
()
&&
matA
.
dims
()
<=
2
&&
matB
.
dims
()
<=
2
&&
matC
.
dims
()
<=
2
&&
_matD
.
isUMat
()
&&
matA
.
cols
()
>
20
&&
matA
.
rows
()
>
20
&&
matB
.
cols
()
>
20
,
// since it works incorrect for small sizes
ocl_gemm
(
matA
,
matB
,
alpha
,
matC
,
beta
,
_matD
,
flags
))
ocl_gemm_amdblas
(
matA
,
matB
,
alpha
,
matC
,
beta
,
_matD
,
flags
))
#endif
#ifdef HAVE_OPENCL
CV_OCL_RUN
(
_matD
.
isUMat
()
&&
matA
.
dims
()
<=
2
&&
matB
.
dims
()
<=
2
&&
matC
.
dims
()
<=
2
,
ocl_gemm
(
matA
,
matB
,
alpha
,
matC
,
beta
,
_matD
,
flags
))
#endif
const
int
block_lin_size
=
128
;
...
...
modules/core/src/opencl/gemm.cl
0 → 100644
View file @
efebd83b
//
This
file
is
part
of
OpenCV
project.
//
It
is
subject
to
the
license
terms
in
the
LICENSE
file
found
in
the
top-level
directory
//
of
this
distribution
and
at
http://opencv.org/license.html.
//
Copyright
(
C
)
2014
,
Itseez,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
TSIZE
(
int
)
sizeof
(
T
)
#
define
WTSIZE
(
int
)
sizeof
(
WT
)
#
define
IND_A
mad24
(
y,
A_step,
A_offset
)
#
define
IND_B
mad24
(
x,
WTSIZE,
B_offset
)
#
define
STEP_B
B_step
/
WTSIZE
#
define
LOCAL_SIZE_ODD
(
LOCAL_SIZE
+
1
)
#
if
cn==2
#
if
kercn==2
#
define
MUL
(
a,
b
)
\
{
\
sum.x
+=
fma
(
a.x,
b.x,
-
a.y
*
b.y
)
;\
sum.y
+=
fma
(
a.x,
b.y,
a.y
*
b.x
)
;\
}
#
else
#
define
MUL
(
a,
b
)
\
{
\
sum.x
+=
fma
(
a.x,
b.x,
-
a.y
*
b.y
)
;\
sum.y
+=
fma
(
a.x,
b.y,
a.y
*
b.x
)
;\
sum.z
+=
fma
(
a.x,
b.z,
-
a.y
*
b.w
)
;\
sum.w
+=
fma
(
a.x,
b.w,
a.y
*
b.z
)
;\
}
#
endif
#
else
#
define
MUL
(
a,
b
)
sum
=
fma
(
a,
b,
sum
)
;
#
endif
__kernel
void
gemm
(
__global
const
uchar
*
A_ptr,
int
A_step,
int
A_offset,
__global
const
uchar
*
B_ptr,
int
B_step,
int
B_offset,
__global
uchar
*
D_ptr,
int
D_step,
int
D_offset,
int
D_rows,
int
D_cols,
int
n,
T1
alpha,
T1
beta
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
lidx
=
get_local_id
(
0
)
;
int
lidy
=
get_local_id
(
1
)
;
__global
const
T*
A
=
(
__global
const
T*
)(
A_ptr
+
IND_A
)
;
__global
const
WT*
B
=
(
__global
const
WT*
)(
B_ptr
+
IND_B
)
;
WT
sum
=
(
WT
)(
0
)
;
#
if
LOCAL_SIZE
==
1
if
(
x
<
D_cols
&&
y
<
D_rows
)
{
for
(
int
i
=
0
; i < n; ++i)
MUL
(
A[i],
B[i*STEP_B]
)
;
#
else
__local
T
a_local[LOCAL_SIZE_ODD*LOCAL_SIZE]
;
__local
WT
b_local[LOCAL_SIZE_ODD*LOCAL_SIZE]
;
int
reps
;
#
if
NO_MULT
reps
=
(
n
+
LOCAL_SIZE-1
)
/LOCAL_SIZE
;
#
else
reps
=
n/LOCAL_SIZE
;
#
endif
for
(
int
p
=
0
; p < reps; ++p)
{
if
(
p
*
LOCAL_SIZE
+
lidx
<
n
&&
y
<
D_rows
)
a_local[mad24
(
lidy,
LOCAL_SIZE_ODD,
lidx
)
]
=
A[mad24
(
p,
LOCAL_SIZE,
lidx
)
]
;
if
(
p
*
LOCAL_SIZE
+
lidy
<
n
&&
x
<
D_cols
)
b_local[mad24
(
lidy,
LOCAL_SIZE_ODD,
lidx
)
]
=
B[mad24
(
p,
LOCAL_SIZE,
lidy
)
*STEP_B]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
x
<
D_cols
&&
y
<
D_rows
)
{
#
if
NO_MULT
int
ie
=
min
(
LOCAL_SIZE,
n
-
p
*
LOCAL_SIZE
)
;
for
(
int
i
=
0
; i < ie; ++i)
#
else
for
(
int
i
=
0
; i < LOCAL_SIZE; ++i)
#
endif
MUL
(
a_local[mad24
(
lidy,
LOCAL_SIZE_ODD,
i
)
],
b_local[mad24
(
i,
LOCAL_SIZE_ODD,
lidx
)
]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
x
<
D_cols
&&
y
<
D_rows
)
{
#
endif
__global
WT*
D
=
(
__global
WT*
)(
D_ptr
+
mad24
(
y,
D_step,
mad24
(
x,
WTSIZE,
D_offset
)))
;
#
if
HAVE_C
D[0]
=
mad
(
alpha,
sum,
D[0]*beta
)
;
#
else
D[0]
=
alpha
*
sum
;
#
endif
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment