Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
b332152b
Commit
b332152b
authored
Aug 28, 2014
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2956 from ilya-lavrenov:tapi_accumulate
parents
d6681597
7017b125
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
99 additions
and
52 deletions
+99
-52
ocl.hpp
modules/core/include/opencv2/core/ocl.hpp
+21
-1
ocl.cpp
modules/core/src/ocl.cpp
+26
-25
accum.cpp
modules/imgproc/src/accum.cpp
+11
-10
accumulate.cl
modules/imgproc/src/opencl/accumulate.cl
+41
-16
No files found.
modules/core/include/opencv2/core/ocl.hpp
View file @
b332152b
...
@@ -598,9 +598,29 @@ CV_EXPORTS const char* typeToStr(int t);
...
@@ -598,9 +598,29 @@ CV_EXPORTS const char* typeToStr(int t);
CV_EXPORTS
const
char
*
memopTypeToStr
(
int
t
);
CV_EXPORTS
const
char
*
memopTypeToStr
(
int
t
);
CV_EXPORTS
String
kernelToStr
(
InputArray
_kernel
,
int
ddepth
=
-
1
,
const
char
*
name
=
NULL
);
CV_EXPORTS
String
kernelToStr
(
InputArray
_kernel
,
int
ddepth
=
-
1
,
const
char
*
name
=
NULL
);
CV_EXPORTS
void
getPlatfomsInfo
(
std
::
vector
<
PlatformInfo
>&
platform_info
);
CV_EXPORTS
void
getPlatfomsInfo
(
std
::
vector
<
PlatformInfo
>&
platform_info
);
enum
OclVectorStrategy
{
// all matrices have its own vector width
OCL_VECTOR_OWN
=
0
,
// all matrices have maximal vector width among all matrices
// (useful for cases when matrices have different data types)
OCL_VECTOR_MAX
=
1
,
// default strategy
OCL_VECTOR_DEFAULT
=
OCL_VECTOR_OWN
};
CV_EXPORTS
int
predictOptimalVectorWidth
(
InputArray
src1
,
InputArray
src2
=
noArray
(),
InputArray
src3
=
noArray
(),
CV_EXPORTS
int
predictOptimalVectorWidth
(
InputArray
src1
,
InputArray
src2
=
noArray
(),
InputArray
src3
=
noArray
(),
InputArray
src4
=
noArray
(),
InputArray
src5
=
noArray
(),
InputArray
src6
=
noArray
(),
InputArray
src4
=
noArray
(),
InputArray
src5
=
noArray
(),
InputArray
src6
=
noArray
(),
InputArray
src7
=
noArray
(),
InputArray
src8
=
noArray
(),
InputArray
src9
=
noArray
());
InputArray
src7
=
noArray
(),
InputArray
src8
=
noArray
(),
InputArray
src9
=
noArray
(),
OclVectorStrategy
strat
=
OCL_VECTOR_DEFAULT
);
// with OCL_VECTOR_MAX strategy
CV_EXPORTS
int
predictOptimalVectorWidthMax
(
InputArray
src1
,
InputArray
src2
=
noArray
(),
InputArray
src3
=
noArray
(),
InputArray
src4
=
noArray
(),
InputArray
src5
=
noArray
(),
InputArray
src6
=
noArray
(),
InputArray
src7
=
noArray
(),
InputArray
src8
=
noArray
(),
InputArray
src9
=
noArray
());
CV_EXPORTS
void
buildOptionsAddMatrixDescription
(
String
&
buildOptions
,
const
String
&
name
,
InputArray
_m
);
CV_EXPORTS
void
buildOptionsAddMatrixDescription
(
String
&
buildOptions
,
const
String
&
name
,
InputArray
_m
);
...
...
modules/core/src/ocl.cpp
View file @
b332152b
...
@@ -4451,42 +4451,46 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name)
...
@@ -4451,42 +4451,46 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name)
if (!src.empty()) \
if (!src.empty()) \
{ \
{ \
CV_Assert(src.isMat() || src.isUMat()); \
CV_Assert(src.isMat() || src.isUMat()); \
int ctype = src.type(), ccn = CV_MAT_CN(ctype); \
Size csize = src.size(); \
Size csize = src.size(); \
cols.push_back(ccn * csize.width); \
int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \
if (ctype != type) \
ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \
if (cwidth < ckercn || ckercn <= 0) \
return 1; \
cols.push_back(cwidth); \
if (strat == OCL_VECTOR_OWN && ctype != ref_type) \
return 1; \
return 1; \
offsets.push_back(src.offset()); \
offsets.push_back(src.offset()); \
steps.push_back(src.step()); \
steps.push_back(src.step()); \
dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
kercns.push_back(ckercn); \
} \
} \
} \
} \
while ((void)0, 0)
while ((void)0, 0)
int
predictOptimalVectorWidth
(
InputArray
src1
,
InputArray
src2
,
InputArray
src3
,
int
predictOptimalVectorWidth
(
InputArray
src1
,
InputArray
src2
,
InputArray
src3
,
InputArray
src4
,
InputArray
src5
,
InputArray
src6
,
InputArray
src4
,
InputArray
src5
,
InputArray
src6
,
InputArray
src7
,
InputArray
src8
,
InputArray
src9
)
InputArray
src7
,
InputArray
src8
,
InputArray
src9
,
OclVectorStrategy
strat
)
{
{
int
type
=
src1
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
esz1
=
CV_ELEM_SIZE1
(
depth
);
Size
ssize
=
src1
.
size
();
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
int
ref_type
=
src1
.
type
();
int
vectorWidths
[]
=
{
d
.
preferredVectorWidthChar
(),
d
.
preferredVectorWidthChar
(),
int
vectorWidths
[]
=
{
d
.
preferredVectorWidthChar
(),
d
.
preferredVectorWidthChar
(),
d
.
preferredVectorWidthShort
(),
d
.
preferredVectorWidthShort
(),
d
.
preferredVectorWidthShort
(),
d
.
preferredVectorWidthShort
(),
d
.
preferredVectorWidthInt
(),
d
.
preferredVectorWidthFloat
(),
d
.
preferredVectorWidthInt
(),
d
.
preferredVectorWidthFloat
(),
d
.
preferredVectorWidthDouble
(),
-
1
}
,
kercn
=
vectorWidths
[
depth
]
;
d
.
preferredVectorWidthDouble
(),
-
1
};
// if the device says don't use vectors
// if the device says don't use vectors
if
(
vectorWidths
[
0
]
==
1
)
if
(
vectorWidths
[
0
]
==
1
)
{
{
// it's heuristic
// it's heuristic
int
vectorWidthsOthers
[]
=
{
16
,
16
,
8
,
8
,
1
,
1
,
1
,
-
1
};
vectorWidths
[
CV_8U
]
=
vectorWidths
[
CV_8S
]
=
16
;
kercn
=
vectorWidthsOthers
[
depth
];
vectorWidths
[
CV_16U
]
=
vectorWidths
[
CV_16S
]
=
8
;
vectorWidths
[
CV_32S
]
=
vectorWidths
[
CV_32F
]
=
vectorWidths
[
CV_64F
]
=
1
;
}
}
if
(
ssize
.
width
*
cn
<
kercn
||
kercn
<=
0
)
return
1
;
std
::
vector
<
size_t
>
offsets
,
steps
,
cols
;
std
::
vector
<
size_t
>
offsets
,
steps
,
cols
;
std
::
vector
<
int
>
dividers
,
kercns
;
PROCESS_SRC
(
src1
);
PROCESS_SRC
(
src1
);
PROCESS_SRC
(
src2
);
PROCESS_SRC
(
src2
);
PROCESS_SRC
(
src3
);
PROCESS_SRC
(
src3
);
...
@@ -4498,27 +4502,24 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
...
@@ -4498,27 +4502,24 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
PROCESS_SRC
(
src9
);
PROCESS_SRC
(
src9
);
size_t
size
=
offsets
.
size
();
size_t
size
=
offsets
.
size
();
int
wsz
=
kercn
*
esz1
;
std
::
vector
<
int
>
dividers
(
size
,
wsz
);
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
while
(
offsets
[
i
]
%
dividers
[
i
]
!=
0
||
steps
[
i
]
%
dividers
[
i
]
!=
0
||
cols
[
i
]
%
divider
s
[
i
]
!=
0
)
while
(
offsets
[
i
]
%
dividers
[
i
]
!=
0
||
steps
[
i
]
%
dividers
[
i
]
!=
0
||
cols
[
i
]
%
kercn
s
[
i
]
!=
0
)
dividers
[
i
]
>>=
1
;
dividers
[
i
]
>>=
1
,
kercns
[
i
]
>>=
1
;
// default strategy
// default strategy
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
int
kercn
=
*
std
::
min_element
(
kercns
.
begin
(),
kercns
.
end
());
if
(
dividers
[
i
]
!=
wsz
)
{
kercn
=
1
;
break
;
}
// another strategy
// width = *std::min_element(dividers.begin(), dividers.end());
return
kercn
;
return
kercn
;
}
}
int
predictOptimalVectorWidthMax
(
InputArray
src1
,
InputArray
src2
,
InputArray
src3
,
InputArray
src4
,
InputArray
src5
,
InputArray
src6
,
InputArray
src7
,
InputArray
src8
,
InputArray
src9
)
{
return
predictOptimalVectorWidth
(
src1
,
src2
,
src3
,
src4
,
src5
,
src6
,
src7
,
src8
,
src9
,
OCL_VECTOR_MAX
);
}
#undef PROCESS_SRC
#undef PROCESS_SRC
...
...
modules/imgproc/src/accum.cpp
View file @
b332152b
...
@@ -369,11 +369,10 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray
...
@@ -369,11 +369,10 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray
CV_Assert
(
op_type
==
ACCUMULATE
||
op_type
==
ACCUMULATE_SQUARE
||
CV_Assert
(
op_type
==
ACCUMULATE
||
op_type
==
ACCUMULATE_SQUARE
||
op_type
==
ACCUMULATE_PRODUCT
||
op_type
==
ACCUMULATE_WEIGHTED
);
op_type
==
ACCUMULATE_PRODUCT
||
op_type
==
ACCUMULATE_WEIGHTED
);
int
stype
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
stype
);
const
ocl
::
Device
&
dev
=
ocl
::
Device
::
getDefault
();
int
sdepth
=
CV_MAT_DEPTH
(
stype
),
ddepth
=
_dst
.
depth
();
bool
haveMask
=
!
_mask
.
empty
(),
doubleSupport
=
dev
.
doubleFPConfig
()
>
0
;
int
stype
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
stype
),
cn
=
CV_MAT_CN
(
stype
),
ddepth
=
_dst
.
depth
();
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
,
int
kercn
=
haveMask
?
cn
:
ocl
::
predictOptimalVectorWidthMax
(
_src
,
_src2
,
_dst
),
rowsPerWI
=
dev
.
isIntel
()
?
4
:
1
;
haveMask
=
!
_mask
.
empty
();
if
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
ddepth
==
CV_64F
))
if
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
ddepth
==
CV_64F
))
return
false
;
return
false
;
...
@@ -381,11 +380,13 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray
...
@@ -381,11 +380,13 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray
const
char
*
const
opMap
[
4
]
=
{
"ACCUMULATE"
,
"ACCUMULATE_SQUARE"
,
"ACCUMULATE_PRODUCT"
,
const
char
*
const
opMap
[
4
]
=
{
"ACCUMULATE"
,
"ACCUMULATE_SQUARE"
,
"ACCUMULATE_PRODUCT"
,
"ACCUMULATE_WEIGHTED"
};
"ACCUMULATE_WEIGHTED"
};
char
cvt
[
40
];
ocl
::
Kernel
k
(
"accumulate"
,
ocl
::
imgproc
::
accumulate_oclsrc
,
ocl
::
Kernel
k
(
"accumulate"
,
ocl
::
imgproc
::
accumulate_oclsrc
,
format
(
"-D %s%s -D srcT
=%s -D cn=%d -D dstT=%s
%s"
,
format
(
"-D %s%s -D srcT
1=%s -D cn=%d -D dstT1=%s%s -D rowsPerWI=%d -D convertToDT=
%s"
,
opMap
[
op_type
],
haveMask
?
" -D HAVE_MASK"
:
""
,
opMap
[
op_type
],
haveMask
?
" -D HAVE_MASK"
:
""
,
ocl
::
typeToStr
(
sdepth
),
cn
,
ocl
::
typeToStr
(
ddepth
),
ocl
::
typeToStr
(
sdepth
),
kercn
,
ocl
::
typeToStr
(
ddepth
),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
rowsPerWI
,
ocl
::
convertTypeStr
(
sdepth
,
ddepth
,
1
,
cvt
)));
if
(
k
.
empty
())
if
(
k
.
empty
())
return
false
;
return
false
;
...
@@ -393,7 +394,7 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray
...
@@ -393,7 +394,7 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src2arg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src2
),
src2arg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src2
),
dstarg
=
ocl
::
KernelArg
::
ReadWrite
(
dst
),
dstarg
=
ocl
::
KernelArg
::
ReadWrite
(
dst
,
cn
,
kercn
),
maskarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
);
maskarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
);
int
argidx
=
k
.
set
(
0
,
srcarg
);
int
argidx
=
k
.
set
(
0
,
srcarg
);
...
@@ -410,7 +411,7 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray
...
@@ -410,7 +411,7 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray
if
(
haveMask
)
if
(
haveMask
)
k
.
set
(
argidx
,
maskarg
);
k
.
set
(
argidx
,
maskarg
);
size_t
globalsize
[
2
]
=
{
src
.
cols
,
src
.
rows
};
size_t
globalsize
[
2
]
=
{
src
.
cols
*
cn
/
kercn
,
(
src
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
}
...
...
modules/imgproc/src/opencl/accumulate.cl
View file @
b332152b
...
@@ -13,13 +13,18 @@
...
@@ -13,13 +13,18 @@
#
endif
#
endif
#
endif
#
endif
#
define
SRC_TSIZE
cn
*
(
int
)
sizeof
(
srcT1
)
#
define
DST_TSIZE
cn
*
(
int
)
sizeof
(
dstT1
)
#
define
noconvert
__kernel
void
accumulate
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
__kernel
void
accumulate
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
#
ifdef
ACCUMULATE_PRODUCT
#
ifdef
ACCUMULATE_PRODUCT
__global
const
uchar
*
src2ptr,
int
src2_step,
int
src2_offset,
__global
const
uchar
*
src2ptr,
int
src2_step,
int
src2_offset,
#
endif
#
endif
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
#
ifdef
ACCUMULATE_WEIGHTED
#
ifdef
ACCUMULATE_WEIGHTED
,
dstT
alpha
,
dstT
1
alpha
#
endif
#
endif
#
ifdef
HAVE_MASK
#
ifdef
HAVE_MASK
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset
...
@@ -27,39 +32,59 @@ __kernel void accumulate(__global const uchar * srcptr, int src_step, int src_of
...
@@ -27,39 +32,59 @@ __kernel void accumulate(__global const uchar * srcptr, int src_step, int src_of
)
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
rowsPerWI
;
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
if
(
x
<
dst_cols
)
{
{
int
src_index
=
mad24
(
y,
src_step,
src_offset
+
x
*
cn
*
(
int
)
sizeof
(
srcT
))
;
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
SRC_TSIZE,
src_offset
))
;
#
ifdef
HAVE_MASK
#
ifdef
HAVE_MASK
int
mask_index
=
mad24
(
y,
mask_step,
mask_offset
+
x
)
;
int
mask_index
=
mad24
(
y,
mask_step,
mask_offset
+
x
)
;
mask
+=
mask_index
;
mask
+=
mask_index
;
#
endif
#
endif
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
cn
*
(
int
)
sizeof
(
dstT
))
;
__global
const
srcT
*
src
=
(
__global
const
srcT
*
)(
srcptr
+
src_index
)
;
#
ifdef
ACCUMULATE_PRODUCT
#
ifdef
ACCUMULATE_PRODUCT
int
src2_index
=
mad24
(
y,
src2_step,
src2_offset
+
x
*
cn
*
(
int
)
sizeof
(
srcT
))
;
int
src2_index
=
mad24
(
y,
src2_step,
mad24
(
x,
SRC_TSIZE,
src2_offset
))
;
__global
const
srcT
*
src2
=
(
__global
const
srcT
*
)(
src2ptr
+
src2_index
)
;
#
endif
#
endif
__global
dstT
*
dst
=
(
__global
dstT
*
)(
dstptr
+
dst_index
)
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
DST_TSIZE,
dst_offset
)
)
;
#
pragma
unroll
#
pragma
unroll
for
(
int
c
=
0
; c < cn; ++c)
for
(
int
i
=
0
; i < rowsPerWI; ++i)
if
(
y
<
dst_rows
)
{
__global
const
srcT1
*
src
=
(
__global
const
srcT1
*
)(
srcptr
+
src_index
)
;
#
ifdef
ACCUMULATE_PRODUCT
__global
const
srcT1
*
src2
=
(
__global
const
srcT1
*
)(
src2ptr
+
src2_index
)
;
#
endif
__global
dstT1
*
dst
=
(
__global
dstT1
*
)(
dstptr
+
dst_index
)
;
#
ifdef
HAVE_MASK
#
ifdef
HAVE_MASK
if
(
mask[0]
)
if
(
mask[0]
)
#
endif
#
endif
#
pragma
unroll
for
(
int
c
=
0
; c < cn; ++c)
{
#
ifdef
ACCUMULATE
#
ifdef
ACCUMULATE
dst[c]
+=
src[c]
;
dst[c]
+=
convertToDT
(
src[c]
)
;
#
elif
defined
ACCUMULATE_SQUARE
#
elif
defined
ACCUMULATE_SQUARE
dst[c]
+=
src[c]
*
src[c]
;
dstT1
val
=
convertToDT
(
src[c]
)
;
dst[c]
=
fma
(
val,
val,
dst[c]
)
;
#
elif
defined
ACCUMULATE_PRODUCT
#
elif
defined
ACCUMULATE_PRODUCT
dst[c]
+=
src[c]
*
src2[c]
;
dst[c]
=
fma
(
convertToDT
(
src[c]
)
,
convertToDT
(
src2[c]
)
,
dst[c]
)
;
#
elif
defined
ACCUMULATE_WEIGHTED
#
elif
defined
ACCUMULATE_WEIGHTED
dst[c]
=
(
1
-
alpha
)
*
dst[c]
+
src[c]
*
alpha
;
dst[c]
=
fma
(
1
-
alpha,
dst[c],
src[c]
*
alpha
)
;
#
else
#
else
#
error
"Unknown accumulation type"
#
error
"Unknown accumulation type"
#
endif
#
endif
}
src_index
+=
src_step
;
#
ifdef
ACCUMULATE_PRODUCT
src2_index
+=
src2_step
;
#
endif
#
ifdef
HAVE_MASK
mask
+=
mask_step
;
#
endif
dst_index
+=
dst_step
;
++y
;
}
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment