Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
78badcd6
Commit
78badcd6
authored
May 31, 2014
by
Alexander Alekhin
Committed by
OpenCV Buildbot
May 31, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2781 from ilya-lavrenov:tapi_meanstddev
parents
89e4118b
33173d90
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
205 additions
and
14 deletions
+205
-14
ocl.cpp
modules/core/src/ocl.cpp
+7
-7
meanstddev.cl
modules/core/src/opencl/meanstddev.cl
+129
-0
stat.cpp
modules/core/src/stat.cpp
+69
-7
No files found.
modules/core/src/ocl.cpp
View file @
78badcd6
...
...
@@ -4419,22 +4419,22 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
InputArray
src4
,
InputArray
src5
,
InputArray
src6
,
InputArray
src7
,
InputArray
src8
,
InputArray
src9
)
{
int
type
=
src1
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
esz
=
CV_ELEM_SIZE
(
depth
);
int
type
=
src1
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
esz
1
=
CV_ELEM_SIZE1
(
depth
);
Size
ssize
=
src1
.
size
();
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
int
vectorWidths
[]
=
{
d
.
preferredVectorWidthChar
(),
d
.
preferredVectorWidthChar
(),
d
.
preferredVectorWidthShort
(),
d
.
preferredVectorWidthShort
(),
d
.
preferredVectorWidthInt
(),
d
.
preferredVectorWidthFloat
(),
d
.
preferredVectorWidthDouble
(),
-
1
},
width
=
vectorWidths
[
depth
];
d
.
preferredVectorWidthDouble
(),
-
1
},
kercn
=
vectorWidths
[
depth
];
if
(
d
.
isIntel
())
{
// it's heuristic
int
vectorWidthsIntel
[]
=
{
16
,
16
,
8
,
8
,
1
,
1
,
1
,
-
1
};
width
=
vectorWidthsIntel
[
depth
];
kercn
=
vectorWidthsIntel
[
depth
];
}
if
(
ssize
.
width
*
cn
<
width
||
width
<=
0
)
if
(
ssize
.
width
*
cn
<
kercn
||
kercn
<=
0
)
return
1
;
std
::
vector
<
size_t
>
offsets
,
steps
,
cols
;
...
...
@@ -4449,7 +4449,7 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
PROCESS_SRC
(
src9
);
size_t
size
=
offsets
.
size
();
int
wsz
=
width
*
esz
;
int
wsz
=
kercn
*
esz1
;
std
::
vector
<
int
>
dividers
(
size
,
wsz
);
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
...
...
@@ -4460,14 +4460,14 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
if
(
dividers
[
i
]
!=
wsz
)
{
width
=
1
;
kercn
=
1
;
break
;
}
// another strategy
// width = *std::min_element(dividers.begin(), dividers.end());
return
width
;
return
kercn
;
}
#undef PROCESS_SRC
...
...
modules/core/src/opencl/meanstddev.cl
0 → 100644
View file @
78badcd6
//
This
file
is
part
of
OpenCV
project.
//
It
is
subject
to
the
license
terms
in
the
LICENSE
file
found
in
the
top-level
directory
//
of
this
distribution
and
at
http://opencv.org/license.html.
//
Copyright
(
C
)
2014
,
Itseez,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
noconvert
#
if
cn
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
srcT
*
)(
addr
)
#
define
storepix
(
val,
addr
)
*
(
__global
dstT
*
)(
addr
)
=
val
#
define
storesqpix
(
val,
addr
)
*
(
__global
sqdstT
*
)(
addr
)
=
val
#
define
srcTSIZE
(
int
)
sizeof
(
srcT
)
#
define
dstTSIZE
(
int
)
sizeof
(
dstT
)
#
define
sqdstTSIZE
(
int
)
sizeof
(
sqdstT
)
#
else
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
srcT1
*
)(
addr
))
#
define
storepix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
dstT1
*
)(
addr
))
#
define
storesqpix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
sqdstT1
*
)(
addr
))
#
define
srcTSIZE
((
int
)
sizeof
(
srcT1
)
*3
)
#
define
dstTSIZE
((
int
)
sizeof
(
dstT1
)
*3
)
#
define
sqdstTSIZE
((
int
)
sizeof
(
sqdstT1
)
*3
)
#
endif
__kernel
void
meanStdDev
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
int
cols,
int
total,
int
groups,
__global
uchar
*
dstptr
#
ifdef
HAVE_MASK
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset
#
endif
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
__local
dstT
localMemSum[WGS2_ALIGNED]
;
__local
sqdstT
localMemSqSum[WGS2_ALIGNED]
;
#
ifdef
HAVE_MASK
__local
int
localMemNonZero[WGS2_ALIGNED]
;
#
endif
dstT
accSum
=
(
dstT
)(
0
)
;
sqdstT
accSqSum
=
(
sqdstT
)(
0
)
;
#
ifdef
HAVE_MASK
int
accNonZero
=
0
;
mask
+=
mask_offset
;
#
endif
srcptr
+=
src_offset
;
for
(
int
grain
=
groups
*
WGS
; id < total; id += grain)
{
#
ifdef
HAVE_MASK
#
ifdef
HAVE_SRC_CONT
int
mask_index
=
id
;
#
else
int
mask_index
=
mad24
(
id
/
cols,
mask_step,
id
%
cols
)
;
#
endif
if
(
mask[mask_index]
)
#
endif
{
#
ifdef
HAVE_SRC_CONT
int
src_index
=
mul24
(
id,
srcTSIZE
)
;
#
else
int
src_index
=
mad24
(
id
/
cols,
src_step,
mul24
(
id
%
cols,
srcTSIZE
))
;
#
endif
srcT
value
=
loadpix
(
srcptr
+
src_index
)
;
accSum
+=
convertToDT
(
value
)
;
sqdstT
dvalue
=
convertToSDT
(
value
)
;
accSqSum
=
fma
(
dvalue,
dvalue,
accSqSum
)
;
#
ifdef
HAVE_MASK
++accNonZero
;
#
endif
}
}
if
(
lid
<
WGS2_ALIGNED
)
{
localMemSum[lid]
=
accSum
;
localMemSqSum[lid]
=
accSqSum
;
#
ifdef
HAVE_MASK
localMemNonZero[lid]
=
accNonZero
;
#
endif
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
>=
WGS2_ALIGNED
&&
total
>=
WGS2_ALIGNED
)
{
localMemSum[lid
-
WGS2_ALIGNED]
+=
accSum
;
localMemSqSum[lid
-
WGS2_ALIGNED]
+=
accSqSum
;
#
ifdef
HAVE_MASK
localMemNonZero[lid
-
WGS2_ALIGNED]
+=
accNonZero
;
#
endif
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
WGS2_ALIGNED
>>
1
; lsize > 0; lsize >>= 1)
{
if
(
lid
<
lsize
)
{
int
lid2
=
lsize
+
lid
;
localMemSum[lid]
+=
localMemSum[lid2]
;
localMemSqSum[lid]
+=
localMemSqSum[lid2]
;
#
ifdef
HAVE_MASK
localMemNonZero[lid]
+=
localMemNonZero[lid2]
;
#
endif
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lid
==
0
)
{
storepix
(
localMemSum[0],
dstptr
+
dstTSIZE
*
gid
)
;
storesqpix
(
localMemSqSum[0],
dstptr
+
mad24
(
dstTSIZE,
groups,
sqdstTSIZE
*
gid
))
;
#
ifdef
HAVE_MASK
*
(
__global
int
*
)(
dstptr
+
mad24
(
dstTSIZE
+
sqdstTSIZE,
groups,
(
int
)
sizeof
(
int
)
*
gid
))
=
localMemNonZero[0]
;
#
endif
}
}
modules/core/src/stat.cpp
View file @
78badcd6
...
...
@@ -878,14 +878,76 @@ namespace cv {
static
bool
ocl_meanStdDev
(
InputArray
_src
,
OutputArray
_mean
,
OutputArray
_sdv
,
InputArray
_mask
)
{
bool
haveMask
=
_mask
.
kind
()
!=
_InputArray
::
NONE
;
int
nz
=
haveMask
?
-
1
:
(
int
)
_src
.
total
();
Scalar
mean
,
stddev
;
if
(
!
ocl_sum
(
_src
,
mean
,
OCL_OP_SUM
,
_mask
))
return
false
;
if
(
!
ocl_sum
(
_src
,
stddev
,
OCL_OP_SUM_SQR
,
_mask
))
return
false
;
int
nz
=
haveMask
?
countNonZero
(
_mask
)
:
(
int
)
_src
.
total
();
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
,
isContinuous
=
_src
.
isContinuous
();
int
groups
=
ocl
::
Device
::
getDefault
().
maxComputeUnits
();
size_t
wgs
=
ocl
::
Device
::
getDefault
().
maxWorkGroupSize
();
int
ddepth
=
std
::
max
(
CV_32S
,
depth
),
sqddepth
=
std
::
max
(
CV_32F
,
depth
),
dtype
=
CV_MAKE_TYPE
(
ddepth
,
cn
),
sqdtype
=
CV_MAKETYPE
(
sqddepth
,
cn
);
CV_Assert
(
!
haveMask
||
_mask
.
type
()
==
CV_8UC1
);
int
wgs2_aligned
=
1
;
while
(
wgs2_aligned
<
(
int
)
wgs
)
wgs2_aligned
<<=
1
;
wgs2_aligned
>>=
1
;
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
cn
>
4
)
return
false
;
char
cvt
[
2
][
40
];
String
opts
=
format
(
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D sqddepth=%d"
" -D sqdstT=%s -D sqdstT1=%s -D convertToSDT=%s -D cn=%d%s"
" -D convertToDT=%s -D WGS=%d -D WGS2_ALIGNED=%d%s%s"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
dtype
),
ocl
::
typeToStr
(
ddepth
),
sqddepth
,
ocl
::
typeToStr
(
sqdtype
),
ocl
::
typeToStr
(
sqddepth
),
ocl
::
convertTypeStr
(
depth
,
sqddepth
,
cn
,
cvt
[
0
]),
cn
,
isContinuous
?
" -D HAVE_SRC_CONT"
:
""
,
ocl
::
convertTypeStr
(
depth
,
ddepth
,
cn
,
cvt
[
1
]),
(
int
)
wgs
,
wgs2_aligned
,
haveMask
?
" -D HAVE_MASK"
:
""
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
k
(
"meanStdDev"
,
ocl
::
core
::
meanstddev_oclsrc
,
opts
);
if
(
k
.
empty
())
return
false
;
int
dbsize
=
groups
*
((
haveMask
?
CV_ELEM_SIZE1
(
CV_32S
)
:
0
)
+
CV_ELEM_SIZE
(
sqdtype
)
+
CV_ELEM_SIZE
(
dtype
));
UMat
src
=
_src
.
getUMat
(),
db
(
1
,
dbsize
,
CV_8UC1
),
mask
=
_mask
.
getUMat
();
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
dbarg
=
ocl
::
KernelArg
::
PtrWriteOnly
(
db
),
maskarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
);
if
(
haveMask
)
k
.
args
(
srcarg
,
src
.
cols
,
(
int
)
src
.
total
(),
groups
,
dbarg
,
maskarg
);
else
k
.
args
(
srcarg
,
src
.
cols
,
(
int
)
src
.
total
(),
groups
,
dbarg
);
size_t
globalsize
=
groups
*
wgs
;
if
(
!
k
.
run
(
1
,
&
globalsize
,
&
wgs
,
false
))
return
false
;
typedef
Scalar
(
*
part_sum
)(
Mat
m
);
part_sum
funcs
[
3
]
=
{
ocl_part_sum
<
int
>
,
ocl_part_sum
<
float
>
,
ocl_part_sum
<
double
>
};
Mat
dbm
=
db
.
getMat
(
ACCESS_READ
);
mean
=
funcs
[
ddepth
-
CV_32S
](
Mat
(
1
,
groups
,
dtype
,
dbm
.
data
));
stddev
=
funcs
[
sqddepth
-
CV_32S
](
Mat
(
1
,
groups
,
sqdtype
,
dbm
.
data
+
groups
*
CV_ELEM_SIZE
(
dtype
)));
if
(
haveMask
)
nz
=
saturate_cast
<
int
>
(
funcs
[
0
](
Mat
(
1
,
groups
,
CV_32SC1
,
dbm
.
data
+
groups
*
(
CV_ELEM_SIZE
(
dtype
)
+
CV_ELEM_SIZE
(
sqdtype
))))[
0
]);
}
double
total
=
nz
!=
0
?
1.0
/
nz
:
0
;
int
k
,
j
,
cn
=
_src
.
channels
();
for
(
int
i
=
0
;
i
<
cn
;
++
i
)
...
...
@@ -927,7 +989,7 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input
ocl_meanStdDev
(
_src
,
_mean
,
_sdv
,
_mask
))
Mat
src
=
_src
.
getMat
(),
mask
=
_mask
.
getMat
();
CV_Assert
(
mask
.
empty
()
||
mask
.
type
()
==
CV_8U
);
CV_Assert
(
mask
.
empty
()
||
mask
.
type
()
==
CV_8U
C1
);
int
k
,
cn
=
src
.
channels
(),
depth
=
src
.
depth
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment