Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
9932cf41
Commit
9932cf41
authored
Mar 11, 2014
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
3-channel reduction operations
parent
fd0ab8eb
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
47 additions
and
30 deletions
+47
-30
perf_arithm.cpp
modules/core/perf/opencl/perf_arithm.cpp
+12
-8
reduce.cl
modules/core/src/opencl/reduce.cl
+29
-19
stat.cpp
modules/core/src/stat.cpp
+6
-3
No files found.
modules/core/perf/opencl/perf_arithm.cpp
View file @
9932cf41
...
...
@@ -344,7 +344,7 @@ OCL_PERF_TEST_P(FlipFixture, Flip,
typedef
Size_MatType
MinMaxLocFixture
;
OCL_PERF_TEST_P
(
MinMaxLocFixture
,
MinMaxLoc
,
::
testing
::
Combine
(
OCL_TEST_SIZES
,
OCL_TEST_TYPES
))
::
testing
::
Combine
(
OCL_TEST_SIZES
,
OCL_TEST_TYPES
_134
))
{
const
Size_MatType_t
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
...
...
@@ -380,7 +380,7 @@ typedef Size_MatType SumFixture;
OCL_PERF_TEST_P
(
SumFixture
,
Sum
,
::
testing
::
Combine
(
OCL_TEST_SIZES
,
OCL_TEST_TYPES
))
OCL_TEST_TYPES
_134
))
{
const
Size_MatType_t
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
...
...
@@ -447,7 +447,7 @@ OCL_PERF_TEST_P(PhaseFixture, Phase, ::testing::Combine(
SANITY_CHECK
(
dst
,
1e-2
);
}
///////////// bitwise_and////////////////////////
///////////// bitwise_and
////////////////////////
typedef
Size_MatType
BitwiseAndFixture
;
...
...
@@ -531,7 +531,7 @@ OCL_PERF_TEST_P(BitwiseNotFixture, Bitwise_not,
SANITY_CHECK
(
dst
);
}
///////////// compare////////////////////////
///////////// compare
////////////////////////
CV_ENUM
(
CmpCode
,
CMP_LT
,
CMP_LE
,
CMP_EQ
,
CMP_NE
,
CMP_GE
,
CMP_GT
)
...
...
@@ -652,7 +652,8 @@ OCL_PERF_TEST_P(SetIdentityFixture, SetIdentity,
typedef
Size_MatType
MeanStdDevFixture
;
OCL_PERF_TEST_P
(
MeanStdDevFixture
,
MeanStdDev
,
::
testing
::
Combine
(
OCL_PERF_ENUM
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
),
OCL_TEST_TYPES
))
::
testing
::
Combine
(
OCL_PERF_ENUM
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
),
OCL_TEST_TYPES_134
))
{
const
Size_MatType_t
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
...
...
@@ -688,7 +689,8 @@ typedef std::tr1::tuple<Size, MatType, NormType> NormParams;
typedef
TestBaseWithParam
<
NormParams
>
NormFixture
;
OCL_PERF_TEST_P
(
NormFixture
,
Norm
,
::
testing
::
Combine
(
OCL_PERF_ENUM
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
),
OCL_TEST_TYPES
,
NormType
::
all
()))
::
testing
::
Combine
(
OCL_PERF_ENUM
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
),
OCL_TEST_TYPES_134
,
NormType
::
all
()))
{
const
NormParams
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
...
...
@@ -711,7 +713,8 @@ OCL_PERF_TEST_P(NormFixture, Norm,
typedef
Size_MatType
UMatDotFixture
;
OCL_PERF_TEST_P
(
UMatDotFixture
,
UMatDot
,
::
testing
::
Combine
(
OCL_PERF_ENUM
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
),
OCL_TEST_TYPES
))
::
testing
::
Combine
(
OCL_PERF_ENUM
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
),
OCL_TEST_TYPES_134
))
{
const
Size_MatType_t
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
...
...
@@ -820,7 +823,8 @@ typedef tuple<Size, MatType, NormalizeModes> NormalizeParams;
typedef
TestBaseWithParam
<
NormalizeParams
>
NormalizeFixture
;
OCL_PERF_TEST_P
(
NormalizeFixture
,
Normalize
,
::
testing
::
Combine
(
OCL_TEST_SIZES
,
OCL_TEST_TYPES
,
NormalizeModes
::
all
()))
::
testing
::
Combine
(
OCL_TEST_SIZES
,
OCL_TEST_TYPES_134
,
NormalizeModes
::
all
()))
{
const
NormalizeParams
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
...
...
modules/core/src/opencl/reduce.cl
View file @
9932cf41
...
...
@@ -52,6 +52,18 @@
#
define
noconvert
#
if
cn
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
srcT
*
)(
addr
)
#
define
storepix
(
val,
addr
)
*
(
__global
dstT
*
)(
addr
)
=
val
#
define
srcTSIZE
(
int
)
sizeof
(
srcT
)
#
define
dstTSIZE
(
int
)
sizeof
(
dstT
)
#
else
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
srcT1
*
)(
addr
))
#
define
storepix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
dstT1
*
)(
addr
))
#
define
srcTSIZE
((
int
)
sizeof
(
srcT1
)
*3
)
#
define
dstTSIZE
((
int
)
sizeof
(
dstT1
)
*3
)
#
endif
#
ifdef
HAVE_MASK
#
define
EXTRA_PARAMS
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset
#
else
...
...
@@ -88,19 +100,20 @@
#ifdef HAVE_MASK
#define REDUCE_GLOBAL \
dstT temp = convertToDT(src[0]); \
int mask_index = mad24(id / cols, mask_step, mask_offset + (id % cols)); \
if (mask[mask_index]) \
FUNC(accumulator, temp)
{ \
dstT temp = convertToDT(loadpix(srcptr + src_index)); \
FUNC(accumulator, temp); \
}
#elif defined OP_DOT
#define REDUCE_GLOBAL \
int src2_index = mad24(id / cols, src2_step, mad24(id % cols, (int)sizeof(srcT), src2_offset)); \
__global const srcT * src2 = (__global const srcT *)(src2ptr + src2_index); \
dstT temp = convertToDT(src[0]), temp2 = convertToDT(src2[0]); \
int src2_index = mad24(id / cols, src2_step, mad24(id % cols, srcTSIZE, src2_offset)); \
dstT temp = convertToDT(loadpix(srcptr + src_index)), temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
FUNC(accumulator, temp, temp2)
#else
#define REDUCE_GLOBAL \
dstT temp = convertToDT(
src[0]
); \
dstT temp = convertToDT(
loadpix(srcptr + src_index)
); \
FUNC(accumulator, temp)
#endif
...
...
@@ -111,8 +124,7 @@
#define REDUCE_LOCAL_2 \
localmem[lid] += localmem[lid2]
#define CALC_RESULT \
__global dstT * dst = (__global dstT *)(dstptr + (int)sizeof(dstT) * gid); \
dst[0] = localmem[0]
storepix(localmem[0], dstptr + dstTSIZE * gid)
// countNonZero stuff
#elif defined OP_COUNT_NON_ZERO
...
...
@@ -123,7 +135,7 @@
dstT accumulator = (dstT)(0); \
srcT zero = (srcT)(0), one = (srcT)(1)
#define REDUCE_GLOBAL \
accumulator +=
src[0]
== zero ? zero : one
accumulator +=
loadpix(srcptr + src_index)
== zero ? zero : one
#define SET_LOCAL_1 \
localmem[lid] = accumulator
#define REDUCE_LOCAL_1 \
...
...
@@ -131,8 +143,7 @@
#define REDUCE_LOCAL_2 \
localmem[lid] += localmem[lid2]
#define CALC_RESULT \
__global dstT * dst = (__global dstT *)(dstptr + (int)sizeof(dstT) * gid); \
dst[0] = localmem[0]
storepix(localmem[0], dstptr + dstTSIZE * gid)
// minMaxLoc stuff
#elif defined OP_MIN_MAX_LOC |
|
defined
OP_MIN_MAX_LOC_MASK
...
...
@@ -167,6 +178,8 @@
#
define
MAX_VAL
DBL_MAX
#
endif
#
define
dstT
srcT
#
define
DECLARE_LOCAL_MEM
\
__local
srcT
localmem_min[WGS2_ALIGNED]
; \
__local
srcT
localmem_max[WGS2_ALIGNED]
; \
...
...
@@ -181,7 +194,7 @@
srcT
temp
; \
int
temploc
#
define
REDUCE_GLOBAL
\
temp
=
src[0]
; \
temp
=
loadpix
(
srcptr
+
src_index
)
; \
temploc
=
id
; \
srcT
temp_minval
=
minval,
temp_maxval
=
maxval
; \
minval
=
min
(
minval,
temp
)
; \
...
...
@@ -217,10 +230,8 @@
localmem_maxloc[lid]
:
(
max1
==
max2
)
?
(
max1
==
oldmax
)
?
min
(
localmem_maxloc[lid2],localmem_maxloc[lid]
)
:
\
localmem_maxloc[lid2]
:
localmem_maxloc[lid]
#
define
CALC_RESULT
\
__global
srcT
*
dstminval
=
(
__global
srcT
*
)(
dstptr
+
(
int
)
sizeof
(
srcT
)
*
gid
)
; \
__global
srcT
*
dstmaxval
=
(
__global
srcT
*
)(
dstptr2
+
(
int
)
sizeof
(
srcT
)
*
gid
)
; \
dstminval[0]
=
localmem_min[0]
; \
dstmaxval[0]
=
localmem_max[0]
; \
storepix
(
localmem_min[0],
dstptr
+
dstTSIZE
*
gid
)
; \
storepix
(
localmem_max[0],
dstptr2
+
dstTSIZE
*
gid
)
; \
dstlocptr[gid]
=
localmem_minloc[0]
; \
dstlocptr2[gid]
=
localmem_maxloc[0]
...
...
@@ -236,7 +247,7 @@
int
temploc
#
undef
REDUCE_GLOBAL
#
define
REDUCE_GLOBAL
\
temp
=
src[0]
; \
temp
=
loadpix
(
srcptr
+
src_index
)
; \
temploc
=
id
; \
int
mask_index
=
mad24
(
id
/
cols,
mask_step,
mask_offset
+
(
id
%
cols
)
*
(
int
)
sizeof
(
uchar
))
; \
__global
const
uchar
*
mask
=
(
__global
const
uchar
*
)(
maskptr
+
mask_index
)
; \
...
...
@@ -278,8 +289,7 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset
for
(
int
grain
=
groupnum
*
WGS
; id < total; id += grain)
{
int
src_index
=
mad24
(
id
/
cols,
src_step,
mad24
(
id
%
cols,
(
int
)
sizeof
(
srcT
)
,
src_offset
))
;
__global
const
srcT
*
src
=
(
__global
const
srcT
*
)(
srcptr
+
src_index
)
;
int
src_index
=
mad24
(
id
/
cols,
src_step,
mad24
(
id
%
cols,
srcTSIZE,
src_offset
))
;
REDUCE_GLOBAL
;
}
...
...
modules/core/src/stat.cpp
View file @
9932cf41
...
...
@@ -475,7 +475,7 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
cn
>
4
||
cn
==
3
)
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
cn
>
4
)
return
false
;
int
dbsize
=
ocl
::
Device
::
getDefault
().
maxComputeUnits
();
...
...
@@ -494,8 +494,11 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask
static
const
char
*
const
opMap
[
3
]
=
{
"OP_SUM"
,
"OP_SUM_ABS"
,
"OP_SUM_SQR"
};
char
cvt
[
40
];
ocl
::
Kernel
k
(
"reduce"
,
ocl
::
core
::
reduce_oclsrc
,
format
(
"-D srcT=%s -D dstT=%s -D ddepth=%d -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
dtype
),
ddepth
,
ocl
::
convertTypeStr
(
depth
,
ddepth
,
cn
,
cvt
),
format
(
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D ddepth=%d -D cn=%d"
" -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
dtype
),
ocl
::
typeToStr
(
ddepth
),
ddepth
,
cn
,
ocl
::
convertTypeStr
(
depth
,
ddepth
,
cn
,
cvt
),
opMap
[
sum_op
],
(
int
)
wgs
,
wgs2_aligned
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
haveMask
?
" -D HAVE_MASK"
:
""
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment