Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
ae56eaa5
Commit
ae56eaa5
authored
Oct 14, 2013
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Oct 14, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1609 from ilya-lavrenov:ocl_some_optimization
parents
0870d3d7
b18101b1
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
124 additions
and
43 deletions
+124
-43
arithm.cpp
modules/ocl/src/arithm.cpp
+38
-16
arithm_add.cl
modules/ocl/src/opencl/arithm_add.cl
+3
-3
arithm_add_scalar.cl
modules/ocl/src/opencl/arithm_add_scalar.cl
+6
-6
arithm_add_scalar_mask.cl
modules/ocl/src/opencl/arithm_add_scalar_mask.cl
+4
-4
arithm_bitwise_binary.cl
modules/ocl/src/opencl/arithm_bitwise_binary.cl
+18
-3
arithm_bitwise_binary_mask.cl
modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl
+17
-2
arithm_bitwise_binary_scalar.cl
modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl
+19
-5
arithm_bitwise_binary_scalar_mask.cl
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
+19
-4
No files found.
modules/ocl/src/arithm.cpp
View file @
ae56eaa5
...
@@ -56,6 +56,23 @@
...
@@ -56,6 +56,23 @@
using
namespace
cv
;
using
namespace
cv
;
using
namespace
cv
::
ocl
;
using
namespace
cv
::
ocl
;
static
std
::
vector
<
uchar
>
scalarToVector
(
const
cv
::
Scalar
&
sc
,
int
depth
,
int
ocn
,
int
cn
)
{
CV_Assert
(
ocn
==
cn
||
(
ocn
==
4
&&
cn
==
3
));
static
const
int
sizeMap
[]
=
{
sizeof
(
uchar
),
sizeof
(
char
),
sizeof
(
ushort
),
sizeof
(
short
),
sizeof
(
int
),
sizeof
(
float
),
sizeof
(
double
)
};
int
elemSize1
=
sizeMap
[
depth
];
int
bufSize
=
elemSize1
*
ocn
;
std
::
vector
<
uchar
>
_buf
(
bufSize
);
uchar
*
buf
=
&
_buf
[
0
];
scalarToRawData
(
sc
,
buf
,
CV_MAKE_TYPE
(
depth
,
cn
));
memset
(
buf
+
elemSize1
*
cn
,
0
,
(
ocn
-
cn
)
*
elemSize1
);
return
_buf
;
}
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
/////////////// add subtract multiply divide min max /////////////////////////
/////////////// add subtract multiply divide min max /////////////////////////
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
...
@@ -84,7 +101,7 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const
...
@@ -84,7 +101,7 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const
int
src2step1
=
src2
.
step
/
src2
.
elemSize
(),
src2offset1
=
src2
.
offset
/
src2
.
elemSize
();
int
src2step1
=
src2
.
step
/
src2
.
elemSize
(),
src2offset1
=
src2
.
offset
/
src2
.
elemSize
();
int
maskstep1
=
mask
.
step
,
maskoffset1
=
mask
.
offset
/
mask
.
elemSize
();
int
maskstep1
=
mask
.
step
,
maskoffset1
=
mask
.
offset
/
mask
.
elemSize
();
int
dststep1
=
dst
.
step
/
dst
.
elemSize
(),
dstoffset1
=
dst
.
offset
/
dst
.
elemSize
();
int
dststep1
=
dst
.
step
/
dst
.
elemSize
(),
dstoffset1
=
dst
.
offset
/
dst
.
elemSize
();
oclMat
m
;
std
::
vector
<
uchar
>
m
;
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
globalThreads
[
3
]
=
{
dst
.
cols
,
dst
.
rows
,
1
};
size_t
globalThreads
[
3
]
=
{
dst
.
cols
,
dst
.
rows
,
1
};
...
@@ -132,10 +149,9 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const
...
@@ -132,10 +149,9 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const
if
(
haveScalar
)
if
(
haveScalar
)
{
{
const
int
WDepthMap
[]
=
{
CV_16S
,
CV_16S
,
CV_32S
,
CV_32S
,
CV_32S
,
CV_32F
,
CV_64F
};
const
int
WDepthMap
[]
=
{
CV_16S
,
CV_16S
,
CV_32S
,
CV_32S
,
CV_32S
,
CV_32F
,
CV_64F
};
m
.
create
(
1
,
1
,
CV_MAKE_TYPE
(
WDepthMap
[
WDepth
],
oclChannels
));
m
=
scalarToVector
(
scalar
,
WDepthMap
[
WDepth
],
oclChannels
,
src1
.
channels
());
m
.
setTo
(
scalar
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
m
.
data
));
args
.
push_back
(
make_pair
(
m
.
size
(),
(
void
*
)
&
m
[
0
]
));
kernelName
+=
"_scalar"
;
kernelName
+=
"_scalar"
;
}
}
...
@@ -1329,6 +1345,13 @@ static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName
...
@@ -1329,6 +1345,13 @@ static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName
enum
{
AND
=
0
,
OR
,
XOR
};
enum
{
AND
=
0
,
OR
,
XOR
};
static
std
::
string
to_string
(
int
value
)
{
std
::
ostringstream
stream
;
stream
<<
value
;
return
stream
.
str
();
}
static
void
bitwise_binary_run
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
const
Scalar
&
src3
,
const
oclMat
&
mask
,
static
void
bitwise_binary_run
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
const
Scalar
&
src3
,
const
oclMat
&
mask
,
oclMat
&
dst
,
int
operationType
)
oclMat
&
dst
,
int
operationType
)
{
{
...
@@ -1337,17 +1360,20 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca
...
@@ -1337,17 +1360,20 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca
CV_Assert
(
mask
.
empty
()
||
(
!
mask
.
empty
()
&&
mask
.
type
()
==
CV_8UC1
&&
mask
.
size
()
==
src1
.
size
()));
CV_Assert
(
mask
.
empty
()
||
(
!
mask
.
empty
()
&&
mask
.
type
()
==
CV_8UC1
&&
mask
.
size
()
==
src1
.
size
()));
dst
.
create
(
src1
.
size
(),
src1
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
int
elemSize
=
dst
.
elemSize
();
int
cols1
=
dst
.
cols
*
elemSize
;
oclMat
m
;
oclMat
m
;
const
char
operationMap
[]
=
{
'&'
,
'|'
,
'^'
};
const
char
operationMap
[]
=
{
'&'
,
'|'
,
'^'
};
std
::
string
kernelName
(
"arithm_bitwise_binary"
);
std
::
string
kernelName
(
"arithm_bitwise_binary"
);
std
::
string
buildOptions
=
format
(
"-D Operation=%c"
,
operationMap
[
operationType
]);
int
vlen
=
std
::
min
<
int
>
(
8
,
src1
.
elemSize1
()
*
src1
.
oclchannels
());
std
::
string
vlenstr
=
vlen
>
1
?
to_string
(
vlen
)
:
""
;
std
::
string
buildOptions
=
format
(
"-D Operation=%c -D vloadn=vload%s -D vstoren=vstore%s -D elemSize=%d -D vlen=%d"
" -D ucharv=uchar%s"
,
operationMap
[
operationType
],
vlenstr
.
c_str
(),
vlenstr
.
c_str
(),
(
int
)
src1
.
elemSize
(),
vlen
,
vlenstr
.
c_str
());
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
globalThreads
[
3
]
=
{
cols1
,
dst
.
rows
,
1
};
size_t
globalThreads
[
3
]
=
{
dst
.
cols
,
dst
.
rows
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
...
@@ -1360,7 +1386,6 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca
...
@@ -1360,7 +1386,6 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca
m
.
setTo
(
src3
);
m
.
setTo
(
src3
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
m
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
m
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
elemSize
)
);
kernelName
+=
"_scalar"
;
kernelName
+=
"_scalar"
;
}
}
...
@@ -1377,9 +1402,6 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca
...
@@ -1377,9 +1402,6 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
mask
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
mask
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
mask
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
mask
.
offset
));
if
(
!
src2
.
empty
())
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
elemSize
));
kernelName
+=
"_mask"
;
kernelName
+=
"_mask"
;
}
}
...
@@ -1387,7 +1409,7 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca
...
@@ -1387,7 +1409,7 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cols1
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
rows
));
openCLExecuteKernel
(
src1
.
clCxt
,
mask
.
empty
()
?
(
!
src2
.
empty
()
?
&
arithm_bitwise_binary
:
&
arithm_bitwise_binary_scalar
)
:
openCLExecuteKernel
(
src1
.
clCxt
,
mask
.
empty
()
?
(
!
src2
.
empty
()
?
&
arithm_bitwise_binary
:
&
arithm_bitwise_binary_scalar
)
:
...
@@ -1400,12 +1422,12 @@ void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst)
...
@@ -1400,12 +1422,12 @@ void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst)
{
{
if
(
!
src
.
clCxt
->
supportsFeature
(
FEATURE_CL_DOUBLE
)
&&
src
.
depth
()
==
CV_64F
)
if
(
!
src
.
clCxt
->
supportsFeature
(
FEATURE_CL_DOUBLE
)
&&
src
.
depth
()
==
CV_64F
)
{
{
CV_Error
(
CV_OpenCLDoubleNotSupported
,
"
s
elected device doesn't support double"
);
CV_Error
(
CV_OpenCLDoubleNotSupported
,
"
S
elected device doesn't support double"
);
return
;
return
;
}
}
dst
.
create
(
src
.
size
(),
src
.
type
());
dst
.
create
(
src
.
size
(),
src
.
type
());
bitwise_unary_run
(
src
,
dst
,
"arithm_bitwise_not"
,
&
arithm_bitwise_not
);
bitwise_unary_run
(
src
,
dst
,
"arithm_bitwise_not"
,
&
arithm_bitwise_not
);
}
}
void
cv
::
ocl
::
bitwise_or
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
oclMat
&
dst
,
const
oclMat
&
mask
)
void
cv
::
ocl
::
bitwise_or
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
oclMat
&
dst
,
const
oclMat
&
mask
)
...
...
modules/ocl/src/opencl/arithm_add.cl
View file @
ae56eaa5
...
@@ -62,7 +62,7 @@
...
@@ -62,7 +62,7 @@
#
if
defined
(
FUNC_MUL
)
#
if
defined
(
FUNC_MUL
)
#
if
defined
(
HAVE_SCALAR
)
#
if
defined
(
HAVE_SCALAR
)
#
define
EXPRESSION
dst[dst_index]
=
convertToT
(
convertToWT
(
src1[src1_index]
)
*
scalar
[0]
*
convertToWT
(
src2[src2_index]
))
;
#
define
EXPRESSION
dst[dst_index]
=
convertToT
(
convertToWT
(
src1[src1_index]
)
*
scalar
*
convertToWT
(
src2[src2_index]
))
;
#
else
#
else
#
define
EXPRESSION
dst[dst_index]
=
convertToT
(
convertToWT
(
src1[src1_index]
)
*
convertToWT
(
src2[src2_index]
))
;
#
define
EXPRESSION
dst[dst_index]
=
convertToT
(
convertToWT
(
src1[src1_index]
)
*
convertToWT
(
src2[src2_index]
))
;
#
endif
#
endif
...
@@ -72,7 +72,7 @@
...
@@ -72,7 +72,7 @@
#
if
defined
(
HAVE_SCALAR
)
#
if
defined
(
HAVE_SCALAR
)
#
define
EXPRESSION
T
zero
=
(
T
)(
0
)
; \
#
define
EXPRESSION
T
zero
=
(
T
)(
0
)
; \
dst[dst_index]
=
src2[src2_index]
==
zero
?
zero
:
\
dst[dst_index]
=
src2[src2_index]
==
zero
?
zero
:
\
convertToT
(
convertToWT
(
src1[src1_index]
)
*
scalar
[0]
/
convertToWT
(
src2[src2_index]
))
;
convertToT
(
convertToWT
(
src1[src1_index]
)
*
scalar
/
convertToWT
(
src2[src2_index]
))
;
#
else
#
else
#
define
EXPRESSION
T
zero
=
(
T
)(
0
)
; \
#
define
EXPRESSION
T
zero
=
(
T
)(
0
)
; \
dst[dst_index]
=
src2[src2_index]
==
zero
?
zero
:
\
dst[dst_index]
=
src2[src2_index]
==
zero
?
zero
:
\
...
@@ -123,7 +123,7 @@ __kernel void arithm_binary_op_mat(__global T *src1, int src1_step, int src1_off
...
@@ -123,7 +123,7 @@ __kernel void arithm_binary_op_mat(__global T *src1, int src1_step, int src1_off
//
add
mat
with
scale
//
add
mat
with
scale
__kernel
void
arithm_binary_op_mat_scalar
(
__global
T
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_binary_op_mat_scalar
(
__global
T
*src1,
int
src1_step,
int
src1_offset,
__global
T
*src2,
int
src2_step,
int
src2_offset,
__global
T
*src2,
int
src2_step,
int
src2_offset,
__global
WT
*
scalar,
WT
scalar,
__global
T
*dst,
int
dst_step,
int
dst_offset,
__global
T
*dst,
int
dst_step,
int
dst_offset,
int
cols,
int
rows
)
int
cols,
int
rows
)
{
{
...
...
modules/ocl/src/opencl/arithm_add_scalar.cl
View file @
ae56eaa5
...
@@ -52,20 +52,20 @@
...
@@ -52,20 +52,20 @@
#
endif
#
endif
#
if
defined
(
FUNC_ADD
)
#
if
defined
(
FUNC_ADD
)
#
define
EXPRESSION
dst[dst_index]
=
convertToT
(
convertToWT
(
src1[src1_index]
)
+
scalar
[0]
)
;
#
define
EXPRESSION
dst[dst_index]
=
convertToT
(
convertToWT
(
src1[src1_index]
)
+
scalar
)
;
#
endif
#
endif
#
if
defined
(
FUNC_SUB
)
#
if
defined
(
FUNC_SUB
)
#
define
EXPRESSION
dst[dst_index]
=
convertToT
(
convertToWT
(
src1[src1_index]
)
-
scalar
[0]
)
;
#
define
EXPRESSION
dst[dst_index]
=
convertToT
(
convertToWT
(
src1[src1_index]
)
-
scalar
)
;
#
endif
#
endif
#
if
defined
(
FUNC_MUL
)
#
if
defined
(
FUNC_MUL
)
#
define
EXPRESSION
dst[dst_index]
=
convertToT
(
convertToWT
(
src1[src1_index]
)
*
scalar
[0]
)
;
#
define
EXPRESSION
dst[dst_index]
=
convertToT
(
convertToWT
(
src1[src1_index]
)
*
scalar
)
;
#
endif
#
endif
#
if
defined
(
FUNC_DIV
)
#
if
defined
(
FUNC_DIV
)
#
define
EXPRESSION
T
zero
=
(
T
)(
0
)
; \
#
define
EXPRESSION
T
zero
=
(
T
)(
0
)
; \
dst[dst_index]
=
src1[src1_index]
==
zero
?
zero
:
convertToT
(
scalar
[0]
/
convertToWT
(
src1[src1_index]
))
;
dst[dst_index]
=
src1[src1_index]
==
zero
?
zero
:
convertToT
(
scalar
/
convertToWT
(
src1[src1_index]
))
;
#
endif
#
endif
#
if
defined
(
FUNC_ABS
)
#
if
defined
(
FUNC_ABS
)
...
@@ -75,7 +75,7 @@
...
@@ -75,7 +75,7 @@
#
endif
#
endif
#
if
defined
(
FUNC_ABS_DIFF
)
#
if
defined
(
FUNC_ABS_DIFF
)
#
define
EXPRESSION
WT
value
=
convertToWT
(
src1[src1_index]
)
-
scalar
[0]
; \
#
define
EXPRESSION
WT
value
=
convertToWT
(
src1[src1_index]
)
-
scalar
; \
value
=
value
>
(
WT
)(
0
)
?
value
:
-value
; \
value
=
value
>
(
WT
)(
0
)
?
value
:
-value
; \
dst[dst_index]
=
convertToT
(
value
)
;
dst[dst_index]
=
convertToT
(
value
)
;
#
endif
#
endif
...
@@ -85,7 +85,7 @@
...
@@ -85,7 +85,7 @@
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_binary_op_scalar
(
__global
T
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_binary_op_scalar
(
__global
T
*src1,
int
src1_step,
int
src1_offset,
__global
WT
*
scalar,
WT
scalar,
__global
T
*dst,
int
dst_step,
int
dst_offset,
__global
T
*dst,
int
dst_step,
int
dst_offset,
int
cols,
int
rows
)
int
cols,
int
rows
)
{
{
...
...
modules/ocl/src/opencl/arithm_add_scalar_mask.cl
View file @
ae56eaa5
...
@@ -52,15 +52,15 @@
...
@@ -52,15 +52,15 @@
#
endif
#
endif
#
if
defined
(
FUNC_ADD
)
#
if
defined
(
FUNC_ADD
)
#
define
EXPRESSION
dst[dst_index]
=
convertToT
(
convertToWT
(
src1[src1_index]
)
+
scalar
[0]
)
;
#
define
EXPRESSION
dst[dst_index]
=
convertToT
(
convertToWT
(
src1[src1_index]
)
+
scalar
)
;
#
endif
#
endif
#
if
defined
(
FUNC_SUB
)
#
if
defined
(
FUNC_SUB
)
#
define
EXPRESSION
dst[dst_index]
=
convertToT
(
convertToWT
(
src1[src1_index]
)
-
scalar
[0]
)
;
#
define
EXPRESSION
dst[dst_index]
=
convertToT
(
convertToWT
(
src1[src1_index]
)
-
scalar
)
;
#
endif
#
endif
#
if
defined
(
FUNC_MUL
)
#
if
defined
(
FUNC_MUL
)
#
define
EXPRESSION
dst[dst_index]
=
convertToT
(
convertToWT
(
src1[src1_index]
)
*
scalar
[0]
)
;
#
define
EXPRESSION
dst[dst_index]
=
convertToT
(
convertToWT
(
src1[src1_index]
)
*
scalar
)
;
#
endif
#
endif
#
if
defined
(
FUNC_DIV
)
#
if
defined
(
FUNC_DIV
)
...
@@ -74,7 +74,7 @@
...
@@ -74,7 +74,7 @@
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_binary_op_scalar_mask
(
__global
T
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_binary_op_scalar_mask
(
__global
T
*src1,
int
src1_step,
int
src1_offset,
__global
WT
*
scalar,
WT
scalar,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
T
*dst,
int
dst_step,
int
dst_offset,
__global
T
*dst,
int
dst_step,
int
dst_offset,
int
cols,
int
rows
)
int
cols,
int
rows
)
...
...
modules/ocl/src/opencl/arithm_bitwise_binary.cl
View file @
ae56eaa5
...
@@ -51,17 +51,32 @@
...
@@ -51,17 +51,32 @@
__kernel
void
arithm_bitwise_binary
(
__global
uchar
*
src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_binary
(
__global
uchar
*
src1,
int
src1_step,
int
src1_offset,
__global
uchar
*
src2,
int
src2_step,
int
src2_offset,
__global
uchar
*
src2,
int
src2_step,
int
src2_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
cols
1
,
int
rows
)
int
cols,
int
rows
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
1
&&
y
<
rows
)
if
(
x
<
cols
&&
y
<
rows
)
{
{
#
if
elemSize
>
1
x
*=
elemSize
;
#
endif
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
#
if
elemSize
>
1
#
pragma
unroll
for
(
int
i
=
0
; i < elemSize; i += vlen)
{
ucharv
t0
=
vloadn
(
0
,
src1
+
src1_index
+
i
)
;
ucharv
t1
=
vloadn
(
0
,
src2
+
src2_index
+
i
)
;
ucharv
t2
=
t0
Operation
t1
;
vstoren
(
t2,
0
,
dst
+
dst_index
+
i
)
;
}
#
else
dst[dst_index]
=
src1[src1_index]
Operation
src2[src2_index]
;
dst[dst_index]
=
src1[src1_index]
Operation
src2[src2_index]
;
#
endif
}
}
}
}
modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl
View file @
ae56eaa5
...
@@ -50,7 +50,7 @@
...
@@ -50,7 +50,7 @@
__kernel
void
arithm_bitwise_binary_mask
(
__global
uchar
*
src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_binary_mask
(
__global
uchar
*
src1,
int
src1_step,
int
src1_offset,
__global
uchar
*
src2,
int
src2_step,
int
src2_offset,
__global
uchar
*
src2,
int
src2_step,
int
src2_offset,
__global
uchar
*
mask,
int
mask_step,
int
mask_offset,
int
elemSize,
__global
uchar
*
mask,
int
mask_step,
int
mask_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
cols1,
int
rows
)
int
cols1,
int
rows
)
{
{
...
@@ -59,15 +59,30 @@ __kernel void arithm_bitwise_binary_mask(__global uchar * src1, int src1_step, i
...
@@ -59,15 +59,30 @@ __kernel void arithm_bitwise_binary_mask(__global uchar * src1, int src1_step, i
if
(
x
<
cols1
&&
y
<
rows
)
if
(
x
<
cols1
&&
y
<
rows
)
{
{
int
mask_index
=
mad24
(
y,
mask_step,
mask_offset
+
(
x
/
elemSize
)
)
;
int
mask_index
=
mad24
(
y,
mask_step,
mask_offset
+
x
)
;
if
(
mask[mask_index]
)
if
(
mask[mask_index]
)
{
{
#
if
elemSize
>
1
x
*=
elemSize
;
#
endif
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
#
if
elemSize
>
1
#
pragma
unroll
for
(
int
i
=
0
; i < elemSize; i += vlen)
{
ucharv
t0
=
vloadn
(
0
,
src1
+
src1_index
+
i
)
;
ucharv
t1
=
vloadn
(
0
,
src2
+
src2_index
+
i
)
;
ucharv
t2
=
t0
Operation
t1
;
vstoren
(
t2,
0
,
dst
+
dst_index
+
i
)
;
}
#
else
dst[dst_index]
=
src1[src1_index]
Operation
src2[src2_index]
;
dst[dst_index]
=
src1[src1_index]
Operation
src2[src2_index]
;
#
endif
}
}
}
}
}
}
modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl
View file @
ae56eaa5
...
@@ -50,19 +50,33 @@
...
@@ -50,19 +50,33 @@
__kernel
void
arithm_bitwise_binary_scalar
(
__kernel
void
arithm_bitwise_binary_scalar
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
int
elemSize,
__global
uchar
*src2,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
int
cols
1
,
int
rows
)
int
cols,
int
rows
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
1
&&
y
<
rows
)
if
(
x
<
cols
&&
y
<
rows
)
{
{
#
if
elemSize
>
1
x
*=
elemSize
;
#
endif
int
src1_index
=
mad24
(
y,
src1_step,
src1_offset
+
x
)
;
int
src1_index
=
mad24
(
y,
src1_step,
src1_offset
+
x
)
;
int
src2_index
=
x
%
elemSize
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
dst[dst_index]
=
src1[src1_index]
Operation
src2[src2_index]
;
#
if
elemSize
>
1
#
pragma
unroll
for
(
int
i
=
0
; i < elemSize; i += vlen)
{
ucharv
t0
=
vloadn
(
0
,
src1
+
src1_index
+
i
)
;
ucharv
t1
=
vloadn
(
0
,
src2
+
i
)
;
ucharv
t2
=
t0
Operation
t1
;
vstoren
(
t2,
0
,
dst
+
dst_index
+
i
)
;
}
#
else
dst[dst_index]
=
src1[src1_index]
Operation
src2[0]
;
#
endif
}
}
}
}
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
View file @
ae56eaa5
...
@@ -56,7 +56,7 @@
...
@@ -56,7 +56,7 @@
//////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_bitwise_binary_scalar_mask
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_binary_scalar_mask
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
int
elemSize,
__global
uchar
*src2,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
int
cols,
int
rows
)
int
cols,
int
rows
)
...
@@ -66,14 +66,29 @@ __kernel void arithm_bitwise_binary_scalar_mask(__global uchar *src1, int src1_s
...
@@ -66,14 +66,29 @@ __kernel void arithm_bitwise_binary_scalar_mask(__global uchar *src1, int src1_s
if
(
x
<
cols
&&
y
<
rows
)
if
(
x
<
cols
&&
y
<
rows
)
{
{
int
mask_index
=
mad24
(
y,
mask_step,
(
x
/
elemSize
)
+
mask_offset
)
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
)
;
if
(
mask[mask_index]
)
if
(
mask[mask_index]
)
{
{
#
if
elemSize
>
1
x
*=
elemSize
;
#
endif
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
src2_index
=
x
%
elemSize
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
dst[dst_index]
=
src1[src1_index]
Operation
src2[src2_index]
;
#
if
elemSize
>
1
#
pragma
unroll
for
(
int
i
=
0
; i < elemSize; i += vlen)
{
ucharv
t0
=
vloadn
(
0
,
src1
+
src1_index
+
i
)
;
ucharv
t1
=
vloadn
(
0
,
src2
+
i
)
;
ucharv
t2
=
t0
Operation
t1
;
vstoren
(
t2,
0
,
dst
+
dst_index
+
i
)
;
}
#
else
dst[dst_index]
=
src1[src1_index]
Operation
src2[0]
;
#
endif
}
}
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment