Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
a2d27429
Commit
a2d27429
authored
12 years ago
by
Andrey Kamaev
Committed by
OpenCV Buildbot
12 years ago
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #775 from bitwangyaoyao:2.4_fixerr
parents
977562b6
bcc086ba
Show whitespace changes
Inline
Side-by-side
Showing
23 changed files
with
1433 additions
and
537 deletions
+1433
-537
arithm_absdiff.cl
modules/ocl/src/opencl/arithm_absdiff.cl
+43
-9
arithm_add.cl
modules/ocl/src/opencl/arithm_add.cl
+44
-10
arithm_addWeighted.cl
modules/ocl/src/opencl/arithm_addWeighted.cl
+30
-8
arithm_add_scalar.cl
modules/ocl/src/opencl/arithm_add_scalar.cl
+32
-7
arithm_add_scalar_mask.cl
modules/ocl/src/opencl/arithm_add_scalar_mask.cl
+32
-7
arithm_bitwise_and.cl
modules/ocl/src/opencl/arithm_bitwise_and.cl
+20
-4
arithm_bitwise_and_mask.cl
modules/ocl/src/opencl/arithm_bitwise_and_mask.cl
+101
-44
arithm_bitwise_and_scalar.cl
modules/ocl/src/opencl/arithm_bitwise_and_scalar.cl
+101
-42
arithm_bitwise_and_scalar_mask.cl
modules/ocl/src/opencl/arithm_bitwise_and_scalar_mask.cl
+102
-42
arithm_bitwise_not.cl
modules/ocl/src/opencl/arithm_bitwise_not.cl
+20
-5
arithm_bitwise_or.cl
modules/ocl/src/opencl/arithm_bitwise_or.cl
+20
-4
arithm_bitwise_or_mask.cl
modules/ocl/src/opencl/arithm_bitwise_or_mask.cl
+100
-43
arithm_bitwise_or_scalar.cl
modules/ocl/src/opencl/arithm_bitwise_or_scalar.cl
+98
-38
arithm_bitwise_or_scalar_mask.cl
modules/ocl/src/opencl/arithm_bitwise_or_scalar_mask.cl
+100
-40
arithm_bitwise_xor.cl
modules/ocl/src/opencl/arithm_bitwise_xor.cl
+20
-6
arithm_bitwise_xor_mask.cl
modules/ocl/src/opencl/arithm_bitwise_xor_mask.cl
+100
-43
arithm_bitwise_xor_scalar.cl
modules/ocl/src/opencl/arithm_bitwise_xor_scalar.cl
+101
-42
arithm_bitwise_xor_scalar_mask.cl
modules/ocl/src/opencl/arithm_bitwise_xor_scalar_mask.cl
+102
-42
arithm_compare_eq.cl
modules/ocl/src/opencl/arithm_compare_eq.cl
+81
-20
arithm_compare_ne.cl
modules/ocl/src/opencl/arithm_compare_ne.cl
+80
-19
arithm_div.cl
modules/ocl/src/opencl/arithm_div.cl
+63
-50
arithm_flip.cl
modules/ocl/src/opencl/arithm_flip.cl
+20
-4
arithm_mul.cl
modules/ocl/src/opencl/arithm_mul.cl
+23
-8
No files found.
modules/ocl/src/opencl/arithm_absdiff.cl
View file @
a2d27429
...
...
@@ -44,7 +44,11 @@
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
...
...
@@ -63,6 +67,9 @@ __kernel void arithm_absdiff_D0 (__global uchar *src1, int src1_step, int src1_o
{
x
=
x
<<
2
;
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
...
...
@@ -111,7 +118,10 @@ __kernel void arithm_absdiff_D2 (__global ushort *src1, int src1_step, int src1_
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -145,7 +155,10 @@ __kernel void arithm_absdiff_D3 (__global short *src1, int src1_step, int src1_o
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -249,7 +262,10 @@ __kernel void arithm_s_absdiff_C1_D0 (__global uchar *src1, int src1_step, int
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -288,7 +304,10 @@ __kernel void arithm_s_absdiff_C1_D2 (__global ushort *src1, int src1_step, in
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -319,7 +338,10 @@ __kernel void arithm_s_absdiff_C1_D3 (__global short *src1, int src1_step, int
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -422,7 +444,10 @@ __kernel void arithm_s_absdiff_C2_D0 (__global uchar *src1, int src1_step, int
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -564,7 +589,10 @@ __kernel void arithm_s_absdiff_C3_D0 (__global uchar *src1, int src1_step, int
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -618,7 +646,10 @@ __kernel void arithm_s_absdiff_C3_D2 (__global ushort *src1, int src1_step, in
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -668,7 +699,10 @@ __kernel void arithm_s_absdiff_C3_D3 (__global short *src1, int src1_step, int
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_add.cl
View file @
a2d27429
...
...
@@ -45,7 +45,11 @@
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
...
...
@@ -64,7 +68,10 @@ __kernel void arithm_add_D0 (__global uchar *src1, int src1_step, int src1_offse
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
...
...
@@ -112,7 +119,10 @@ __kernel void arithm_add_D2 (__global ushort *src1, int src1_step, int src1_offs
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -147,7 +157,10 @@ __kernel void arithm_add_D3 (__global short *src1, int src1_step, int src1_offse
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -252,7 +265,10 @@ __kernel void arithm_add_with_mask_C1_D0 (__global uchar *src1, int src1_step, i
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -311,7 +327,10 @@ __kernel void arithm_add_with_mask_C1_D2 (__global ushort *src1, int src1_step,
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -348,7 +367,10 @@ __kernel void arithm_add_with_mask_C1_D3 (__global short *src1, int src1_step, i
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -477,7 +499,10 @@ __kernel void arithm_add_with_mask_C2_D0 (__global uchar *src1, int src1_step, i
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -664,7 +689,10 @@ __kernel void arithm_add_with_mask_C3_D0 (__global uchar *src1, int src1_step, i
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
*
3
)
+
src2_offset
-
(
dst_align
*
3
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -724,7 +752,10 @@ __kernel void arithm_add_with_mask_C3_D2 (__global ushort *src1, int src1_step,
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
*
6
)
+
src2_offset
-
(
dst_align
*
6
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -780,7 +811,10 @@ __kernel void arithm_add_with_mask_C3_D3 (__global short *src1, int src1_step, i
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
*
6
)
+
src2_offset
-
(
dst_align
*
6
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_addWeighted.cl
View file @
a2d27429
...
...
@@ -42,8 +42,12 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
if
defined
DOUBLE_SUPPORT
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
typedef
double
F
;
#
else
typedef
float
F
;
...
...
@@ -65,7 +69,10 @@ __kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
...
...
@@ -122,7 +129,10 @@ __kernel void addWeighted_D2 (__global ushort *src1, int src1_step,int src1_offs
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -182,7 +192,10 @@ __kernel void addWeighted_D3 (__global short *src1, int src1_step,int src1_offse
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -241,9 +254,12 @@ __kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset,
x
=
x
<<
2
;
#
define
bitOfInt
(
sizeof
(
int
)
==
4
?
2:
3
)
#
define
bitOfInt
(
sizeof
(
int
)
==
4
?
2:
3
)
#
define
dst_align
((
dst_offset
>>
bitOfInt
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
bitOfInt
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
bitOfInt
)
+
src1_offset
-
(
dst_align
<<
bitOfInt
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
bitOfInt
)
+
src2_offset
-
(
dst_align
<<
bitOfInt
))
;
...
...
@@ -304,7 +320,10 @@ __kernel void addWeighted_D5 (__global float *src1,int src1_step,int src1_offset
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
2
)
+
src1_offset
-
(
dst_align
<<
2
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
2
)
+
src2_offset
-
(
dst_align
<<
2
))
;
...
...
@@ -366,7 +385,10 @@ __kernel void addWeighted_D6 (__global double *src1, int src1_step,int src1_offs
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
3
)
+
src1_offset
-
(
dst_align
<<
3
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
3
)
+
src2_offset
-
(
dst_align
<<
3
))
;
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_add_scalar.cl
View file @
a2d27429
...
...
@@ -44,9 +44,13 @@
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
/**************************************add
with
scalar
without
mask**************************************/
__kernel
void
arithm_s_add_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -59,7 +63,10 @@ __kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -99,7 +106,10 @@ __kernel void arithm_s_add_C1_D2 (__global ushort *src1, int src1_step, int sr
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -131,7 +141,10 @@ __kernel void arithm_s_add_C1_D3 (__global short *src1, int src1_step, int src
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -233,7 +246,10 @@ __kernel void arithm_s_add_C2_D0 (__global uchar *src1, int src1_step, int src
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -378,7 +394,10 @@ __kernel void arithm_s_add_C3_D0 (__global uchar *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -432,7 +451,10 @@ __kernel void arithm_s_add_C3_D2 (__global ushort *src1, int src1_step, int sr
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -482,7 +504,10 @@ __kernel void arithm_s_add_C3_D3 (__global short *src1, int src1_step, int src
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_add_scalar_mask.cl
View file @
a2d27429
...
...
@@ -44,7 +44,11 @@
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
/**************************************add
with
scalar
with
mask**************************************/
...
...
@@ -61,7 +65,10 @@ __kernel void arithm_s_add_with_mask_C1_D0 (__global uchar *src1, int src1_ste
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -111,7 +118,10 @@ __kernel void arithm_s_add_with_mask_C1_D2 (__global ushort *src1, int src1_st
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -146,7 +156,10 @@ __kernel void arithm_s_add_with_mask_C1_D3 (__global short *src1, int src1_ste
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -267,7 +280,10 @@ __kernel void arithm_s_add_with_mask_C2_D0 (__global uchar *src1, int src1_ste
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -443,7 +459,10 @@ __kernel void arithm_s_add_with_mask_C3_D0 (__global uchar *src1, int src1_ste
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -501,7 +520,10 @@ __kernel void arithm_s_add_with_mask_C3_D2 (__global ushort *src1, int src1_st
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -555,7 +577,10 @@ __kernel void arithm_s_add_with_mask_C3_D3 (__global short *src1, int src1_ste
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_bitwise_and.cl
View file @
a2d27429
...
...
@@ -43,7 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
...
...
@@ -62,7 +66,10 @@ __kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int sr
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
...
...
@@ -112,7 +119,10 @@ __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
...
...
@@ -163,7 +173,10 @@ __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int s
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -215,7 +228,10 @@ __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int sr
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_bitwise_and_mask.cl
View file @
a2d27429
...
...
@@ -43,14 +43,18 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_and
with
mask**************************************/
__kernel
void
arithm_bitwise_and_with_mask_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -64,7 +68,10 @@ __kernel void arithm_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -91,7 +98,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1
__kernel
void
arithm_bitwise_and_with_mask_C1_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C1_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -105,7 +113,10 @@ __kernel void arithm_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -132,7 +143,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_
__kernel
void
arithm_bitwise_and_with_mask_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -146,7 +158,10 @@ __kernel void arithm_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -171,7 +186,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src
__kernel
void
arithm_bitwise_and_with_mask_C1_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C1_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -185,7 +201,10 @@ __kernel void arithm_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -210,7 +229,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1
__kernel
void
arithm_bitwise_and_with_mask_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -242,7 +262,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D4 (__global int *src1, int src1
__kernel
void
arithm_bitwise_and_with_mask_C1_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C1_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -274,8 +295,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D5 (__global char *src1, int src1_
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_bitwise_and_with_mask_C1_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C1_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -305,11 +326,11 @@ __kernel void arithm_bitwise_and_with_mask_C1_D6 (__global char *src1, int src1_
}
}
#
endif
__kernel
void
arithm_bitwise_and_with_mask_C2_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C2_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -323,7 +344,10 @@ __kernel void arithm_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -347,7 +371,8 @@ __kernel void arithm_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1
}
__kernel
void
arithm_bitwise_and_with_mask_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -361,7 +386,10 @@ __kernel void arithm_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -384,7 +412,8 @@ __kernel void arithm_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_
}
}
__kernel
void
arithm_bitwise_and_with_mask_C2_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C2_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -413,7 +442,8 @@ __kernel void arithm_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int src
*
((
__global
ushort2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_bitwise_and_with_mask_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -442,7 +472,8 @@ __kernel void arithm_bitwise_and_with_mask_C2_D3 (__global short *src1, int src1
*
((
__global
short2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_bitwise_and_with_mask_C2_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C2_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -471,7 +502,8 @@ __kernel void arithm_bitwise_and_with_mask_C2_D4 (__global int *src1, int src1
*
((
__global
int2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_bitwise_and_with_mask_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -500,8 +532,9 @@ __kernel void arithm_bitwise_and_with_mask_C2_D5 (__global char *src1, int src1_
*
((
__global
char8
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_bitwise_and_with_mask_C2_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C2_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -530,11 +563,11 @@ __kernel void arithm_bitwise_and_with_mask_C2_D6 (__global char *src1, int src1_
*
((
__global
char16
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
#
endif
__kernel
void
arithm_bitwise_and_with_mask_C3_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C3_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -548,7 +581,10 @@ __kernel void arithm_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
*
3
)
+
src2_offset
-
(
dst_align
*
3
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -596,7 +632,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1
}
__kernel
void
arithm_bitwise_and_with_mask_C3_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C3_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -610,7 +647,10 @@ __kernel void arithm_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
*
3
)
+
src2_offset
-
(
dst_align
*
3
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -657,7 +697,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_
}
}
__kernel
void
arithm_bitwise_and_with_mask_C3_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C3_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -671,7 +712,10 @@ __kernel void arithm_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
*
6
)
+
src2_offset
-
(
dst_align
*
6
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -713,7 +757,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src
*
((
__global
ushort2
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_bitwise_and_with_mask_C3_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C3_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -727,7 +772,10 @@ __kernel void arithm_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
*
6
)
+
src2_offset
-
(
dst_align
*
6
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -769,7 +817,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1
*
((
__global
short2
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_bitwise_and_with_mask_C3_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C3_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -813,7 +862,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D4 (__global int *src1, int src1
*
((
__global
int
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_bitwise_and_with_mask_C3_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C3_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -858,7 +908,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D5 (__global char *src1, int src1_
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_bitwise_and_with_mask_C3_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C3_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -905,8 +956,8 @@ __kernel void arithm_bitwise_and_with_mask_C3_D6 (__global char *src1, int src1_
#
endif
__kernel
void
arithm_bitwise_and_with_mask_C4_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C4_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -937,7 +988,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int src1
}
__kernel
void
arithm_bitwise_and_with_mask_C4_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C4_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -967,7 +1019,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D1 (__global char *src1, int src1_
}
}
__kernel
void
arithm_bitwise_and_with_mask_C4_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C4_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -996,7 +1049,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int src
*
((
__global
ushort4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_bitwise_and_with_mask_C4_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C4_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -1025,7 +1079,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D3 (__global short *src1, int src1
*
((
__global
short4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_bitwise_and_with_mask_C4_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C4_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -1054,7 +1109,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D4 (__global int *src1, int src1
*
((
__global
int4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_bitwise_and_with_mask_C4_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C4_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -1084,7 +1140,8 @@ __kernel void arithm_bitwise_and_with_mask_C4_D5 (__global char *src1, int src1_
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_bitwise_and_with_mask_C4_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_and_with_mask_C4_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_bitwise_and_scalar.cl
View file @
a2d27429
...
...
@@ -42,17 +42,20 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//
#
if
defined
(
__ATI__
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
__NVIDIA__
)
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************and
with
scalar
without
mask**************************************/
__kernel
void
arithm_s_bitwise_and_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -63,7 +66,10 @@ __kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step,
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -86,7 +92,8 @@ __kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step,
}
__kernel
void
arithm_s_bitwise_and_C1_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C1_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -97,7 +104,10 @@ __kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step,
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -119,7 +129,8 @@ __kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step,
}
}
__kernel
void
arithm_s_bitwise_and_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -131,7 +142,10 @@ __kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -150,7 +164,8 @@ __kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step
*
((
__global
ushort2
*
)((
__global
uchar
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_and_C1_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C1_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -162,7 +177,10 @@ __kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step,
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -181,7 +199,8 @@ __kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step,
*
((
__global
short2
*
)((
__global
uchar
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_and_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -202,7 +221,8 @@ __kernel void arithm_s_bitwise_and_C1_D4 (__global int *src1, int src1_step, i
*
((
__global
int
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_and_C1_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C1_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -232,9 +252,9 @@ __kernel void arithm_s_bitwise_and_C1_D5 (__global char *src1, int src1_step,
*
((
__global
char4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_and_C1_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C1_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -256,7 +276,8 @@ __kernel void arithm_s_bitwise_and_C1_D6 (__global short *src1, int src1_step, i
}
}
#
endif
__kernel
void
arithm_s_bitwise_and_C2_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C2_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -268,7 +289,10 @@ __kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step,
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -290,7 +314,8 @@ __kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step,
}
__kernel
void
arithm_s_bitwise_and_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -302,7 +327,10 @@ __kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step,
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -322,7 +350,8 @@ __kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step,
}
}
__kernel
void
arithm_s_bitwise_and_C2_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C2_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -343,7 +372,8 @@ __kernel void arithm_s_bitwise_and_C2_D2 (__global ushort *src1, int src1_step
*
((
__global
ushort2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_and_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -364,7 +394,8 @@ __kernel void arithm_s_bitwise_and_C2_D3 (__global short *src1, int src1_step,
*
((
__global
short2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_and_C2_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C2_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -384,7 +415,8 @@ __kernel void arithm_s_bitwise_and_C2_D4 (__global int *src1, int src1_step, i
*
((
__global
int2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_and_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -406,7 +438,8 @@ __kernel void arithm_s_bitwise_and_C2_D5 (__global char *src1, int src1_step,
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_and_C2_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C2_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -428,7 +461,8 @@ __kernel void arithm_s_bitwise_and_C2_D6 (__global short *src1, int src1_step, i
}
}
#
endif
__kernel
void
arithm_s_bitwise_and_C3_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C3_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -440,7 +474,10 @@ __kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step,
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -484,7 +521,8 @@ __kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step,
}
__kernel
void
arithm_s_bitwise_and_C3_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C3_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -496,7 +534,10 @@ __kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step,
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -539,7 +580,8 @@ __kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step,
}
}
__kernel
void
arithm_s_bitwise_and_C3_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C3_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -551,7 +593,10 @@ __kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -589,7 +634,8 @@ __kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step
*
((
__global
ushort2
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_s_bitwise_and_C3_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C3_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -601,7 +647,10 @@ __kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step,
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -639,7 +688,8 @@ __kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step,
*
((
__global
short2
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_s_bitwise_and_C3_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C3_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -673,7 +723,8 @@ __kernel void arithm_s_bitwise_and_C3_D4 (__global int *src1, int src1_step, i
*
((
__global
int
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
tmp_data_2
;
}
}
__kernel
void
arithm_s_bitwise_and_C3_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C3_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -708,7 +759,8 @@ __kernel void arithm_s_bitwise_and_C3_D5 (__global char *src1, int src1_step,
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_and_C3_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C3_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -743,7 +795,8 @@ __kernel void arithm_s_bitwise_and_C3_D6 (__global short *src1, int src1_step, i
}
}
#
endif
__kernel
void
arithm_s_bitwise_and_C4_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C4_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -765,7 +818,8 @@ __kernel void arithm_s_bitwise_and_C4_D0 (__global uchar *src1, int src1_step,
}
__kernel
void
arithm_s_bitwise_and_C4_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C4_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -786,7 +840,8 @@ __kernel void arithm_s_bitwise_and_C4_D1 (__global char *src1, int src1_step,
}
}
__kernel
void
arithm_s_bitwise_and_C4_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C4_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -806,7 +861,8 @@ __kernel void arithm_s_bitwise_and_C4_D2 (__global ushort *src1, int src1_step
*
((
__global
ushort4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_and_C4_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C4_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -826,7 +882,8 @@ __kernel void arithm_s_bitwise_and_C4_D3 (__global short *src1, int src1_step,
*
((
__global
short4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_and_C4_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C4_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -846,7 +903,8 @@ __kernel void arithm_s_bitwise_and_C4_D4 (__global int *src1, int src1_step, i
*
((
__global
int4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_and_C4_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C4_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -869,7 +927,8 @@ __kernel void arithm_s_bitwise_and_C4_D5 (__global char *src1, int src1_step,
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_and_C4_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_C4_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_bitwise_and_scalar_mask.cl
View file @
a2d27429
...
...
@@ -42,17 +42,19 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
if
defined
(
__ATI__
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
__NVIDIA__
)
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_AND////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_and
with
scalar
with
mask**************************************/
__kernel
void
arithm_s_bitwise_and_with_mask_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -65,7 +67,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -90,7 +95,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int
}
__kernel
void
arithm_s_bitwise_and_with_mask_C1_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C1_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -103,7 +109,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D1 (__global char *src1, int s
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -127,7 +136,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D1 (__global char *src1, int s
}
}
__kernel
void
arithm_s_bitwise_and_with_mask_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -140,7 +150,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -161,7 +174,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int
*
((
__global
ushort2
*
)((
__global
uchar
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_and_with_mask_C1_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C1_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -174,7 +188,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D3 (__global short *src1, int
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -195,7 +212,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D3 (__global short *src1, int
*
((
__global
short2
*
)((
__global
uchar
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_and_with_mask_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -223,7 +241,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D4 (__global int *src1, int
}
}
__kernel
void
arithm_s_bitwise_and_with_mask_C1_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C1_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -252,7 +271,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D5 (__global char *src1, int src
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_and_with_mask_C1_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C1_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -280,7 +300,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D6 (__global short *src1, int sr
}
}
#
endif
__kernel
void
arithm_s_bitwise_and_with_mask_C2_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C2_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -293,7 +314,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -316,7 +340,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int
}
__kernel
void
arithm_s_bitwise_and_with_mask_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -329,7 +354,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D1 (__global char *src1, int s
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -351,7 +379,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D1 (__global char *src1, int s
}
}
__kernel
void
arithm_s_bitwise_and_with_mask_C2_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C2_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -378,7 +407,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int
*
((
__global
ushort2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_and_with_mask_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -405,7 +435,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D3 (__global short *src1, int
*
((
__global
short2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_and_with_mask_C2_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C2_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -432,7 +463,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D4 (__global int *src1, int sr
*
((
__global
int2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_and_with_mask_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -461,7 +493,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D5 (__global char *src1, int s
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_and_with_mask_C2_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C2_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -489,7 +522,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D6 (__global short *src1, int sr
}
}
#
endif
__kernel
void
arithm_s_bitwise_and_with_mask_C3_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C3_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -502,7 +536,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -549,7 +586,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int
}
__kernel
void
arithm_s_bitwise_and_with_mask_C3_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C3_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -562,7 +600,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D1 (__global char *src1, int s
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -608,7 +649,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D1 (__global char *src1, int s
}
}
__kernel
void
arithm_s_bitwise_and_with_mask_C3_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C3_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -621,7 +663,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -662,7 +707,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int
*
((
__global
ushort2
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_s_bitwise_and_with_mask_C3_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C3_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -675,7 +721,10 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D3 (__global short *src1, int
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -716,7 +765,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D3 (__global short *src1, int
*
((
__global
short2
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_s_bitwise_and_with_mask_C3_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C3_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -758,7 +808,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D4 (__global int *src1, int sr
*
((
__global
int
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_s_bitwise_and_with_mask_C3_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C3_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -801,7 +852,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D5 (__global char *src1, int s
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_and_with_mask_C3_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C3_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -844,7 +896,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D6 (__global short *src1, int sr
}
}
#
endif
__kernel
void
arithm_s_bitwise_and_with_mask_C4_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C4_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -872,7 +925,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int
}
__kernel
void
arithm_s_bitwise_and_with_mask_C4_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C4_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -899,7 +953,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D1 (__global char *src1, int s
}
}
__kernel
void
arithm_s_bitwise_and_with_mask_C4_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C4_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -925,7 +980,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int
*
((
__global
ushort4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_and_with_mask_C4_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C4_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -951,7 +1007,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D3 (__global short *src1, int
*
((
__global
short4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_and_with_mask_C4_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C4_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -977,7 +1034,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D4 (__global int *src1, int sr
*
((
__global
int4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_and_with_mask_C4_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C4_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -1006,7 +1064,8 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D5 (__global char *src1, int s
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_and_with_mask_C4_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_and_with_mask_C4_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -1055,3 +1114,4 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D6 (__global short *src1, int sr
}
}
#
endif
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_bitwise_not.cl
View file @
a2d27429
...
...
@@ -43,9 +43,12 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_NOT////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
...
...
@@ -61,7 +64,10 @@ __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int sr
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -101,7 +107,10 @@ __kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -135,7 +144,10 @@ __kernel void arithm_bitwise_not_D2 (__global ushort *src1, int src1_step, int s
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -170,7 +182,10 @@ __kernel void arithm_bitwise_not_D3 (__global short *src1, int src1_step, int sr
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_bitwise_or.cl
View file @
a2d27429
...
...
@@ -43,7 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
...
...
@@ -62,7 +66,10 @@ __kernel void arithm_bitwise_or_D0 (__global uchar *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
...
...
@@ -110,7 +117,10 @@ __kernel void arithm_bitwise_or_D1 (__global char *src1, int src1_step, int src1
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
...
...
@@ -147,7 +157,10 @@ __kernel void arithm_bitwise_or_D2 (__global ushort *src1, int src1_step, int sr
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -185,7 +198,10 @@ __kernel void arithm_bitwise_or_D3 (__global short *src1, int src1_step, int src
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_bitwise_or_mask.cl
View file @
a2d27429
...
...
@@ -43,14 +43,18 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_or
with
mask**************************************/
__kernel
void
arithm_bitwise_or_with_mask_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_or_with_mask_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -64,7 +68,10 @@ __kernel void arithm_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -91,7 +98,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_
__kernel void arithm_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
...
...
@@ -105,7 +113,10 @@ __kernel void arithm_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_s
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
...
...
@@ -132,7 +143,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_s
__kernel
void
arithm_bitwise_or_with_mask_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_or_with_mask_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -146,7 +158,10 @@ __kernel void arithm_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -171,7 +186,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1
__kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
...
...
@@ -185,7 +201,10 @@ __kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
...
...
@@ -210,7 +229,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_
__kernel
void
arithm_bitwise_or_with_mask_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_or_with_mask_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -242,7 +262,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D4 (__global int *src1, int src1_
__kernel void arithm_bitwise_or_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
...
...
@@ -273,9 +294,9 @@ __kernel void arithm_bitwise_or_with_mask_C1_D5 (__global char *src1, int src1_s
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_bitwise_or_with_mask_C1_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_or_with_mask_C1_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -308,8 +329,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D6 (__global char *src1, int src1_s
#endif
__kernel void arithm_bitwise_or_with_mask_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
...
...
@@ -323,7 +344,10 @@ __kernel void arithm_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
...
...
@@ -347,7 +371,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_
}
__kernel
void
arithm_bitwise_or_with_mask_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_or_with_mask_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -361,7 +386,10 @@ __kernel void arithm_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_s
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -384,7 +412,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_s
}
}
__kernel void arithm_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
...
...
@@ -413,7 +442,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int src1
*
((
__global
ushort2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_bitwise_or_with_mask_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_or_with_mask_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -442,7 +472,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D3 (__global short *src1, int src1_
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_or_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
...
...
@@ -471,7 +502,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D4 (__global int *src1, int src1_
*
((
__global
int2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_bitwise_or_with_mask_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_or_with_mask_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -501,7 +533,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D5 (__global char *src1, int src1_s
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_or_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C2_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
...
...
@@ -533,8 +566,8 @@ __kernel void arithm_bitwise_or_with_mask_C2_D6 (__global char *src1, int src1_s
#
endif
__kernel
void
arithm_bitwise_or_with_mask_C3_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_or_with_mask_C3_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -548,7 +581,10 @@ __kernel void arithm_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
*
3
)
+
src2_offset
-
(
dst_align
*
3
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -596,7 +632,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_
}
__kernel void arithm_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
...
...
@@ -610,7 +647,10 @@ __kernel void arithm_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_s
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
...
...
@@ -657,7 +697,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_s
}
}
__kernel
void
arithm_bitwise_or_with_mask_C3_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_or_with_mask_C3_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -671,7 +712,10 @@ __kernel void arithm_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
*
6
)
+
src2_offset
-
(
dst_align
*
6
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -713,7 +757,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global short *dst, int dst_step, int dst_offset,
...
...
@@ -727,7 +772,10 @@ __kernel void arithm_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
...
...
@@ -769,7 +817,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_
*
((
__global
short2
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_bitwise_or_with_mask_C3_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_or_with_mask_C3_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -813,7 +862,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D4 (__global int *src1, int src1_
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_bitwise_or_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
...
...
@@ -858,7 +908,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D5 (__global char *src1, int src1_s
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_bitwise_or_with_mask_C3_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_or_with_mask_C3_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -905,8 +956,8 @@ __kernel void arithm_bitwise_or_with_mask_C3_D6 (__global char *src1, int src1_s
#endif
__kernel void arithm_bitwise_or_with_mask_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
...
...
@@ -937,7 +988,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int src1_
}
__kernel
void
arithm_bitwise_or_with_mask_C4_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_or_with_mask_C4_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -967,7 +1019,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D1 (__global char *src1, int src1_s
}
}
__kernel void arithm_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global ushort *dst, int dst_step, int dst_offset,
...
...
@@ -996,7 +1049,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int src1
*
((
__global
ushort4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_bitwise_or_with_mask_C4_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_or_with_mask_C4_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -1025,7 +1079,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D3 (__global short *src1, int src1_
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_bitwise_or_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global int *dst, int dst_step, int dst_offset,
...
...
@@ -1054,7 +1109,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D4 (__global int *src1, int src1_
*
((
__global
int4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_bitwise_or_with_mask_C4_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_or_with_mask_C4_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -1084,7 +1140,8 @@ __kernel void arithm_bitwise_or_with_mask_C4_D5 (__global char *src1, int src1_s
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_or_with_mask_C4_D6 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_bitwise_or_with_mask_C4_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *src2, int src2_step, int src2_offset,
__global uchar *mask, int mask_step, int mask_offset,
__global char *dst, int dst_step, int dst_offset,
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_bitwise_or_scalar.cl
View file @
a2d27429
...
...
@@ -43,14 +43,19 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************and
with
scalar
without
mask**************************************/
__kernel
void
arithm_s_bitwise_or_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -61,7 +66,10 @@ __kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step,
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -84,7 +92,8 @@ __kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
{
...
...
@@ -95,7 +104,10 @@ __kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, i
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
...
...
@@ -117,7 +129,8 @@ __kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, i
}
}
__kernel
void
arithm_s_bitwise_or_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -129,7 +142,10 @@ __kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step,
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -148,7 +164,8 @@ __kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step,
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
{
...
...
@@ -160,7 +177,10 @@ __kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
...
...
@@ -179,7 +199,8 @@ __kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step,
*
((
__global
short2
*
)((
__global
uchar
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_or_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -200,7 +221,8 @@ __kernel void arithm_s_bitwise_or_C1_D4 (__global int *src1, int src1_step, in
*((__global int *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
{
...
...
@@ -222,7 +244,8 @@ __kernel void arithm_s_bitwise_or_C1_D5 (__global char *src1, int src1_step, i
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_or_C1_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_C1_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -245,8 +268,8 @@ __kernel void arithm_s_bitwise_or_C1_D6 (__global short *src1, int src1_step, in
}
}
#endif
__kernel void arithm_s_bitwise_or_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
...
...
@@ -259,7 +282,10 @@ __kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step,
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
...
...
@@ -280,7 +306,8 @@ __kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step,
}
__kernel
void
arithm_s_bitwise_or_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -293,7 +320,10 @@ __kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, i
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -313,7 +343,8 @@ __kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, i
}
}
__kernel void arithm_s_bitwise_or_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
...
...
@@ -335,7 +366,8 @@ __kernel void arithm_s_bitwise_or_C2_D2 (__global ushort *src1, int src1_step,
*
((
__global
ushort2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_or_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -378,7 +410,8 @@ __kernel void arithm_s_bitwise_or_C2_D4 (__global int *src1, int src1_step, in
*
((
__global
int2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_or_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -400,7 +433,8 @@ __kernel void arithm_s_bitwise_or_C2_D5 (__global char *src1, int src1_step, i
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_C2_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C2_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
...
...
@@ -423,7 +457,8 @@ __kernel void arithm_s_bitwise_or_C2_D6 (__global short *src1, int src1_step, in
}
}
#endif
__kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
uchar4 src2, int rows, int cols, int dst_step1)
...
...
@@ -436,7 +471,10 @@ __kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step,
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int dst_start = mad24(y, dst_step, dst_offset);
...
...
@@ -480,7 +518,8 @@ __kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step,
}
__kernel
void
arithm_s_bitwise_or_C3_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_C3_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -493,7 +532,10 @@ __kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, i
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -536,7 +578,8 @@ __kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, i
}
}
__kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
ushort4 src2, int rows, int cols, int dst_step1)
...
...
@@ -549,7 +592,10 @@ __kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step,
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int dst_start = mad24(y, dst_step, dst_offset);
...
...
@@ -587,7 +633,8 @@ __kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step,
*
((
__global
ushort2
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_s_bitwise_or_C3_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_C3_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -600,7 +647,10 @@ __kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step,
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -638,7 +688,8 @@ __kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step,
*((__global short2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_or_C3_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int4 src2, int rows, int cols, int dst_step1)
...
...
@@ -673,7 +724,8 @@ __kernel void arithm_s_bitwise_or_C3_D4 (__global int *src1, int src1_step, in
*
((
__global
int
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
tmp_data_2
;
}
}
__kernel
void
arithm_s_bitwise_or_C3_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_C3_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -706,7 +758,8 @@ __kernel void arithm_s_bitwise_or_C3_D5 (__global char *src1, int src1_step, i
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_C3_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C3_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short16 src2, int rows, int cols, int dst_step1)
...
...
@@ -742,7 +795,8 @@ __kernel void arithm_s_bitwise_or_C3_D6 (__global short *src1, int src1_step, in
}
}
#
endif
__kernel
void
arithm_s_bitwise_or_C4_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_C4_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -765,7 +819,8 @@ __kernel void arithm_s_bitwise_or_C4_D0 (__global uchar *src1, int src1_step,
}
__kernel void arithm_s_bitwise_or_C4_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C4_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char4 src2, int rows, int cols, int dst_step1)
...
...
@@ -787,7 +842,8 @@ __kernel void arithm_s_bitwise_or_C4_D1 (__global char *src1, int src1_step, i
}
}
__kernel
void
arithm_s_bitwise_or_C4_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_C4_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -808,7 +864,8 @@ __kernel void arithm_s_bitwise_or_C4_D2 (__global ushort *src1, int src1_step,
*((__global ushort4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C4_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C4_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
short4 src2, int rows, int cols, int dst_step1)
...
...
@@ -829,7 +886,8 @@ __kernel void arithm_s_bitwise_or_C4_D3 (__global short *src1, int src1_step,
*
((
__global
short4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_or_C4_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_C4_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -850,7 +908,8 @@ __kernel void arithm_s_bitwise_or_C4_D4 (__global int *src1, int src1_step, in
*((__global int4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_C4_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_C4_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
char16 src2, int rows, int cols, int dst_step1)
...
...
@@ -874,7 +933,8 @@ __kernel void arithm_s_bitwise_or_C4_D5 (__global char *src1, int src1_step, i
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_or_C4_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_C4_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_bitwise_or_scalar_mask.cl
View file @
a2d27429
...
...
@@ -43,14 +43,18 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_OR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_or
with
scalar
with
mask**************************************/
__kernel
void
arithm_s_bitwise_or_with_mask_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_with_mask_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -64,7 +68,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int s
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -89,7 +96,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int s
}
__kernel void arithm_s_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C1_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
...
...
@@ -103,7 +111,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D1 (__global char *src1, int sr
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
...
...
@@ -127,7 +138,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D1 (__global char *src1, int sr
}
}
__kernel
void
arithm_s_bitwise_or_with_mask_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_with_mask_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -141,7 +153,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -162,7 +177,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int
*((__global ushort2 *)((__global uchar *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C1_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
...
...
@@ -176,7 +192,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D3 (__global short *src1, int s
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
...
...
@@ -197,7 +216,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D3 (__global short *src1, int s
*
((
__global
short2
*
)((
__global
uchar
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_or_with_mask_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_with_mask_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -226,7 +246,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D4 (__global int *src1, int s
}
}
__kernel void arithm_s_bitwise_or_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C1_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
...
...
@@ -254,9 +275,9 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D5 (__global char *src1, int
*
((
__global
char4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_or_with_mask_C1_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_with_mask_C1_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -285,7 +306,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D6 (__global short *src1, int src
}
}
#endif
__kernel void arithm_s_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C2_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
...
...
@@ -299,7 +321,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int s
{
x = x << 1;
#define dst_align ((dst_offset >> 1) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 1)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
...
...
@@ -322,7 +347,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int s
}
__kernel
void
arithm_s_bitwise_or_with_mask_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_with_mask_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -336,7 +362,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D1 (__global char *src1, int sr
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -358,7 +387,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D1 (__global char *src1, int sr
}
}
__kernel void arithm_s_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C2_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
...
...
@@ -386,7 +416,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int
*
((
__global
ushort2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_or_with_mask_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_with_mask_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -414,7 +445,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D3 (__global short *src1, int s
*((__global short2 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C2_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
...
...
@@ -442,7 +474,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D4 (__global int *src1, int src
*
((
__global
int2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_or_with_mask_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_with_mask_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -470,7 +503,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D5 (__global char *src1, int sr
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C2_D6 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
...
...
@@ -499,7 +533,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D6 (__global char *src1, int sr
}
}
#
endif
__kernel
void
arithm_s_bitwise_or_with_mask_C3_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_with_mask_C3_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -513,7 +548,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int s
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -560,7 +598,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int s
}
__kernel void arithm_s_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C3_D1 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char4 src2, int rows, int cols, int dst_step1)
...
...
@@ -574,7 +613,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D1 (__global char *src1, int sr
{
x = x << 2;
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 3 ) & 3)
int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
...
...
@@ -620,7 +662,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D1 (__global char *src1, int sr
}
}
__kernel
void
arithm_s_bitwise_or_with_mask_C3_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_with_mask_C3_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -634,7 +677,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -675,7 +721,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int
*((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C3_D3 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short4 src2, int rows, int cols, int dst_step1)
...
...
@@ -689,7 +736,10 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D3 (__global short *src1, int s
{
x = x << 1;
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (((dst_offset % dst_step) / 6 ) & 1)
int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6));
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
...
...
@@ -730,7 +780,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D3 (__global short *src1, int s
*
((
__global
short2
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_s_bitwise_or_with_mask_C3_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_with_mask_C3_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -773,7 +824,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D4 (__global int *src1, int src
*((__global int *)((__global char *)dst + dst_index + 8))= data_2;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C3_D5 (
__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
char16 src2, int rows, int cols, int dst_step1)
...
...
@@ -818,7 +870,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D5 (__global char *src1, int sr
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_or_with_mask_C3_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_with_mask_C3_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -861,7 +914,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D6 (__global short *src1, int src
}
}
#endif
__kernel void arithm_s_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C4_D0 (
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
uchar4 src2, int rows, int cols, int dst_step1)
...
...
@@ -890,7 +944,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int s
}
__kernel
void
arithm_s_bitwise_or_with_mask_C4_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_with_mask_C4_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -918,7 +973,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D1 (__global char *src1, int sr
}
}
__kernel void arithm_s_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C4_D2 (
__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
ushort4 src2, int rows, int cols, int dst_step1)
...
...
@@ -945,7 +1001,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int
*
((
__global
ushort4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_or_with_mask_C4_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_with_mask_C4_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -972,7 +1029,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D3 (__global short *src1, int s
*((__global short4 *)((__global char *)dst + dst_index)) = data;
}
}
__kernel void arithm_s_bitwise_or_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C4_D4 (
__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
int4 src2, int rows, int cols, int dst_step1)
...
...
@@ -999,7 +1057,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D4 (__global int *src1, int src
*
((
__global
int4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_or_with_mask_C4_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_or_with_mask_C4_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -1029,7 +1088,8 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D5 (__global char *src1, int sr
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_bitwise_or_with_mask_C4_D6 (__global short *src1, int src1_step, int src1_offset,
__kernel void arithm_s_bitwise_or_with_mask_C4_D6 (
__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
__global uchar *mask, int mask_step, int mask_offset,
short16 src2, int rows, int cols, int dst_step1)
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_bitwise_xor.cl
View file @
a2d27429
...
...
@@ -43,9 +43,12 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
...
...
@@ -62,7 +65,10 @@ __kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int sr
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
...
...
@@ -112,7 +118,10 @@ __kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
...
...
@@ -163,7 +172,10 @@ __kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int s
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -215,7 +227,10 @@ __kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int sr
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -301,7 +316,6 @@ __kernel void arithm_bitwise_xor_D5 (__global char *src1, int src1_step, int src
*
((
__global
char4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
tmp
;
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_bitwise_xor_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_bitwise_xor_mask.cl
View file @
a2d27429
...
...
@@ -43,14 +43,18 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_xor
with
mask**************************************/
__kernel
void
arithm_bitwise_xor_with_mask_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -64,7 +68,10 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -91,7 +98,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1
__kernel
void
arithm_bitwise_xor_with_mask_C1_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C1_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -105,7 +113,10 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -132,7 +143,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_
__kernel
void
arithm_bitwise_xor_with_mask_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -146,7 +158,10 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -171,7 +186,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src
__kernel
void
arithm_bitwise_xor_with_mask_C1_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C1_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -185,7 +201,10 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -210,7 +229,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1
__kernel
void
arithm_bitwise_xor_with_mask_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -242,7 +262,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D4 (__global int *src1, int src1
__kernel
void
arithm_bitwise_xor_with_mask_C1_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C1_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -273,9 +294,9 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src1_
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_bitwise_xor_with_mask_C1_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C1_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -308,8 +329,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D6 (__global char *src1, int src1_
__kernel
void
arithm_bitwise_xor_with_mask_C2_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C2_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -323,7 +344,10 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -347,7 +371,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1
}
__kernel
void
arithm_bitwise_xor_with_mask_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -361,7 +386,10 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -384,7 +412,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_
}
}
__kernel
void
arithm_bitwise_xor_with_mask_C2_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C2_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -413,7 +442,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int src
*
((
__global
ushort2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_bitwise_xor_with_mask_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -442,7 +472,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D3 (__global short *src1, int src1
*
((
__global
short2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_bitwise_xor_with_mask_C2_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C2_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -471,7 +502,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D4 (__global int *src1, int src1
*
((
__global
int2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_bitwise_xor_with_mask_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -501,7 +533,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D5 (__global char *src1, int src1_
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_bitwise_xor_with_mask_C2_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C2_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -533,8 +566,8 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D6 (__global char *src1, int src1_
#
endif
__kernel
void
arithm_bitwise_xor_with_mask_C3_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C3_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -548,7 +581,10 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
*
3
)
+
src2_offset
-
(
dst_align
*
3
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -596,7 +632,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1
}
__kernel
void
arithm_bitwise_xor_with_mask_C3_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C3_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -610,7 +647,10 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
*
3
)
+
src2_offset
-
(
dst_align
*
3
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -657,7 +697,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_
}
}
__kernel
void
arithm_bitwise_xor_with_mask_C3_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C3_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -671,7 +712,10 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
*
6
)
+
src2_offset
-
(
dst_align
*
6
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -713,7 +757,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src
*
((
__global
ushort2
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_bitwise_xor_with_mask_C3_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C3_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -727,7 +772,10 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
*
6
)
+
src2_offset
-
(
dst_align
*
6
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -769,7 +817,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1
*
((
__global
short2
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_bitwise_xor_with_mask_C3_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C3_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -813,7 +862,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D4 (__global int *src1, int src1
*
((
__global
int
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_bitwise_xor_with_mask_C3_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C3_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -858,7 +908,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D5 (__global char *src1, int src1_
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_bitwise_xor_with_mask_C3_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C3_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -905,8 +956,8 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D6 (__global char *src1, int src1_
#
endif
__kernel
void
arithm_bitwise_xor_with_mask_C4_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C4_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -937,7 +988,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int src1
}
__kernel
void
arithm_bitwise_xor_with_mask_C4_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C4_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -967,7 +1019,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D1 (__global char *src1, int src1_
}
}
__kernel
void
arithm_bitwise_xor_with_mask_C4_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C4_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -996,7 +1049,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int src
*
((
__global
ushort4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_bitwise_xor_with_mask_C4_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C4_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -1025,7 +1079,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D3 (__global short *src1, int src1
*
((
__global
short4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_bitwise_xor_with_mask_C4_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C4_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -1054,7 +1109,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D4 (__global int *src1, int src1
*
((
__global
int4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_bitwise_xor_with_mask_C4_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C4_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
@@ -1084,7 +1140,8 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D5 (__global char *src1, int src1_
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_bitwise_xor_with_mask_C4_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise_xor_with_mask_C4_D6
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_bitwise_xor_scalar.cl
View file @
a2d27429
...
...
@@ -42,17 +42,19 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//
#
if
defined
(
__ATI__
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
__NVIDIA__
)
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************xor
with
scalar
without
mask**************************************/
__kernel
void
arithm_s_bitwise_xor_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -63,7 +65,10 @@ __kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step,
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -86,7 +91,8 @@ __kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step,
}
__kernel
void
arithm_s_bitwise_xor_C1_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C1_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -97,7 +103,10 @@ __kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step,
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -119,7 +128,8 @@ __kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step,
}
}
__kernel
void
arithm_s_bitwise_xor_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -131,7 +141,10 @@ __kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -150,7 +163,8 @@ __kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step
*
((
__global
ushort2
*
)((
__global
uchar
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_xor_C1_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C1_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -162,7 +176,10 @@ __kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step,
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -181,7 +198,8 @@ __kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step,
*
((
__global
short2
*
)((
__global
uchar
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_xor_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -202,7 +220,8 @@ __kernel void arithm_s_bitwise_xor_C1_D4 (__global int *src1, int src1_step, i
*
((
__global
int
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_xor_C1_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C1_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -234,7 +253,8 @@ __kernel void arithm_s_bitwise_xor_C1_D5 (__global char *src1, int src1_step,
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_xor_C1_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C1_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -256,7 +276,8 @@ __kernel void arithm_s_bitwise_xor_C1_D6 (__global short *src1, int src1_step, i
}
}
#
endif
__kernel
void
arithm_s_bitwise_xor_C2_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C2_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -268,7 +289,10 @@ __kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step,
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -290,7 +314,8 @@ __kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step,
}
__kernel
void
arithm_s_bitwise_xor_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -302,7 +327,10 @@ __kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step,
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -322,7 +350,8 @@ __kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step,
}
}
__kernel
void
arithm_s_bitwise_xor_C2_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C2_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -343,7 +372,8 @@ __kernel void arithm_s_bitwise_xor_C2_D2 (__global ushort *src1, int src1_step
*
((
__global
ushort2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_xor_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -364,7 +394,8 @@ __kernel void arithm_s_bitwise_xor_C2_D3 (__global short *src1, int src1_step,
*
((
__global
short2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_xor_C2_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C2_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -384,7 +415,8 @@ __kernel void arithm_s_bitwise_xor_C2_D4 (__global int *src1, int src1_step, i
*
((
__global
int2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_xor_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -406,7 +438,8 @@ __kernel void arithm_s_bitwise_xor_C2_D5 (__global char *src1, int src1_step,
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_xor_C2_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C2_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -428,7 +461,8 @@ __kernel void arithm_s_bitwise_xor_C2_D6 (__global short *src1, int src1_step, i
}
}
#
endif
__kernel
void
arithm_s_bitwise_xor_C3_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C3_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -440,7 +474,10 @@ __kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step,
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -484,7 +521,8 @@ __kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step,
}
__kernel
void
arithm_s_bitwise_xor_C3_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C3_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -496,7 +534,10 @@ __kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step,
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -539,7 +580,8 @@ __kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step,
}
}
__kernel
void
arithm_s_bitwise_xor_C3_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C3_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -551,7 +593,10 @@ __kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -589,7 +634,8 @@ __kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step
*
((
__global
ushort2
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_s_bitwise_xor_C3_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C3_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -601,7 +647,10 @@ __kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step,
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
...
...
@@ -639,7 +688,8 @@ __kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step,
*
((
__global
short2
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_s_bitwise_xor_C3_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C3_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -673,7 +723,8 @@ __kernel void arithm_s_bitwise_xor_C3_D4 (__global int *src1, int src1_step, i
*
((
__global
int
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
tmp_data_2
;
}
}
__kernel
void
arithm_s_bitwise_xor_C3_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C3_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -708,7 +759,8 @@ __kernel void arithm_s_bitwise_xor_C3_D5 (__global char *src1, int src1_step,
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_xor_C3_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C3_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -743,7 +795,8 @@ __kernel void arithm_s_bitwise_xor_C3_D6 (__global short *src1, int src1_step, i
}
}
#
endif
__kernel
void
arithm_s_bitwise_xor_C4_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C4_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -765,7 +818,8 @@ __kernel void arithm_s_bitwise_xor_C4_D0 (__global uchar *src1, int src1_step,
}
__kernel
void
arithm_s_bitwise_xor_C4_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C4_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -786,7 +840,8 @@ __kernel void arithm_s_bitwise_xor_C4_D1 (__global char *src1, int src1_step,
}
}
__kernel
void
arithm_s_bitwise_xor_C4_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C4_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -806,7 +861,8 @@ __kernel void arithm_s_bitwise_xor_C4_D2 (__global ushort *src1, int src1_step
*
((
__global
ushort4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_xor_C4_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C4_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -826,7 +882,8 @@ __kernel void arithm_s_bitwise_xor_C4_D3 (__global short *src1, int src1_step,
*
((
__global
short4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_xor_C4_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C4_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -846,7 +903,8 @@ __kernel void arithm_s_bitwise_xor_C4_D4 (__global int *src1, int src1_step, i
*
((
__global
int4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_xor_C4_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C4_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
@@ -869,7 +927,8 @@ __kernel void arithm_s_bitwise_xor_C4_D5 (__global char *src1, int src1_step,
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_xor_C4_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_C4_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
{
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_bitwise_xor_scalar_mask.cl
View file @
a2d27429
...
...
@@ -42,17 +42,20 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
if
defined
(
__ATI__
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
__NVIDIA__
)
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_XOR////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
/**************************************bitwise_xor
with
scalar
with
mask**************************************/
__kernel
void
arithm_s_bitwise_xor_with_mask_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C1_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -65,7 +68,10 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -90,7 +96,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C1_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C1_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -103,7 +110,10 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D1 (__global char *src1, int s
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -127,7 +137,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D1 (__global char *src1, int s
}
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C1_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -140,7 +151,10 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -161,7 +175,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int
*
((
__global
ushort2
*
)((
__global
uchar
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C1_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C1_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -174,7 +189,10 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D3 (__global short *src1, int
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -195,7 +213,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D3 (__global short *src1, int
*
((
__global
short2
*
)((
__global
uchar
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C1_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -223,7 +242,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D4 (__global int *src1, int
}
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C1_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C1_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -252,7 +272,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_xor_with_mask_C1_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C1_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -280,7 +301,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D6 (__global short *src1, int sr
}
}
#
endif
__kernel
void
arithm_s_bitwise_xor_with_mask_C2_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C2_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -293,7 +315,10 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -316,7 +341,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C2_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -329,7 +355,10 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D1 (__global char *src1, int s
{
x
=
x
<<
1
;
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -351,7 +380,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D1 (__global char *src1, int s
}
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C2_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C2_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -378,7 +408,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int
*
((
__global
ushort2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C2_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -405,7 +436,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D3 (__global short *src1, int
*
((
__global
short2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C2_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C2_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -432,7 +464,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D4 (__global int *src1, int sr
*
((
__global
int2
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C2_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -461,7 +494,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D5 (__global char *src1, int s
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_xor_with_mask_C2_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C2_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -489,7 +523,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D6 (__global short *src1, int sr
}
}
#
endif
__kernel
void
arithm_s_bitwise_xor_with_mask_C3_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C3_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -502,7 +537,10 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -549,7 +587,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C3_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C3_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -562,7 +601,10 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D1 (__global char *src1, int s
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
3
)
+
src1_offset
-
(
dst_align
*
3
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -608,7 +650,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D1 (__global char *src1, int s
}
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C3_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C3_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -621,7 +664,10 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -662,7 +708,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int
*
((
__global
ushort2
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C3_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C3_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -675,7 +722,10 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D3 (__global short *src1, int
{
x
=
x
<<
1
;
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
%
dst_step
)
/
6
)
&
1
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
*
6
)
+
src1_offset
-
(
dst_align
*
6
))
;
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
-
dst_align
)
;
...
...
@@ -716,7 +766,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D3 (__global short *src1, int
*
((
__global
short2
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C3_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C3_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -758,7 +809,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D4 (__global int *src1, int sr
*
((
__global
int
*
)((
__global
char
*
)
dst
+
dst_index
+
8
))
=
data_2
;
}
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C3_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C3_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -801,7 +853,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D5 (__global char *src1, int s
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_xor_with_mask_C3_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C3_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -844,7 +897,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D6 (__global short *src1, int sr
}
}
#
endif
__kernel
void
arithm_s_bitwise_xor_with_mask_C4_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C4_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
uchar4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -872,7 +926,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C4_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C4_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -899,7 +954,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D1 (__global char *src1, int s
}
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C4_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C4_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
ushort4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -925,7 +981,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int
*
((
__global
ushort4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C4_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C4_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -951,7 +1008,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D3 (__global short *src1, int
*
((
__global
short4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C4_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C4_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
int4
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -977,7 +1035,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D4 (__global int *src1, int sr
*
((
__global
int4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
__kernel
void
arithm_s_bitwise_xor_with_mask_C4_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C4_D5
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
char16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
@@ -1006,7 +1065,8 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D5 (__global char *src1, int s
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_s_bitwise_xor_with_mask_C4_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_s_bitwise_xor_with_mask_C4_D6
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
short16
src2,
int
rows,
int
cols,
int
dst_step1
)
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_compare_eq.cl
View file @
a2d27429
...
...
@@ -43,7 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
...
...
@@ -62,7 +66,10 @@ __kernel void arithm_compare_eq_D0 (__global uchar *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
...
...
@@ -114,7 +121,10 @@ __kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int sr
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -165,7 +175,10 @@ __kernel void arithm_compare_eq_D3 (__global short *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -217,7 +230,10 @@ __kernel void arithm_compare_eq_D4 (__global int *src1, int src1_step, int src1_
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
2
)
+
src1_offset
-
(
dst_align
<<
2
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
2
)
+
src2_offset
-
(
dst_align
<<
2
))
;
...
...
@@ -265,7 +281,10 @@ __kernel void arithm_compare_eq_D5 (__global float *src1, int src1_step, int src
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
2
)
+
src1_offset
-
(
dst_align
<<
2
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
2
)
+
src2_offset
-
(
dst_align
<<
2
))
;
...
...
@@ -275,7 +294,8 @@ __kernel void arithm_compare_eq_D5 (__global float *src1, int src1_step, int src
int
src1_index_fix
=
src1_index
<
0
?
0
:
src1_index
;
int
src2_index_fix
=
src2_index
<
0
?
0
:
src2_index
;
float4
src1_data
=
vload4
(
0
,
(
__global
float
*
)((
__global
char
*
)
src1
+
src1_index_fix
))
;
float4
src2_data
=
vload4
(
0
,
(
__global
float
*
)((
__global
char
*
)
src2
+
src2_index_fix
))
; if(src2_index < 0)
float4
src2_data
=
vload4
(
0
,
(
__global
float
*
)((
__global
char
*
)
src2
+
src2_index_fix
))
;
if
(
src2_index
<
0
)
{
float4
tmp
;
tmp.xyzw
=
(
src2_index
==
-2
)
?
src2_data.zwxy:src2_data.yzwx
;
...
...
@@ -307,7 +327,10 @@ __kernel void arithm_compare_eq_D6 (__global double *src1, int src1_step, int sr
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
3
)
+
src1_offset
-
(
dst_align
<<
3
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
3
)
+
src2_offset
-
(
dst_align
<<
3
))
;
...
...
@@ -358,7 +381,10 @@ __kernel void arithm_compare_gt_D0 (__global uchar *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
...
...
@@ -409,7 +435,10 @@ __kernel void arithm_compare_gt_D2 (__global ushort *src1, int src1_step, int sr
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -462,7 +491,10 @@ __kernel void arithm_compare_gt_D3 (__global short *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -511,7 +543,10 @@ __kernel void arithm_compare_gt_D4 (__global int *src1, int src1_step, int src1_
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
2
)
+
src1_offset
-
(
dst_align
<<
2
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
2
)
+
src2_offset
-
(
dst_align
<<
2
))
;
...
...
@@ -560,7 +595,10 @@ __kernel void arithm_compare_gt_D5 (__global float *src1, int src1_step, int src
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
2
)
+
src1_offset
-
(
dst_align
<<
2
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
2
)
+
src2_offset
-
(
dst_align
<<
2
))
;
...
...
@@ -609,7 +647,10 @@ __kernel void arithm_compare_gt_D6 (__global double *src1, int src1_step, int sr
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
3
)
+
src1_offset
-
(
dst_align
<<
3
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
3
)
+
src2_offset
-
(
dst_align
<<
3
))
;
...
...
@@ -660,7 +701,10 @@ __kernel void arithm_compare_ge_D0 (__global uchar *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
...
...
@@ -714,7 +758,10 @@ __kernel void arithm_compare_ge_D2 (__global ushort *src1, int src1_step, int sr
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -769,7 +816,10 @@ __kernel void arithm_compare_ge_D3 (__global short *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -820,7 +870,10 @@ __kernel void arithm_compare_ge_D4 (__global int *src1, int src1_step, int src1_
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
2
)
+
src1_offset
-
(
dst_align
<<
2
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
2
)
+
src2_offset
-
(
dst_align
<<
2
))
;
...
...
@@ -869,7 +922,10 @@ __kernel void arithm_compare_ge_D5 (__global float *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
2
)
+
src1_offset
-
(
dst_align
<<
2
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
2
)
+
src2_offset
-
(
dst_align
<<
2
))
;
...
...
@@ -920,7 +976,10 @@ __kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int sr
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
3
)
+
src1_offset
-
(
dst_align
<<
3
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
3
)
+
src2_offset
-
(
dst_align
<<
3
))
;
...
...
@@ -942,7 +1001,8 @@ __kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int sr
double4
tmp
;
tmp.xyzw
=
(
src2_index
==
-2
)
?
src2_data.zwxy:src2_data.yzwx
;
src2_data.xyzw
=
(
src2_index
==
-1
)
?
src2_data.wxyz:tmp.xyzw
;
}
uchar4
dst_data
=
*
((
__global
uchar4
*
)(
dst
+
dst_index
))
;
}
uchar4
dst_data
=
*
((
__global
uchar4
*
)(
dst
+
dst_index
))
;
uchar4
tmp_data
=
convert_uchar4
((
src1_data
>=
src2_data
))
;
dst_data.x
=
((
dst_index
+
0
>=
dst_start
)
&&
(
dst_index
+
0
<
dst_end
))
?
tmp_data.x
:
dst_data.x
;
...
...
@@ -954,3 +1014,4 @@ __kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int sr
}
}
#
endif
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_compare_ne.cl
View file @
a2d27429
...
...
@@ -43,7 +43,11 @@
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
/***********************************Compare
NE*******************************/
__kernel
void
arithm_compare_ne_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
...
...
@@ -58,7 +62,10 @@ __kernel void arithm_compare_ne_D0 (__global uchar *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
...
...
@@ -110,7 +117,10 @@ __kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int sr
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -162,7 +172,10 @@ __kernel void arithm_compare_ne_D3 (__global short *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -210,7 +223,10 @@ __kernel void arithm_compare_ne_D4 (__global int *src1, int src1_step, int src1_
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
2
)
+
src1_offset
-
(
dst_align
<<
2
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
2
)
+
src2_offset
-
(
dst_align
<<
2
))
;
...
...
@@ -259,7 +275,10 @@ __kernel void arithm_compare_ne_D5 (__global float *src1, int src1_step, int src
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
2
)
+
src1_offset
-
(
dst_align
<<
2
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
2
)
+
src2_offset
-
(
dst_align
<<
2
))
;
...
...
@@ -269,7 +288,8 @@ __kernel void arithm_compare_ne_D5 (__global float *src1, int src1_step, int src
int
src1_index_fix
=
src1_index
<
0
?
0
:
src1_index
;
int
src2_index_fix
=
src2_index
<
0
?
0
:
src2_index
;
float4
src1_data
=
vload4
(
0
,
(
__global
float
*
)((
__global
char
*
)
src1
+
src1_index_fix
))
;
float4
src2_data
=
vload4
(
0
,
(
__global
float
*
)((
__global
char
*
)
src2
+
src2_index_fix
))
; if(src1_index < 0)
float4
src2_data
=
vload4
(
0
,
(
__global
float
*
)((
__global
char
*
)
src2
+
src2_index_fix
))
;
if
(
src1_index
<
0
)
{
float4
tmp
;
tmp.xyzw
=
(
src1_index
==
-2
)
?
src1_data.zwxy:src1_data.yzwx
;
...
...
@@ -306,7 +326,10 @@ __kernel void arithm_compare_ne_D6 (__global double *src1, int src1_step, int sr
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
3
)
+
src1_offset
-
(
dst_align
<<
3
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
3
)
+
src2_offset
-
(
dst_align
<<
3
))
;
...
...
@@ -358,7 +381,10 @@ __kernel void arithm_compare_lt_D0 (__global uchar *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
...
...
@@ -410,7 +436,10 @@ __kernel void arithm_compare_lt_D2 (__global ushort *src1, int src1_step, int sr
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -463,7 +492,10 @@ __kernel void arithm_compare_lt_D3 (__global short *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -512,7 +544,10 @@ __kernel void arithm_compare_lt_D4 (__global int *src1, int src1_step, int src1_
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
2
)
+
src1_offset
-
(
dst_align
<<
2
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
2
)
+
src2_offset
-
(
dst_align
<<
2
))
;
...
...
@@ -564,7 +599,10 @@ __kernel void arithm_compare_lt_D5 (__global float *src1, int src1_step, int src
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
2
)
+
src1_offset
-
(
dst_align
<<
2
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
2
)
+
src2_offset
-
(
dst_align
<<
2
))
;
...
...
@@ -613,7 +651,10 @@ __kernel void arithm_compare_lt_D6 (__global double *src1, int src1_step, int sr
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
3
)
+
src1_offset
-
(
dst_align
<<
3
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
3
)
+
src2_offset
-
(
dst_align
<<
3
))
;
...
...
@@ -664,7 +705,10 @@ __kernel void arithm_compare_le_D0 (__global uchar *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
...
...
@@ -717,7 +761,10 @@ __kernel void arithm_compare_le_D2 (__global ushort *src1, int src1_step, int sr
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -770,7 +817,10 @@ __kernel void arithm_compare_le_D3 (__global short *src1, int src1_step, int src
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -819,7 +869,10 @@ __kernel void arithm_compare_le_D4 (__global int *src1, int src1_step, int src1_
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
2
)
+
src1_offset
-
(
dst_align
<<
2
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
2
)
+
src2_offset
-
(
dst_align
<<
2
))
;
...
...
@@ -867,7 +920,10 @@ __kernel void arithm_compare_le_D5 (__global float *src1, int src1_step, int src
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
2
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
2
)
+
src1_offset
-
(
dst_align
<<
2
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
2
)
+
src2_offset
-
(
dst_align
<<
2
))
;
...
...
@@ -915,7 +971,10 @@ __kernel void arithm_compare_le_D6 (__global double *src1, int src1_step, int sr
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
3
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
3
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
3
)
+
src1_offset
-
(
dst_align
<<
3
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
3
)
+
src2_offset
-
(
dst_align
<<
3
))
;
...
...
@@ -952,3 +1011,5 @@ __kernel void arithm_compare_le_D6 (__global double *src1, int src1_step, int sr
}
}
#
endif
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_div.cl
View file @
a2d27429
...
...
@@ -44,7 +44,11 @@
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
typedef
double
F
;
typedef
double4
F4
;
#
define
convert_F4
convert_double4
...
...
@@ -56,34 +60,24 @@ typedef float4 F4;
#
define
convert_F
float
#
endif
uchar
round2_uchar
(
F
v
)
{
uchar
v1
=
convert_uchar_sat
(
round
(
v
))
;
//uchar
v2
=
convert_uchar_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
return
v1
;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
inline
uchar
round2_uchar
(
F
v
)
{
return
convert_uchar_sat
(
round
(
v
))
;
}
ushort
round2_ushort
(
F
v
)
{
ushort
v1
=
convert_ushort_sat
(
round
(
v
))
;
//ushort
v2
=
convert_ushort_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
return
v1
;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
inline
ushort
round2_ushort
(
F
v
)
{
return
convert_ushort_sat
(
round
(
v
))
;
}
short
round2_short
(
F
v
)
{
short
v1
=
convert_short_sat
(
round
(
v
))
;
//short
v2
=
convert_short_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
return
v1
;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
inline
short
round2_short
(
F
v
)
{
return
convert_short_sat
(
round
(
v
))
;
}
int
round2_int
(
F
v
)
{
int
v1
=
convert_int_sat
(
round
(
v
))
;
//int
v2
=
convert_int_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
return
v1
;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
inline
int
round2_int
(
F
v
)
{
return
convert_int_sat
(
round
(
v
))
;
}
///////////////////////////////////////////////////////////////////////////////////////
////////////////////////////divide///////////////////////////////////////////////////
...
...
@@ -94,39 +88,41 @@ __kernel void arithm_div_D0 (__global uchar *src1, int src1_step, int src1_offse
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1,
F
scalar
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int2
coor
=
(
int2
)(
get_global_id
(
0
)
,
get_global_id
(
1
))
;
if
(
x
<
cols
&&
y
<
rows
)
if
(
coor.x
<
cols
&&
coor.
y
<
rows
)
{
x
=
x
<<
2
;
coor.x
=
coor.
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int2
src_index
=
(
int2
)(
mad24
(
coor.y,
src1_step,
coor.x
+
src1_offset
-
dst_align
)
,
mad24
(
coor.y,
src2_step,
coor.x
+
src2_offset
-
dst_align
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
int
dst_end
=
mad24
(
y,
dst_step,
dst_offset
+
dst_step1
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
&
(
int
)
0xfffffffc
)
;
int4
dst_args
=
(
int4
)(
mad24
(
coor.y,
dst_step,
dst_offset
)
,
mad24
(
coor.y,
dst_step,
dst_offset
+
dst_step1
)
,
mad24
(
coor.y,
dst_step,
dst_offset
+
coor.x
&
(
int
)
0xfffffffc
)
,
0
)
;
uchar4
src1_data
=
vload4
(
0
,
src1
+
src
1_inde
x
)
;
uchar4
src2_data
=
vload4
(
0
,
src2
+
src
2_index
)
;
uchar4
dst_data
=
*
((
__global
uchar4
*
)(
dst
+
dst_
index
))
;
uchar4
src1_data
=
vload4
(
0
,
src1
+
src
_index.
x
)
;
uchar4
src2_data
=
vload4
(
0
,
src2
+
src
_index.y
)
;
uchar4
dst_data
=
*
((
__global
uchar4
*
)(
dst
+
dst_
args.z
))
;
F4
tmp
=
convert_F4
(
src1_data
)
*
scalar
;
uchar4
tmp_data
;
tmp_data.x
=
((
tmp.x
==
0
)
|
| (src2_data.x == 0)) ? 0 : round2_uchar(tmp.x /
(F)
src2_data.x);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_uchar(tmp.y /
(F)
src2_data.y);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_uchar(tmp.z /
(F)
src2_data.z);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_uchar(tmp.w /
(F)
src2_data.w);
tmp_data.x
=
((
tmp.x
==
0
)
|
| (src2_data.x == 0)) ? 0 : round2_uchar(tmp.x / src2_data.x);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_uchar(tmp.y / src2_data.y);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_uchar(tmp.z / src2_data.z);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_uchar(tmp.w / src2_data.w);
dst_data.x = ((dst_
index + 0 >= dst_start) && (dst_index + 0 < dst_end
)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_
index + 1 >= dst_start) && (dst_index + 1 < dst_end
)) ? tmp_data.y : dst_data.y;
dst_data.z = ((dst_
index + 2 >= dst_start) && (dst_index + 2 < dst_end
)) ? tmp_data.z : dst_data.z;
dst_data.w = ((dst_
index + 3 >= dst_start) && (dst_index + 3 < dst_end
)) ? tmp_data.w : dst_data.w;
dst_data.x = ((dst_
args.z + 0 >= dst_args.x) && (dst_args.z + 0 < dst_args.y
)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_
args.z + 1 >= dst_args.x) && (dst_args.z + 1 < dst_args.y
)) ? tmp_data.y : dst_data.y;
dst_data.z = ((dst_
args.z + 2 >= dst_args.x) && (dst_args.z + 2 < dst_args.y
)) ? tmp_data.z : dst_data.z;
dst_data.w = ((dst_
args.z + 3 >= dst_args.x) && (dst_args.z + 3 < dst_args.y
)) ? tmp_data.w : dst_data.w;
*((__global uchar4 *)(dst + dst_
index
)) = dst_data;
*((__global uchar4 *)(dst + dst_
args.z
)) = dst_data;
}
}
...
...
@@ -142,7 +138,10 @@ __kernel void arithm_div_D2 (__global ushort *src1, int src1_step, int src1_offs
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
...
...
@@ -182,7 +181,10 @@ __kernel void arithm_div_D3 (__global short *src1, int src1_step, int src1_offse
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
...
...
@@ -297,7 +299,10 @@ __kernel void arithm_s_div_D0 (__global uchar *src, int src_step, int src_offset
{
x = x << 2;
#define dst_align (dst_offset & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align (dst_offset & 3)
int src_index = mad24(y, src_step, x + src_offset - dst_align);
int dst_start = mad24(y, dst_step, dst_offset);
...
...
@@ -333,7 +338,10 @@ __kernel void arithm_s_div_D2 (__global ushort *src, int src_step, int src_offse
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src_index = mad24(y, src_step, (x << 1) + src_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
...
...
@@ -368,7 +376,10 @@ __kernel void arithm_s_div_D3 (__global short *src, int src_step, int src_offset
{
x = x << 2;
#define dst_align ((dst_offset >> 1) & 3)
#ifdef dst_align
#undef dst_align
#endif
#define dst_align ((dst_offset >> 1) & 3)
int src_index = mad24(y, src_step, (x << 1) + src_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
...
...
@@ -455,3 +466,5 @@ __kernel void arithm_s_div_D6 (__global double *src, int src_step, int src_offse
}
}
#
endif
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_flip.cl
View file @
a2d27429
...
...
@@ -44,7 +44,11 @@
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
//////////////////////////////////////////////////////////////////////////////////////////////////////
...
...
@@ -61,7 +65,10 @@ __kernel void arithm_flip_rows_D0 (__global uchar *src, int src_step, int src_of
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src_index_0
=
mad24
(
y,
src_step,
x
+
src_offset
-
dst_align
)
;
int
src_index_1
=
mad24
(
rows
-
y
-
1
,
src_step,
x
+
src_offset
-
dst_align
)
;
...
...
@@ -116,7 +123,10 @@ __kernel void arithm_flip_rows_D1 (__global char *src, int src_step, int src_off
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src_index_0
=
mad24
(
y,
src_step,
x
+
src_offset
-
dst_align
)
;
int
src_index_1
=
mad24
(
rows
-
y
-
1
,
src_step,
x
+
src_offset
-
dst_align
)
;
...
...
@@ -158,7 +168,10 @@ __kernel void arithm_flip_rows_D2 (__global ushort *src, int src_step, int src_o
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
>>
1
)
&
3
)
<<
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
>>
1
)
&
3
)
<<
1
)
int
src_index_0
=
mad24
(
y,
src_step,
(
x
<<
1
)
+
src_offset
-
dst_align
)
;
int
src_index_1
=
mad24
(
rows
-
y
-
1
,
src_step,
(
x
<<
1
)
+
src_offset
-
dst_align
)
;
...
...
@@ -200,7 +213,10 @@ __kernel void arithm_flip_rows_D3 (__global short *src, int src_step, int src_of
{
x
=
x
<<
2
;
#
define
dst_align
(((
dst_offset
>>
1
)
&
3
)
<<
1
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(((
dst_offset
>>
1
)
&
3
)
<<
1
)
int
src_index_0
=
mad24
(
y,
src_step,
(
x
<<
1
)
+
src_offset
-
dst_align
)
;
int
src_index_1
=
mad24
(
rows
-
y
-
1
,
src_step,
(
x
<<
1
)
+
src_offset
-
dst_align
)
;
...
...
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/arithm_mul.cl
View file @
a2d27429
...
...
@@ -16,7 +16,6 @@
//
//
@Authors
//
Jia
Haipeng,
jiahaipeng95@gmail.com
//
Dachuan
Zhao,
dachuan@multicorewareinc.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
...
...
@@ -44,11 +43,16 @@
//
//M*/
#
if
defined
DOUBLE_SUPPORT
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
int4
round_int4
(
float4
v
)
{
int4
round_int4
(
float4
v
)
{
v.s0
=
v.s0
+
(
v.s0
>
0
?
0.5
:
-0.5
)
;
v.s1
=
v.s1
+
(
v.s1
>
0
?
0.5
:
-0.5
)
;
v.s2
=
v.s2
+
(
v.s2
>
0
?
0.5
:
-0.5
)
;
...
...
@@ -56,7 +60,8 @@ int4 round_int4(float4 v){
return
convert_int4_sat
(
v
)
;
}
uint4
round_uint4
(
float4
v
)
{
uint4
round_uint4
(
float4
v
)
{
v.s0
=
v.s0
+
(
v.s0
>
0
?
0.5
:
-0.5
)
;
v.s1
=
v.s1
+
(
v.s1
>
0
?
0.5
:
-0.5
)
;
v.s2
=
v.s2
+
(
v.s2
>
0
?
0.5
:
-0.5
)
;
...
...
@@ -64,7 +69,8 @@ uint4 round_uint4(float4 v){
return
convert_uint4_sat
(
v
)
;
}
long
round_int
(
float
v
)
{
long
round_int
(
float
v
)
{
v
=
v
+
(
v
>
0
?
0.5
:
-0.5
)
;
return
convert_int_sat
(
v
)
;
...
...
@@ -85,7 +91,10 @@ __kernel void arithm_mul_D0 (__global uchar *src1, int src1_step, int src1_offse
{
x
=
x
<<
2
;
#
define
dst_align
(
dst_offset
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
(
dst_offset
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
-
dst_align
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
-
dst_align
)
;
...
...
@@ -130,7 +139,10 @@ __kernel void arithm_mul_D2 (__global ushort *src1, int src1_step, int src1_offs
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
@@ -166,7 +178,10 @@ __kernel void arithm_mul_D3 (__global short *src1, int src1_step, int src1_offse
{
x
=
x
<<
2
;
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
src2_index
=
mad24
(
y,
src2_step,
(
x
<<
1
)
+
src2_offset
-
(
dst_align
<<
1
))
;
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment