Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
bd5e4c6c
Commit
bd5e4c6c
authored
May 14, 2014
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
other kernels now use row scheme
parent
15738bf7
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
140 additions
and
91 deletions
+140
-91
arithm.cpp
modules/core/src/arithm.cpp
+6
-5
convert.cpp
modules/core/src/convert.cpp
+24
-18
dxt.cpp
modules/core/src/dxt.cpp
+4
-3
matrix.cpp
modules/core/src/matrix.cpp
+5
-3
inrange.cl
modules/core/src/opencl/inrange.cl
+30
-20
mixchannels.cl
modules/core/src/opencl/mixchannels.cl
+15
-7
mulspectrums.cl
modules/core/src/opencl/mulspectrums.cl
+16
-12
set_identity.cl
modules/core/src/opencl/set_identity.cl
+6
-5
split_merge.cl
modules/core/src/opencl/split_merge.cl
+34
-18
No files found.
modules/core/src/arithm.cpp
View file @
bd5e4c6c
...
...
@@ -3094,11 +3094,12 @@ static InRangeFunc getInRangeFunc(int depth)
static
bool
ocl_inRange
(
InputArray
_src
,
InputArray
_lowerb
,
InputArray
_upperb
,
OutputArray
_dst
)
{
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
int
skind
=
_src
.
kind
(),
lkind
=
_lowerb
.
kind
(),
ukind
=
_upperb
.
kind
();
Size
ssize
=
_src
.
size
(),
lsize
=
_lowerb
.
size
(),
usize
=
_upperb
.
size
();
int
stype
=
_src
.
type
(),
ltype
=
_lowerb
.
type
(),
utype
=
_upperb
.
type
();
int
sdepth
=
CV_MAT_DEPTH
(
stype
),
ldepth
=
CV_MAT_DEPTH
(
ltype
),
udepth
=
CV_MAT_DEPTH
(
utype
);
int
cn
=
CV_MAT_CN
(
stype
);
int
cn
=
CV_MAT_CN
(
stype
)
,
rowsPerWI
=
d
.
isIntel
()
?
4
:
1
;
bool
lbScalar
=
false
,
ubScalar
=
false
;
if
(
(
lkind
==
_InputArray
::
MATX
&&
skind
!=
_InputArray
::
MATX
)
||
...
...
@@ -3122,7 +3123,7 @@ static bool ocl_inRange( InputArray _src, InputArray _lowerb,
if
(
lbScalar
!=
ubScalar
)
return
false
;
bool
doubleSupport
=
ocl
::
Device
::
getDefault
()
.
doubleFPConfig
()
>
0
,
bool
doubleSupport
=
d
.
doubleFPConfig
()
>
0
,
haveScalar
=
lbScalar
&&
ubScalar
;
if
(
(
!
doubleSupport
&&
sdepth
==
CV_64F
)
||
...
...
@@ -3187,13 +3188,13 @@ static bool ocl_inRange( InputArray _src, InputArray _lowerb,
uscalar
.
copyTo
(
uscalaru
);
ker
.
args
(
srcarg
,
dstarg
,
ocl
::
KernelArg
::
PtrReadOnly
(
lscalaru
),
ocl
::
KernelArg
::
PtrReadOnly
(
uscalaru
));
ocl
::
KernelArg
::
PtrReadOnly
(
uscalaru
)
,
rowsPerWI
);
}
else
ker
.
args
(
srcarg
,
dstarg
,
ocl
::
KernelArg
::
ReadOnlyNoSize
(
lscalaru
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
uscalaru
));
ocl
::
KernelArg
::
ReadOnlyNoSize
(
uscalaru
)
,
rowsPerWI
);
size_t
globalsize
[
2
]
=
{
ssize
.
width
,
ssize
.
height
};
size_t
globalsize
[
2
]
=
{
ssize
.
width
,
(
ssize
.
height
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
ker
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
...
...
modules/core/src/convert.cpp
View file @
bd5e4c6c
...
...
@@ -270,21 +270,22 @@ namespace cv {
static
bool
ocl_split
(
InputArray
_m
,
OutputArrayOfArrays
_mv
)
{
int
type
=
_m
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
int
type
=
_m
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
rowsPerWI
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
4
:
1
;
String
dstargs
,
dstdecl
,
processelem
;
String
dstargs
,
processelem
,
indexdecl
;
for
(
int
i
=
0
;
i
<
cn
;
++
i
)
{
dstargs
+=
format
(
"DECLARE_DST_PARAM(%d)"
,
i
);
dstdecl
+=
format
(
"DECLARE_DATA
(%d)"
,
i
);
indexdecl
+=
format
(
"DECLARE_INDEX
(%d)"
,
i
);
processelem
+=
format
(
"PROCESS_ELEM(%d)"
,
i
);
}
ocl
::
Kernel
k
(
"split"
,
ocl
::
core
::
split_merge_oclsrc
,
format
(
"-D T=%s -D OP_SPLIT -D cn=%d -D DECLARE_DST_PARAMS=%s
"
"
-D DECLARE_DATA_N=%s -D PROCESS_ELEMS
_N=%s"
,
format
(
"-D T=%s -D OP_SPLIT -D cn=%d -D DECLARE_DST_PARAMS=%s"
"
-D PROCESS_ELEMS_N=%s -D DECLARE_INDEX
_N=%s"
,
ocl
::
memopTypeToStr
(
depth
),
cn
,
dstargs
.
c_str
(),
dstdecl
.
c_str
(),
processelem
.
c_str
()));
processelem
.
c_str
(),
indexdecl
.
c_str
()));
if
(
k
.
empty
())
return
false
;
...
...
@@ -299,8 +300,9 @@ static bool ocl_split( InputArray _m, OutputArrayOfArrays _mv )
int
argidx
=
k
.
set
(
0
,
ocl
::
KernelArg
::
ReadOnly
(
_m
.
getUMat
()));
for
(
int
i
=
0
;
i
<
cn
;
++
i
)
argidx
=
k
.
set
(
argidx
,
ocl
::
KernelArg
::
WriteOnlyNoSize
(
dst
[
i
]));
k
.
set
(
argidx
,
rowsPerWI
);
size_t
globalsize
[
2
]
=
{
size
.
width
,
size
.
height
};
size_t
globalsize
[
2
]
=
{
size
.
width
,
(
size
.
height
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
...
...
@@ -419,7 +421,8 @@ static bool ocl_merge( InputArrayOfArrays _mv, OutputArray _dst )
_mv
.
getUMatVector
(
src
);
CV_Assert
(
!
src
.
empty
());
int
type
=
src
[
0
].
type
(),
depth
=
CV_MAT_DEPTH
(
type
);
int
type
=
src
[
0
].
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
rowsPerWI
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
4
:
1
;
Size
size
=
src
[
0
].
size
();
for
(
size_t
i
=
0
,
srcsize
=
src
.
size
();
i
<
srcsize
;
++
i
)
...
...
@@ -440,20 +443,20 @@ static bool ocl_merge( InputArrayOfArrays _mv, OutputArray _dst )
}
int
dcn
=
(
int
)
ksrc
.
size
();
String
srcargs
,
srcdecl
,
processelem
,
cn
decl
;
String
srcargs
,
processelem
,
cndecl
,
index
decl
;
for
(
int
i
=
0
;
i
<
dcn
;
++
i
)
{
srcargs
+=
format
(
"DECLARE_SRC_PARAM(%d)"
,
i
);
srcdecl
+=
format
(
"DECLARE_DATA(%d)"
,
i
);
processelem
+=
format
(
"PROCESS_ELEM(%d)"
,
i
);
indexdecl
+=
format
(
"DECLARE_INDEX(%d)"
,
i
);
cndecl
+=
format
(
" -D scn%d=%d"
,
i
,
ksrc
[
i
].
channels
());
}
ocl
::
Kernel
k
(
"merge"
,
ocl
::
core
::
split_merge_oclsrc
,
format
(
"-D OP_MERGE -D cn=%d -D T=%s -D DECLARE_SRC_PARAMS_N=%s"
" -D DECLARE_
DATA
_N=%s -D PROCESS_ELEMS_N=%s%s"
,
" -D DECLARE_
INDEX
_N=%s -D PROCESS_ELEMS_N=%s%s"
,
dcn
,
ocl
::
memopTypeToStr
(
depth
),
srcargs
.
c_str
(),
src
decl
.
c_str
(),
processelem
.
c_str
(),
cndecl
.
c_str
()));
index
decl
.
c_str
(),
processelem
.
c_str
(),
cndecl
.
c_str
()));
if
(
k
.
empty
())
return
false
;
...
...
@@ -463,9 +466,10 @@ static bool ocl_merge( InputArrayOfArrays _mv, OutputArray _dst )
int
argidx
=
0
;
for
(
int
i
=
0
;
i
<
dcn
;
++
i
)
argidx
=
k
.
set
(
argidx
,
ocl
::
KernelArg
::
ReadOnlyNoSize
(
ksrc
[
i
]));
k
.
set
(
argidx
,
ocl
::
KernelArg
::
WriteOnly
(
dst
));
argidx
=
k
.
set
(
argidx
,
ocl
::
KernelArg
::
WriteOnly
(
dst
));
k
.
set
(
argidx
,
rowsPerWI
);
size_t
globalsize
[
2
]
=
{
dst
.
cols
,
dst
.
rows
};
size_t
globalsize
[
2
]
=
{
dst
.
cols
,
(
dst
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
...
...
@@ -690,7 +694,7 @@ static bool ocl_mixChannels(InputArrayOfArrays _src, InputOutputArrayOfArrays _d
for
(
size_t
i
=
0
,
dsize
=
dst
.
size
();
i
<
dsize
;
++
i
)
CV_Assert
(
dst
[
i
].
size
()
==
size
&&
dst
[
i
].
depth
()
==
depth
);
String
declsrc
,
decldst
,
declproc
,
declcn
;
String
declsrc
,
decldst
,
declproc
,
declcn
,
indexdecl
;
std
::
vector
<
UMat
>
srcargs
(
npairs
),
dstargs
(
npairs
);
for
(
size_t
i
=
0
;
i
<
npairs
;
++
i
)
...
...
@@ -711,14 +715,16 @@ static bool ocl_mixChannels(InputArrayOfArrays _src, InputOutputArrayOfArrays _d
declsrc
+=
format
(
"DECLARE_INPUT_MAT(%d)"
,
i
);
decldst
+=
format
(
"DECLARE_OUTPUT_MAT(%d)"
,
i
);
indexdecl
+=
format
(
"DECLARE_INDEX(%d)"
,
i
);
declproc
+=
format
(
"PROCESS_ELEM(%d)"
,
i
);
declcn
+=
format
(
" -D scn%d=%d -D dcn%d=%d"
,
i
,
src
[
src_idx
].
channels
(),
i
,
dst
[
dst_idx
].
channels
());
}
ocl
::
Kernel
k
(
"mixChannels"
,
ocl
::
core
::
mixchannels_oclsrc
,
format
(
"-D T=%s -D DECLARE_INPUT_MATS=%s -D DECLARE_OUTPUT_MATS=%s"
" -D PROCESS_ELEMS=%s%s"
,
ocl
::
memopTypeToStr
(
depth
),
declsrc
.
c_str
(),
decldst
.
c_str
(),
declproc
.
c_str
(),
declcn
.
c_str
()));
format
(
"-D T=%s -D DECLARE_INPUT_MAT_N=%s -D DECLARE_OUTPUT_MAT_N=%s"
" -D PROCESS_ELEM_N=%s -D DECLARE_INDEX_N=%s%s"
,
ocl
::
memopTypeToStr
(
depth
),
declsrc
.
c_str
(),
decldst
.
c_str
(),
declproc
.
c_str
(),
indexdecl
.
c_str
(),
declcn
.
c_str
()));
if
(
k
.
empty
())
return
false
;
...
...
modules/core/src/dxt.cpp
View file @
bd5e4c6c
...
...
@@ -2489,7 +2489,8 @@ namespace cv {
static
bool
ocl_mulSpectrums
(
InputArray
_srcA
,
InputArray
_srcB
,
OutputArray
_dst
,
int
flags
,
bool
conjB
)
{
int
atype
=
_srcA
.
type
(),
btype
=
_srcB
.
type
();
int
atype
=
_srcA
.
type
(),
btype
=
_srcB
.
type
(),
rowsPerWI
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
4
:
1
;
Size
asize
=
_srcA
.
size
(),
bsize
=
_srcB
.
size
();
CV_Assert
(
asize
==
bsize
);
...
...
@@ -2509,9 +2510,9 @@ static bool ocl_mulSpectrums( InputArray _srcA, InputArray _srcB,
return
false
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
A
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
B
),
ocl
::
KernelArg
::
WriteOnly
(
dst
));
ocl
::
KernelArg
::
WriteOnly
(
dst
)
,
rowsPerWI
);
size_t
globalsize
[
2
]
=
{
asize
.
width
,
asize
.
height
};
size_t
globalsize
[
2
]
=
{
asize
.
width
,
(
asize
.
height
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
...
...
modules/core/src/matrix.cpp
View file @
bd5e4c6c
...
...
@@ -2742,7 +2742,8 @@ namespace cv {
static
bool
ocl_setIdentity
(
InputOutputArray
_m
,
const
Scalar
&
s
)
{
int
type
=
_m
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
sctype
=
CV_MAKE_TYPE
(
depth
,
cn
==
3
?
4
:
cn
);
sctype
=
CV_MAKE_TYPE
(
depth
,
cn
==
3
?
4
:
cn
),
rowsPerWI
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
4
:
1
;
ocl
::
Kernel
k
(
"setIdentity"
,
ocl
::
core
::
set_identity_oclsrc
,
format
(
"-D T=%s -D T1=%s -D cn=%d -D ST=%s"
,
ocl
::
memopTypeToStr
(
type
),
...
...
@@ -2751,9 +2752,10 @@ static bool ocl_setIdentity( InputOutputArray _m, const Scalar& s )
return
false
;
UMat
m
=
_m
.
getUMat
();
k
.
args
(
ocl
::
KernelArg
::
WriteOnly
(
m
),
ocl
::
KernelArg
::
Constant
(
Mat
(
1
,
1
,
sctype
,
s
)));
k
.
args
(
ocl
::
KernelArg
::
WriteOnly
(
m
),
ocl
::
KernelArg
::
Constant
(
Mat
(
1
,
1
,
sctype
,
s
)),
rowsPerWI
);
size_t
globalsize
[
2
]
=
{
m
.
cols
,
m
.
rows
};
size_t
globalsize
[
2
]
=
{
m
.
cols
,
(
m
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
...
...
modules/core/src/opencl/inrange.cl
View file @
bd5e4c6c
...
...
@@ -52,37 +52,47 @@
__kernel
void
inrange
(
__global
const
uchar
*
src1ptr,
int
src1_step,
int
src1_offset,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
#
ifdef
HAVE_SCALAR
__global
const
T
*
src2,
__global
const
T
*
src3
__global
const
T
*
src2,
__global
const
T
*
src3
,
#
else
__global
const
uchar
*
src2ptr,
int
src2_step,
int
src2_offset,
__global
const
uchar
*
src3ptr,
int
src3_step,
int
src3_offset
__global
const
uchar
*
src3ptr,
int
src3_step,
int
src3_offset
,
#
endif
)
int
rowsPerWI
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
0
=
get_global_id
(
1
)
*
rowsPerWI
;
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
if
(
x
<
dst_cols
)
{
int
src1_index
=
mad24
(
y,
src1_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
cn,
src1_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
__global
const
T
*
src1
=
(
__global
const
T
*
)(
src1ptr
+
src1_index
)
;
__global
uchar
*
dst
=
dstptr
+
dst_index
;
int
src1_index
=
mad24
(
y0,
src1_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
cn,
src1_offset
))
;
int
dst_index
=
mad24
(
y0,
dst_step,
x
+
dst_offset
)
;
#
ifndef
HAVE_SCALAR
int
src2_index
=
mad24
(
y0,
src2_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
cn,
src2_offset
))
;
int
src3_index
=
mad24
(
y0,
src3_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
cn,
src3_offset
))
;
#
endif
for
(
int
y
=
y0,
y1
=
min
(
dst_rows,
y0
+
rowsPerWI
)
; y < y1; ++y, src1_index += src1_step, dst_index += dst_step)
{
__global
const
T
*
src1
=
(
__global
const
T
*
)(
src1ptr
+
src1_index
)
;
__global
uchar
*
dst
=
dstptr
+
dst_index
;
#
ifndef
HAVE_SCALAR
int
src2_index
=
mad24
(
y,
src2_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
cn,
src2_offset
))
;
int
src3_index
=
mad24
(
y,
src3_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
cn,
src3_offset
))
;
__global
const
T
*
src2
=
(
__global
const
T
*
)(
src2ptr
+
src2_index
)
;
__global
const
T
*
src3
=
(
__global
const
T
*
)(
src3ptr
+
src3_index
)
;
__global
const
T
*
src2
=
(
__global
const
T
*
)(
src2ptr
+
src2_index
)
;
__global
const
T
*
src3
=
(
__global
const
T
*
)(
src3ptr
+
src3_index
)
;
#
endif
dst[0]
=
255
;
dst[0]
=
255
;
for
(
int
c
=
0
; c < cn; ++c)
if
(
src2[c]
>
src1[c]
||
src3[c]
<
src1[c]
)
{
dst[0]
=
0
;
break
;
}
for
(
int
c
=
0
; c < cn; ++c)
if
(
src2[c]
>
src1[c]
||
src3[c]
<
src1[c]
)
{
dst[0]
=
0
;
break
;
}
#
ifndef
HAVE_SCALAR
src2_index
+=
src2_step
;
src3_index
+=
src3_step
;
#
endif
}
}
}
modules/core/src/opencl/mixchannels.cl
View file @
bd5e4c6c
...
...
@@ -45,20 +45,28 @@
__global
const
uchar
*
src##i##ptr,
int
src##i##_step,
int
src##i##_offset,
#
define
DECLARE_OUTPUT_MAT
(
i
)
\
__global
uchar
*
dst##i##ptr,
int
dst##i##_step,
int
dst##i##_offset,
#
define
DECLARE_INDEX
(
i
)
\
int
src##i##_index
=
mad24
(
src##i##_step,
y0,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
scn##i,
src##i##_offset
))
; \
int
dst##i##_index
=
mad24
(
dst##i##_step,
y0,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
dcn##i,
dst##i##_offset
))
;
#
define
PROCESS_ELEM
(
i
)
\
int
src##i##_index
=
mad24
(
src##i##_step,
y,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
scn##i,
src##i##_offset
))
; \
__global
const
T
*
src##i
=
(
__global
const
T
*
)(
src##i##ptr
+
src##i##_index
)
; \
int
dst##i##_index
=
mad24
(
dst##i##_step,
y,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
dcn##i,
dst##i##_offset
))
; \
__global
T
*
dst##i
=
(
__global
T
*
)(
dst##i##ptr
+
dst##i##_index
)
; \
dst##i[0]
=
src##i[0]
;
dst##i[0]
=
src##i[0]
; \
src##i##_index
+=
src##i##_step
; \
dst##i##_index
+=
dst##i##_step
;
__kernel
void
mixChannels
(
DECLARE_INPUT_MAT
S
DECLARE_OUTPUT_MATS
int
rows,
int
cols
)
__kernel
void
mixChannels
(
DECLARE_INPUT_MAT
_N
DECLARE_OUTPUT_MAT_N
int
rows,
int
cols,
int
rowsPerWI
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
0
=
get_global_id
(
1
)
*
rowsPerWI
;
if
(
x
<
cols
&&
y
<
rows
)
if
(
x
<
cols
)
{
PROCESS_ELEMS
DECLARE_INDEX_N
for
(
int
y
=
y0,
y1
=
min
(
y0
+
rowsPerWI,
rows
)
; y < y1; ++y)
{
PROCESS_ELEM_N
}
}
}
modules/core/src/opencl/mulspectrums.cl
View file @
bd5e4c6c
...
...
@@ -56,26 +56,30 @@ inline float2 conjf(float2 a)
__kernel
void
mulAndScaleSpectrums
(
__global
const
uchar
*
src1ptr,
int
src1_step,
int
src1_offset,
__global
const
uchar
*
src2ptr,
int
src2_step,
int
src2_offset,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
)
int
dst_rows,
int
dst_cols
,
int
rowsPerWI
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
0
=
get_global_id
(
1
)
*
rowsPerWI
;
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
if
(
x
<
dst_cols
)
{
int
src1_index
=
mad24
(
y,
src1_step,
mad24
(
x,
(
int
)
sizeof
(
float2
)
,
src1_offset
))
;
int
src2_index
=
mad24
(
y,
src2_step,
mad24
(
x,
(
int
)
sizeof
(
float2
)
,
src2_offset
))
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float2
)
,
dst_offset
))
;
int
src1_index
=
mad24
(
y
0
,
src1_step,
mad24
(
x,
(
int
)
sizeof
(
float2
)
,
src1_offset
))
;
int
src2_index
=
mad24
(
y
0
,
src2_step,
mad24
(
x,
(
int
)
sizeof
(
float2
)
,
src2_offset
))
;
int
dst_index
=
mad24
(
y
0
,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float2
)
,
dst_offset
))
;
float2
src0
=
*
(
__global
const
float2
*
)(
src1ptr
+
src1_index
)
;
float2
src1
=
*
(
__global
const
float2
*
)(
src2ptr
+
src2_index
)
;
__global
float2
*
dst
=
(
__global
float2
*
)(
dstptr
+
dst_index
)
;
for
(
int
y
=
y0,
y1
=
min
(
dst_rows,
y0
+
rowsPerWI
)
; y < y1; ++y,
src1_index
+=
src1_step,
src2_index
+=
src2_step,
dst_index
+=
dst_step
)
{
float2
src0
=
*
(
__global
const
float2
*
)(
src1ptr
+
src1_index
)
;
float2
src1
=
*
(
__global
const
float2
*
)(
src2ptr
+
src2_index
)
;
__global
float2
*
dst
=
(
__global
float2
*
)(
dstptr
+
dst_index
)
;
#
ifdef
CONJ
float2
v
=
cmulf
(
src0,
conjf
(
src1
))
;
float2
v
=
cmulf
(
src0,
conjf
(
src1
))
;
#
else
float2
v
=
cmulf
(
src0,
src1
)
;
float2
v
=
cmulf
(
src0,
src1
)
;
#
endif
dst[0]
=
v
;
dst[0]
=
v
;
}
}
}
modules/core/src/opencl/set_identity.cl
View file @
bd5e4c6c
...
...
@@ -56,15 +56,16 @@
#
endif
__kernel
void
setIdentity
(
__global
uchar
*
srcptr,
int
src_step,
int
src_offset,
int
rows,
int
cols,
ST
scalar_
)
ST
scalar_
,
int
rowsPerWI
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
0
=
get_global_id
(
1
)
*
rowsPerWI
;
if
(
x
<
cols
&&
y
<
rows
)
if
(
x
<
cols
)
{
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
TSIZE,
src_offset
))
;
int
src_index
=
mad24
(
y
0
,
src_step,
mad24
(
x,
TSIZE,
src_offset
))
;
storepix
(
x
==
y
?
scalar
:
(
T
)(
0
)
,
srcptr
+
src_index
)
;
for
(
int
y
=
y0,
y1
=
min
(
rows,
y0
+
rowsPerWI
)
; y < y1; ++y, src_index += src_step)
storepix
(
x
==
y
?
scalar
:
(
T
)(
0
)
,
srcptr
+
src_index
)
;
}
}
modules/core/src/opencl/split_merge.cl
View file @
bd5e4c6c
...
...
@@ -44,42 +44,58 @@
#
ifdef
OP_MERGE
#
define
DECLARE_SRC_PARAM
(
index
)
__global
const
uchar
*
src##index##ptr,
int
src##index##_step,
int
src##index##_offset,
#
define
DECLARE_DATA
(
index
)
__global
const
T
*
src##index
=
\
(
__global
T
*
)(
src##index##ptr
+
mad24
(
src##index##_step,
y,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
scn##index,
src##index##_offset
)))
;
#
define
PROCESS_ELEM
(
index
)
dst[index]
=
src##index[0]
;
#
define
DECLARE_INDEX
(
index
)
int
src##index##_index
=
mad24
(
src##index##_step,
y0,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
scn##index,
src##index##_offset
))
;
#
define
PROCESS_ELEM
(
index
)
\
__global
const
T
*
src##index
=
(
__global
const
T
*
)(
src##index##ptr
+
src##index##_index
)
; \
dst[index]
=
src##index[0]
; \
src##index##_index
+=
src##index##_step
;
__kernel
void
merge
(
DECLARE_SRC_PARAMS_N
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
rows,
int
cols
)
int
rows,
int
cols
,
int
rowsPerWI
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
0
=
get_global_id
(
1
)
*
rowsPerWI
;
if
(
x
<
cols
&&
y
<
rows
)
if
(
x
<
cols
)
{
DECLARE_DATA_N
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
mad24
(
dst_step,
y,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
cn,
dst_offset
)))
;
PROCESS_ELEMS_N
DECLARE_INDEX_N
int
dst_index
=
mad24
(
dst_step,
y0,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
cn,
dst_offset
))
;
for
(
int
y
=
y0,
y1
=
min
(
rows,
y0
+
rowsPerWI
)
; y < y1; ++y, dst_index += dst_step)
{
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
dst_index
)
;
PROCESS_ELEMS_N
}
}
}
#
elif
defined
OP_SPLIT
#
define
DECLARE_DST_PARAM
(
index
)
,
__global
uchar
*
dst##index##ptr,
int
dst##index##_step,
int
dst##index##_offset
#
define
DECLARE_DATA
(
index
)
__global
T
*
dst##index
=
\
(
__global
T
*
)(
dst##index##ptr
+
mad24
(
y,
dst##index##_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
dst##index##_offset
)))
;
#
define
PROCESS_ELEM
(
index
)
dst##index[0]
=
src[index]
;
#
define
DECLARE_INDEX
(
index
)
int
dst##index##_index
=
mad24
(
y0,
dst##index##_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
dst##index##_offset
))
;
#
define
PROCESS_ELEM
(
index
)
\
__global
T
*
dst##index
=
(
__global
T
*
)(
dst##index##ptr
+
dst##index##_index
)
; \
dst##index[0]
=
src[index]
; \
dst##index##_index
+=
dst##index##_step
;
__kernel
void
split
(
__global
uchar*
srcptr,
int
src_step,
int
src_offset,
int
rows,
int
cols
DECLARE_DST_PARAMS
)
__kernel
void
split
(
__global
uchar*
srcptr,
int
src_step,
int
src_offset,
int
rows,
int
cols
DECLARE_DST_PARAMS
,
int
rowsPerWI
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
0
=
get_global_id
(
1
)
*
rowsPerWI
;
if
(
x
<
cols
&&
y
<
rows
)
if
(
x
<
cols
)
{
DECLARE_DATA_N
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
mad24
(
y,
src_step,
mad24
(
x,
cn
*
(
int
)
sizeof
(
T
)
,
src_offset
)))
;
PROCESS_ELEMS_N
DECLARE_INDEX_N
int
src_index
=
mad24
(
y0,
src_step,
mad24
(
x,
cn
*
(
int
)
sizeof
(
T
)
,
src_offset
))
;
for
(
int
y
=
y0,
y1
=
min
(
rows,
y0
+
rowsPerWI
)
; y < y1; ++y, src_index += src_step)
{
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
src_index
)
;
PROCESS_ELEMS_N
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment