Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
0fc248fb
Commit
0fc248fb
authored
Feb 14, 2014
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Feb 14, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2326 from vpisarev:ocl_ch3
parents
534bec3a
630bdbf4
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
295 additions
and
168 deletions
+295
-168
arithm.cpp
modules/core/src/arithm.cpp
+23
-11
arithm.cl
modules/core/src/opencl/arithm.cl
+106
-58
copyset.cl
modules/core/src/opencl/copyset.cl
+18
-6
umatrix.cpp
modules/core/src/umatrix.cpp
+7
-4
imgwarp.cpp
modules/imgproc/src/imgwarp.cpp
+39
-24
resize.cl
modules/imgproc/src/opencl/resize.cl
+26
-21
warp_affine.cl
modules/imgproc/src/opencl/warp_affine.cl
+38
-22
warp_perspective.cl
modules/imgproc/src/opencl/warp_perspective.cl
+38
-22
No files found.
modules/core/src/arithm.cpp
View file @
0fc248fb
...
...
@@ -934,16 +934,23 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
oclop
<
0
||
((
haveMask
||
haveScalar
)
&&
(
cn
>
4
||
cn
==
3
)
)
||
if
(
oclop
<
0
||
((
haveMask
||
haveScalar
)
&&
cn
>
4
)
||
(
!
doubleSupport
&&
srcdepth
==
CV_64F
))
return
false
;
char
opts
[
1024
];
int
kercn
=
haveMask
||
haveScalar
?
cn
:
1
;
sprintf
(
opts
,
"-D %s%s -D %s -D dstT=%s%s"
,
int
scalarcn
=
kercn
==
3
?
4
:
kercn
;
sprintf
(
opts
,
"-D %s%s -D %s -D dstT=%s%s -D dstT_C1=%s -D workST=%s -D cn=%d"
,
(
haveMask
?
"MASK_"
:
""
),
(
haveScalar
?
"UNARY_OP"
:
"BINARY_OP"
),
oclop2str
[
oclop
],
bitwise
?
ocl
::
memopTypeToStr
(
CV_MAKETYPE
(
srcdepth
,
kercn
))
:
ocl
::
typeToStr
(
CV_MAKETYPE
(
srcdepth
,
kercn
)),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
typeToStr
(
CV_MAKETYPE
(
srcdepth
,
kercn
)),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
bitwise
?
ocl
::
memopTypeToStr
(
CV_MAKETYPE
(
srcdepth
,
1
))
:
ocl
::
typeToStr
(
CV_MAKETYPE
(
srcdepth
,
1
)),
bitwise
?
ocl
::
memopTypeToStr
(
CV_MAKETYPE
(
srcdepth
,
scalarcn
))
:
ocl
::
typeToStr
(
CV_MAKETYPE
(
srcdepth
,
scalarcn
)),
kercn
);
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
opts
);
if
(
k
.
empty
()
)
...
...
@@ -960,7 +967,7 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
if
(
haveScalar
)
{
size_t
esz
=
CV_ELEM_SIZE
(
srctype
)
;
size_t
esz
=
CV_ELEM_SIZE
1
(
srctype
)
*
scalarcn
;
double
buf
[
4
]
=
{
0
,
0
,
0
,
0
};
if
(
oclop
!=
OCL_OP_NOT
)
...
...
@@ -1294,7 +1301,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
int
type1
=
_src1
.
type
(),
depth1
=
CV_MAT_DEPTH
(
type1
),
cn
=
CV_MAT_CN
(
type1
);
bool
haveMask
=
!
_mask
.
empty
();
if
(
((
haveMask
||
haveScalar
)
&&
(
cn
>
4
||
cn
==
3
)
)
)
if
(
((
haveMask
||
haveScalar
)
&&
cn
>
4
)
)
return
false
;
int
dtype
=
_dst
.
type
(),
ddepth
=
CV_MAT_DEPTH
(
dtype
),
wdepth
=
std
::
max
(
CV_32S
,
CV_MAT_DEPTH
(
wtype
));
...
...
@@ -1307,21 +1314,26 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
return
false
;
int
kercn
=
haveMask
||
haveScalar
?
cn
:
1
;
int
scalarcn
=
kercn
==
3
?
4
:
kercn
;
char
cvtstr
[
4
][
32
],
opts
[
1024
];
sprintf
(
opts
,
"-D %s%s -D %s -D srcT1=%s -D srcT
2
=%s "
"-D dstT=%s -D
work
T=%s -D scaleT=%s -D convertToWT1=%s "
"-D convertToWT2=%s -D convertToDT=%s%s"
,
sprintf
(
opts
,
"-D %s%s -D %s -D srcT1=%s -D srcT
1_C1=%s -D srcT2=%s -D srcT2_C1
=%s "
"-D dstT=%s -D
dstT_C1=%s -D workT=%s -D workS
T=%s -D scaleT=%s -D convertToWT1=%s "
"-D convertToWT2=%s -D convertToDT=%s%s
-D cn=%d
"
,
(
haveMask
?
"MASK_"
:
""
),
(
haveScalar
?
"UNARY_OP"
:
"BINARY_OP"
),
oclop2str
[
oclop
],
ocl
::
typeToStr
(
CV_MAKETYPE
(
depth1
,
kercn
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
depth1
,
1
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
depth2
,
kercn
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
depth2
,
1
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
ddepth
,
kercn
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
ddepth
,
1
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
wdepth
,
kercn
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
wdepth
,
scalarcn
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
wdepth
,
1
)),
ocl
::
convertTypeStr
(
depth1
,
wdepth
,
kercn
,
cvtstr
[
0
]),
ocl
::
convertTypeStr
(
depth2
,
wdepth
,
kercn
,
cvtstr
[
1
]),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
kercn
,
cvtstr
[
2
]),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
kercn
);
size_t
usrdata_esz
=
CV_ELEM_SIZE
(
wdepth
);
const
uchar
*
usrdata_p
=
(
const
uchar
*
)
usrdata
;
...
...
@@ -1352,7 +1364,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
if
(
haveScalar
)
{
size_t
esz
=
CV_ELEM_SIZE
(
wtype
);
size_t
esz
=
CV_ELEM_SIZE
(
wtype
)
*
scalarcn
;
double
buf
[
4
]
=
{
0
,
0
,
0
,
0
};
Mat
src2sc
=
_src2
.
getMat
();
...
...
@@ -2621,7 +2633,7 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in
const
char
*
const
operationMap
[]
=
{
"=="
,
">"
,
">="
,
"<"
,
"<="
,
"!="
};
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D BINARY_OP -D srcT1=%s -D workT=srcT1"
format
(
"-D BINARY_OP -D srcT1=%s -D workT=srcT1
-D cn=1
"
" -D OP_CMP -D CMP_OPERATOR=%s%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
1
)),
operationMap
[
op
],
...
...
modules/core/src/opencl/arithm.cl
View file @
0fc248fb
...
...
@@ -70,21 +70,47 @@
#define CV_PI M_PI_F
#endif
#define dstelem *(__global dstT*)(dstptr + dst_index)
#define dstelem2 *(__global dstT*)(dstptr2 + dst_index2)
#ifndef cn
#define cn 1
#endif
#if cn == 1
#undef srcT1_C1
#undef srcT2_C1
#undef dstT_C1
#define srcT1_C1 srcT1
#define srcT2_C1 srcT2
#define dstT_C1 dstT
#endif
#if cn != 3
#define storedst(val) *(__global dstT*)(dstptr + dst_index) = val
#define storedst2(val) *(__global dstT*)(dstptr2 + dst_index2) = val
#else
#define storedst(val) vstore3(val, 0, (__global dstT_C1*)(dstptr + dst_index))
#define storedst2(val) vstore3(val, 0, (__global dstT_C1*)(dstptr2 + dst_index2))
#endif
#define noconvert
#ifndef workT
#ifndef srcT1
#define srcT1 dstT
#define srcT1_C1 dstT_C1
#endif
#ifndef srcT2
#define srcT2 dstT
#define srcT2_C1 dstT_C1
#endif
#define workT dstT
#define srcelem1 *(__global srcT1*)(srcptr1 + src1_index)
#define srcelem2 *(__global srcT2*)(srcptr2 + src2_index)
#if cn != 3
#define srcelem1 *(__global srcT1*)(srcptr1 + src1_index)
#define srcelem2 *(__global srcT2*)(srcptr2 + src2_index)
#else
#define srcelem1 vload3(0, (__global srcT1_C1*)(srcptr1 + src1_index))
#define srcelem2 vload3(0, (__global srcT2_C1*)(srcptr2 + src2_index))
#endif
#ifndef convertToDT
#define convertToDT noconvert
#endif
...
...
@@ -94,153 +120,168 @@
#ifndef convertToWT2
#define convertToWT2 convertToWT1
#endif
#define srcelem1 convertToWT1(*(__global srcT1*)(srcptr1 + src1_index))
#define srcelem2 convertToWT2(*(__global srcT2*)(srcptr2 + src2_index))
#if cn != 3
#define srcelem1 convertToWT1(*(__global srcT1*)(srcptr1 + src1_index))
#define srcelem2 convertToWT2(*(__global srcT2*)(srcptr2 + src2_index))
#else
#define srcelem1 convertToWT1(vload3(0, (__global srcT1_C1*)(srcptr1 + src1_index)))
#define srcelem2 convertToWT2(vload3(0, (__global srcT2_C1*)(srcptr2 + src2_index)))
#endif
#endif
#ifndef workST
#define workST workT
#endif
#define EXTRA_PARAMS
#define EXTRA_INDEX
#if defined OP_ADD
#define PROCESS_ELEM
dstelem = convertToDT(srcelem1 + srcelem2
)
#define PROCESS_ELEM
storedst(convertToDT(srcelem1 + srcelem2)
)
#elif defined OP_SUB
#define PROCESS_ELEM
dstelem = convertToDT(srcelem1 - srcelem2
)
#define PROCESS_ELEM
storedst(convertToDT(srcelem1 - srcelem2)
)
#elif defined OP_RSUB
#define PROCESS_ELEM
dstelem = convertToDT(srcelem2 - srcelem1
)
#define PROCESS_ELEM
storedst(convertToDT(srcelem2 - srcelem1)
)
#elif defined OP_ABSDIFF
#define PROCESS_ELEM \
workT v = srcelem1 - srcelem2; \
dstelem = convertToDT(v >= (workT)(0) ? v : -v);
storedst(convertToDT(v >= (workT)(0) ? v : -v))
#elif defined OP_AND
#define PROCESS_ELEM
dstelem = srcelem1 & srcelem2
#define PROCESS_ELEM
storedst(srcelem1 & srcelem2)
#elif defined OP_OR
#define PROCESS_ELEM
dstelem = srcelem1 | srcelem2
#define PROCESS_ELEM
storedst(srcelem1 | srcelem2)
#elif defined OP_XOR
#define PROCESS_ELEM
dstelem = srcelem1 ^ srcelem2
#define PROCESS_ELEM
storedst(srcelem1 ^ srcelem2)
#elif defined OP_NOT
#define PROCESS_ELEM
dstelem = ~srcelem1
#define PROCESS_ELEM
storedst(~srcelem1)
#elif defined OP_MIN
#define PROCESS_ELEM
dstelem = min(srcelem1, srcelem2
)
#define PROCESS_ELEM
storedst(min(srcelem1, srcelem2)
)
#elif defined OP_MAX
#define PROCESS_ELEM
dstelem = max(srcelem1, srcelem2
)
#define PROCESS_ELEM
storedst(max(srcelem1, srcelem2)
)
#elif defined OP_MUL
#define PROCESS_ELEM
dstelem = convertToDT(srcelem1 * srcelem2
)
#define PROCESS_ELEM
storedst(convertToDT(srcelem1 * srcelem2)
)
#elif defined OP_MUL_SCALE
#undef EXTRA_PARAMS
#ifdef UNARY_OP
#define EXTRA_PARAMS , workT srcelem2, scaleT scale
#define EXTRA_PARAMS , workST srcelem2_, scaleT scale
#undef srcelem2
#define srcelem2 srcelem2_
#else
#define EXTRA_PARAMS , scaleT scale
#endif
#define PROCESS_ELEM
dstelem = convertToDT(srcelem1 * scale * srcelem2
)
#define PROCESS_ELEM
storedst(convertToDT(srcelem1 * scale * srcelem2)
)
#elif defined OP_DIV
#define PROCESS_ELEM \
workT e2 = srcelem2, zero = (workT)(0); \
dstelem = convertToDT(e2 != zero ? srcelem1 / e2 : zero
)
storedst(convertToDT(e2 != zero ? srcelem1 / e2 : zero)
)
#elif defined OP_DIV_SCALE
#undef EXTRA_PARAMS
#ifdef UNARY_OP
#define EXTRA_PARAMS , workT srcelem2, scaleT scale
#define EXTRA_PARAMS , workST srcelem2_, scaleT scale
#undef srcelem2
#define srcelem2 srcelem2_
#else
#define EXTRA_PARAMS , scaleT scale
#endif
#define PROCESS_ELEM \
workT e2 = srcelem2, zero = (workT)(0); \
dstelem = convertToDT(e2 == zero ? zero : (srcelem1 * (workT)(scale) / e2
))
storedst(convertToDT(e2 == zero ? zero : (srcelem1 * (workT)(scale) / e2)
))
#elif defined OP_RDIV_SCALE
#undef EXTRA_PARAMS
#ifdef UNARY_OP
#define EXTRA_PARAMS , workT srcelem2, scaleT scale
#define EXTRA_PARAMS , workST srcelem2_, scaleT scale
#undef srcelem2
#define srcelem2 srcelem2_
#else
#define EXTRA_PARAMS , scaleT scale
#endif
#define PROCESS_ELEM \
workT e1 = srcelem1, zero = (workT)(0); \
dstelem = convertToDT(e1 == zero ? zero : (srcelem2 * (workT)(scale) / e1
))
storedst(convertToDT(e1 == zero ? zero : (srcelem2 * (workT)(scale) / e1)
))
#elif defined OP_RECIP_SCALE
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , scaleT scale
#define PROCESS_ELEM \
workT e1 = srcelem1, zero = (workT)(0); \
dstelem = convertToDT(e1 != zero ? scale / e1 : zero
)
storedst(convertToDT(e1 != zero ? scale / e1 : zero)
)
#elif defined OP_ADDW
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , scaleT alpha, scaleT beta, scaleT gamma
#define PROCESS_ELEM
dstelem = convertToDT(srcelem1*alpha + srcelem2*beta + gamma
)
#define PROCESS_ELEM
storedst(convertToDT(srcelem1*alpha + srcelem2*beta + gamma)
)
#elif defined OP_MAG
#define PROCESS_ELEM
dstelem = hypot(srcelem1, srcelem2
)
#define PROCESS_ELEM
storedst(hypot(srcelem1, srcelem2)
)
#elif defined OP_ABS_NOSAT
#define PROCESS_ELEM \
dstT v = convertToDT(srcelem1); \
dstelem = v >= 0 ? v : -v
storedst(v >= 0 ? v : -v)
#elif defined OP_PHASE_RADIANS
#define PROCESS_ELEM \
workT tmp = atan2(srcelem2, srcelem1); \
if(tmp < 0) tmp += 6.283185307179586232f; \
dstelem = tmp
storedst(tmp)
#elif defined OP_PHASE_DEGREES
#define PROCESS_ELEM \
workT tmp = atan2(srcelem2, srcelem1)*57.29577951308232286465f; \
if(tmp < 0) tmp += 360; \
dstelem = tmp
storedst(tmp)
#elif defined OP_EXP
#define PROCESS_ELEM
dstelem = exp(srcelem1
)
#define PROCESS_ELEM
storedst(exp(srcelem1)
)
#elif defined OP_POW
#define PROCESS_ELEM
dstelem = pow(srcelem1, srcelem2
)
#define PROCESS_ELEM
storedst(pow(srcelem1, srcelem2)
)
#elif defined OP_POWN
#undef workT
#define workT int
#define PROCESS_ELEM
dstelem = pown(srcelem1, srcelem2
)
#define PROCESS_ELEM
storedst(pown(srcelem1, srcelem2)
)
#elif defined OP_SQRT
#define PROCESS_ELEM
dstelem = sqrt(srcelem1
)
#define PROCESS_ELEM
storedst(sqrt(srcelem1)
)
#elif defined OP_LOG
#define PROCESS_ELEM \
dstT v = (dstT)(srcelem1);\
dstelem = v > (dstT)(0) ? log(v) : log(-v
)
dstT v = (dstT)(srcelem1);\
storedst(v > (dstT)(0) ? log(v) : log(-v)
)
#elif defined OP_CMP
#define dstT uchar
#define srcT2 srcT1
#define convertToWT1
#define PROCESS_ELEM
dstelem = convert_uchar(srcelem1 CMP_OPERATOR srcelem2 ? 255 : 0
)
#define PROCESS_ELEM
storedst(convert_uchar(srcelem1 CMP_OPERATOR srcelem2 ? 255 : 0)
)
#elif defined OP_CONVERT_SCALE_ABS
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT alpha, workT beta
#define PROCESS_ELEM \
workT value = srcelem1 * alpha + beta; \
dstelem = convertToDT(value >= 0 ? value : -value
)
storedst(convertToDT(value >= 0 ? value : -value)
)
#elif defined OP_SCALE_ADD
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT alpha
#define PROCESS_ELEM
dstelem = convertToDT(srcelem1 * alpha + srcelem2
)
#define PROCESS_ELEM
storedst(convertToDT(srcelem1 * alpha + srcelem2)
)
#elif defined OP_CTP_AD || defined OP_CTP_AR
#ifdef OP_CTP_AD
...
...
@@ -257,8 +298,8 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v)
dstT tmp1 = y >= 0 ? CV_PI * 0.5f : CV_PI * 1.5f; \
dstT cartToPolar = y2 <= x2 ? x * y / (x2 + 0.28f * y2 + CV_EPSILON) + tmp : (tmp1 - x * y / (y2 + 0.28f * x2 + CV_EPSILON)); \
TO_DEGREE \
dstelem = magnitude
; \
dstelem2 = cartToPolar
storedst(magnitude)
; \
storedst2(cartToPolar)
#elif defined OP_PTC_AD || defined OP_PTC_AR
#ifdef OP_PTC_AD
...
...
@@ -272,15 +313,15 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v)
#define PROCESS_ELEM \
dstT x = srcelem1, y = srcelem2; \
FROM_DEGREE; \
dstelem = cos(alpha) * x
; \
dstelem2 = sin(alpha) * x
storedst(cos(alpha) * x)
; \
storedst2(sin(alpha) * x)
#elif defined OP_PATCH_NANS
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , int val
#define PROCESS_ELEM \
if (( srcelem1 & 0x7fffffff) > 0x7f800000 ) \
dstelem = val
storedst(val)
#else
#error "unknown op type"
...
...
@@ -290,18 +331,26 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v)
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , __global uchar* dstptr2, int dststep2, int dstoffset2
#undef EXTRA_INDEX
#define EXTRA_INDEX int dst_index2 = mad24(y, dststep2, x*(int)sizeof(dstT
)
+ dstoffset2)
#define EXTRA_INDEX int dst_index2 = mad24(y, dststep2, x*(int)sizeof(dstT
_C1)*cn
+ dstoffset2)
#endif
#if defined UNARY_OP || defined MASK_UNARY_OP
#undef srcelem2
#if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \
defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \
defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW || \
defined OP_MUL || defined OP_DIV || defined OP_POWN
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT srcelem2
#define EXTRA_PARAMS , workST srcelem2_
#undef srcelem2
#define srcelem2 srcelem2_
#endif
#if cn == 3
#undef srcelem2
#define srcelem2 (workT)(srcelem2_.x, srcelem2_.y, srcelem2_.z)
#endif
#endif
#if defined BINARY_OP
...
...
@@ -316,11 +365,11 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
if (x < cols && y < rows)
{
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1
)
+ srcoffset1);
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1
_C1)*cn
+ srcoffset1);
#if !(defined(OP_RECIP_SCALE) || defined(OP_NOT))
int src2_index = mad24(y, srcstep2, x*(int)sizeof(srcT2
)
+ srcoffset2);
int src2_index = mad24(y, srcstep2, x*(int)sizeof(srcT2
_C1)*cn
+ srcoffset2);
#endif
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT
)
+ dstoffset);
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT
_C1)*cn
+ dstoffset);
EXTRA_INDEX;
PROCESS_ELEM;
...
...
@@ -343,9 +392,9 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
int mask_index = mad24(y, maskstep, x + maskoffset);
if( mask[mask_index] )
{
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1
)
+ srcoffset1);
int src2_index = mad24(y, srcstep2, x*(int)sizeof(srcT2
)
+ srcoffset2);
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT
)
+ dstoffset);
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1
_C1)*cn
+ srcoffset1);
int src2_index = mad24(y, srcstep2, x*(int)sizeof(srcT2
_C1)*cn
+ srcoffset2);
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT
_C1)*cn
+ dstoffset);
PROCESS_ELEM;
}
...
...
@@ -363,9 +412,8 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
if (x < cols && y < rows)
{
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1) + srcoffset1);
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT) + dstoffset);
EXTRA_INDEX;
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1_C1)*cn + srcoffset1);
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT_C1)*cn + dstoffset);
PROCESS_ELEM;
}
...
...
@@ -386,8 +434,8 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
int mask_index = mad24(y, maskstep, x + maskoffset);
if( mask[mask_index] )
{
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1
)
+ srcoffset1);
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT
)
+ dstoffset);
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1
_C1)*cn
+ srcoffset1);
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT
_C1)*cn
+ dstoffset);
PROCESS_ELEM;
}
...
...
modules/core/src/opencl/copyset.cl
View file @
0fc248fb
...
...
@@ -87,9 +87,21 @@ __kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_of
#
else
#
ifndef
dstST
#
define
dstST
dstT
#
endif
#
if
cn
!=
3
#
define
value
value_
#
define
storedst
(
val
)
*
(
__global
dstT*
)(
dstptr
+
dst_index
)
=
val
#
else
#
define
value
(
dstT
)(
value_.x,
value_.y,
value_.z
)
#
define
storedst
(
val
)
vstore3
(
val,
0
,
(
__global
dstT1*
)(
dstptr
+
dst_index
))
#
endif
__kernel
void
setMask
(
__global
const
uchar*
mask,
int
maskstep,
int
maskoffset,
__global
uchar*
dstptr,
int
dststep,
int
dstoffset,
int
rows,
int
cols,
dst
T
value
)
int
rows,
int
cols,
dst
ST
value_
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
...
...
@@ -99,22 +111,22 @@ __kernel void setMask(__global const uchar* mask, int maskstep, int maskoffset,
int
mask_index
=
mad24
(
y,
maskstep,
x
+
maskoffset
)
;
if
(
mask[mask_index]
)
{
int
dst_index
=
mad24
(
y,
dststep,
x*
(
int
)
sizeof
(
dstT
)
+
dstoffset
)
;
*
(
__global
dstT*
)(
dstptr
+
dst_index
)
=
value
;
int
dst_index
=
mad24
(
y,
dststep,
x*
(
int
)
sizeof
(
dstT
1
)
*cn
+
dstoffset
)
;
storedst
(
value
)
;
}
}
}
__kernel
void
set
(
__global
uchar*
dstptr,
int
dststep,
int
dstoffset,
int
rows,
int
cols,
dst
T
value
)
int
rows,
int
cols,
dst
ST
value_
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
int
dst_index
=
mad24
(
y,
dststep,
x*
(
int
)
sizeof
(
dstT
)
+
dstoffset
)
;
*
(
__global
dstT*
)(
dstptr
+
dst_index
)
=
value
;
int
dst_index
=
mad24
(
y,
dststep,
x*
(
int
)
sizeof
(
dstT
1
)
*cn
+
dstoffset
)
;
storedst
(
value
)
;
}
}
...
...
modules/core/src/umatrix.cpp
View file @
0fc248fb
...
...
@@ -744,20 +744,23 @@ UMat& UMat::setTo(InputArray _value, InputArray _mask)
{
bool
haveMask
=
!
_mask
.
empty
();
int
tp
=
type
(),
cn
=
CV_MAT_CN
(
tp
);
if
(
dims
<=
2
&&
cn
<=
4
&&
cn
!=
3
&&
ocl
::
useOpenCL
()
)
if
(
dims
<=
2
&&
cn
<=
4
&&
CV_MAT_DEPTH
(
tp
)
<
CV_64F
&&
ocl
::
useOpenCL
()
)
{
Mat
value
=
_value
.
getMat
();
CV_Assert
(
checkScalar
(
value
,
type
(),
_value
.
kind
(),
_InputArray
::
UMAT
)
);
double
buf
[
4
];
double
buf
[
4
]
=
{
0
,
0
,
0
,
0
}
;
convertAndUnrollScalar
(
value
,
tp
,
(
uchar
*
)
buf
,
1
);
int
scalarcn
=
cn
==
3
?
4
:
cn
;
char
opts
[
1024
];
sprintf
(
opts
,
"-D dstT=%s"
,
ocl
::
memopTypeToStr
(
tp
));
sprintf
(
opts
,
"-D dstT=%s -D dstST=%s -D dstT1=%s -D cn=%d"
,
ocl
::
memopTypeToStr
(
tp
),
ocl
::
memopTypeToStr
(
CV_MAKETYPE
(
tp
,
scalarcn
)),
ocl
::
memopTypeToStr
(
CV_MAT_DEPTH
(
tp
)),
cn
);
ocl
::
Kernel
setK
(
haveMask
?
"setMask"
:
"set"
,
ocl
::
core
::
copyset_oclsrc
,
opts
);
if
(
!
setK
.
empty
()
)
{
ocl
::
KernelArg
scalararg
(
0
,
0
,
0
,
buf
,
CV_ELEM_SIZE
(
tp
)
);
ocl
::
KernelArg
scalararg
(
0
,
0
,
0
,
buf
,
CV_ELEM_SIZE
1
(
tp
)
*
scalarcn
);
UMat
mask
;
if
(
haveMask
)
...
...
modules/imgproc/src/imgwarp.cpp
View file @
0fc248fb
...
...
@@ -1957,7 +1957,7 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
double
inv_fx
=
1.
/
fx
,
inv_fy
=
1.
/
fy
;
float
inv_fxf
=
(
float
)
inv_fx
,
inv_fyf
=
(
float
)
inv_fy
;
if
(
cn
==
3
||
!
(
cn
<=
4
&&
if
(
!
(
cn
<=
4
&&
(
interpolation
==
INTER_NEAREST
||
interpolation
==
INTER_LINEAR
||
(
interpolation
==
INTER_AREA
&&
inv_fx
>=
1
&&
inv_fy
>=
1
)
))
)
return
false
;
...
...
@@ -1975,15 +1975,18 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
int
wtype
=
CV_MAKETYPE
(
wdepth
,
cn
);
char
buf
[
2
][
32
];
k
.
create
(
"resizeLN"
,
ocl
::
imgproc
::
resize_oclsrc
,
format
(
"-D INTER_LINEAR -D depth=%d -D PIXTYPE=%s -D WORKTYPE=%s -D convertToWT=%s -D convertToDT=%s"
,
depth
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
wtype
),
format
(
"-D INTER_LINEAR -D depth=%d -D PIXTYPE=%s -D PIXTYPE1=%s "
"-D WORKTYPE=%s -D convertToWT=%s -D convertToDT=%s -D cn=%d"
,
depth
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
wtype
),
ocl
::
convertTypeStr
(
depth
,
wdepth
,
cn
,
buf
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
depth
,
cn
,
buf
[
1
])));
ocl
::
convertTypeStr
(
wdepth
,
depth
,
cn
,
buf
[
1
]),
cn
));
}
else
if
(
interpolation
==
INTER_NEAREST
)
{
k
.
create
(
"resizeNN"
,
ocl
::
imgproc
::
resize_oclsrc
,
format
(
"-D INTER_NEAREST -D PIXTYPE=%s -D cn"
,
ocl
::
memopTypeToStr
(
type
),
cn
));
format
(
"-D INTER_NEAREST -D PIXTYPE=%s -D PIXTYPE1=%s -D cn=%d"
,
ocl
::
memopTypeToStr
(
type
),
ocl
::
memopTypeToStr
(
depth
),
cn
));
}
else
if
(
interpolation
==
INTER_AREA
)
{
...
...
@@ -1995,9 +1998,9 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
int
wtype
=
CV_MAKE_TYPE
(
wdepth
,
cn
);
char
cvt
[
2
][
40
];
String
buildOption
=
format
(
"-D INTER_AREA -D
T=%s -D WTV=%s -D convertToWTV=%s
"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
wtype
),
ocl
::
convertTypeStr
(
depth
,
wdepth
,
cn
,
cvt
[
0
]));
String
buildOption
=
format
(
"-D INTER_AREA -D
PIXTYPE=%s -D PIXTYPE1=%s -D WTV=%s -D convertToWTV=%s -D cn=%d
"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
wtype
),
ocl
::
convertTypeStr
(
depth
,
wdepth
,
cn
,
cvt
[
0
])
,
cn
);
UMat
alphaOcl
,
tabofsOcl
,
mapOcl
;
UMat
dmap
,
smap
;
...
...
@@ -2005,7 +2008,7 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
if
(
is_area_fast
)
{
int
wdepth2
=
std
::
max
(
CV_32F
,
depth
),
wtype2
=
CV_MAKE_TYPE
(
wdepth2
,
cn
);
buildOption
=
buildOption
+
format
(
" -D convertTo
T
=%s -D WT2V=%s -D convertToWT2V=%s -D INTER_AREA_FAST"
buildOption
=
buildOption
+
format
(
" -D convertTo
PIXTYPE
=%s -D WT2V=%s -D convertToWT2V=%s -D INTER_AREA_FAST"
" -D XSCALE=%d -D YSCALE=%d -D SCALE=%ff"
,
ocl
::
convertTypeStr
(
wdepth2
,
depth
,
cn
,
cvt
[
0
]),
ocl
::
typeToStr
(
wtype2
),
ocl
::
convertTypeStr
(
wdepth
,
wdepth2
,
cn
,
cvt
[
1
]),
...
...
@@ -2028,7 +2031,7 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
}
else
{
buildOption
=
buildOption
+
format
(
" -D convertTo
T
=%s"
,
ocl
::
convertTypeStr
(
wdepth
,
depth
,
cn
,
cvt
[
0
]));
buildOption
=
buildOption
+
format
(
" -D convertTo
PIXTYPE
=%s"
,
ocl
::
convertTypeStr
(
wdepth
,
depth
,
cn
,
cvt
[
0
]));
k
.
create
(
"resizeAREA"
,
ocl
::
imgproc
::
resize_oclsrc
,
buildOption
);
if
(
k
.
empty
())
return
false
;
...
...
@@ -3887,7 +3890,7 @@ static bool ocl_warpTransform(InputArray _src, OutputArray _dst, InputArray _M0,
{
CV_Assert
(
op_type
==
OCL_OP_AFFINE
||
op_type
==
OCL_OP_PERSPECTIVE
);
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
)
,
wdepth
=
depth
;
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
double
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
int
interpolation
=
flags
&
INTER_MAX
;
...
...
@@ -3896,7 +3899,7 @@ static bool ocl_warpTransform(InputArray _src, OutputArray _dst, InputArray _M0,
if
(
!
(
borderType
==
cv
::
BORDER_CONSTANT
&&
(
interpolation
==
cv
::
INTER_NEAREST
||
interpolation
==
cv
::
INTER_LINEAR
||
interpolation
==
cv
::
INTER_CUBIC
))
||
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
cn
>
4
||
cn
==
3
)
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
cn
>
4
)
return
false
;
const
char
*
const
interpolationMap
[
3
]
=
{
"NEAREST"
,
"LINEAR"
,
"CUBIC"
};
...
...
@@ -3904,28 +3907,40 @@ static bool ocl_warpTransform(InputArray _src, OutputArray _dst, InputArray _M0,
ocl
::
imgproc
::
warp_affine_oclsrc
:
ocl
::
imgproc
::
warp_perspective_oclsrc
;
const
char
*
const
kernelName
=
op_type
==
OCL_OP_AFFINE
?
"warpAffine"
:
"warpPerspective"
;
int
scalarcn
=
cn
==
3
?
4
:
cn
;
int
wdepth
=
interpolation
==
INTER_NEAREST
?
depth
:
std
::
max
(
CV_32S
,
depth
);
int
sctype
=
CV_MAKETYPE
(
wdepth
,
scalarcn
);
ocl
::
Kernel
k
;
String
opts
;
if
(
interpolation
==
INTER_NEAREST
)
{
k
.
create
(
kernelName
,
program
,
format
(
"-D INTER_NEAREST -D T=%s%s"
,
ocl
::
typeToStr
(
type
),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
opts
=
format
(
"-D INTER_NEAREST -D T=%s%s -D T1=%s -D ST=%s -D cn=%d"
,
ocl
::
typeToStr
(
type
),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
ocl
::
typeToStr
(
CV_MAT_DEPTH
(
type
)),
ocl
::
typeToStr
(
sctype
),
cn
);
}
else
{
char
cvt
[
2
][
50
];
wdepth
=
std
::
max
(
CV_32S
,
depth
);
k
.
create
(
kernelName
,
program
,
format
(
"-D INTER_%s -D T=%s -D WT=%s -D depth=%d -D convertToWT=%s -D convertToT=%s%s"
,
interpolationMap
[
interpolation
],
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
cn
)),
depth
,
ocl
::
convertTypeStr
(
depth
,
wdepth
,
cn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
depth
,
cn
,
cvt
[
1
]),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
)
);
opts
=
format
(
"-D INTER_%s -D T=%s -D T1=%s -D ST=%s -D WT=%s -D depth=%d -D convertToWT=%s -D convertToT=%s%s -D cn=%d"
,
interpolationMap
[
interpolation
],
ocl
::
typeToStr
(
type
)
,
ocl
::
typeToStr
(
CV_MAT_DEPTH
(
type
))
,
ocl
::
typeToStr
(
sc
type
),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
cn
)),
depth
,
ocl
::
convertTypeStr
(
depth
,
wdepth
,
cn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
depth
,
cn
,
cvt
[
1
]),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
cn
);
}
k
.
create
(
kernelName
,
program
,
opts
);
if
(
k
.
empty
())
return
false
;
double
borderBuf
[]
=
{
0
,
0
,
0
,
0
};
scalarToRawData
(
borderValue
,
borderBuf
,
sctype
);
UMat
src
=
_src
.
getUMat
(),
M0
;
_dst
.
create
(
dsize
.
area
()
==
0
?
src
.
size
()
:
dsize
,
src
.
type
()
);
UMat
dst
=
_dst
.
getUMat
();
...
...
@@ -3956,7 +3971,7 @@ static bool ocl_warpTransform(InputArray _src, OutputArray _dst, InputArray _M0,
matM
.
convertTo
(
M0
,
doubleSupport
?
CV_64F
:
CV_32F
);
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
),
ocl
::
KernelArg
::
PtrReadOnly
(
M0
),
ocl
::
KernelArg
::
Constant
(
Mat
(
1
,
1
,
CV_MAKE_TYPE
(
wdepth
,
cn
),
borderValu
e
)));
ocl
::
KernelArg
(
0
,
0
,
0
,
borderBuf
,
CV_ELEM_SIZE
(
sctyp
e
)));
size_t
globalThreads
[
2
]
=
{
dst
.
cols
,
dst
.
rows
};
return
k
.
run
(
2
,
globalThreads
,
NULL
,
false
);
...
...
modules/imgproc/src/opencl/resize.cl
View file @
0fc248fb
...
...
@@ -52,9 +52,19 @@
#
define
CAST_BITS
(
INTER_RESIZE_COEF_BITS
<<
1
)
#
define
INC
(
x,l
)
min
(
x+1,l-1
)
#
define
PIXSIZE
((
int
)
sizeof
(
PIXTYPE
))
#
define
noconvert
(
x
)
(
x
)
#
if
cn
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
PIXTYPE*
)(
addr
)
#
define
storepix
(
val,
addr
)
*
(
__global
PIXTYPE*
)(
addr
)
=
val
#
define
PIXSIZE
((
int
)
sizeof
(
PIXTYPE
))
#
else
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
PIXTYPE1*
)(
addr
))
#
define
storepix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
PIXTYPE1*
)(
addr
))
#
define
PIXSIZE
((
int
)
sizeof
(
PIXTYPE1
)
*3
)
#
endif
#
if
defined
INTER_LINEAR
__kernel
void
resizeLN
(
__global
const
uchar*
srcptr,
int
srcstep,
int
srcoffset,
...
...
@@ -89,10 +99,10 @@ __kernel void resizeLN(__global const uchar* srcptr, int srcstep, int srcoffset,
int
U1
=
rint
(
INTER_RESIZE_COEF_SCALE
-
u
)
;
int
V1
=
rint
(
INTER_RESIZE_COEF_SCALE
-
v
)
;
WORKTYPE
data0
=
convertToWT
(
*
(
__global
const
PIXTYPE*
)
(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x*PIXSIZE
)))
;
WORKTYPE
data1
=
convertToWT
(
*
(
__global
const
PIXTYPE*
)
(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x_*PIXSIZE
)))
;
WORKTYPE
data2
=
convertToWT
(
*
(
__global
const
PIXTYPE*
)
(
srcptr
+
mad24
(
y_,
srcstep,
srcoffset
+
x*PIXSIZE
)))
;
WORKTYPE
data3
=
convertToWT
(
*
(
__global
const
PIXTYPE*
)
(
srcptr
+
mad24
(
y_,
srcstep,
srcoffset
+
x_*PIXSIZE
)))
;
WORKTYPE
data0
=
convertToWT
(
loadpix
(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x*PIXSIZE
)))
;
WORKTYPE
data1
=
convertToWT
(
loadpix
(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x_*PIXSIZE
)))
;
WORKTYPE
data2
=
convertToWT
(
loadpix
(
srcptr
+
mad24
(
y_,
srcstep,
srcoffset
+
x*PIXSIZE
)))
;
WORKTYPE
data3
=
convertToWT
(
loadpix
(
srcptr
+
mad24
(
y_,
srcstep,
srcoffset
+
x_*PIXSIZE
)))
;
WORKTYPE
val
=
mul24
((
WORKTYPE
)
mul24
(
U1,
V1
)
,
data0
)
+
mul24
((
WORKTYPE
)
mul24
(
U,
V1
)
,
data1
)
+
mul24
((
WORKTYPE
)
mul24
(
U1,
V
)
,
data2
)
+
mul24
((
WORKTYPE
)
mul24
(
U,
V
)
,
data3
)
;
...
...
@@ -102,10 +112,10 @@ __kernel void resizeLN(__global const uchar* srcptr, int srcstep, int srcoffset,
#
else
float
u1
=
1.f
-
u
;
float
v1
=
1.f
-
v
;
WORKTYPE
data0
=
convertToWT
(
*
(
__global
const
PIXTYPE*
)
(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x*PIXSIZE
)))
;
WORKTYPE
data1
=
convertToWT
(
*
(
__global
const
PIXTYPE*
)
(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x_*PIXSIZE
)))
;
WORKTYPE
data2
=
convertToWT
(
*
(
__global
const
PIXTYPE*
)
(
srcptr
+
mad24
(
y_,
srcstep,
srcoffset
+
x*PIXSIZE
)))
;
WORKTYPE
data3
=
convertToWT
(
*
(
__global
const
PIXTYPE*
)
(
srcptr
+
mad24
(
y_,
srcstep,
srcoffset
+
x_*PIXSIZE
)))
;
WORKTYPE
data0
=
convertToWT
(
loadpix
(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x*PIXSIZE
)))
;
WORKTYPE
data1
=
convertToWT
(
loadpix
(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x_*PIXSIZE
)))
;
WORKTYPE
data2
=
convertToWT
(
loadpix
(
srcptr
+
mad24
(
y_,
srcstep,
srcoffset
+
x*PIXSIZE
)))
;
WORKTYPE
data3
=
convertToWT
(
loadpix
(
srcptr
+
mad24
(
y_,
srcstep,
srcoffset
+
x_*PIXSIZE
)))
;
PIXTYPE
uval
=
u1
*
v1
*
data0
+
u
*
v1
*
data1
+
u1
*
v
*data2
+
u
*
v
*data3
;
...
...
@@ -113,8 +123,7 @@ __kernel void resizeLN(__global const uchar* srcptr, int srcstep, int srcoffset,
if
(
dx
<
dstcols
&&
dy
<
dstrows
)
{
__global
PIXTYPE*
dst
=
(
__global
PIXTYPE*
)(
dstptr
+
mad24
(
dy,
dststep,
dstoffset
+
dx*PIXSIZE
))
;
dst[0]
=
uval
;
storepix
(
uval,
dstptr
+
mad24
(
dy,
dststep,
dstoffset
+
dx*PIXSIZE
))
;
}
}
...
...
@@ -136,17 +145,13 @@ __kernel void resizeNN(__global const uchar* srcptr, int srcstep, int srcoffset,
int
sx
=
min
(
convert_int_rtz
(
s1
)
,
srccols-1
)
;
int
sy
=
min
(
convert_int_rtz
(
s2
)
,
srcrows-1
)
;
__global
PIXTYPE*
dst
=
(
__global
PIXTYPE*
)(
dstptr
+
mad24
(
dy,
dststep,
dstoffset
+
dx*PIXSIZE
))
;
__global
const
PIXTYPE*
src
=
(
__global
const
PIXTYPE*
)(
srcptr
+
mad24
(
sy,
srcstep,
srcoffset
+
sx*PIXSIZE
))
;
dst[0]
=
src[0]
;
storepix
(
loadpix
(
srcptr
+
mad24
(
sy,
srcstep,
srcoffset
+
sx*PIXSIZE
))
,
dstptr
+
mad24
(
dy,
dststep,
dstoffset
+
dx*PIXSIZE
))
;
}
}
#
elif
defined
INTER_AREA
#
define
TSIZE
((
int
)(
sizeof
(
T
)))
#
ifdef
INTER_AREA_FAST
__kernel
void
resizeAREA_FAST
(
__global
const
uchar
*
src,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
...
...
@@ -174,10 +179,10 @@ __kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_
int
src_index
=
mad24
(
symap_tab[y
+
sy],
src_step,
src_offset
)
;
#
pragma
unroll
for
(
int
x
=
0
; x < XSCALE; ++x)
sum
+=
convertToWTV
(
((
__global
const
T*
)(
src
+
src_index
))
[sxmap_tab[sx
+
x]]
)
;
sum
+=
convertToWTV
(
loadpix
(
src
+
src_index
+
sxmap_tab[sx
+
x]*PIXSIZE
)
)
;
}
((
__global
T*
)(
dst
+
dst_index
))
[dx]
=
convertToT
(
convertToWT2V
(
sum
)
*
(
WT2V
)(
SCALE
)
)
;
storepix
(
convertToPIXTYPE
(
convertToWT2V
(
sum
)
*
(
WT2V
)(
SCALE
))
,
dst
+
dst_index
+
dx*PIXSIZE
)
;
}
}
...
...
@@ -219,12 +224,12 @@ __kernel void resizeAREA(__global const uchar * src, int src_step, int src_offse
for
(
int
sx
=
sx0,
xk
=
xk0
; sx <= sx1; ++sx, ++xk)
{
WTV
alpha
=
(
WTV
)(
xalpha_tab[xk]
)
;
buf
+=
convertToWTV
(
((
__global
const
T*
)(
src
+
src_index
))
[sx]
)
*
alpha
;
buf
+=
convertToWTV
(
loadpix
(
src
+
src_index
+
sx*PIXSIZE
)
)
*
alpha
;
}
sum
+=
buf
*
beta
;
}
((
__global
T*
)(
dst
+
dst_index
))
[dx]
=
convertToT
(
sum
)
;
storepix
(
convertToPIXTYPE
(
sum
)
,
dst
+
dst_index
+
dx*PIXSIZE
)
;
}
}
...
...
modules/imgproc/src/opencl/warp_affine.cl
View file @
0fc248fb
...
...
@@ -64,11 +64,31 @@
#
define
noconvert
#
ifndef
ST
#
define
ST
T
#
endif
#
if
cn
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
T*
)(
addr
)
#
define
storepix
(
val,
addr
)
*
(
__global
T*
)(
addr
)
=
val
#
define
scalar
scalar_
#
define
pixsize
(
int
)
sizeof
(
T
)
#
else
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
T1*
)(
addr
))
#
define
storepix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
T1*
)(
addr
))
#
ifdef
INTER_NEAREST
#
define
scalar
(
T
)(
scalar_.x,
scalar_.y,
scalar_.z
)
#
else
#
define
scalar
(
WT
)(
scalar_.x,
scalar_.y,
scalar_.z
)
#
endif
#
define
pixsize
((
int
)
sizeof
(
T1
)
*3
)
#
endif
#
ifdef
INTER_NEAREST
__kernel
void
warpAffine
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__constant
CT
*
M,
T
scalar
)
__constant
CT
*
M,
ST
scalar_
)
{
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
...
...
@@ -85,17 +105,15 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of
short
sx
=
convert_short_sat
(
X0
>>
AB_BITS
)
;
short
sy
=
convert_short_sat
(
Y0
>>
AB_BITS
)
;
int
dst_index
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
(
int
)
sizeof
(
T
))
;
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
dst_index
)
;
int
dst_index
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
pixsize
)
;
if
(
sx
>=
0
&&
sx
<
src_cols
&&
sy
>=
0
&&
sy
<
src_rows
)
{
int
src_index
=
mad24
(
sy,
src_step,
src_offset
+
sx
*
(
int
)
sizeof
(
T
))
;
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
src_index
)
;
dst[0]
=
src[0]
;
int
src_index
=
mad24
(
sy,
src_step,
src_offset
+
sx
*
pixsize
)
;
storepix
(
loadpix
(
srcptr
+
src_index
)
,
dstptr
+
dst_index
)
;
}
else
dst[0]
=
scalar
;
storepix
(
scalar,
dstptr
+
dst_index
)
;
}
}
...
...
@@ -103,7 +121,7 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of
__kernel
void
warpAffine
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__constant
CT
*
M,
WT
scalar
)
__constant
CT
*
M,
ST
scalar_
)
{
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
...
...
@@ -126,19 +144,18 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of
short
ay
=
convert_short
(
Y0
&
(
INTER_TAB_SIZE-1
))
;
WT
v0
=
(
sx
>=
0
&&
sx
<
src_cols
&&
sy
>=
0
&&
sy
<
src_rows
)
?
convertToWT
(
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
sy,
src_step,
src_offset
+
sx
*
(
int
)
sizeof
(
T
)
)))
:
scalar
;
convertToWT
(
loadpix
(
srcptr
+
mad24
(
sy,
src_step,
src_offset
+
sx
*
pixsize
)))
:
scalar
;
WT
v1
=
(
sx+1
>=
0
&&
sx+1
<
src_cols
&&
sy
>=
0
&&
sy
<
src_rows
)
?
convertToWT
(
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
sy,
src_step,
src_offset
+
(
sx+1
)
*
(
int
)
sizeof
(
T
)
)))
:
scalar
;
convertToWT
(
loadpix
(
srcptr
+
mad24
(
sy,
src_step,
src_offset
+
(
sx+1
)
*
pixsize
)))
:
scalar
;
WT
v2
=
(
sx
>=
0
&&
sx
<
src_cols
&&
sy+1
>=
0
&&
sy+1
<
src_rows
)
?
convertToWT
(
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
sy+1,
src_step,
src_offset
+
sx
*
(
int
)
sizeof
(
T
)
)))
:
scalar
;
convertToWT
(
loadpix
(
srcptr
+
mad24
(
sy+1,
src_step,
src_offset
+
sx
*
pixsize
)))
:
scalar
;
WT
v3
=
(
sx+1
>=
0
&&
sx+1
<
src_cols
&&
sy+1
>=
0
&&
sy+1
<
src_rows
)
?
convertToWT
(
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
sy+1,
src_step,
src_offset
+
(
sx+1
)
*
(
int
)
sizeof
(
T
)
)))
:
scalar
;
convertToWT
(
loadpix
(
srcptr
+
mad24
(
sy+1,
src_step,
src_offset
+
(
sx+1
)
*
pixsize
)))
:
scalar
;
float
taby
=
1.f/INTER_TAB_SIZE*ay
;
float
tabx
=
1.f/INTER_TAB_SIZE*ax
;
int
dst_index
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
(
int
)
sizeof
(
T
))
;
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
dst_index
)
;
int
dst_index
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
pixsize
)
;
#
if
depth
<=
4
int
itab0
=
convert_short_sat_rte
(
(
1.0f-taby
)
*
(
1.0f-tabx
)
*
INTER_REMAP_COEF_SCALE
)
;
...
...
@@ -147,11 +164,11 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of
int
itab3
=
convert_short_sat_rte
(
taby*tabx
*
INTER_REMAP_COEF_SCALE
)
;
WT
val
=
v0
*
itab0
+
v1
*
itab1
+
v2
*
itab2
+
v3
*
itab3
;
dst[0]
=
convertToT
((
val
+
(
1
<<
(
INTER_REMAP_COEF_BITS-1
)))
>>
INTER_REMAP_COEF_BITS
)
;
storepix
(
convertToT
((
val
+
(
1
<<
(
INTER_REMAP_COEF_BITS-1
)))
>>
INTER_REMAP_COEF_BITS
)
,
dstptr
+
dst_index
)
;
#
else
float
tabx2
=
1.0f
-
tabx,
taby2
=
1.0f
-
taby
;
WT
val
=
v0
*
tabx2
*
taby2
+
v1
*
tabx
*
taby2
+
v2
*
tabx2
*
taby
+
v3
*
tabx
*
taby
;
dst[0]
=
convertToT
(
val
)
;
storepix
(
convertToT
(
val
)
,
dstptr
+
dst_index
)
;
#
endif
}
}
...
...
@@ -170,7 +187,7 @@ inline void interpolateCubic( float x, float* coeffs )
__kernel
void
warpAffine
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__constant
CT
*
M,
WT
scalar
)
__constant
CT
*
M,
ST
scalar_
)
{
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
...
...
@@ -198,7 +215,7 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of
#
pragma
unroll
for
(
int
x
=
0
; x < 4; x++)
v[mad24
(
y,
4
,
x
)
]
=
(
sx+x
>=
0
&&
sx+x
<
src_cols
&&
sy+y
>=
0
&&
sy+y
<
src_rows
)
?
convertToWT
(
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
sy+y,
src_step,
src_offset
+
(
sx+x
)
*
(
int
)
sizeof
(
T
)
)))
:
scalar
;
convertToWT
(
loadpix
(
srcptr
+
mad24
(
sy+y,
src_step,
src_offset
+
(
sx+x
)
*
pixsize
)))
:
scalar
;
float
tab1y[4],
tab1x[4]
;
...
...
@@ -207,8 +224,7 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of
interpolateCubic
(
ayy,
tab1y
)
;
interpolateCubic
(
axx,
tab1x
)
;
int
dst_index
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
(
int
)
sizeof
(
T
))
;
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
dst_index
)
;
int
dst_index
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
pixsize
)
;
WT
sum
=
(
WT
)(
0
)
;
#
if
depth
<=
4
...
...
@@ -221,12 +237,12 @@ __kernel void warpAffine(__global const uchar * srcptr, int src_step, int src_of
#
pragma
unroll
for
(
int
i
=
0
; i < 16; i++)
sum
+=
v[i]
*
itab[i]
;
dst[0]
=
convertToT
(
(
sum
+
(
1
<<
(
INTER_REMAP_COEF_BITS-1
)))
>>
INTER_REMAP_COEF_BITS
)
;
storepix
(
convertToT
(
(
sum
+
(
1
<<
(
INTER_REMAP_COEF_BITS-1
)))
>>
INTER_REMAP_COEF_BITS
)
,
dstptr
+
dst_index
)
;
#
else
#
pragma
unroll
for
(
int
i
=
0
; i < 16; i++)
sum
+=
v[i]
*
tab1y[
(
i>>2
)
]
*
tab1x[
(
i&3
)
]
;
dst[0]
=
convertToT
(
sum
)
;
storepix
(
convertToT
(
sum
)
,
dstptr
+
dst_index
)
;
#
endif
}
}
...
...
modules/imgproc/src/opencl/warp_perspective.cl
View file @
0fc248fb
...
...
@@ -64,11 +64,31 @@
#
define
noconvert
#
ifndef
ST
#
define
ST
T
#
endif
#
if
cn
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
T*
)(
addr
)
#
define
storepix
(
val,
addr
)
*
(
__global
T*
)(
addr
)
=
val
#
define
scalar
scalar_
#
define
pixsize
(
int
)
sizeof
(
T
)
#
else
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
T1*
)(
addr
))
#
define
storepix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
T1*
)(
addr
))
#
ifdef
INTER_NEAREST
#
define
scalar
(
T
)(
scalar_.x,
scalar_.y,
scalar_.z
)
#
else
#
define
scalar
(
WT
)(
scalar_.x,
scalar_.y,
scalar_.z
)
#
endif
#
define
pixsize
((
int
)
sizeof
(
T1
)
*3
)
#
endif
#
ifdef
INTER_NEAREST
__kernel
void
warpPerspective
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__constant
CT
*
M,
T
scalar
)
__constant
CT
*
M,
ST
scalar_
)
{
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
...
...
@@ -82,17 +102,15 @@ __kernel void warpPerspective(__global const uchar * srcptr, int src_step, int s
short
sx
=
convert_short_sat_rte
(
X0*W
)
;
short
sy
=
convert_short_sat_rte
(
Y0*W
)
;
int
dst_index
=
mad24
(
dy,
dst_step,
dx
*
(
int
)
sizeof
(
T
)
+
dst_offset
)
;
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
dst_index
)
;
int
dst_index
=
mad24
(
dy,
dst_step,
dx
*
pixsize
+
dst_offset
)
;
if
(
sx
>=
0
&&
sx
<
src_cols
&&
sy
>=
0
&&
sy
<
src_rows
)
{
int
src_index
=
mad24
(
sy,
src_step,
sx
*
(
int
)
sizeof
(
T
)
+
src_offset
)
;
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
src_index
)
;
dst[0]
=
src[0]
;
int
src_index
=
mad24
(
sy,
src_step,
sx
*
pixsize
+
src_offset
)
;
storepix
(
loadpix
(
srcptr
+
src_index
)
,
dstptr
+
dst_index
)
;
}
else
dst[0]
=
scalar
;
storepix
(
scalar,
dstptr
+
dst_index
)
;
}
}
...
...
@@ -100,7 +118,7 @@ __kernel void warpPerspective(__global const uchar * srcptr, int src_step, int s
__kernel
void
warpPerspective
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__constant
CT
*
M,
WT
scalar
)
__constant
CT
*
M,
ST
scalar_
)
{
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
...
...
@@ -119,19 +137,18 @@ __kernel void warpPerspective(__global const uchar * srcptr, int src_step, int s
short
ax
=
(
short
)(
X
&
(
INTER_TAB_SIZE
-
1
))
;
WT
v0
=
(
sx
>=
0
&&
sx
<
src_cols
&&
sy
>=
0
&&
sy
<
src_rows
)
?
convertToWT
(
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
sy,
src_step,
src_offset
+
sx
*
(
int
)
sizeof
(
T
)
)))
:
scalar
;
convertToWT
(
loadpix
(
srcptr
+
mad24
(
sy,
src_step,
src_offset
+
sx
*
pixsize
)))
:
scalar
;
WT
v1
=
(
sx+1
>=
0
&&
sx+1
<
src_cols
&&
sy
>=
0
&&
sy
<
src_rows
)
?
convertToWT
(
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
sy,
src_step,
src_offset
+
(
sx+1
)
*
(
int
)
sizeof
(
T
)
)))
:
scalar
;
convertToWT
(
loadpix
(
srcptr
+
mad24
(
sy,
src_step,
src_offset
+
(
sx+1
)
*
pixsize
)))
:
scalar
;
WT
v2
=
(
sx
>=
0
&&
sx
<
src_cols
&&
sy+1
>=
0
&&
sy+1
<
src_rows
)
?
convertToWT
(
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
sy+1,
src_step,
src_offset
+
sx
*
(
int
)
sizeof
(
T
)
)))
:
scalar
;
convertToWT
(
loadpix
(
srcptr
+
mad24
(
sy+1,
src_step,
src_offset
+
sx
*
pixsize
)))
:
scalar
;
WT
v3
=
(
sx+1
>=
0
&&
sx+1
<
src_cols
&&
sy+1
>=
0
&&
sy+1
<
src_rows
)
?
convertToWT
(
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
sy+1,
src_step,
src_offset
+
(
sx+1
)
*
(
int
)
sizeof
(
T
)
)))
:
scalar
;
convertToWT
(
loadpix
(
srcptr
+
mad24
(
sy+1,
src_step,
src_offset
+
(
sx+1
)
*
pixsize
)))
:
scalar
;
float
taby
=
1.f/INTER_TAB_SIZE*ay
;
float
tabx
=
1.f/INTER_TAB_SIZE*ax
;
int
dst_index
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
(
int
)
sizeof
(
T
))
;
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
dst_index
)
;
int
dst_index
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
pixsize
)
;
#
if
depth
<=
4
int
itab0
=
convert_short_sat_rte
(
(
1.0f-taby
)
*
(
1.0f-tabx
)
*
INTER_REMAP_COEF_SCALE
)
;
...
...
@@ -140,11 +157,11 @@ __kernel void warpPerspective(__global const uchar * srcptr, int src_step, int s
int
itab3
=
convert_short_sat_rte
(
taby*tabx
*
INTER_REMAP_COEF_SCALE
)
;
WT
val
=
v0
*
itab0
+
v1
*
itab1
+
v2
*
itab2
+
v3
*
itab3
;
dst[0]
=
convertToT
((
val
+
(
1
<<
(
INTER_REMAP_COEF_BITS-1
)))
>>
INTER_REMAP_COEF_BITS
)
;
storepix
(
convertToT
((
val
+
(
1
<<
(
INTER_REMAP_COEF_BITS-1
)))
>>
INTER_REMAP_COEF_BITS
)
,
dstptr
+
dst_index
)
;
#
else
float
tabx2
=
1.0f
-
tabx,
taby2
=
1.0f
-
taby
;
WT
val
=
v0
*
tabx2
*
taby2
+
v1
*
tabx
*
taby2
+
v2
*
tabx2
*
taby
+
v3
*
tabx
*
taby
;
dst[0]
=
convertToT
(
val
)
;
storepix
(
convertToT
(
val
)
,
dstptr
+
dst_index
)
;
#
endif
}
}
...
...
@@ -163,7 +180,7 @@ inline void interpolateCubic( float x, float* coeffs )
__kernel
void
warpPerspective
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
__constant
CT
*
M,
WT
scalar
)
__constant
CT
*
M,
ST
scalar_
)
{
int
dx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
...
...
@@ -187,7 +204,7 @@ __kernel void warpPerspective(__global const uchar * srcptr, int src_step, int s
#
pragma
unroll
for
(
int
x
=
0
; x < 4; x++)
v[mad24
(
y,
4
,
x
)
]
=
(
sx+x
>=
0
&&
sx+x
<
src_cols
&&
sy+y
>=
0
&&
sy+y
<
src_rows
)
?
convertToWT
(
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
sy+y,
src_step,
src_offset
+
(
sx+x
)
*
(
int
)
sizeof
(
T
)
)))
:
scalar
;
convertToWT
(
loadpix
(
srcptr
+
mad24
(
sy+y,
src_step,
src_offset
+
(
sx+x
)
*
pixsize
)))
:
scalar
;
float
tab1y[4],
tab1x[4]
;
...
...
@@ -196,8 +213,7 @@ __kernel void warpPerspective(__global const uchar * srcptr, int src_step, int s
interpolateCubic
(
ayy,
tab1y
)
;
interpolateCubic
(
axx,
tab1x
)
;
int
dst_index
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
(
int
)
sizeof
(
T
))
;
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
dst_index
)
;
int
dst_index
=
mad24
(
dy,
dst_step,
dst_offset
+
dx
*
pixsize
)
;
WT
sum
=
(
WT
)(
0
)
;
#
if
depth
<=
4
...
...
@@ -210,12 +226,12 @@ __kernel void warpPerspective(__global const uchar * srcptr, int src_step, int s
#
pragma
unroll
for
(
int
i
=
0
; i < 16; i++)
sum
+=
v[i]
*
itab[i]
;
dst[0]
=
convertToT
(
(
sum
+
(
1
<<
(
INTER_REMAP_COEF_BITS-1
)))
>>
INTER_REMAP_COEF_BITS
)
;
storepix
(
convertToT
(
(
sum
+
(
1
<<
(
INTER_REMAP_COEF_BITS-1
)))
>>
INTER_REMAP_COEF_BITS
)
,
dstptr
+
dst_index
)
;
#
else
#
pragma
unroll
for
(
int
i
=
0
; i < 16; i++)
sum
+=
v[i]
*
tab1y[
(
i>>2
)
]
*
tab1x[
(
i&3
)
]
;
dst[0]
=
convertToT
(
sum
)
;
storepix
(
convertToT
(
sum
)
,
dstptr
+
dst_index
)
;
#
endif
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment