Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
fcec053d
Commit
fcec053d
authored
Feb 05, 2019
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
parents
d073215f
a63f66c9
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
785 additions
and
261 deletions
+785
-261
ocl.cpp
modules/core/src/ocl.cpp
+13
-0
mvn_layer.cpp
modules/dnn/src/layers/mvn_layer.cpp
+10
-9
mvn.cl
modules/dnn/src/opencl/mvn.cl
+22
-10
color_yuv.cpp
modules/imgproc/src/color_yuv.cpp
+565
-131
core_bindings.cpp
modules/js/src/core_bindings.cpp
+10
-0
embindgen.py
modules/js/src/embindgen.py
+1
-1
test_features2d.js
modules/js/test/test_features2d.js
+82
-0
tests.html
modules/js/test/tests.html
+1
-6
tests.js
modules/js/test/tests.js
+2
-1
optflowgf.cpp
modules/video/src/optflowgf.cpp
+41
-46
build_js.py
platforms/js/build_js.py
+3
-0
object_detection.cpp
samples/dnn/object_detection.cpp
+27
-39
object_detection.py
samples/dnn/object_detection.py
+8
-18
No files found.
modules/core/src/ocl.cpp
View file @
fcec053d
...
...
@@ -2993,7 +2993,11 @@ int Kernel::set(int i, const KernelArg& arg)
if
(
!
p
||
!
p
->
handle
)
return
-
1
;
if
(
i
<
0
)
{
CV_LOG_ERROR
(
NULL
,
cv
::
format
(
"OpenCL: Kernel(%s)::set(arg_index=%d): negative arg_index"
,
p
->
name
.
c_str
(),
(
int
)
i
));
return
i
;
}
if
(
i
==
0
)
p
->
cleanupUMats
();
cl_int
status
=
0
;
...
...
@@ -3002,10 +3006,19 @@ int Kernel::set(int i, const KernelArg& arg)
AccessFlag
accessFlags
=
((
arg
.
flags
&
KernelArg
::
READ_ONLY
)
?
ACCESS_READ
:
static_cast
<
AccessFlag
>
(
0
))
|
((
arg
.
flags
&
KernelArg
::
WRITE_ONLY
)
?
ACCESS_WRITE
:
static_cast
<
AccessFlag
>
(
0
));
bool
ptronly
=
(
arg
.
flags
&
KernelArg
::
PTR_ONLY
)
!=
0
;
if
(
ptronly
&&
arg
.
m
->
empty
())
{
cl_mem
h_null
=
(
cl_mem
)
NULL
;
status
=
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)
i
,
sizeof
(
h_null
),
&
h_null
);
CV_OCL_DBG_CHECK_RESULT
(
status
,
cv
::
format
(
"clSetKernelArg('%s', arg_index=%d, cl_mem=NULL)"
,
p
->
name
.
c_str
(),
(
int
)
i
).
c_str
());
return
i
+
1
;
}
cl_mem
h
=
(
cl_mem
)
arg
.
m
->
handle
(
accessFlags
);
if
(
!
h
)
{
CV_LOG_ERROR
(
NULL
,
cv
::
format
(
"OpenCL: Kernel(%s)::set(arg_index=%d, flags=%d): can't create cl_mem handle for passed UMat buffer (addr=%p)"
,
p
->
name
.
c_str
(),
(
int
)
i
,
(
int
)
arg
.
flags
,
arg
.
m
));
p
->
release
();
p
=
0
;
return
-
1
;
...
...
modules/dnn/src/layers/mvn_layer.cpp
View file @
fcec053d
...
...
@@ -138,9 +138,12 @@ public:
UMat
&
bnorm_weight
=
umat_scale
;
UMat
&
bnorm_bias
=
umat_shift
;
const
unsigned
LOCAL_SIZE
=
128
;
bool
use_half
=
(
inputs
[
0
].
depth
()
==
CV_16S
);
String
opts
=
format
(
" -DT=%s -DT4=%s -Dconvert_T=%s"
,
use_half
?
"half"
:
"float"
,
use_half
?
"half4"
:
"float4"
,
use_half
?
"convert_half4"
:
"convert_float4"
);
String
opts
=
format
(
" -DT=%s -DT4=%s -Dconvert_T=%s -DLOCAL_SIZE=%u"
,
use_half
?
"half"
:
"float"
,
use_half
?
"half4"
:
"float4"
,
use_half
?
"convert_half4"
:
"convert_float4"
,
LOCAL_SIZE
);
int
splitDim
=
(
acrossChannels
)
?
1
:
2
;
for
(
size_t
inpIdx
=
0
;
inpIdx
<
inputs
.
size
();
inpIdx
++
)
...
...
@@ -155,8 +158,8 @@ public:
float
alpha
=
1.0
f
/
s
[
1
];
String
buildopt
=
"-DNUM=4"
+
opts
;
ocl
::
Kernel
k
(
"mean_fuse4"
,
ocl
::
dnn
::
mvn_oclsrc
,
buildopt
);
size_t
localsize
[]
=
{
128
};
ocl
::
Kernel
k
(
"mean_fuse4"
,
ocl
::
dnn
::
mvn_oclsrc
,
buildopt
+
" -DKERNEL_MEAN_FUSE"
);
size_t
localsize
[]
=
{
LOCAL_SIZE
};
size_t
globalsize
[]
=
{
(
size_t
)
s
[
0
]
/
4
*
localsize
[
0
]
};
int
argId
=
0
;
...
...
@@ -165,7 +168,6 @@ public:
k
.
set
(
argId
++
,
alpha
);
k
.
set
(
argId
++
,
ocl
::
KernelArg
::
PtrWriteOnly
(
meanMat
));
k
.
set
(
argId
++
,
ocl
::
KernelArg
::
PtrWriteOnly
(
tmpMat
));
k
.
set
(
argId
++
,
NULL
,
localsize
[
0
]
*
sizeof
(
cl_float4
));
bool
ret
=
k
.
run
(
1
,
globalsize
,
localsize
,
false
);
if
(
!
ret
)
return
false
;
...
...
@@ -173,7 +175,7 @@ public:
buildopt
+=
format
(
" %s %s"
,
(
fuse_batch_norm
)
?
"-DFUSE_BATCH_NORM"
:
""
,
(
fuse_relu
)
?
"-DFUSE_RELU"
:
""
);
ocl
::
Kernel
k1
(
"mvn_fuse4"
,
ocl
::
dnn
::
mvn_oclsrc
,
buildopt
);
ocl
::
Kernel
k1
(
"mvn_fuse4"
,
ocl
::
dnn
::
mvn_oclsrc
,
buildopt
+
" -DKERNEL_MVN_FUSE"
);
argId
=
0
;
k1
.
set
(
argId
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
tmpMat
));
k1
.
set
(
argId
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
inpMat
));
...
...
@@ -185,7 +187,6 @@ public:
k1
.
set
(
argId
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
bnorm_weight
));
k1
.
set
(
argId
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
bnorm_bias
));
k1
.
set
(
argId
++
,
ocl
::
KernelArg
::
PtrWriteOnly
(
outMat
));
k1
.
set
(
argId
++
,
NULL
,
localsize
[
0
]
*
sizeof
(
cl_float4
));
ret
=
k1
.
run
(
1
,
globalsize
,
localsize
,
false
);
if
(
!
ret
)
return
false
;
...
...
@@ -243,7 +244,7 @@ public:
if
(
normVariance
)
{
String
kname
=
format
(
"calc_mean%d"
,
number
);
ocl
::
Kernel
kernel
(
kname
.
c_str
(),
ocl
::
dnn
::
mvn_oclsrc
,
buildopt
);
ocl
::
Kernel
kernel
(
kname
.
c_str
(),
ocl
::
dnn
::
mvn_oclsrc
,
buildopt
+
" -DKERNEL_MEAN"
);
if
(
kernel
.
empty
())
return
false
;
...
...
@@ -263,7 +264,7 @@ public:
}
String
kname
=
format
(
"mvn%d"
,
number
);
buildopt
+=
format
(
"%s%s%s"
,
(
normVariance
)
?
" -DNORM_VARIANCE"
:
""
,
buildopt
+=
format
(
"%s%s%s
-DKERNEL_MVN
"
,
(
normVariance
)
?
" -DNORM_VARIANCE"
:
""
,
(
fuse_batch_norm
)
?
" -DFUSE_BATCH_NORM"
:
""
,
(
fuse_relu
)
?
" -DFUSE_RELU"
:
""
);
ocl
::
Kernel
kernel1
(
kname
.
c_str
(),
ocl
::
dnn
::
mvn_oclsrc
,
buildopt
);
...
...
modules/dnn/src/opencl/mvn.cl
View file @
fcec053d
...
...
@@ -74,6 +74,8 @@
#
define
MVN_FUSE
mvn_fuse1
#
endif
#
ifdef
KERNEL_MEAN
__kernel
void
CALC_MEAN
(
__global
const
Dtype*
src,
const
int
rows,
const
int
cols,
...
...
@@ -94,6 +96,8 @@ __kernel void CALC_MEAN(__global const Dtype* src,
store(dst_vec, dst, index);
}
#elif defined KERNEL_MVN
__kernel void MVN(__global const Dtype* src,
const int rows,
const int cols,
...
...
@@ -140,12 +144,13 @@ __kernel void MVN(__global const Dtype* src,
store
(
dst_vec,
dst,
index
)
;
}
#
elif
defined
KERNEL_MEAN_FUSE
__kernel
void
MEAN_FUSE
(
__global
const
T
*
A,
unsigned
int
A_col_size,
float
alpha,
__global
T4
*
mean,
__global
Dtype
*
tmp,
__local
Dtype4
*
work
)
__global
Dtype
*
tmp
)
{
unsigned
int
row_gid
=
get_group_id
(
0
)
;
unsigned
int
lid
=
get_local_id
(
0
)
;
...
...
@@ -168,15 +173,16 @@ __kernel void MEAN_FUSE(__global const T * A,
dot2
+=
convert_float4
(
a2
)
;
dot3
+=
convert_float4
(
a3
)
;
i
+=
get_local_size
(
0
)
;
i
+=
LOCAL_SIZE
;
}
__local
Dtype4
work[LOCAL_SIZE]
;
work[lid].s0
=
dot
(
dot0,
b0
)
;
work[lid].s1
=
dot
(
dot1,
b0
)
;
work[lid].s2
=
dot
(
dot2,
b0
)
;
work[lid].s3
=
dot
(
dot3,
b0
)
;
for
(
unsigned
int
stride=
get_local_size
(
0
)
/2
; stride>0 ; stride>>=1)
for
(
unsigned
int
stride=
LOCAL_SIZE
/2
; stride>0 ; stride>>=1)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
stride
)
...
...
@@ -212,10 +218,12 @@ __kernel void MEAN_FUSE(__global const T * A,
vstore4
(
dot2,
i,
dst0_read
+
2
*
A_col_size
)
;
vstore4
(
dot3,
i,
dst0_read
+
3
*
A_col_size
)
;
i
+=
get_local_size
(
0
)
;
i
+=
LOCAL_SIZE
;
}
}
#
elif
defined
KERNEL_MVN_FUSE
__kernel
void
MVN_FUSE
(
__global
const
Dtype
*
tmp,
__global
const
T
*
A,
__global
const
T4
*
mean,
...
...
@@ -225,8 +233,7 @@ __kernel void MVN_FUSE(__global const Dtype * tmp,
const
float
relu_slope,
__global
const
Dtype4
*
bnorm_weight,
__global
const
Dtype4
*
bnorm_bias,
__global
T
*
B,
__local
Dtype4
*
work
)
__global
T
*
B
)
{
unsigned
int
row_gid
=
get_group_id
(
0
)
;
unsigned
int
lid
=
get_local_id
(
0
)
;
...
...
@@ -250,15 +257,16 @@ __kernel void MVN_FUSE(__global const Dtype * tmp,
dot2
+=
a2
;
dot3
+=
a3
;
i
+=
get_local_size
(
0
)
;
i
+=
LOCAL_SIZE
;
}
__local
Dtype4
work[LOCAL_SIZE]
;
work[lid].s0
=
dot
(
dot0,
b0
)
;
work[lid].s1
=
dot
(
dot1,
b0
)
;
work[lid].s2
=
dot
(
dot2,
b0
)
;
work[lid].s3
=
dot
(
dot3,
b0
)
;
for
(
unsigned
int
stride=
get_local_size
(
0
)
/2
; stride>0 ; stride>>=1)
for
(
unsigned
int
stride=
LOCAL_SIZE
/2
; stride>0 ; stride>>=1)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
stride
)
...
...
@@ -314,6 +322,10 @@ __kernel void MVN_FUSE(__global const Dtype * tmp,
vstore4
(
convert_T
(
dot2
)
,
i,
dst0_read
+
2
*
A_col_size
)
;
vstore4
(
convert_T
(
dot3
)
,
i,
dst0_read
+
3
*
A_col_size
)
;
i
+=
get_local_size
(
0
)
;
i
+=
LOCAL_SIZE
;
}
}
#
else
#
error
"Configuration error!"
#
endif
modules/imgproc/src/color_yuv.cpp
View file @
fcec053d
...
...
@@ -963,22 +963,22 @@ struct YCrCb2RGB_i<ushort>
///////////////////////////////////// YUV420 -> RGB /////////////////////////////////////
const
int
ITUR_BT_601_CY
=
1220542
;
const
int
ITUR_BT_601_CUB
=
2116026
;
const
int
ITUR_BT_601_CUG
=
-
409993
;
const
int
ITUR_BT_601_CVG
=
-
852492
;
const
int
ITUR_BT_601_CVR
=
1673527
;
const
int
ITUR_BT_601_SHIFT
=
20
;
static
const
int
ITUR_BT_601_CY
=
1220542
;
static
const
int
ITUR_BT_601_CUB
=
2116026
;
static
const
int
ITUR_BT_601_CUG
=
-
409993
;
static
const
int
ITUR_BT_601_CVG
=
-
852492
;
static
const
int
ITUR_BT_601_CVR
=
1673527
;
static
const
int
ITUR_BT_601_SHIFT
=
20
;
// Coefficients for RGB to YUV420p conversion
const
int
ITUR_BT_601_CRY
=
269484
;
const
int
ITUR_BT_601_CGY
=
528482
;
const
int
ITUR_BT_601_CBY
=
102760
;
const
int
ITUR_BT_601_CRU
=
-
155188
;
const
int
ITUR_BT_601_CGU
=
-
305135
;
const
int
ITUR_BT_601_CBU
=
460324
;
const
int
ITUR_BT_601_CGV
=
-
385875
;
const
int
ITUR_BT_601_CBV
=
-
74448
;
static
const
int
ITUR_BT_601_CRY
=
269484
;
static
const
int
ITUR_BT_601_CGY
=
528482
;
static
const
int
ITUR_BT_601_CBY
=
102760
;
static
const
int
ITUR_BT_601_CRU
=
-
155188
;
static
const
int
ITUR_BT_601_CGU
=
-
305135
;
static
const
int
ITUR_BT_601_CBU
=
460324
;
static
const
int
ITUR_BT_601_CGV
=
-
385875
;
static
const
int
ITUR_BT_601_CBV
=
-
74448
;
//R = 1.164(Y - 16) + 1.596(V - 128)
//G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
...
...
@@ -988,49 +988,146 @@ const int ITUR_BT_601_CBV = -74448;
//G = (1220542(Y - 16) - 852492(V - 128) - 409993(U - 128) + (1 << 19)) >> 20
//B = (1220542(Y - 16) + 2116026(U - 128) + (1 << 19)) >> 20
static
inline
void
uvToRGBuv
(
const
uchar
u
,
const
uchar
v
,
int
&
ruv
,
int
&
guv
,
int
&
buv
)
{
int
uu
,
vv
;
uu
=
int
(
u
)
-
128
;
vv
=
int
(
v
)
-
128
;
ruv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
+
ITUR_BT_601_CVR
*
vv
;
guv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
+
ITUR_BT_601_CVG
*
vv
+
ITUR_BT_601_CUG
*
uu
;
buv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
+
ITUR_BT_601_CUB
*
uu
;
}
static
inline
void
uvToRGBuv
(
const
v_uint8
&
u
,
const
v_uint8
&
v
,
v_int32
(
&
ruv
)[
4
],
v_int32
(
&
guv
)[
4
],
v_int32
(
&
buv
)[
4
])
{
v_uint8
v128
=
vx_setall_u8
(
128
);
v_int8
su
=
v_reinterpret_as_s8
(
v_sub_wrap
(
u
,
v128
));
v_int8
sv
=
v_reinterpret_as_s8
(
v_sub_wrap
(
v
,
v128
));
v_int16
uu0
,
uu1
,
vv0
,
vv1
;
v_expand
(
su
,
uu0
,
uu1
);
v_expand
(
sv
,
vv0
,
vv1
);
v_int32
uu
[
4
],
vv
[
4
];
v_expand
(
uu0
,
uu
[
0
],
uu
[
1
]);
v_expand
(
uu1
,
uu
[
2
],
uu
[
3
]);
v_expand
(
vv0
,
vv
[
0
],
vv
[
1
]);
v_expand
(
vv1
,
vv
[
2
],
vv
[
3
]);
v_int32
vshift
=
vx_setall_s32
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
));
v_int32
vr
=
vx_setall_s32
(
ITUR_BT_601_CVR
);
v_int32
vg
=
vx_setall_s32
(
ITUR_BT_601_CVG
);
v_int32
ug
=
vx_setall_s32
(
ITUR_BT_601_CUG
);
v_int32
ub
=
vx_setall_s32
(
ITUR_BT_601_CUB
);
for
(
int
k
=
0
;
k
<
4
;
k
++
)
{
ruv
[
k
]
=
vshift
+
vr
*
vv
[
k
];
guv
[
k
]
=
vshift
+
vg
*
vv
[
k
]
+
ug
*
uu
[
k
];
buv
[
k
]
=
vshift
+
ub
*
uu
[
k
];
}
}
static
inline
void
yRGBuvToRGBA
(
const
uchar
vy
,
const
int
ruv
,
const
int
guv
,
const
int
buv
,
uchar
&
r
,
uchar
&
g
,
uchar
&
b
,
uchar
&
a
)
{
int
yy
=
int
(
vy
);
int
y
=
std
::
max
(
0
,
yy
-
16
)
*
ITUR_BT_601_CY
;
r
=
saturate_cast
<
uchar
>
((
y
+
ruv
)
>>
ITUR_BT_601_SHIFT
);
g
=
saturate_cast
<
uchar
>
((
y
+
guv
)
>>
ITUR_BT_601_SHIFT
);
b
=
saturate_cast
<
uchar
>
((
y
+
buv
)
>>
ITUR_BT_601_SHIFT
);
a
=
uchar
(
0xff
);
}
static
inline
void
yRGBuvToRGBA
(
const
v_uint8
&
vy
,
const
v_int32
(
&
ruv
)[
4
],
const
v_int32
(
&
guv
)[
4
],
const
v_int32
(
&
buv
)[
4
],
v_uint8
&
rr
,
v_uint8
&
gg
,
v_uint8
&
bb
)
{
v_uint8
v16
=
vx_setall_u8
(
16
);
v_uint8
posY
=
vy
-
v16
;
v_uint16
yy0
,
yy1
;
v_expand
(
posY
,
yy0
,
yy1
);
v_int32
yy
[
4
];
v_int32
yy00
,
yy01
,
yy10
,
yy11
;
v_expand
(
v_reinterpret_as_s16
(
yy0
),
yy
[
0
],
yy
[
1
]);
v_expand
(
v_reinterpret_as_s16
(
yy1
),
yy
[
2
],
yy
[
3
]);
v_int32
vcy
=
vx_setall_s32
(
ITUR_BT_601_CY
);
v_int32
y
[
4
],
r
[
4
],
g
[
4
],
b
[
4
];
for
(
int
k
=
0
;
k
<
4
;
k
++
)
{
y
[
k
]
=
yy
[
k
]
*
vcy
;
r
[
k
]
=
(
y
[
k
]
+
ruv
[
k
])
>>
ITUR_BT_601_SHIFT
;
g
[
k
]
=
(
y
[
k
]
+
guv
[
k
])
>>
ITUR_BT_601_SHIFT
;
b
[
k
]
=
(
y
[
k
]
+
buv
[
k
])
>>
ITUR_BT_601_SHIFT
;
}
v_int16
r0
,
r1
,
g0
,
g1
,
b0
,
b1
;
r0
=
v_pack
(
r
[
0
],
r
[
1
]);
r1
=
v_pack
(
r
[
2
],
r
[
3
]);
g0
=
v_pack
(
g
[
0
],
g
[
1
]);
g1
=
v_pack
(
g
[
2
],
g
[
3
]);
b0
=
v_pack
(
b
[
0
],
b
[
1
]);
b1
=
v_pack
(
b
[
2
],
b
[
3
]);
rr
=
v_pack_u
(
r0
,
r1
);
gg
=
v_pack_u
(
g0
,
g1
);
bb
=
v_pack_u
(
b0
,
b1
);
}
template
<
int
bIdx
,
int
dcn
,
bool
is420
>
static
inline
void
cvtYuv42xxp2RGB8
(
int
u
,
int
v
,
int
vy01
,
int
vy11
,
int
vy02
,
int
vy12
,
static
inline
void
cvtYuv42xxp2RGB8
(
const
uchar
u
,
const
uchar
v
,
const
uchar
vy01
,
const
uchar
vy11
,
const
uchar
vy02
,
const
uchar
vy12
,
uchar
*
row1
,
uchar
*
row2
)
{
u
=
u
-
128
;
v
=
v
-
128
;
int
ruv
,
guv
,
buv
;
uvToRGBuv
(
u
,
v
,
ruv
,
guv
,
buv
)
;
int
ruv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
+
ITUR_BT_601_CVR
*
v
;
int
guv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
+
ITUR_BT_601_CVG
*
v
+
ITUR_BT_601_CUG
*
u
;
int
buv
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
))
+
ITUR_BT_601_CUB
*
u
;
uchar
r00
,
g00
,
b00
,
a00
;
uchar
r01
,
g01
,
b01
,
a01
;
int
y00
=
std
::
max
(
0
,
vy01
-
16
)
*
ITUR_BT_601_CY
;
row1
[
2
-
bIdx
]
=
saturate_cast
<
uchar
>
((
y00
+
ruv
)
>>
ITUR_BT_601_SHIFT
);
row1
[
1
]
=
saturate_cast
<
uchar
>
((
y00
+
guv
)
>>
ITUR_BT_601_SHIFT
);
row1
[
bIdx
]
=
saturate_cast
<
uchar
>
((
y00
+
buv
)
>>
ITUR_BT_601_SHIFT
);
yRGBuvToRGBA
(
vy01
,
ruv
,
guv
,
buv
,
r00
,
g00
,
b00
,
a00
);
yRGBuvToRGBA
(
vy11
,
ruv
,
guv
,
buv
,
r01
,
g01
,
b01
,
a01
);
row1
[
2
-
bIdx
]
=
r00
;
row1
[
1
]
=
g00
;
row1
[
bIdx
]
=
b00
;
if
(
dcn
==
4
)
row1
[
3
]
=
uchar
(
0xff
)
;
row1
[
3
]
=
a00
;
int
y01
=
std
::
max
(
0
,
vy11
-
16
)
*
ITUR_BT_601_CY
;
row1
[
dcn
+
2
-
bIdx
]
=
saturate_cast
<
uchar
>
((
y01
+
ruv
)
>>
ITUR_BT_601_SHIFT
);
row1
[
dcn
+
1
]
=
saturate_cast
<
uchar
>
((
y01
+
guv
)
>>
ITUR_BT_601_SHIFT
);
row1
[
dcn
+
0
+
bIdx
]
=
saturate_cast
<
uchar
>
((
y01
+
buv
)
>>
ITUR_BT_601_SHIFT
);
row1
[
dcn
+
2
-
bIdx
]
=
r01
;
row1
[
dcn
+
1
]
=
g01
;
row1
[
dcn
+
0
+
bIdx
]
=
b01
;
if
(
dcn
==
4
)
row1
[
7
]
=
uchar
(
0xff
)
;
row1
[
7
]
=
a01
;
if
(
is420
)
{
int
y10
=
std
::
max
(
0
,
vy02
-
16
)
*
ITUR_BT_601_CY
;
row2
[
2
-
bIdx
]
=
saturate_cast
<
uchar
>
((
y10
+
ruv
)
>>
ITUR_BT_601_SHIFT
);
row2
[
1
]
=
saturate_cast
<
uchar
>
((
y10
+
guv
)
>>
ITUR_BT_601_SHIFT
);
row2
[
bIdx
]
=
saturate_cast
<
uchar
>
((
y10
+
buv
)
>>
ITUR_BT_601_SHIFT
);
uchar
r10
,
g10
,
b10
,
a10
;
uchar
r11
,
g11
,
b11
,
a11
;
yRGBuvToRGBA
(
vy02
,
ruv
,
guv
,
buv
,
r10
,
g10
,
b10
,
a10
);
yRGBuvToRGBA
(
vy12
,
ruv
,
guv
,
buv
,
r11
,
g11
,
b11
,
a11
);
row2
[
2
-
bIdx
]
=
r10
;
row2
[
1
]
=
g10
;
row2
[
bIdx
]
=
b10
;
if
(
dcn
==
4
)
row2
[
3
]
=
uchar
(
0xff
)
;
row2
[
3
]
=
a10
;
int
y11
=
std
::
max
(
0
,
vy12
-
16
)
*
ITUR_BT_601_CY
;
row2
[
dcn
+
2
-
bIdx
]
=
saturate_cast
<
uchar
>
((
y11
+
ruv
)
>>
ITUR_BT_601_SHIFT
);
row2
[
dcn
+
1
]
=
saturate_cast
<
uchar
>
((
y11
+
guv
)
>>
ITUR_BT_601_SHIFT
);
row2
[
dcn
+
0
+
bIdx
]
=
saturate_cast
<
uchar
>
((
y11
+
buv
)
>>
ITUR_BT_601_SHIFT
);
row2
[
dcn
+
2
-
bIdx
]
=
r11
;
row2
[
dcn
+
1
]
=
g11
;
row2
[
dcn
+
0
+
bIdx
]
=
b11
;
if
(
dcn
==
4
)
row2
[
7
]
=
uchar
(
0xff
)
;
row2
[
7
]
=
a11
;
}
}
// bIdx is 0 or 2, uIdx is 0 or 1, dcn is 3 or 4
template
<
int
bIdx
,
int
uIdx
,
int
dcn
>
struct
YUV420sp2RGB8Invoker
:
ParallelLoopBody
{
...
...
@@ -1056,15 +1153,80 @@ struct YUV420sp2RGB8Invoker : ParallelLoopBody
uchar
*
row2
=
dst_data
+
dst_step
*
(
j
+
1
);
const
uchar
*
y2
=
y1
+
stride
;
for
(
int
i
=
0
;
i
<
width
;
i
+=
2
,
row1
+=
dcn
*
2
,
row2
+=
dcn
*
2
)
int
i
=
0
;
#if CV_SIMD
const
int
vsize
=
v_uint8
::
nlanes
;
v_uint8
a
=
vx_setall_u8
(
uchar
(
0xff
));
for
(
;
i
<=
width
-
2
*
vsize
;
i
+=
2
*
vsize
,
row1
+=
vsize
*
dcn
*
2
,
row2
+=
vsize
*
dcn
*
2
)
{
int
u
=
int
(
uv
[
i
+
0
+
uIdx
]);
int
v
=
int
(
uv
[
i
+
1
-
uIdx
]);
v_uint8
u
,
v
;
v_load_deinterleave
(
uv
+
i
,
u
,
v
);
if
(
uIdx
)
{
swap
(
u
,
v
);
}
v_uint8
vy
[
4
];
v_load_deinterleave
(
y1
+
i
,
vy
[
0
],
vy
[
1
]);
v_load_deinterleave
(
y2
+
i
,
vy
[
2
],
vy
[
3
]);
v_int32
ruv
[
4
],
guv
[
4
],
buv
[
4
];
uvToRGBuv
(
u
,
v
,
ruv
,
guv
,
buv
);
int
vy01
=
int
(
y1
[
i
]);
int
vy11
=
int
(
y1
[
i
+
1
]);
int
vy02
=
int
(
y2
[
i
]);
int
vy12
=
int
(
y2
[
i
+
1
]);
v_uint8
r
[
4
],
g
[
4
],
b
[
4
];
for
(
int
k
=
0
;
k
<
4
;
k
++
)
{
yRGBuvToRGBA
(
vy
[
k
],
ruv
,
guv
,
buv
,
r
[
k
],
g
[
k
],
b
[
k
]);
}
if
(
bIdx
)
{
for
(
int
k
=
0
;
k
<
4
;
k
++
)
swap
(
r
[
k
],
b
[
k
]);
}
// [r0...], [r1...] => [r0, r1, r0, r1...], [r0, r1, r0, r1...]
v_uint8
r0_0
,
r0_1
,
r1_0
,
r1_1
;
v_zip
(
r
[
0
],
r
[
1
],
r0_0
,
r0_1
);
v_zip
(
r
[
2
],
r
[
3
],
r1_0
,
r1_1
);
v_uint8
g0_0
,
g0_1
,
g1_0
,
g1_1
;
v_zip
(
g
[
0
],
g
[
1
],
g0_0
,
g0_1
);
v_zip
(
g
[
2
],
g
[
3
],
g1_0
,
g1_1
);
v_uint8
b0_0
,
b0_1
,
b1_0
,
b1_1
;
v_zip
(
b
[
0
],
b
[
1
],
b0_0
,
b0_1
);
v_zip
(
b
[
2
],
b
[
3
],
b1_0
,
b1_1
);
if
(
dcn
==
4
)
{
v_store_interleave
(
row1
+
0
*
vsize
,
b0_0
,
g0_0
,
r0_0
,
a
);
v_store_interleave
(
row1
+
4
*
vsize
,
b0_1
,
g0_1
,
r0_1
,
a
);
v_store_interleave
(
row2
+
0
*
vsize
,
b1_0
,
g1_0
,
r1_0
,
a
);
v_store_interleave
(
row2
+
4
*
vsize
,
b1_1
,
g1_1
,
r1_1
,
a
);
}
else
//dcn == 3
{
v_store_interleave
(
row1
+
0
*
vsize
,
b0_0
,
g0_0
,
r0_0
);
v_store_interleave
(
row1
+
3
*
vsize
,
b0_1
,
g0_1
,
r0_1
);
v_store_interleave
(
row2
+
0
*
vsize
,
b1_0
,
g1_0
,
r1_0
);
v_store_interleave
(
row2
+
3
*
vsize
,
b1_1
,
g1_1
,
r1_1
);
}
}
vx_cleanup
();
#endif
for
(
;
i
<
width
;
i
+=
2
,
row1
+=
dcn
*
2
,
row2
+=
dcn
*
2
)
{
uchar
u
=
uv
[
i
+
0
+
uIdx
];
uchar
v
=
uv
[
i
+
1
-
uIdx
];
uchar
vy01
=
y1
[
i
];
uchar
vy11
=
y1
[
i
+
1
];
uchar
vy02
=
y2
[
i
];
uchar
vy12
=
y2
[
i
+
1
];
cvtYuv42xxp2RGB8
<
bIdx
,
dcn
,
true
>
(
u
,
v
,
vy01
,
vy11
,
vy02
,
vy12
,
row1
,
row2
);
}
...
...
@@ -1108,16 +1270,77 @@ struct YUV420p2RGB8Invoker : ParallelLoopBody
uchar
*
row1
=
dst_data
+
dst_step
*
j
;
uchar
*
row2
=
dst_data
+
dst_step
*
(
j
+
1
);
const
uchar
*
y2
=
y1
+
stride
;
int
i
=
0
;
#if CV_SIMD
const
int
vsize
=
v_uint8
::
nlanes
;
v_uint8
a
=
vx_setall_u8
(
uchar
(
0xff
));
for
(
;
i
<=
width
/
2
-
vsize
;
i
+=
vsize
,
row1
+=
vsize
*
dcn
*
2
,
row2
+=
vsize
*
dcn
*
2
)
{
v_uint8
u
,
v
;
u
=
vx_load
(
u1
+
i
);
v
=
vx_load
(
v1
+
i
);
v_uint8
vy
[
4
];
v_load_deinterleave
(
y1
+
2
*
i
,
vy
[
0
],
vy
[
1
]);
v_load_deinterleave
(
y2
+
2
*
i
,
vy
[
2
],
vy
[
3
]);
v_int32
ruv
[
4
],
guv
[
4
],
buv
[
4
];
uvToRGBuv
(
u
,
v
,
ruv
,
guv
,
buv
);
v_uint8
r
[
4
],
g
[
4
],
b
[
4
];
for
(
int
k
=
0
;
k
<
4
;
k
++
)
{
yRGBuvToRGBA
(
vy
[
k
],
ruv
,
guv
,
buv
,
r
[
k
],
g
[
k
],
b
[
k
]);
}
if
(
bIdx
)
{
for
(
int
k
=
0
;
k
<
4
;
k
++
)
swap
(
r
[
k
],
b
[
k
]);
}
// [r0...], [r1...] => [r0, r1, r0, r1...], [r0, r1, r0, r1...]
v_uint8
r0_0
,
r0_1
,
r1_0
,
r1_1
;
v_zip
(
r
[
0
],
r
[
1
],
r0_0
,
r0_1
);
v_zip
(
r
[
2
],
r
[
3
],
r1_0
,
r1_1
);
v_uint8
g0_0
,
g0_1
,
g1_0
,
g1_1
;
v_zip
(
g
[
0
],
g
[
1
],
g0_0
,
g0_1
);
v_zip
(
g
[
2
],
g
[
3
],
g1_0
,
g1_1
);
v_uint8
b0_0
,
b0_1
,
b1_0
,
b1_1
;
v_zip
(
b
[
0
],
b
[
1
],
b0_0
,
b0_1
);
v_zip
(
b
[
2
],
b
[
3
],
b1_0
,
b1_1
);
if
(
dcn
==
4
)
{
v_store_interleave
(
row1
+
0
*
vsize
,
b0_0
,
g0_0
,
r0_0
,
a
);
v_store_interleave
(
row1
+
4
*
vsize
,
b0_1
,
g0_1
,
r0_1
,
a
);
v_store_interleave
(
row2
+
0
*
vsize
,
b1_0
,
g1_0
,
r1_0
,
a
);
v_store_interleave
(
row2
+
4
*
vsize
,
b1_1
,
g1_1
,
r1_1
,
a
);
}
else
//dcn == 3
{
v_store_interleave
(
row1
+
0
*
vsize
,
b0_0
,
g0_0
,
r0_0
);
v_store_interleave
(
row1
+
3
*
vsize
,
b0_1
,
g0_1
,
r0_1
);
for
(
int
i
=
0
;
i
<
width
/
2
;
i
+=
1
,
row1
+=
dcn
*
2
,
row2
+=
dcn
*
2
)
v_store_interleave
(
row2
+
0
*
vsize
,
b1_0
,
g1_0
,
r1_0
);
v_store_interleave
(
row2
+
3
*
vsize
,
b1_1
,
g1_1
,
r1_1
);
}
}
vx_cleanup
();
#endif
for
(;
i
<
width
/
2
;
i
+=
1
,
row1
+=
dcn
*
2
,
row2
+=
dcn
*
2
)
{
int
u
=
int
(
u1
[
i
])
;
int
v
=
int
(
v1
[
i
])
;
uchar
u
=
u1
[
i
]
;
uchar
v
=
v1
[
i
]
;
int
vy01
=
int
(
y1
[
2
*
i
])
;
int
vy11
=
int
(
y1
[
2
*
i
+
1
])
;
int
vy02
=
int
(
y2
[
2
*
i
])
;
int
vy12
=
int
(
y2
[
2
*
i
+
1
])
;
uchar
vy01
=
y1
[
2
*
i
]
;
uchar
vy11
=
y1
[
2
*
i
+
1
]
;
uchar
vy02
=
y2
[
2
*
i
]
;
uchar
vy12
=
y2
[
2
*
i
+
1
]
;
cvtYuv42xxp2RGB8
<
bIdx
,
dcn
,
true
>
(
u
,
v
,
vy01
,
vy11
,
vy02
,
vy12
,
row1
,
row2
);
}
...
...
@@ -1150,106 +1373,258 @@ inline void cvtYUV420p2RGB(uchar * dst_data, size_t dst_step, int dst_width, int
///////////////////////////////////// RGB -> YUV420p /////////////////////////////////////
static
inline
uchar
rgbToY42x
(
uchar
r
,
uchar
g
,
uchar
b
)
{
const
int
shifted16
=
(
16
<<
ITUR_BT_601_SHIFT
);
const
int
halfShift
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
));
int
yy
=
ITUR_BT_601_CRY
*
r
+
ITUR_BT_601_CGY
*
g
+
ITUR_BT_601_CBY
*
b
+
halfShift
+
shifted16
;
return
saturate_cast
<
uchar
>
(
yy
>>
ITUR_BT_601_SHIFT
);
}
static
inline
v_uint8
rgbToY42x
(
const
v_uint8
&
r
,
const
v_uint8
&
g
,
const
v_uint8
&
b
)
{
const
int
shifted16
=
(
16
<<
ITUR_BT_601_SHIFT
);
const
int
halfShift
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
));
v_uint16
r0
,
r1
,
g0
,
g1
,
b0
,
b1
;
v_expand
(
r
,
r0
,
r1
);
v_expand
(
g
,
g0
,
g1
);
v_expand
(
b
,
b0
,
b1
);
v_uint32
rq
[
4
],
gq
[
4
],
bq
[
4
];
v_expand
(
r0
,
rq
[
0
],
rq
[
1
]);
v_expand
(
r1
,
rq
[
2
],
rq
[
3
]);
v_expand
(
g0
,
gq
[
0
],
gq
[
1
]);
v_expand
(
g1
,
gq
[
2
],
gq
[
3
]);
v_expand
(
b0
,
bq
[
0
],
bq
[
1
]);
v_expand
(
b1
,
bq
[
2
],
bq
[
3
]);
v_uint32
ry
=
vx_setall_u32
(
ITUR_BT_601_CRY
),
gy
=
vx_setall_u32
(
ITUR_BT_601_CGY
);
v_uint32
by
=
vx_setall_u32
(
ITUR_BT_601_CBY
),
shift
=
vx_setall_u32
(
halfShift
+
shifted16
);
v_uint32
y
[
4
];
for
(
int
k
=
0
;
k
<
4
;
k
++
)
{
y
[
k
]
=
(
rq
[
k
]
*
ry
+
gq
[
k
]
*
gy
+
bq
[
k
]
*
by
+
shift
)
>>
ITUR_BT_601_SHIFT
;
}
v_uint16
y0
,
y1
;
y0
=
v_pack
(
y
[
0
],
y
[
1
]);
y1
=
v_pack
(
y
[
2
],
y
[
3
]);
return
v_pack
(
y0
,
y1
);
}
static
inline
void
rgbToUV42x
(
uchar
r
,
uchar
g
,
uchar
b
,
uchar
&
u
,
uchar
&
v
)
{
const
int
halfShift
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
));
const
int
shifted128
=
(
128
<<
ITUR_BT_601_SHIFT
);
int
uu
=
ITUR_BT_601_CRU
*
r
+
ITUR_BT_601_CGU
*
g
+
ITUR_BT_601_CBU
*
b
+
halfShift
+
shifted128
;
int
vv
=
ITUR_BT_601_CBU
*
r
+
ITUR_BT_601_CGV
*
g
+
ITUR_BT_601_CBV
*
b
+
halfShift
+
shifted128
;
u
=
saturate_cast
<
uchar
>
(
uu
>>
ITUR_BT_601_SHIFT
);
v
=
saturate_cast
<
uchar
>
(
vv
>>
ITUR_BT_601_SHIFT
);
}
static
inline
void
rgbToUV42x
(
const
v_uint8
&
r0
,
const
v_uint8
&
r1
,
const
v_uint8
&
g0
,
const
v_uint8
&
g1
,
const
v_uint8
&
b0
,
const
v_uint8
&
b1
,
v_uint8
&
u
,
v_uint8
&
v
)
{
// [r0, r1, r2, r3,..] => [r0, 0, r2, 0,..]
v_int16
vlowByte
=
vx_setall_s16
(
0x00ff
);
v_int16
rd0
,
rd1
,
gd0
,
gd1
,
bd0
,
bd1
;
rd0
=
v_reinterpret_as_s16
(
r0
)
&
vlowByte
;
rd1
=
v_reinterpret_as_s16
(
r1
)
&
vlowByte
;
gd0
=
v_reinterpret_as_s16
(
g0
)
&
vlowByte
;
gd1
=
v_reinterpret_as_s16
(
g1
)
&
vlowByte
;
bd0
=
v_reinterpret_as_s16
(
b0
)
&
vlowByte
;
bd1
=
v_reinterpret_as_s16
(
b1
)
&
vlowByte
;
v_int32
rq
[
4
],
gq
[
4
],
bq
[
4
];
v_expand
(
rd0
,
rq
[
0
],
rq
[
1
]);
v_expand
(
rd1
,
rq
[
2
],
rq
[
3
]);
v_expand
(
gd0
,
gq
[
0
],
gq
[
1
]);
v_expand
(
gd1
,
gq
[
2
],
gq
[
3
]);
v_expand
(
bd0
,
bq
[
0
],
bq
[
1
]);
v_expand
(
bd1
,
bq
[
2
],
bq
[
3
]);
const
int
halfShift
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
));
const
int
shifted128
=
(
128
<<
ITUR_BT_601_SHIFT
);
v_int32
shift
=
vx_setall_s32
(
halfShift
+
shifted128
);
v_int32
ru
,
gu
,
bu
,
gv
,
bv
;
ru
=
vx_setall_s32
(
ITUR_BT_601_CRU
);
gu
=
vx_setall_s32
(
ITUR_BT_601_CGU
);
gv
=
vx_setall_s32
(
ITUR_BT_601_CGV
);
bu
=
vx_setall_s32
(
ITUR_BT_601_CBU
);
bv
=
vx_setall_s32
(
ITUR_BT_601_CBV
);
v_int32
uq
[
4
],
vq
[
4
];
for
(
int
k
=
0
;
k
<
4
;
k
++
)
{
uq
[
k
]
=
(
ru
*
rq
[
k
]
+
gu
*
gq
[
k
]
+
bu
*
bq
[
k
]
+
shift
)
>>
ITUR_BT_601_SHIFT
;
vq
[
k
]
=
(
bu
*
rq
[
k
]
+
gv
*
gq
[
k
]
+
bv
*
bq
[
k
]
+
shift
)
>>
ITUR_BT_601_SHIFT
;
}
v_int16
u0
,
u1
,
v0
,
v1
;
u0
=
v_pack
(
uq
[
0
],
uq
[
1
]);
u1
=
v_pack
(
uq
[
2
],
uq
[
3
]);
v0
=
v_pack
(
vq
[
0
],
vq
[
1
]);
v1
=
v_pack
(
vq
[
2
],
vq
[
3
]);
u
=
v_pack_u
(
u0
,
u1
);
v
=
v_pack_u
(
v0
,
v1
);
}
struct
RGB8toYUV420pInvoker
:
public
ParallelLoopBody
{
RGB8toYUV420pInvoker
(
const
uchar
*
_src
_data
,
size_t
_src_s
tep
,
uchar
*
_y
_data
,
uchar
*
_uv_data
,
size_t
_dst_s
tep
,
int
_src
_width
,
int
_src_height
,
int
_scn
,
bool
swapBlue_
,
bool
swapUV_
,
bool
interleaved_
)
:
src
_data
(
_src_data
),
src_step
(
_src_s
tep
),
y
_data
(
_y_data
),
uv_data
(
_uv_data
),
dst_step
(
_dst_s
tep
),
src
_width
(
_src_width
),
src_height
(
_src_h
eight
),
s
cn
(
_scn
),
swapBlue
(
swapBlue_
),
swapUV
(
swapUV_
),
interleaved
(
interleaved_
)
{
}
RGB8toYUV420pInvoker
(
const
uchar
*
_src
Data
,
size_t
_srcS
tep
,
uchar
*
_y
Data
,
uchar
*
_uvData
,
size_t
_dstS
tep
,
int
_src
Width
,
int
_srcHeight
,
int
_scn
,
bool
_swapBlue
,
bool
_swapUV
,
bool
_interleave
)
:
src
Data
(
_srcData
),
srcStep
(
_srcS
tep
),
y
Data
(
_yData
),
uvData
(
_uvData
),
dstStep
(
_dstS
tep
),
src
Width
(
_srcWidth
),
srcHeight
(
_srcH
eight
),
s
rcCn
(
_scn
),
swapBlue
(
_swapBlue
),
swapUV
(
_swapUV
),
interleave
(
_interleave
)
{
}
void
operator
()(
const
Range
&
rowRange
)
const
CV_OVERRIDE
{
const
int
w
=
src_width
;
const
int
h
=
src_height
;
const
int
cn
=
scn
;
for
(
int
i
=
rowRange
.
start
;
i
<
rowRange
.
end
;
i
++
)
const
int
w
=
srcWidth
;
const
int
h
=
srcHeight
;
const
int
scn
=
srcCn
;
const
uchar
*
srcRow
=
(
uchar
*
)
0
;
uchar
*
yRow
=
(
uchar
*
)
0
,
*
uRow
=
(
uchar
*
)
0
,
*
vRow
=
(
uchar
*
)
0
,
*
uvRow
=
(
uchar
*
)
0
;
for
(
int
sRow
=
rowRange
.
start
*
2
;
sRow
<
rowRange
.
end
*
2
;
sRow
++
)
{
const
uchar
*
brow0
=
src_data
+
src_step
*
(
2
*
i
);
const
uchar
*
grow0
=
brow0
+
1
;
const
uchar
*
rrow0
=
brow0
+
2
;
const
uchar
*
brow1
=
src_data
+
src_step
*
(
2
*
i
+
1
);
const
uchar
*
grow1
=
brow1
+
1
;
const
uchar
*
rrow1
=
brow1
+
2
;
if
(
swapBlue
)
srcRow
=
srcData
+
srcStep
*
sRow
;
yRow
=
yData
+
dstStep
*
sRow
;
bool
evenRow
=
(
sRow
%
2
)
==
0
;
if
(
evenRow
)
{
std
::
swap
(
brow0
,
rrow0
);
std
::
swap
(
brow1
,
rrow1
);
if
(
interleave
)
{
uvRow
=
uvData
+
dstStep
*
(
sRow
/
2
);
}
else
{
uRow
=
uvData
+
dstStep
*
(
sRow
/
4
)
+
((
sRow
/
2
)
%
2
)
*
(
w
/
2
);
vRow
=
uvData
+
dstStep
*
((
sRow
+
h
)
/
4
)
+
(((
sRow
+
h
)
/
2
)
%
2
)
*
(
w
/
2
);
}
}
int
i
=
0
;
#if CV_SIMD
const
int
vsize
=
v_uint8
::
nlanes
;
uchar
*
y
=
y_data
+
dst_step
*
(
2
*
i
);
uchar
*
u
;
uchar
*
v
;
if
(
interleaved
)
{
u
=
uv_data
+
dst_step
*
i
;
v
=
uv_data
+
dst_step
*
i
+
1
;
}
else
for
(
;
i
<=
w
/
2
-
vsize
;
i
+=
vsize
)
{
u
=
uv_data
+
dst_step
*
(
i
/
2
)
+
(
i
%
2
)
*
(
w
/
2
);
v
=
uv_data
+
dst_step
*
((
i
+
h
/
2
)
/
2
)
+
((
i
+
h
/
2
)
%
2
)
*
(
w
/
2
);
}
// processing (2*vsize) pixels at once
v_uint8
b0
,
b1
,
g0
,
g1
,
r0
,
r1
,
a0
,
a1
;
if
(
scn
==
4
)
{
v_load_deinterleave
(
srcRow
+
2
*
4
*
i
+
0
*
vsize
,
b0
,
g0
,
r0
,
a0
);
v_load_deinterleave
(
srcRow
+
2
*
4
*
i
+
4
*
vsize
,
b1
,
g1
,
r1
,
a1
);
}
else
// scn == 3
{
v_load_deinterleave
(
srcRow
+
2
*
3
*
i
+
0
*
vsize
,
b0
,
g0
,
r0
);
v_load_deinterleave
(
srcRow
+
2
*
3
*
i
+
3
*
vsize
,
b1
,
g1
,
r1
);
}
if
(
swapUV
)
{
std
::
swap
(
u
,
v
);
}
if
(
swapBlue
)
{
swap
(
b0
,
r0
);
swap
(
b1
,
r1
);
}
v_uint8
y0
,
y1
;
y0
=
rgbToY42x
(
r0
,
g0
,
b0
);
y1
=
rgbToY42x
(
r1
,
g1
,
b1
);
for
(
int
j
=
0
,
k
=
0
;
j
<
w
*
cn
;
j
+=
2
*
cn
,
k
++
)
v_store
(
yRow
+
2
*
i
+
0
*
vsize
,
y0
);
v_store
(
yRow
+
2
*
i
+
1
*
vsize
,
y1
);
if
(
evenRow
)
{
v_uint8
u
,
v
;
rgbToUV42x
(
r0
,
r1
,
g0
,
g1
,
b0
,
b1
,
u
,
v
);
if
(
swapUV
)
{
swap
(
u
,
v
);
}
if
(
interleave
)
{
v_store_interleave
(
uvRow
+
2
*
i
,
u
,
v
);
}
else
{
v_store
(
uRow
+
i
,
u
);
v_store
(
vRow
+
i
,
v
);
}
}
}
vx_cleanup
();
#endif
// processing two pixels at once
for
(
;
i
<
w
/
2
;
i
++
)
{
int
r00
=
rrow0
[
j
];
int
g00
=
grow0
[
j
];
int
b00
=
brow0
[
j
];
int
r01
=
rrow0
[
cn
+
j
];
int
g01
=
grow0
[
cn
+
j
];
int
b01
=
brow0
[
cn
+
j
];
int
r10
=
rrow1
[
j
];
int
g10
=
grow1
[
j
];
int
b10
=
brow1
[
j
];
int
r11
=
rrow1
[
cn
+
j
];
int
g11
=
grow1
[
cn
+
j
];
int
b11
=
brow1
[
cn
+
j
];
const
int
shifted16
=
(
16
<<
ITUR_BT_601_SHIFT
);
const
int
halfShift
=
(
1
<<
(
ITUR_BT_601_SHIFT
-
1
));
int
y00
=
ITUR_BT_601_CRY
*
r00
+
ITUR_BT_601_CGY
*
g00
+
ITUR_BT_601_CBY
*
b00
+
halfShift
+
shifted16
;
int
y01
=
ITUR_BT_601_CRY
*
r01
+
ITUR_BT_601_CGY
*
g01
+
ITUR_BT_601_CBY
*
b01
+
halfShift
+
shifted16
;
int
y10
=
ITUR_BT_601_CRY
*
r10
+
ITUR_BT_601_CGY
*
g10
+
ITUR_BT_601_CBY
*
b10
+
halfShift
+
shifted16
;
int
y11
=
ITUR_BT_601_CRY
*
r11
+
ITUR_BT_601_CGY
*
g11
+
ITUR_BT_601_CBY
*
b11
+
halfShift
+
shifted16
;
y
[
2
*
k
+
0
]
=
saturate_cast
<
uchar
>
(
y00
>>
ITUR_BT_601_SHIFT
);
y
[
2
*
k
+
1
]
=
saturate_cast
<
uchar
>
(
y01
>>
ITUR_BT_601_SHIFT
);
y
[
2
*
k
+
dst_step
+
0
]
=
saturate_cast
<
uchar
>
(
y10
>>
ITUR_BT_601_SHIFT
);
y
[
2
*
k
+
dst_step
+
1
]
=
saturate_cast
<
uchar
>
(
y11
>>
ITUR_BT_601_SHIFT
);
const
int
shifted128
=
(
128
<<
ITUR_BT_601_SHIFT
);
int
u00
=
ITUR_BT_601_CRU
*
r00
+
ITUR_BT_601_CGU
*
g00
+
ITUR_BT_601_CBU
*
b00
+
halfShift
+
shifted128
;
int
v00
=
ITUR_BT_601_CBU
*
r00
+
ITUR_BT_601_CGV
*
g00
+
ITUR_BT_601_CBV
*
b00
+
halfShift
+
shifted128
;
if
(
interleaved
)
uchar
b0
,
g0
,
r0
;
uchar
b1
,
g1
,
r1
;
b0
=
srcRow
[(
2
*
i
+
0
)
*
scn
+
0
];
g0
=
srcRow
[(
2
*
i
+
0
)
*
scn
+
1
];
r0
=
srcRow
[(
2
*
i
+
0
)
*
scn
+
2
];
b1
=
srcRow
[(
2
*
i
+
1
)
*
scn
+
0
];
g1
=
srcRow
[(
2
*
i
+
1
)
*
scn
+
1
];
r1
=
srcRow
[(
2
*
i
+
1
)
*
scn
+
2
];
if
(
swapBlue
)
{
u
[
k
*
2
]
=
saturate_cast
<
uchar
>
(
u00
>>
ITUR_BT_601_SHIFT
);
v
[
k
*
2
]
=
saturate_cast
<
uchar
>
(
v00
>>
ITUR_BT_601_SHIFT
);
swap
(
b0
,
r0
);
swap
(
b1
,
r1
);
}
else
uchar
y0
=
rgbToY42x
(
r0
,
g0
,
b0
);
uchar
y1
=
rgbToY42x
(
r1
,
g1
,
b1
);
yRow
[
2
*
i
+
0
]
=
y0
;
yRow
[
2
*
i
+
1
]
=
y1
;
if
(
evenRow
)
{
u
[
k
]
=
saturate_cast
<
uchar
>
(
u00
>>
ITUR_BT_601_SHIFT
);
v
[
k
]
=
saturate_cast
<
uchar
>
(
v00
>>
ITUR_BT_601_SHIFT
);
uchar
uu
,
vv
;
rgbToUV42x
(
r0
,
g0
,
b0
,
uu
,
vv
);
if
(
swapUV
)
{
swap
(
uu
,
vv
);
}
if
(
interleave
)
{
uvRow
[
2
*
i
+
0
]
=
uu
;
uvRow
[
2
*
i
+
1
]
=
vv
;
}
else
{
uRow
[
i
]
=
uu
;
vRow
[
i
]
=
vv
;
}
}
}
}
}
const
uchar
*
src
_d
ata
;
size_t
src
_s
tep
;
uchar
*
y
_data
,
*
uv_d
ata
;
size_t
dst
_s
tep
;
int
src
_w
idth
;
int
src
_h
eight
;
const
int
s
c
n
;
const
uchar
*
src
D
ata
;
size_t
src
S
tep
;
uchar
*
y
Data
,
*
uvD
ata
;
size_t
dst
S
tep
;
int
src
W
idth
;
int
src
H
eight
;
const
int
s
rcC
n
;
bool
swapBlue
;
bool
swapUV
;
bool
interleave
d
;
bool
interleave
;
};
///////////////////////////////////// YUV422 -> RGB /////////////////////////////////////
// bIdx is 0 or 2; [uIdx, yIdx] is [0, 0], [0, 1], [1, 0]; dcn is 3 or 4
template
<
int
bIdx
,
int
uIdx
,
int
yIdx
,
int
dcn
>
struct
YUV422toRGB8Invoker
:
ParallelLoopBody
{
...
...
@@ -1269,6 +1644,10 @@ struct YUV422toRGB8Invoker : ParallelLoopBody
int
rangeBegin
=
range
.
start
;
int
rangeEnd
=
range
.
end
;
// [yIdx, uIdx] | [uidx, vidx]:
// 0, 0 | 1, 3
// 0, 1 | 3, 1
// 1, 0 | 0, 2
const
int
uidx
=
1
-
yIdx
+
uIdx
*
2
;
const
int
vidx
=
(
2
+
uidx
)
%
4
;
const
uchar
*
yuv_src
=
src_data
+
rangeBegin
*
src_step
;
...
...
@@ -1276,14 +1655,69 @@ struct YUV422toRGB8Invoker : ParallelLoopBody
for
(
int
j
=
rangeBegin
;
j
<
rangeEnd
;
j
++
,
yuv_src
+=
src_step
)
{
uchar
*
row
=
dst_data
+
dst_step
*
j
;
int
i
=
0
;
#if CV_SIMD
const
int
vsize
=
v_uint8
::
nlanes
;
v_uint8
a
=
vx_setall_u8
(
uchar
(
0xff
));
for
(;
i
<=
2
*
width
-
4
*
vsize
;
i
+=
4
*
vsize
,
row
+=
vsize
*
dcn
*
2
)
{
v_uint8
u
,
v
,
vy
[
2
];
if
(
yIdx
==
1
)
// UYVY
{
v_load_deinterleave
(
yuv_src
+
i
,
u
,
vy
[
0
],
v
,
vy
[
1
]);
}
else
// YUYV or YVYU
{
v_load_deinterleave
(
yuv_src
+
i
,
vy
[
0
],
u
,
vy
[
1
],
v
);
if
(
uIdx
==
1
)
// YVYU
{
swap
(
u
,
v
);
}
}
v_int32
ruv
[
4
],
guv
[
4
],
buv
[
4
];
uvToRGBuv
(
u
,
v
,
ruv
,
guv
,
buv
);
for
(
int
i
=
0
;
i
<
2
*
width
;
i
+=
4
,
row
+=
dcn
*
2
)
v_uint8
r
[
2
],
g
[
2
],
b
[
2
];
yRGBuvToRGBA
(
vy
[
0
],
ruv
,
guv
,
buv
,
r
[
0
],
g
[
0
],
b
[
0
]);
yRGBuvToRGBA
(
vy
[
1
],
ruv
,
guv
,
buv
,
r
[
1
],
g
[
1
],
b
[
1
]);
if
(
bIdx
)
{
swap
(
r
[
0
],
b
[
0
]);
swap
(
r
[
1
],
b
[
1
]);
}
// [r0...], [r1...] => [r0, r1, r0, r1...], [r0, r1, r0, r1...]
v_uint8
r0_0
,
r0_1
;
v_zip
(
r
[
0
],
r
[
1
],
r0_0
,
r0_1
);
v_uint8
g0_0
,
g0_1
;
v_zip
(
g
[
0
],
g
[
1
],
g0_0
,
g0_1
);
v_uint8
b0_0
,
b0_1
;
v_zip
(
b
[
0
],
b
[
1
],
b0_0
,
b0_1
);
if
(
dcn
==
4
)
{
v_store_interleave
(
row
+
0
*
vsize
,
b0_0
,
g0_0
,
r0_0
,
a
);
v_store_interleave
(
row
+
4
*
vsize
,
b0_1
,
g0_1
,
r0_1
,
a
);
}
else
//dcn == 3
{
v_store_interleave
(
row
+
0
*
vsize
,
b0_0
,
g0_0
,
r0_0
);
v_store_interleave
(
row
+
3
*
vsize
,
b0_1
,
g0_1
,
r0_1
);
}
}
vx_cleanup
();
#endif
for
(;
i
<
2
*
width
;
i
+=
4
,
row
+=
dcn
*
2
)
{
int
u
=
int
(
yuv_src
[
i
+
uidx
])
;
int
v
=
int
(
yuv_src
[
i
+
vidx
])
;
uchar
u
=
yuv_src
[
i
+
uidx
]
;
uchar
v
=
yuv_src
[
i
+
vidx
]
;
int
vy0
=
int
(
yuv_src
[
i
+
yIdx
])
;
int
vy1
=
int
(
yuv_src
[
i
+
yIdx
+
2
])
;
uchar
vy0
=
yuv_src
[
i
+
yIdx
]
;
uchar
vy1
=
yuv_src
[
i
+
yIdx
+
2
]
;
cvtYuv42xxp2RGB8
<
bIdx
,
dcn
,
false
>
(
u
,
v
,
vy0
,
vy1
,
0
,
0
,
row
,
(
uchar
*
)(
0
));
}
...
...
modules/js/src/core_bindings.cpp
View file @
fcec053d
...
...
@@ -289,13 +289,16 @@ namespace binding_utils
float
radius
;
};
#ifdef HAVE_OPENCV_IMGPROC
Circle
minEnclosingCircle
(
const
cv
::
Mat
&
points
)
{
Circle
circle
;
cv
::
minEnclosingCircle
(
points
,
circle
.
center
,
circle
.
radius
);
return
circle
;
}
#endif
#ifdef HAVE_OPENCV_VIDEO
emscripten
::
val
CamShiftWrapper
(
const
cv
::
Mat
&
arg1
,
Rect
&
arg2
,
TermCriteria
arg3
)
{
RotatedRect
rotatedRect
=
cv
::
CamShift
(
arg1
,
arg2
,
arg3
);
...
...
@@ -313,6 +316,7 @@ namespace binding_utils
result
.
call
<
void
>
(
"push"
,
arg2
);
return
result
;
}
#endif // HAVE_OPENCV_VIDEO
std
::
string
getExceptionMsg
(
const
cv
::
Exception
&
e
)
{
return
e
.
msg
;
...
...
@@ -551,19 +555,25 @@ EMSCRIPTEN_BINDINGS(binding_utils)
function
(
"exceptionFromPtr"
,
&
binding_utils
::
exceptionFromPtr
,
allow_raw_pointers
());
#ifdef HAVE_OPENCV_IMGPROC
function
(
"minEnclosingCircle"
,
select_overload
<
binding_utils
::
Circle
(
const
cv
::
Mat
&
)
>
(
&
binding_utils
::
minEnclosingCircle
));
#endif
function
(
"minMaxLoc"
,
select_overload
<
binding_utils
::
MinMaxLoc
(
const
cv
::
Mat
&
,
const
cv
::
Mat
&
)
>
(
&
binding_utils
::
minMaxLoc
));
function
(
"minMaxLoc"
,
select_overload
<
binding_utils
::
MinMaxLoc
(
const
cv
::
Mat
&
)
>
(
&
binding_utils
::
minMaxLoc_1
));
#ifdef HAVE_OPENCV_IMGPROC
function
(
"morphologyDefaultBorderValue"
,
&
cv
::
morphologyDefaultBorderValue
);
#endif
function
(
"CV_MAT_DEPTH"
,
&
binding_utils
::
cvMatDepth
);
#ifdef HAVE_OPENCV_VIDEO
function
(
"CamShift"
,
select_overload
<
emscripten
::
val
(
const
cv
::
Mat
&
,
Rect
&
,
TermCriteria
)
>
(
&
binding_utils
::
CamShiftWrapper
));
function
(
"meanShift"
,
select_overload
<
emscripten
::
val
(
const
cv
::
Mat
&
,
Rect
&
,
TermCriteria
)
>
(
&
binding_utils
::
meanShiftWrapper
));
#endif
function
(
"getBuildInformation"
,
&
binding_utils
::
getBuildInformation
);
...
...
modules/js/src/embindgen.py
View file @
fcec053d
...
...
@@ -140,7 +140,7 @@ features2d = {'Feature2D': ['detect', 'compute', 'detectAndCompute', 'descriptor
'AKAZE'
:
[
'create'
,
'setDescriptorType'
,
'getDescriptorType'
,
'setDescriptorSize'
,
'getDescriptorSize'
,
'setDescriptorChannels'
,
'getDescriptorChannels'
,
'setThreshold'
,
'getThreshold'
,
'setNOctaves'
,
'getNOctaves'
,
'setNOctaveLayers'
,
'getNOctaveLayers'
,
'setDiffusivity'
,
'getDiffusivity'
,
'getDefaultName'
],
'DescriptorMatcher'
:
[
'add'
,
'clear'
,
'empty'
,
'isMaskSupported'
,
'train'
,
'match'
,
'knnMatch'
,
'radiusMatch'
,
'clone'
,
'create'
],
'BFMatcher'
:
[
'isMaskSupported'
,
'create'
],
''
:
[
'
FAST'
,
'AGAST'
,
'
drawKeypoints'
,
'drawMatches'
]}
''
:
[
'drawKeypoints'
,
'drawMatches'
]}
photo
=
{
''
:
[
'createAlignMTB'
,
'createCalibrateDebevec'
,
'createCalibrateRobertson'
,
\
'createMergeDebevec'
,
'createMergeMertens'
,
'createMergeRobertson'
,
\
...
...
modules/js/test/test_features2d.js
0 → 100644
View file @
fcec053d
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
if
(
typeof
module
!==
'undefined'
&&
module
.
exports
)
{
// The envrionment is Node.js
var
cv
=
require
(
'./opencv.js'
);
// eslint-disable-line no-var
}
function
generateTestFrame
(
width
,
height
)
{
let
w
=
width
||
200
;
let
h
=
height
||
200
;
let
img
=
new
cv
.
Mat
(
h
,
w
,
cv
.
CV_8UC1
,
new
cv
.
Scalar
(
0
,
0
,
0
,
0
));
let
s
=
new
cv
.
Scalar
(
255
,
255
,
255
,
255
);
let
s128
=
new
cv
.
Scalar
(
128
,
128
,
128
,
128
);
let
rect
=
new
cv
.
Rect
(
w
/
4
,
h
/
4
,
w
/
2
,
h
/
2
);
img
.
roi
(
rect
).
setTo
(
s
);
img
.
roi
(
new
cv
.
Rect
(
w
/
2
-
w
/
8
,
h
/
2
-
h
/
8
,
w
/
4
,
h
/
4
)).
setTo
(
s128
);
cv
.
rectangle
(
img
,
new
cv
.
Point
(
w
/
8
,
h
/
8
),
new
cv
.
Point
(
w
-
w
/
8
,
h
-
h
/
8
),
s
,
5
);
cv
.
rectangle
(
img
,
new
cv
.
Point
(
w
/
5
,
h
/
5
),
new
cv
.
Point
(
w
-
w
/
5
,
h
-
h
/
5
),
s128
,
3
);
cv
.
line
(
img
,
new
cv
.
Point
(
-
w
,
0
),
new
cv
.
Point
(
w
/
2
,
h
/
2
),
s128
,
5
);
cv
.
line
(
img
,
new
cv
.
Point
(
2
*
w
,
0
),
new
cv
.
Point
(
w
/
2
,
h
/
2
),
s
,
5
);
return
img
;
}
QUnit
.
module
(
'Features2D'
,
{});
QUnit
.
test
(
'Detectors'
,
function
(
assert
)
{
let
image
=
generateTestFrame
();
let
kp
=
new
cv
.
KeyPointVector
();
let
orb
=
new
cv
.
ORB
();
orb
.
detect
(
image
,
kp
);
assert
.
equal
(
kp
.
size
(),
67
,
'ORB'
);
let
mser
=
new
cv
.
MSER
();
mser
.
detect
(
image
,
kp
);
assert
.
equal
(
kp
.
size
(),
7
,
'MSER'
);
let
brisk
=
new
cv
.
BRISK
();
brisk
.
detect
(
image
,
kp
);
assert
.
equal
(
kp
.
size
(),
191
,
'BRISK'
);
let
ffd
=
new
cv
.
FastFeatureDetector
();
ffd
.
detect
(
image
,
kp
);
assert
.
equal
(
kp
.
size
(),
12
,
'FastFeatureDetector'
);
let
afd
=
new
cv
.
AgastFeatureDetector
();
afd
.
detect
(
image
,
kp
);
assert
.
equal
(
kp
.
size
(),
67
,
'AgastFeatureDetector'
);
let
gftt
=
new
cv
.
GFTTDetector
();
gftt
.
detect
(
image
,
kp
);
assert
.
equal
(
kp
.
size
(),
168
,
'GFTTDetector'
);
let
kaze
=
new
cv
.
KAZE
();
kaze
.
detect
(
image
,
kp
);
assert
.
equal
(
kp
.
size
(),
159
,
'KAZE'
);
let
akaze
=
new
cv
.
AKAZE
();
akaze
.
detect
(
image
,
kp
);
assert
.
equal
(
kp
.
size
(),
52
,
'AKAZE'
);
});
QUnit
.
test
(
'BFMatcher'
,
function
(
assert
)
{
// Generate key points.
let
image
=
generateTestFrame
();
let
kp
=
new
cv
.
KeyPointVector
();
let
descriptors
=
new
cv
.
Mat
();
let
orb
=
new
cv
.
ORB
();
orb
.
detectAndCompute
(
image
,
new
cv
.
Mat
(),
kp
,
descriptors
);
assert
.
equal
(
kp
.
size
(),
67
);
// Run a matcher.
let
dm
=
new
cv
.
DMatchVector
();
let
matcher
=
new
cv
.
BFMatcher
();
matcher
.
match
(
descriptors
,
descriptors
,
dm
);
assert
.
equal
(
dm
.
size
(),
67
);
});
modules/js/test/tests.html
View file @
fcec053d
...
...
@@ -29,6 +29,7 @@
<script
type=
"application/javascript"
src=
"test_objdetect.js"
></script>
<script
type=
"application/javascript"
src=
"test_video.js"
></script>
<script
type=
"application/javascript"
src=
"test_photo.js"
></script>
<script
type=
"application/javascript"
src=
"test_features2d.js"
></script>
<script
type=
'text/javascript'
>
QUnit
.
config
.
autostart
=
false
;
...
...
@@ -69,11 +70,5 @@
};
};
</script>
<!--
TODO
<script type="application/javascript" src="test_features2d.js"></script>
-->
</body>
</html>
modules/js/test/tests.js
View file @
fcec053d
...
...
@@ -45,7 +45,8 @@ testrunner.run(
{
code
:
'opencv.js'
,
tests
:
[
'test_mat.js'
,
'test_utils.js'
,
'test_imgproc.js'
,
'test_objdetect.js'
,
'test_video.js'
,
'test_photo.js'
],
'test_objdetect.js'
,
'test_video.js'
,
'test_features2d.js'
,
'test_photo.js'
],
},
function
(
err
,
report
)
{
console
.
log
(
report
.
failed
+
' failed, '
+
report
.
passed
+
' passed'
);
...
...
modules/video/src/optflowgf.cpp
View file @
fcec053d
...
...
@@ -42,6 +42,7 @@
#include "precomp.hpp"
#include "opencl_kernels_video.hpp"
#include "opencv2/core/hal/intrin.hpp"
#if defined __APPLE__ || defined __ANDROID__
#define SMALL_LOCALSIZE
...
...
@@ -433,13 +434,11 @@ FarnebackUpdateFlow_GaussianBlur( const Mat& _R0, const Mat& _R1,
for
(
i
=
0
;
i
<=
m
;
i
++
)
kernel
[
i
]
=
(
float
)(
kernel
[
i
]
*
s
);
#if CV_S
SE2
#if CV_S
IMD128
float
*
simd_kernel
=
alignPtr
(
kernel
+
m
+
1
,
16
);
volatile
bool
useSIMD
=
checkHardwareSupport
(
CV_CPU_SSE
);
if
(
useSIMD
)
{
for
(
i
=
0
;
i
<=
m
;
i
++
)
_mm_store_ps
(
simd_kernel
+
i
*
4
,
_mm_set1_ps
(
kernel
[
i
]));
v_store
(
simd_kernel
+
i
*
4
,
v_setall_f32
(
kernel
[
i
]));
}
#endif
...
...
@@ -457,54 +456,53 @@ FarnebackUpdateFlow_GaussianBlur( const Mat& _R0, const Mat& _R1,
}
x
=
0
;
#if CV_SSE2
if
(
useSIMD
)
#if CV_SIMD128
{
for
(
;
x
<=
width
*
5
-
16
;
x
+=
16
)
{
const
float
*
sptr0
=
srow
[
m
],
*
sptr1
;
__m128
g4
=
_mm_load_ps
(
simd_kernel
);
__m128
s0
,
s1
,
s2
,
s3
;
s0
=
_mm_mul_ps
(
_mm_loadu_ps
(
sptr0
+
x
),
g4
)
;
s1
=
_mm_mul_ps
(
_mm_loadu_ps
(
sptr0
+
x
+
4
),
g4
)
;
s2
=
_mm_mul_ps
(
_mm_loadu_ps
(
sptr0
+
x
+
8
),
g4
)
;
s3
=
_mm_mul_ps
(
_mm_loadu_ps
(
sptr0
+
x
+
12
),
g4
)
;
v_float32x4
g4
=
v_load
(
simd_kernel
);
v_float32x4
s0
,
s1
,
s2
,
s3
;
s0
=
v_load
(
sptr0
+
x
)
*
g4
;
s1
=
v_load
(
sptr0
+
x
+
4
)
*
g4
;
s2
=
v_load
(
sptr0
+
x
+
8
)
*
g4
;
s3
=
v_load
(
sptr0
+
x
+
12
)
*
g4
;
for
(
i
=
1
;
i
<=
m
;
i
++
)
{
__m128
x0
,
x1
;
v_float32x4
x0
,
x1
;
sptr0
=
srow
[
m
+
i
],
sptr1
=
srow
[
m
-
i
];
g4
=
_mm_load_ps
(
simd_kernel
+
i
*
4
);
x0
=
_mm_add_ps
(
_mm_loadu_ps
(
sptr0
+
x
),
_mm_loadu_ps
(
sptr1
+
x
)
);
x1
=
_mm_add_ps
(
_mm_loadu_ps
(
sptr0
+
x
+
4
),
_mm_loadu_ps
(
sptr1
+
x
+
4
)
);
s0
=
_mm_add_ps
(
s0
,
_mm_mul_ps
(
x0
,
g4
)
);
s1
=
_mm_add_ps
(
s1
,
_mm_mul_ps
(
x1
,
g4
)
);
x0
=
_mm_add_ps
(
_mm_loadu_ps
(
sptr0
+
x
+
8
),
_mm_loadu_ps
(
sptr1
+
x
+
8
)
);
x1
=
_mm_add_ps
(
_mm_loadu_ps
(
sptr0
+
x
+
12
),
_mm_loadu_ps
(
sptr1
+
x
+
12
)
);
s2
=
_mm_add_ps
(
s2
,
_mm_mul_ps
(
x0
,
g4
)
);
s3
=
_mm_add_ps
(
s3
,
_mm_mul_ps
(
x1
,
g4
)
);
g4
=
v_load
(
simd_kernel
+
i
*
4
);
x0
=
v_load
(
sptr0
+
x
)
+
v_load
(
sptr1
+
x
);
x1
=
v_load
(
sptr0
+
x
+
4
)
+
v_load
(
sptr1
+
x
+
4
);
s0
=
v_muladd
(
x0
,
g4
,
s0
);
s1
=
v_muladd
(
x1
,
g4
,
s1
);
x0
=
v_load
(
sptr0
+
x
+
8
)
+
v_load
(
sptr1
+
x
+
8
);
x1
=
v_load
(
sptr0
+
x
+
12
)
+
v_load
(
sptr1
+
x
+
12
);
s2
=
v_muladd
(
x0
,
g4
,
s2
);
s3
=
v_muladd
(
x1
,
g4
,
s3
);
}
_mm_store_ps
(
vsum
+
x
,
s0
);
_mm_store_ps
(
vsum
+
x
+
4
,
s1
);
_mm_store_ps
(
vsum
+
x
+
8
,
s2
);
_mm_store_ps
(
vsum
+
x
+
12
,
s3
);
v_store
(
vsum
+
x
,
s0
);
v_store
(
vsum
+
x
+
4
,
s1
);
v_store
(
vsum
+
x
+
8
,
s2
);
v_store
(
vsum
+
x
+
12
,
s3
);
}
for
(
;
x
<=
width
*
5
-
4
;
x
+=
4
)
{
const
float
*
sptr0
=
srow
[
m
],
*
sptr1
;
__m128
g4
=
_mm_load_ps
(
simd_kernel
);
__m128
s0
=
_mm_mul_ps
(
_mm_loadu_ps
(
sptr0
+
x
),
g4
)
;
v_float32x4
g4
=
v_load
(
simd_kernel
);
v_float32x4
s0
=
v_load
(
sptr0
+
x
)
*
g4
;
for
(
i
=
1
;
i
<=
m
;
i
++
)
{
sptr0
=
srow
[
m
+
i
],
sptr1
=
srow
[
m
-
i
];
g4
=
_mm_load_ps
(
simd_kernel
+
i
*
4
);
__m128
x0
=
_mm_add_ps
(
_mm_loadu_ps
(
sptr0
+
x
),
_mm_loadu_ps
(
sptr1
+
x
)
);
s0
=
_mm_add_ps
(
s0
,
_mm_mul_ps
(
x0
,
g4
)
);
g4
=
v_load
(
simd_kernel
+
i
*
4
);
v_float32x4
x0
=
v_load
(
sptr0
+
x
)
+
v_load
(
sptr1
+
x
);
s0
=
v_muladd
(
x0
,
g4
,
s0
);
}
_mm_store_ps
(
vsum
+
x
,
s0
);
v_store
(
vsum
+
x
,
s0
);
}
}
#endif
...
...
@@ -525,28 +523,25 @@ FarnebackUpdateFlow_GaussianBlur( const Mat& _R0, const Mat& _R1,
// horizontal blur
x
=
0
;
#if CV_SSE2
if
(
useSIMD
)
#if CV_SIMD128
{
for
(
;
x
<=
width
*
5
-
8
;
x
+=
8
)
{
__m128
g4
=
_mm_load_ps
(
simd_kernel
);
__m128
s0
=
_mm_mul_ps
(
_mm_loadu_ps
(
vsum
+
x
),
g4
)
;
__m128
s1
=
_mm_mul_ps
(
_mm_loadu_ps
(
vsum
+
x
+
4
),
g4
)
;
v_float32x4
g4
=
v_load
(
simd_kernel
);
v_float32x4
s0
=
v_load
(
vsum
+
x
)
*
g4
;
v_float32x4
s1
=
v_load
(
vsum
+
x
+
4
)
*
g4
;
for
(
i
=
1
;
i
<=
m
;
i
++
)
{
g4
=
_mm_load_ps
(
simd_kernel
+
i
*
4
);
__m128
x0
=
_mm_add_ps
(
_mm_loadu_ps
(
vsum
+
x
-
i
*
5
),
_mm_loadu_ps
(
vsum
+
x
+
i
*
5
));
__m128
x1
=
_mm_add_ps
(
_mm_loadu_ps
(
vsum
+
x
-
i
*
5
+
4
),
_mm_loadu_ps
(
vsum
+
x
+
i
*
5
+
4
));
s0
=
_mm_add_ps
(
s0
,
_mm_mul_ps
(
x0
,
g4
));
s1
=
_mm_add_ps
(
s1
,
_mm_mul_ps
(
x1
,
g4
));
g4
=
v_load
(
simd_kernel
+
i
*
4
);
v_float32x4
x0
=
v_load
(
vsum
+
x
-
i
*
5
)
+
v_load
(
vsum
+
x
+
i
*
5
);
v_float32x4
x1
=
v_load
(
vsum
+
x
-
i
*
5
+
4
)
+
v_load
(
vsum
+
x
+
i
*
5
+
4
);
s0
=
v_muladd
(
x0
,
g4
,
s0
);
s1
=
v_muladd
(
x1
,
g4
,
s1
);
}
_mm_store_ps
(
hsum
+
x
,
s0
);
_mm_store_ps
(
hsum
+
x
+
4
,
s1
);
v_store
(
hsum
+
x
,
s0
);
v_store
(
hsum
+
x
+
4
,
s1
);
}
}
#endif
...
...
platforms/js/build_js.py
View file @
fcec053d
...
...
@@ -113,6 +113,7 @@ class Builder:
"-DWITH_GPHOTO2=OFF"
,
"-DWITH_LAPACK=OFF"
,
"-DWITH_ITT=OFF"
,
"-DWITH_QUIRC=OFF"
,
"-DBUILD_ZLIB=ON"
,
"-DBUILD_opencv_apps=OFF"
,
"-DBUILD_opencv_calib3d=ON"
,
# No bindings provided. This module is used as a dependency for other modules.
...
...
@@ -130,9 +131,11 @@ class Builder:
"-DBUILD_opencv_superres=OFF"
,
"-DBUILD_opencv_stitching=OFF"
,
"-DBUILD_opencv_java=OFF"
,
"-DBUILD_opencv_java_bindings_generator=OFF"
,
"-DBUILD_opencv_js=ON"
,
"-DBUILD_opencv_python2=OFF"
,
"-DBUILD_opencv_python3=OFF"
,
"-DBUILD_opencv_python_bindings_generator=OFF"
,
"-DBUILD_EXAMPLES=OFF"
,
"-DBUILD_PACKAGE=OFF"
,
"-DBUILD_TESTS=OFF"
,
...
...
samples/dnn/object_detection.cpp
View file @
fcec053d
...
...
@@ -153,51 +153,39 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
std
::
vector
<
int
>
classIds
;
std
::
vector
<
float
>
confidences
;
std
::
vector
<
Rect
>
boxes
;
if
(
net
.
getLayer
(
0
)
->
outputNameToIndex
(
"im_info"
)
!=
-
1
)
// Faster-RCNN or R-FCN
if
(
outLayerType
==
"DetectionOutput"
)
{
// Network produces output blob with a shape 1x1xNx7 where N is a number of
// detections and an every detection is a vector of values
// [batchId, classId, confidence, left, top, right, bottom]
CV_Assert
(
outs
.
size
()
==
1
);
float
*
data
=
(
float
*
)
outs
[
0
].
data
;
for
(
size_t
i
=
0
;
i
<
outs
[
0
].
total
();
i
+=
7
)
CV_Assert
(
outs
.
size
()
>
0
);
for
(
size_t
k
=
0
;
k
<
outs
.
size
();
k
++
)
{
float
confidence
=
data
[
i
+
2
]
;
if
(
confidence
>
confThreshold
)
float
*
data
=
(
float
*
)
outs
[
k
].
data
;
for
(
size_t
i
=
0
;
i
<
outs
[
k
].
total
();
i
+=
7
)
{
int
left
=
(
int
)
data
[
i
+
3
];
int
top
=
(
int
)
data
[
i
+
4
];
int
right
=
(
int
)
data
[
i
+
5
];
int
bottom
=
(
int
)
data
[
i
+
6
];
int
width
=
right
-
left
+
1
;
int
height
=
bottom
-
top
+
1
;
classIds
.
push_back
((
int
)(
data
[
i
+
1
])
-
1
);
// Skip 0th background class id.
boxes
.
push_back
(
Rect
(
left
,
top
,
width
,
height
));
confidences
.
push_back
(
confidence
);
}
}
}
else
if
(
outLayerType
==
"DetectionOutput"
)
{
// Network produces output blob with a shape 1x1xNx7 where N is a number of
// detections and an every detection is a vector of values
// [batchId, classId, confidence, left, top, right, bottom]
CV_Assert
(
outs
.
size
()
==
1
);
float
*
data
=
(
float
*
)
outs
[
0
].
data
;
for
(
size_t
i
=
0
;
i
<
outs
[
0
].
total
();
i
+=
7
)
{
float
confidence
=
data
[
i
+
2
];
if
(
confidence
>
confThreshold
)
{
int
left
=
(
int
)(
data
[
i
+
3
]
*
frame
.
cols
);
int
top
=
(
int
)(
data
[
i
+
4
]
*
frame
.
rows
);
int
right
=
(
int
)(
data
[
i
+
5
]
*
frame
.
cols
);
int
bottom
=
(
int
)(
data
[
i
+
6
]
*
frame
.
rows
);
int
width
=
right
-
left
+
1
;
int
height
=
bottom
-
top
+
1
;
classIds
.
push_back
((
int
)(
data
[
i
+
1
])
-
1
);
// Skip 0th background class id.
boxes
.
push_back
(
Rect
(
left
,
top
,
width
,
height
));
confidences
.
push_back
(
confidence
);
float
confidence
=
data
[
i
+
2
];
if
(
confidence
>
confThreshold
)
{
int
left
=
(
int
)
data
[
i
+
3
];
int
top
=
(
int
)
data
[
i
+
4
];
int
right
=
(
int
)
data
[
i
+
5
];
int
bottom
=
(
int
)
data
[
i
+
6
];
int
width
=
right
-
left
+
1
;
int
height
=
bottom
-
top
+
1
;
if
(
width
*
height
<=
1
)
{
left
=
(
int
)(
data
[
i
+
3
]
*
frame
.
cols
);
top
=
(
int
)(
data
[
i
+
4
]
*
frame
.
rows
);
right
=
(
int
)(
data
[
i
+
5
]
*
frame
.
cols
);
bottom
=
(
int
)(
data
[
i
+
6
]
*
frame
.
rows
);
width
=
right
-
left
+
1
;
height
=
bottom
-
top
+
1
;
}
classIds
.
push_back
((
int
)(
data
[
i
+
1
])
-
1
);
// Skip 0th background class id.
boxes
.
push_back
(
Rect
(
left
,
top
,
width
,
height
));
confidences
.
push_back
(
confidence
);
}
}
}
}
...
...
samples/dnn/object_detection.py
View file @
fcec053d
...
...
@@ -102,7 +102,7 @@ def postprocess(frame, outs):
classIds
=
[]
confidences
=
[]
boxes
=
[]
if
net
.
getLayer
(
0
)
.
outputNameToIndex
(
'im_info'
)
!=
-
1
:
# Faster-RCNN or R-FCN
if
lastLayer
.
type
==
'DetectionOutput'
:
# Network produces output blob with a shape 1x1xNx7 where N is a number of
# detections and an every detection is a vector of values
# [batchId, classId, confidence, left, top, right, bottom]
...
...
@@ -116,23 +116,13 @@ def postprocess(frame, outs):
bottom
=
int
(
detection
[
6
])
width
=
right
-
left
+
1
height
=
bottom
-
top
+
1
classIds
.
append
(
int
(
detection
[
1
])
-
1
)
# Skip background label
confidences
.
append
(
float
(
confidence
))
boxes
.
append
([
left
,
top
,
width
,
height
])
elif
lastLayer
.
type
==
'DetectionOutput'
:
# Network produces output blob with a shape 1x1xNx7 where N is a number of
# detections and an every detection is a vector of values
# [batchId, classId, confidence, left, top, right, bottom]
for
out
in
outs
:
for
detection
in
out
[
0
,
0
]:
confidence
=
detection
[
2
]
if
confidence
>
confThreshold
:
left
=
int
(
detection
[
3
]
*
frameWidth
)
top
=
int
(
detection
[
4
]
*
frameHeight
)
right
=
int
(
detection
[
5
]
*
frameWidth
)
bottom
=
int
(
detection
[
6
]
*
frameHeight
)
width
=
right
-
left
+
1
height
=
bottom
-
top
+
1
if
width
*
height
<=
1
:
left
=
int
(
detection
[
3
]
*
frameWidth
)
top
=
int
(
detection
[
4
]
*
frameHeight
)
right
=
int
(
detection
[
5
]
*
frameWidth
)
bottom
=
int
(
detection
[
6
]
*
frameHeight
)
width
=
right
-
left
+
1
height
=
bottom
-
top
+
1
classIds
.
append
(
int
(
detection
[
1
])
-
1
)
# Skip background label
confidences
.
append
(
float
(
confidence
))
boxes
.
append
([
left
,
top
,
width
,
height
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment