Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
1661e839
Commit
1661e839
authored
Aug 30, 2019
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
parents
834c9925
f224d740
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
180 additions
and
119 deletions
+180
-119
fast_math.hpp
modules/core/include/opencv2/core/fast_math.hpp
+1
-1
intrin_avx.hpp
modules/core/include/opencv2/core/hal/intrin_avx.hpp
+9
-12
intrin_vsx.hpp
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
+4
-4
vsx_utils.hpp
modules/core/include/opencv2/core/vsx_utils.hpp
+2
-0
test_intrin_utils.hpp
modules/core/test/test_intrin_utils.hpp
+6
-2
fully_connected_layer.cpp
modules/dnn/src/layers/fully_connected_layer.cpp
+1
-1
tf_graph_simplifier.cpp
modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
+2
-2
test_halide_layers.cpp
modules/dnn/test/test_halide_layers.cpp
+5
-2
test_tf_importer.cpp
modules/dnn/test/test_tf_importer.cpp
+1
-0
fast.cpp
modules/features2d/src/fast.cpp
+3
-6
colorscale_turbo.jpg
modules/imgproc/doc/pics/colormaps/colorscale_turbo.jpg
+0
-0
imgproc.hpp
modules/imgproc/include/opencv2/imgproc.hpp
+2
-1
colormap.cpp
modules/imgproc/src/colormap.cpp
+31
-0
ts.cpp
modules/ts/src/ts.cpp
+2
-1
lkpyramid.cpp
modules/video/src/lkpyramid.cpp
+111
-87
No files found.
modules/core/include/opencv2/core/fast_math.hpp
View file @
1661e839
...
...
@@ -102,7 +102,7 @@
#define CV_INLINE_ROUND_DBL(value) \
int out; \
double temp; \
__asm__( "fctiw %[temp],%[in]\n\tmf
fp
rwz %[out],%[temp]\n\t" : [out] "=r" (out), [temp] "=d" (temp) : [in] "d" ((double)(value)) : ); \
__asm__( "fctiw %[temp],%[in]\n\tmf
vs
rwz %[out],%[temp]\n\t" : [out] "=r" (out), [temp] "=d" (temp) : [in] "d" ((double)(value)) : ); \
return out;
// FP32 also works with FP64 routine above
...
...
modules/core/include/opencv2/core/hal/intrin_avx.hpp
View file @
1661e839
...
...
@@ -1231,19 +1231,16 @@ inline int v_signmask(const v_int16x16& a)
inline
int
v_signmask
(
const
v_uint16x16
&
a
)
{
return
v_signmask
(
v_reinterpret_as_s16
(
a
));
}
inline
int
v_signmask
(
const
v_int32x8
&
a
)
{
v_int16x16
a16
=
v_pack
(
a
,
a
);
return
v_signmask
(
v_pack
(
a16
,
a16
))
&
0xFF
;
}
inline
int
v_signmask
(
const
v_uint32x8
&
a
)
{
return
v_signmask
(
v_reinterpret_as_s32
(
a
));
}
inline
int
v_signmask
(
const
v_float32x8
&
a
)
{
return
_mm256_movemask_ps
(
a
.
val
);
}
inline
int
v_signmask
(
const
v_float64x4
&
a
)
{
return
_mm256_movemask_pd
(
a
.
val
);
}
inline
int
v_signmask
(
const
v_int32x8
&
a
)
{
return
v_signmask
(
v_reinterpret_as_f32
(
a
));
}
inline
int
v_signmask
(
const
v_uint32x8
&
a
)
{
return
v_signmask
(
v_reinterpret_as_f32
(
a
));
}
inline
int
v_scan_forward
(
const
v_int8x32
&
a
)
{
return
trailingZeros32
(
v_signmask
(
v_reinterpret_as_s8
(
a
)));
}
inline
int
v_scan_forward
(
const
v_uint8x32
&
a
)
{
return
trailingZeros32
(
v_signmask
(
v_reinterpret_as_s8
(
a
)));
}
inline
int
v_scan_forward
(
const
v_int16x16
&
a
)
{
return
trailingZeros32
(
v_signmask
(
v_reinterpret_as_s8
(
a
)))
/
2
;
}
...
...
@@ -1270,10 +1267,10 @@ inline int v_scan_forward(const v_float64x4& a) { return trailingZeros32(v_signm
OPENCV_HAL_IMPL_AVX_CHECK
(
v_uint8x32
,
OPENCV_HAL_1ST
,
-
1
)
OPENCV_HAL_IMPL_AVX_CHECK
(
v_int8x32
,
OPENCV_HAL_1ST
,
-
1
)
OPENCV_HAL_IMPL_AVX_CHECK
(
v_uint16x16
,
OPENCV_HAL_AND
,
(
int
)
0xaaaa
)
OPENCV_HAL_IMPL_AVX_CHECK
(
v_int16x16
,
OPENCV_HAL_AND
,
(
int
)
0xaaaa
)
OPENCV_HAL_IMPL_AVX_CHECK
(
v_uint32x8
,
OPENCV_HAL_AND
,
(
int
)
0x8888
)
OPENCV_HAL_IMPL_AVX_CHECK
(
v_int32x8
,
OPENCV_HAL_AND
,
(
int
)
0x8888
)
OPENCV_HAL_IMPL_AVX_CHECK
(
v_uint16x16
,
OPENCV_HAL_AND
,
(
int
)
0xaaaa
aaaa
)
OPENCV_HAL_IMPL_AVX_CHECK
(
v_int16x16
,
OPENCV_HAL_AND
,
(
int
)
0xaaaa
aaaa
)
OPENCV_HAL_IMPL_AVX_CHECK
(
v_uint32x8
,
OPENCV_HAL_AND
,
(
int
)
0x8888
8888
)
OPENCV_HAL_IMPL_AVX_CHECK
(
v_int32x8
,
OPENCV_HAL_AND
,
(
int
)
0x8888
8888
)
#define OPENCV_HAL_IMPL_AVX_CHECK_FLT(_Tpvec, allmask) \
inline bool v_check_all(const _Tpvec& a) \
...
...
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
View file @
1661e839
...
...
@@ -764,10 +764,10 @@ inline scalartype v_reduce_##suffix(const _Tpvec& a)
rs = func(rs, vec_sld(rs, rs, 2)); \
return vec_extract(func(rs, vec_sld(rs, rs, 1)), 0); \
}
OPENCV_HAL_IMPL_VSX_REDUCE_OP_
8
(
v_uint8x16
,
vec_uchar16
,
uchar
,
max
,
vec_max
)
OPENCV_HAL_IMPL_VSX_REDUCE_OP_
8
(
v_uint8x16
,
vec_uchar16
,
uchar
,
min
,
vec_min
)
OPENCV_HAL_IMPL_VSX_REDUCE_OP_
8
(
v_int8x16
,
vec_char16
,
schar
,
max
,
vec_max
)
OPENCV_HAL_IMPL_VSX_REDUCE_OP_
8
(
v_int8x16
,
vec_char16
,
schar
,
min
,
vec_min
)
OPENCV_HAL_IMPL_VSX_REDUCE_OP_
16
(
v_uint8x16
,
vec_uchar16
,
uchar
,
max
,
vec_max
)
OPENCV_HAL_IMPL_VSX_REDUCE_OP_
16
(
v_uint8x16
,
vec_uchar16
,
uchar
,
min
,
vec_min
)
OPENCV_HAL_IMPL_VSX_REDUCE_OP_
16
(
v_int8x16
,
vec_char16
,
schar
,
max
,
vec_max
)
OPENCV_HAL_IMPL_VSX_REDUCE_OP_
16
(
v_int8x16
,
vec_char16
,
schar
,
min
,
vec_min
)
inline
v_float32x4
v_reduce_sum4
(
const
v_float32x4
&
a
,
const
v_float32x4
&
b
,
const
v_float32x4
&
c
,
const
v_float32x4
&
d
)
...
...
modules/core/include/opencv2/core/vsx_utils.hpp
View file @
1661e839
...
...
@@ -363,10 +363,12 @@ VSX_FINLINE(Tvec) vec_popcntu(const Tvec2& a) \
VSX_IMPL_POPCNTU
(
vec_uchar16
,
vec_char16
,
vec_uchar16_c
);
VSX_IMPL_POPCNTU
(
vec_ushort8
,
vec_short8
,
vec_ushort8_c
);
VSX_IMPL_POPCNTU
(
vec_uint4
,
vec_int4
,
vec_uint4_c
);
VSX_IMPL_POPCNTU
(
vec_udword2
,
vec_dword2
,
vec_udword2_c
);
// redirect unsigned types
VSX_REDIRECT_1RG
(
vec_uchar16
,
vec_uchar16
,
vec_popcntu
,
vec_popcnt
)
VSX_REDIRECT_1RG
(
vec_ushort8
,
vec_ushort8
,
vec_popcntu
,
vec_popcnt
)
VSX_REDIRECT_1RG
(
vec_uint4
,
vec_uint4
,
vec_popcntu
,
vec_popcnt
)
VSX_REDIRECT_1RG
(
vec_udword2
,
vec_udword2
,
vec_popcntu
,
vec_popcnt
)
// converts between single and double precision
VSX_REDIRECT_1RG
(
vec_float4
,
vec_double2
,
vec_cvfo
,
__builtin_vsx_xvcvdpsp
)
...
...
modules/core/test/test_intrin_utils.hpp
View file @
1661e839
...
...
@@ -804,11 +804,14 @@ template<typename R> struct TheTest
all1s
;
all1s
.
ui
=
(
uint_type
)
-
1
;
LaneType
mask_one
=
all1s
.
l
;
dataB
[
R
::
nlanes
-
1
]
=
mask_one
;
R
l
=
dataB
;
dataB
[
1
]
=
mask_one
;
dataB
[
R
::
nlanes
/
2
]
=
mask_one
;
dataB
[
R
::
nlanes
-
1
]
=
mask_one
;
dataC
*=
(
LaneType
)
-
1
;
R
a
=
dataA
,
b
=
dataB
,
c
=
dataC
,
d
=
dataD
,
e
=
dataE
;
dataC
[
R
::
nlanes
-
1
]
=
0
;
R
nl
=
dataC
;
EXPECT_EQ
(
2
,
v_signmask
(
a
));
#if CV_SIMD_WIDTH <= 32
...
...
@@ -818,11 +821,12 @@ template<typename R> struct TheTest
EXPECT_EQ
(
false
,
v_check_all
(
a
));
EXPECT_EQ
(
false
,
v_check_all
(
b
));
EXPECT_EQ
(
true
,
v_check_all
(
c
));
EXPECT_EQ
(
false
,
v_check_all
(
nl
));
EXPECT_EQ
(
true
,
v_check_any
(
a
));
EXPECT_EQ
(
true
,
v_check_any
(
b
));
EXPECT_EQ
(
true
,
v_check_any
(
c
));
EXPECT_EQ
(
true
,
v_check_any
(
l
));
R
f
=
v_select
(
b
,
d
,
e
);
Data
<
R
>
resF
=
f
;
for
(
int
i
=
0
;
i
<
R
::
nlanes
;
++
i
)
...
...
modules/dnn/src/layers/fully_connected_layer.cpp
View file @
1661e839
...
...
@@ -449,7 +449,7 @@ public:
InferenceEngine
::
Builder
::
Layer
l
=
ieLayer
;
addConstantData
(
"weights"
,
wrapToInfEngineBlob
(
blobs
[
0
],
{(
size_t
)
blobs
[
0
].
size
[
0
],
(
size_t
)
blobs
[
0
].
size
[
1
],
1
,
1
},
InferenceEngine
::
Layout
::
OIHW
),
l
);
if
(
b
lobs
.
size
()
>
1
)
if
(
b
ias
)
addConstantData
(
"biases"
,
wrapToInfEngineBlob
(
blobs
[
1
],
{(
size_t
)
outNum
},
InferenceEngine
::
Layout
::
C
),
l
);
return
Ptr
<
BackendNode
>
(
new
InfEngineBackendNode
(
l
));
...
...
modules/dnn/src/tensorflow/tf_graph_simplifier.cpp
View file @
1661e839
...
...
@@ -787,7 +787,7 @@ void RemoveIdentityOps(tensorflow::GraphDef& net)
const
tensorflow
::
NodeDef
&
layer
=
net
.
node
(
li
);
String
type
=
layer
.
op
();
if
(
type
==
"Identity"
||
type
==
"Dropout"
)
{
if
(
type
==
"Identity"
||
type
==
"Dropout"
||
type
==
"PlaceholderWithDefault"
)
{
identity_ops_idx
.
push_back
(
li
);
identity_ops
[
layer
.
name
()]
=
layer
.
input
(
0
);
}
...
...
@@ -1031,7 +1031,7 @@ void removePhaseSwitches(tensorflow::GraphDef& net)
}
}
nodesToRemove
.
push_back
(
i
);
if
(
node
.
op
()
==
"Merge"
)
if
(
node
.
op
()
==
"Merge"
||
node
.
op
()
==
"Switch"
)
mergeOpSubgraphNodes
.
push
(
i
);
}
}
...
...
modules/dnn/test/test_halide_layers.cpp
View file @
1661e839
...
...
@@ -386,8 +386,11 @@ TEST_P(FullyConnected, Accuracy)
bool
hasBias
=
get
<
3
>
(
GetParam
());
Backend
backendId
=
get
<
0
>
(
get
<
4
>
(
GetParam
()));
Target
targetId
=
get
<
1
>
(
get
<
4
>
(
GetParam
()));
if
(
backendId
==
DNN_BACKEND_INFERENCE_ENGINE
)
applyTestTag
(
CV_TEST_TAG_DNN_SKIP_IE
);
if
(
backendId
==
DNN_BACKEND_INFERENCE_ENGINE
&&
(
targetId
==
DNN_TARGET_OPENCL_FP16
||
(
targetId
==
DNN_TARGET_MYRIAD
&&
getInferenceEngineVPUType
()
==
CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X
)))
{
applyTestTag
(
CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16
);
applyTestTag
(
CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X
);
}
Mat
weights
(
outChannels
,
inChannels
*
inSize
.
height
*
inSize
.
width
,
CV_32F
);
randu
(
weights
,
-
1.0
f
,
1.0
f
);
...
...
modules/dnn/test/test_tf_importer.cpp
View file @
1661e839
...
...
@@ -729,6 +729,7 @@ TEST_P(Test_TensorFlow_layers, subpixel)
TEST_P
(
Test_TensorFlow_layers
,
keras_mobilenet_head
)
{
runTensorFlowNet
(
"keras_mobilenet_head"
);
runTensorFlowNet
(
"keras_learning_phase"
);
}
TEST_P
(
Test_TensorFlow_layers
,
resize_bilinear
)
...
...
modules/features2d/src/fast.cpp
View file @
1661e839
...
...
@@ -159,15 +159,12 @@ void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bo
}
max0
=
K16
<
v_max
(
max0
,
max1
);
int
m
=
-
v_reduce_sum
(
v_reinterpret_as_s8
(
max0
));
uchar
mflag
[
16
];
v_store
(
mflag
,
max0
);
unsigned
int
m
=
v_signmask
(
v_reinterpret_as_s8
(
max0
));
for
(
k
=
0
;
m
>
0
&&
k
<
16
;
k
++
)
for
(
k
=
0
;
m
>
0
&&
k
<
16
;
k
++
,
m
>>=
1
)
{
if
(
mflag
[
k
]
)
if
(
m
&
1
)
{
--
m
;
cornerpos
[
ncorners
++
]
=
j
+
k
;
if
(
nonmax_suppression
)
{
...
...
modules/imgproc/doc/pics/colormaps/colorscale_turbo.jpg
0 → 100644
View file @
1661e839
1.53 KB
modules/imgproc/include/opencv2/imgproc.hpp
View file @
1661e839
...
...
@@ -4208,7 +4208,8 @@ enum ColormapTypes
COLORMAP_VIRIDIS
=
16
,
//!< 
COLORMAP_CIVIDIS
=
17
,
//!< 
COLORMAP_TWILIGHT
=
18
,
//!< 
COLORMAP_TWILIGHT_SHIFTED
=
19
//!< 
COLORMAP_TWILIGHT_SHIFTED
=
19
,
//!< 
COLORMAP_TURBO
=
20
//!< 
};
/** @example samples/cpp/falsecolor.cpp
...
...
modules/imgproc/src/colormap.cpp
View file @
1661e839
...
...
@@ -657,6 +657,36 @@ namespace colormap
}
};
// Equals the colormap "Turbo" proposed by Google.
// https://ai.googleblog.com/2019/08/turbo-improved-rainbow-colormap-for.html
// https://gist.github.com/mikhailov-work/6a308c20e494d9e0ccc29036b28faa7a
class
Turbo
:
public
ColorMap
{
public
:
Turbo
()
:
ColorMap
()
{
init
(
256
);
}
Turbo
(
int
n
)
:
ColorMap
()
{
init
(
n
);
}
void
init
(
int
n
)
{
// define the basemap
static
const
float
r
[]
=
{
0.18995
f
,
0.19483
f
,
0.19956
f
,
0.20415
f
,
0.20860
f
,
0.21291
f
,
0.21708
f
,
0.22111
f
,
0.22500
f
,
0.22875
f
,
0.23236
f
,
0.23582
f
,
0.23915
f
,
0.24234
f
,
0.24539
f
,
0.24830
f
,
0.25107
f
,
0.25369
f
,
0.25618
f
,
0.25853
f
,
0.26074
f
,
0.26280
f
,
0.26473
f
,
0.26652
f
,
0.26816
f
,
0.26967
f
,
0.27103
f
,
0.27226
f
,
0.27334
f
,
0.27429
f
,
0.27509
f
,
0.27576
f
,
0.27628
f
,
0.27667
f
,
0.27691
f
,
0.27701
f
,
0.27698
f
,
0.27680
f
,
0.27648
f
,
0.27603
f
,
0.27543
f
,
0.27469
f
,
0.27381
f
,
0.27273
f
,
0.27106
f
,
0.26878
f
,
0.26592
f
,
0.26252
f
,
0.25862
f
,
0.25425
f
,
0.24946
f
,
0.24427
f
,
0.23874
f
,
0.23288
f
,
0.22676
f
,
0.22039
f
,
0.21382
f
,
0.20708
f
,
0.20021
f
,
0.19326
f
,
0.18625
f
,
0.17923
f
,
0.17223
f
,
0.16529
f
,
0.15844
f
,
0.15173
f
,
0.14519
f
,
0.13886
f
,
0.13278
f
,
0.12698
f
,
0.12151
f
,
0.11639
f
,
0.11167
f
,
0.10738
f
,
0.10357
f
,
0.10026
f
,
0.09750
f
,
0.09532
f
,
0.09377
f
,
0.09287
f
,
0.09267
f
,
0.09320
f
,
0.09451
f
,
0.09662
f
,
0.09958
f
,
0.10342
f
,
0.10815
f
,
0.11374
f
,
0.12014
f
,
0.12733
f
,
0.13526
f
,
0.14391
f
,
0.15323
f
,
0.16319
f
,
0.17377
f
,
0.18491
f
,
0.19659
f
,
0.20877
f
,
0.22142
f
,
0.23449
f
,
0.24797
f
,
0.26180
f
,
0.27597
f
,
0.29042
f
,
0.30513
f
,
0.32006
f
,
0.33517
f
,
0.35043
f
,
0.36581
f
,
0.38127
f
,
0.39678
f
,
0.41229
f
,
0.42778
f
,
0.44321
f
,
0.45854
f
,
0.47375
f
,
0.48879
f
,
0.50362
f
,
0.51822
f
,
0.53255
f
,
0.54658
f
,
0.56026
f
,
0.57357
f
,
0.58646
f
,
0.59891
f
,
0.61088
f
,
0.62233
f
,
0.63323
f
,
0.64362
f
,
0.65394
f
,
0.66428
f
,
0.67462
f
,
0.68494
f
,
0.69525
f
,
0.70553
f
,
0.71577
f
,
0.72596
f
,
0.73610
f
,
0.74617
f
,
0.75617
f
,
0.76608
f
,
0.77591
f
,
0.78563
f
,
0.79524
f
,
0.80473
f
,
0.81410
f
,
0.82333
f
,
0.83241
f
,
0.84133
f
,
0.85010
f
,
0.85868
f
,
0.86709
f
,
0.87530
f
,
0.88331
f
,
0.89112
f
,
0.89870
f
,
0.90605
f
,
0.91317
f
,
0.92004
f
,
0.92666
f
,
0.93301
f
,
0.93909
f
,
0.94489
f
,
0.95039
f
,
0.95560
f
,
0.96049
f
,
0.96507
f
,
0.96931
f
,
0.97323
f
,
0.97679
f
,
0.98000
f
,
0.98289
f
,
0.98549
f
,
0.98781
f
,
0.98986
f
,
0.99163
f
,
0.99314
f
,
0.99438
f
,
0.99535
f
,
0.99607
f
,
0.99654
f
,
0.99675
f
,
0.99672
f
,
0.99644
f
,
0.99593
f
,
0.99517
f
,
0.99419
f
,
0.99297
f
,
0.99153
f
,
0.98987
f
,
0.98799
f
,
0.98590
f
,
0.98360
f
,
0.98108
f
,
0.97837
f
,
0.97545
f
,
0.97234
f
,
0.96904
f
,
0.96555
f
,
0.96187
f
,
0.95801
f
,
0.95398
f
,
0.94977
f
,
0.94538
f
,
0.94084
f
,
0.93612
f
,
0.93125
f
,
0.92623
f
,
0.92105
f
,
0.91572
f
,
0.91024
f
,
0.90463
f
,
0.89888
f
,
0.89298
f
,
0.88691
f
,
0.88066
f
,
0.87422
f
,
0.86760
f
,
0.86079
f
,
0.85380
f
,
0.84662
f
,
0.83926
f
,
0.83172
f
,
0.82399
f
,
0.81608
f
,
0.80799
f
,
0.79971
f
,
0.79125
f
,
0.78260
f
,
0.77377
f
,
0.76476
f
,
0.75556
f
,
0.74617
f
,
0.73661
f
,
0.72686
f
,
0.71692
f
,
0.70680
f
,
0.69650
f
,
0.68602
f
,
0.67535
f
,
0.66449
f
,
0.65345
f
,
0.64223
f
,
0.63082
f
,
0.61923
f
,
0.60746
f
,
0.59550
f
,
0.58336
f
,
0.57103
f
,
0.55852
f
,
0.54583
f
,
0.53295
f
,
0.51989
f
,
0.50664
f
,
0.49321
f
,
0.47960
f
};
static
const
float
g
[]
=
{
0.07176
f
,
0.08339
f
,
0.09498
f
,
0.10652
f
,
0.11802
f
,
0.12947
f
,
0.14087
f
,
0.15223
f
,
0.16354
f
,
0.17481
f
,
0.18603
f
,
0.19720
f
,
0.20833
f
,
0.21941
f
,
0.23044
f
,
0.24143
f
,
0.25237
f
,
0.26327
f
,
0.27412
f
,
0.28492
f
,
0.29568
f
,
0.30639
f
,
0.31706
f
,
0.32768
f
,
0.33825
f
,
0.34878
f
,
0.35926
f
,
0.36970
f
,
0.38008
f
,
0.39043
f
,
0.40072
f
,
0.41097
f
,
0.42118
f
,
0.43134
f
,
0.44145
f
,
0.45152
f
,
0.46153
f
,
0.47151
f
,
0.48144
f
,
0.49132
f
,
0.50115
f
,
0.51094
f
,
0.52069
f
,
0.53040
f
,
0.54015
f
,
0.54995
f
,
0.55979
f
,
0.56967
f
,
0.57958
f
,
0.58950
f
,
0.59943
f
,
0.60937
f
,
0.61931
f
,
0.62923
f
,
0.63913
f
,
0.64901
f
,
0.65886
f
,
0.66866
f
,
0.67842
f
,
0.68812
f
,
0.69775
f
,
0.70732
f
,
0.71680
f
,
0.72620
f
,
0.73551
f
,
0.74472
f
,
0.75381
f
,
0.76279
f
,
0.77165
f
,
0.78037
f
,
0.78896
f
,
0.79740
f
,
0.80569
f
,
0.81381
f
,
0.82177
f
,
0.82955
f
,
0.83714
f
,
0.84455
f
,
0.85175
f
,
0.85875
f
,
0.86554
f
,
0.87211
f
,
0.87844
f
,
0.88454
f
,
0.89040
f
,
0.89600
f
,
0.90142
f
,
0.90673
f
,
0.91193
f
,
0.91701
f
,
0.92197
f
,
0.92680
f
,
0.93151
f
,
0.93609
f
,
0.94053
f
,
0.94484
f
,
0.94901
f
,
0.95304
f
,
0.95692
f
,
0.96065
f
,
0.96423
f
,
0.96765
f
,
0.97092
f
,
0.97403
f
,
0.97697
f
,
0.97974
f
,
0.98234
f
,
0.98477
f
,
0.98702
f
,
0.98909
f
,
0.99098
f
,
0.99268
f
,
0.99419
f
,
0.99551
f
,
0.99663
f
,
0.99755
f
,
0.99828
f
,
0.99879
f
,
0.99910
f
,
0.99919
f
,
0.99907
f
,
0.99873
f
,
0.99817
f
,
0.99739
f
,
0.99638
f
,
0.99514
f
,
0.99366
f
,
0.99195
f
,
0.98999
f
,
0.98775
f
,
0.98524
f
,
0.98246
f
,
0.97941
f
,
0.97610
f
,
0.97255
f
,
0.96875
f
,
0.96470
f
,
0.96043
f
,
0.95593
f
,
0.95121
f
,
0.94627
f
,
0.94113
f
,
0.93579
f
,
0.93025
f
,
0.92452
f
,
0.91861
f
,
0.91253
f
,
0.90627
f
,
0.89986
f
,
0.89328
f
,
0.88655
f
,
0.87968
f
,
0.87267
f
,
0.86553
f
,
0.85826
f
,
0.85087
f
,
0.84337
f
,
0.83576
f
,
0.82806
f
,
0.82025
f
,
0.81236
f
,
0.80439
f
,
0.79634
f
,
0.78823
f
,
0.78005
f
,
0.77181
f
,
0.76352
f
,
0.75519
f
,
0.74682
f
,
0.73842
f
,
0.73000
f
,
0.72140
f
,
0.71250
f
,
0.70330
f
,
0.69382
f
,
0.68408
f
,
0.67408
f
,
0.66386
f
,
0.65341
f
,
0.64277
f
,
0.63193
f
,
0.62093
f
,
0.60977
f
,
0.59846
f
,
0.58703
f
,
0.57549
f
,
0.56386
f
,
0.55214
f
,
0.54036
f
,
0.52854
f
,
0.51667
f
,
0.50479
f
,
0.49291
f
,
0.48104
f
,
0.46920
f
,
0.45740
f
,
0.44565
f
,
0.43399
f
,
0.42241
f
,
0.41093
f
,
0.39958
f
,
0.38836
f
,
0.37729
f
,
0.36638
f
,
0.35566
f
,
0.34513
f
,
0.33482
f
,
0.32473
f
,
0.31489
f
,
0.30530
f
,
0.29599
f
,
0.28696
f
,
0.27824
f
,
0.26981
f
,
0.26152
f
,
0.25334
f
,
0.24526
f
,
0.23730
f
,
0.22945
f
,
0.22170
f
,
0.21407
f
,
0.20654
f
,
0.19912
f
,
0.19182
f
,
0.18462
f
,
0.17753
f
,
0.17055
f
,
0.16368
f
,
0.15693
f
,
0.15028
f
,
0.14374
f
,
0.13731
f
,
0.13098
f
,
0.12477
f
,
0.11867
f
,
0.11268
f
,
0.10680
f
,
0.10102
f
,
0.09536
f
,
0.08980
f
,
0.08436
f
,
0.07902
f
,
0.07380
f
,
0.06868
f
,
0.06367
f
,
0.05878
f
,
0.05399
f
,
0.04931
f
,
0.04474
f
,
0.04028
f
,
0.03593
f
,
0.03169
f
,
0.02756
f
,
0.02354
f
,
0.01963
f
,
0.01583
f
};
static
const
float
b
[]
=
{
0.23217
f
,
0.26149
f
,
0.29024
f
,
0.31844
f
,
0.34607
f
,
0.37314
f
,
0.39964
f
,
0.42558
f
,
0.45096
f
,
0.47578
f
,
0.50004
f
,
0.52373
f
,
0.54686
f
,
0.56942
f
,
0.59142
f
,
0.61286
f
,
0.63374
f
,
0.65406
f
,
0.67381
f
,
0.69300
f
,
0.71162
f
,
0.72968
f
,
0.74718
f
,
0.76412
f
,
0.78050
f
,
0.79631
f
,
0.81156
f
,
0.82624
f
,
0.84037
f
,
0.85393
f
,
0.86692
f
,
0.87936
f
,
0.89123
f
,
0.90254
f
,
0.91328
f
,
0.92347
f
,
0.93309
f
,
0.94214
f
,
0.95064
f
,
0.95857
f
,
0.96594
f
,
0.97275
f
,
0.97899
f
,
0.98461
f
,
0.98930
f
,
0.99303
f
,
0.99583
f
,
0.99773
f
,
0.99876
f
,
0.99896
f
,
0.99835
f
,
0.99697
f
,
0.99485
f
,
0.99202
f
,
0.98851
f
,
0.98436
f
,
0.97959
f
,
0.97423
f
,
0.96833
f
,
0.96190
f
,
0.95498
f
,
0.94761
f
,
0.93981
f
,
0.93161
f
,
0.92305
f
,
0.91416
f
,
0.90496
f
,
0.89550
f
,
0.88580
f
,
0.87590
f
,
0.86581
f
,
0.85559
f
,
0.84525
f
,
0.83484
f
,
0.82437
f
,
0.81389
f
,
0.80342
f
,
0.79299
f
,
0.78264
f
,
0.77240
f
,
0.76230
f
,
0.75237
f
,
0.74265
f
,
0.73316
f
,
0.72393
f
,
0.71500
f
,
0.70599
f
,
0.69651
f
,
0.68660
f
,
0.67627
f
,
0.66556
f
,
0.65448
f
,
0.64308
f
,
0.63137
f
,
0.61938
f
,
0.60713
f
,
0.59466
f
,
0.58199
f
,
0.56914
f
,
0.55614
f
,
0.54303
f
,
0.52981
f
,
0.51653
f
,
0.50321
f
,
0.48987
f
,
0.47654
f
,
0.46325
f
,
0.45002
f
,
0.43688
f
,
0.42386
f
,
0.41098
f
,
0.39826
f
,
0.38575
f
,
0.37345
f
,
0.36140
f
,
0.34963
f
,
0.33816
f
,
0.32701
f
,
0.31622
f
,
0.30581
f
,
0.29581
f
,
0.28623
f
,
0.27712
f
,
0.26849
f
,
0.26038
f
,
0.25280
f
,
0.24579
f
,
0.23937
f
,
0.23356
f
,
0.22835
f
,
0.22370
f
,
0.21960
f
,
0.21602
f
,
0.21294
f
,
0.21032
f
,
0.20815
f
,
0.20640
f
,
0.20504
f
,
0.20406
f
,
0.20343
f
,
0.20311
f
,
0.20310
f
,
0.20336
f
,
0.20386
f
,
0.20459
f
,
0.20552
f
,
0.20663
f
,
0.20788
f
,
0.20926
f
,
0.21074
f
,
0.21230
f
,
0.21391
f
,
0.21555
f
,
0.21719
f
,
0.21880
f
,
0.22038
f
,
0.22188
f
,
0.22328
f
,
0.22456
f
,
0.22570
f
,
0.22667
f
,
0.22744
f
,
0.22800
f
,
0.22831
f
,
0.22836
f
,
0.22811
f
,
0.22754
f
,
0.22663
f
,
0.22536
f
,
0.22369
f
,
0.22161
f
,
0.21918
f
,
0.21650
f
,
0.21358
f
,
0.21043
f
,
0.20706
f
,
0.20348
f
,
0.19971
f
,
0.19577
f
,
0.19165
f
,
0.18738
f
,
0.18297
f
,
0.17842
f
,
0.17376
f
,
0.16899
f
,
0.16412
f
,
0.15918
f
,
0.15417
f
,
0.14910
f
,
0.14398
f
,
0.13883
f
,
0.13367
f
,
0.12849
f
,
0.12332
f
,
0.11817
f
,
0.11305
f
,
0.10797
f
,
0.10294
f
,
0.09798
f
,
0.09310
f
,
0.08831
f
,
0.08362
f
,
0.07905
f
,
0.07461
f
,
0.07031
f
,
0.06616
f
,
0.06218
f
,
0.05837
f
,
0.05475
f
,
0.05134
f
,
0.04814
f
,
0.04516
f
,
0.04243
f
,
0.03993
f
,
0.03753
f
,
0.03521
f
,
0.03297
f
,
0.03082
f
,
0.02875
f
,
0.02677
f
,
0.02487
f
,
0.02305
f
,
0.02131
f
,
0.01966
f
,
0.01809
f
,
0.01660
f
,
0.01520
f
,
0.01387
f
,
0.01264
f
,
0.01148
f
,
0.01041
f
,
0.00942
f
,
0.00851
f
,
0.00769
f
,
0.00695
f
,
0.00629
f
,
0.00571
f
,
0.00522
f
,
0.00481
f
,
0.00449
f
,
0.00424
f
,
0.00408
f
,
0.00401
f
,
0.00401
f
,
0.00410
f
,
0.00427
f
,
0.00453
f
,
0.00486
f
,
0.00529
f
,
0.00579
f
,
0.00638
f
,
0.00705
f
,
0.00780
f
,
0.00863
f
,
0.00955
f
,
0.01055
f
};
// breakpoints
Mat
X
=
linspace
(
0
,
1
,
256
);
// now build lookup table
this
->
_lut
=
ColorMap
::
linear_colormap
(
X
,
Mat
(
256
,
1
,
CV_32FC1
,
(
void
*
)
r
).
clone
(),
// red
Mat
(
256
,
1
,
CV_32FC1
,
(
void
*
)
g
).
clone
(),
// green
Mat
(
256
,
1
,
CV_32FC1
,
(
void
*
)
b
).
clone
(),
// blue
n
);
}
};
// UserColormap .
class
UserColorMap
:
public
ColorMap
{
public
:
...
...
@@ -724,6 +754,7 @@ namespace colormap
colormap
==
COLORMAP_RAINBOW
?
(
colormap
::
ColorMap
*
)(
new
colormap
::
Rainbow
)
:
colormap
==
COLORMAP_SPRING
?
(
colormap
::
ColorMap
*
)(
new
colormap
::
Spring
)
:
colormap
==
COLORMAP_SUMMER
?
(
colormap
::
ColorMap
*
)(
new
colormap
::
Summer
)
:
colormap
==
COLORMAP_TURBO
?
(
colormap
::
ColorMap
*
)(
new
colormap
::
Turbo
)
:
colormap
==
COLORMAP_TWILIGHT
?
(
colormap
::
ColorMap
*
)(
new
colormap
::
Twilight
)
:
colormap
==
COLORMAP_TWILIGHT_SHIFTED
?
(
colormap
::
ColorMap
*
)(
new
colormap
::
TwilightShifted
)
:
colormap
==
COLORMAP_VIRIDIS
?
(
colormap
::
ColorMap
*
)(
new
colormap
::
Viridis
)
:
...
...
modules/ts/src/ts.cpp
View file @
1661e839
...
...
@@ -512,8 +512,9 @@ string TS::str_from_code( const TS::FailureCode code )
return
"Generic/Unknown"
;
}
static
int
tsErrorCallback
(
int
status
,
const
char
*
func_name
,
const
char
*
err_msg
,
const
char
*
file_name
,
int
line
,
TS
*
ts
)
static
int
tsErrorCallback
(
int
status
,
const
char
*
func_name
,
const
char
*
err_msg
,
const
char
*
file_name
,
int
line
,
void
*
data
)
{
TS
*
ts
=
(
TS
*
)
data
;
const
char
*
delim
=
std
::
string
(
err_msg
).
find
(
'\n'
)
==
std
::
string
::
npos
?
""
:
"
\n
"
;
ts
->
printf
(
TS
::
LOG
,
"OpenCV Error:
\n\t
%s (%s%s) in %s, file %s, line %d
\n
"
,
cvErrorStr
(
status
),
delim
,
err_msg
,
func_name
[
0
]
!=
0
?
func_name
:
"unknown function"
,
file_name
,
line
);
return
0
;
...
...
modules/video/src/lkpyramid.cpp
View file @
1661e839
...
...
@@ -237,13 +237,12 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
acctype
iA11
=
0
,
iA12
=
0
,
iA22
=
0
;
float
A11
,
A12
,
A22
;
#if CV_SSE2
__m128i
qw0
=
_mm_set1_epi32
(
iw00
+
(
iw01
<<
16
));
__m128i
qw1
=
_mm_set1_epi32
(
iw10
+
(
iw11
<<
16
));
__m128i
z
=
_mm_setzero_si128
();
__m128i
qdelta_d
=
_mm_set1_epi32
(
1
<<
(
W_BITS1
-
1
));
__m128i
qdelta
=
_mm_set1_epi32
(
1
<<
(
W_BITS1
-
5
-
1
));
__m128
qA11
=
_mm_setzero_ps
(),
qA12
=
_mm_setzero_ps
(),
qA22
=
_mm_setzero_ps
();
#if CV_SIMD128 && !CV_NEON
v_int16x8
qw0
((
short
)(
iw00
),
(
short
)(
iw01
),
(
short
)(
iw00
),
(
short
)(
iw01
),
(
short
)(
iw00
),
(
short
)(
iw01
),
(
short
)(
iw00
),
(
short
)(
iw01
));
v_int16x8
qw1
((
short
)(
iw10
),
(
short
)(
iw11
),
(
short
)(
iw10
),
(
short
)(
iw11
),
(
short
)(
iw10
),
(
short
)(
iw11
),
(
short
)(
iw10
),
(
short
)(
iw11
));
v_int32x4
qdelta_d
=
v_setall_s32
(
1
<<
(
W_BITS1
-
1
));
v_int32x4
qdelta
=
v_setall_s32
(
1
<<
(
W_BITS1
-
5
-
1
));
v_float32x4
qA11
=
v_setzero_f32
(),
qA12
=
v_setzero_f32
(),
qA22
=
v_setzero_f32
();
#endif
#if CV_NEON
...
...
@@ -273,44 +272,75 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
x
=
0
;
#if CV_S
SE2
for
(
;
x
<=
winSize
.
width
*
cn
-
4
;
x
+=
4
,
dsrc
+=
4
*
2
,
dIptr
+=
4
*
2
)
#if CV_S
IMD128 && !CV_NEON
for
(
;
x
<=
winSize
.
width
*
cn
-
8
;
x
+=
8
,
dsrc
+=
8
*
2
,
dIptr
+=
8
*
2
)
{
__m128i
v00
,
v01
,
v10
,
v11
,
t0
,
t1
;
v00
=
_mm_unpacklo_epi8
(
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
src
+
x
)),
z
);
v01
=
_mm_unpacklo_epi8
(
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
src
+
x
+
cn
)),
z
);
v10
=
_mm_unpacklo_epi8
(
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
src
+
x
+
stepI
)),
z
);
v11
=
_mm_unpacklo_epi8
(
_mm_cvtsi32_si128
(
*
(
const
int
*
)(
src
+
x
+
stepI
+
cn
)),
z
);
t0
=
_mm_add_epi32
(
_mm_madd_epi16
(
_mm_unpacklo_epi16
(
v00
,
v01
),
qw0
),
_mm_madd_epi16
(
_mm_unpacklo_epi16
(
v10
,
v11
),
qw1
));
t0
=
_mm_srai_epi32
(
_mm_add_epi32
(
t0
,
qdelta
),
W_BITS1
-
5
);
_mm_storel_epi64
((
__m128i
*
)(
Iptr
+
x
),
_mm_packs_epi32
(
t0
,
t0
));
v00
=
_mm_loadu_si128
((
const
__m128i
*
)(
dsrc
));
v01
=
_mm_loadu_si128
((
const
__m128i
*
)(
dsrc
+
cn2
));
v10
=
_mm_loadu_si128
((
const
__m128i
*
)(
dsrc
+
dstep
));
v11
=
_mm_loadu_si128
((
const
__m128i
*
)(
dsrc
+
dstep
+
cn2
));
t0
=
_mm_add_epi32
(
_mm_madd_epi16
(
_mm_unpacklo_epi16
(
v00
,
v01
),
qw0
),
_mm_madd_epi16
(
_mm_unpacklo_epi16
(
v10
,
v11
),
qw1
));
t1
=
_mm_add_epi32
(
_mm_madd_epi16
(
_mm_unpackhi_epi16
(
v00
,
v01
),
qw0
),
_mm_madd_epi16
(
_mm_unpackhi_epi16
(
v10
,
v11
),
qw1
));
t0
=
_mm_srai_epi32
(
_mm_add_epi32
(
t0
,
qdelta_d
),
W_BITS1
);
t1
=
_mm_srai_epi32
(
_mm_add_epi32
(
t1
,
qdelta_d
),
W_BITS1
);
v00
=
_mm_packs_epi32
(
t0
,
t1
);
// Ix0 Iy0 Ix1 Iy1 ...
_mm_storeu_si128
((
__m128i
*
)
dIptr
,
v00
);
t0
=
_mm_srai_epi32
(
v00
,
16
);
// Iy0 Iy1 Iy2 Iy3
t1
=
_mm_srai_epi32
(
_mm_slli_epi32
(
v00
,
16
),
16
);
// Ix0 Ix1 Ix2 Ix3
__m128
fy
=
_mm_cvtepi32_ps
(
t0
);
__m128
fx
=
_mm_cvtepi32_ps
(
t1
);
qA22
=
_mm_add_ps
(
qA22
,
_mm_mul_ps
(
fy
,
fy
));
qA12
=
_mm_add_ps
(
qA12
,
_mm_mul_ps
(
fx
,
fy
));
qA11
=
_mm_add_ps
(
qA11
,
_mm_mul_ps
(
fx
,
fx
));
v_int32x4
t0
,
t1
;
v_int16x8
v00
,
v01
,
v10
,
v11
,
t00
,
t01
,
t10
,
t11
;
v00
=
v_reinterpret_as_s16
(
v_load_expand
(
src
+
x
));
v01
=
v_reinterpret_as_s16
(
v_load_expand
(
src
+
x
+
cn
));
v10
=
v_reinterpret_as_s16
(
v_load_expand
(
src
+
x
+
stepI
));
v11
=
v_reinterpret_as_s16
(
v_load_expand
(
src
+
x
+
stepI
+
cn
));
v_zip
(
v00
,
v01
,
t00
,
t01
);
v_zip
(
v10
,
v11
,
t10
,
t11
);
t0
=
v_dotprod
(
t00
,
qw0
,
qdelta
)
+
v_dotprod
(
t10
,
qw1
);
t1
=
v_dotprod
(
t01
,
qw0
,
qdelta
)
+
v_dotprod
(
t11
,
qw1
);
t0
=
t0
>>
(
W_BITS1
-
5
);
t1
=
t1
>>
(
W_BITS1
-
5
);
v_store
(
Iptr
+
x
,
v_pack
(
t0
,
t1
));
v00
=
v_reinterpret_as_s16
(
v_load
(
dsrc
));
v01
=
v_reinterpret_as_s16
(
v_load
(
dsrc
+
cn2
));
v10
=
v_reinterpret_as_s16
(
v_load
(
dsrc
+
dstep
));
v11
=
v_reinterpret_as_s16
(
v_load
(
dsrc
+
dstep
+
cn2
));
v_zip
(
v00
,
v01
,
t00
,
t01
);
v_zip
(
v10
,
v11
,
t10
,
t11
);
t0
=
v_dotprod
(
t00
,
qw0
,
qdelta_d
)
+
v_dotprod
(
t10
,
qw1
);
t1
=
v_dotprod
(
t01
,
qw0
,
qdelta_d
)
+
v_dotprod
(
t11
,
qw1
);
t0
=
t0
>>
W_BITS1
;
t1
=
t1
>>
W_BITS1
;
v00
=
v_pack
(
t0
,
t1
);
// Ix0 Iy0 Ix1 Iy1 ...
v_store
(
dIptr
,
v00
);
v00
=
v_reinterpret_as_s16
(
v_interleave_pairs
(
v_reinterpret_as_s32
(
v_interleave_pairs
(
v00
))));
v_expand
(
v00
,
t1
,
t0
);
v_float32x4
fy
=
v_cvt_f32
(
t0
);
v_float32x4
fx
=
v_cvt_f32
(
t1
);
qA22
=
v_muladd
(
fy
,
fy
,
qA22
);
qA12
=
v_muladd
(
fx
,
fy
,
qA12
);
qA11
=
v_muladd
(
fx
,
fx
,
qA11
);
v00
=
v_reinterpret_as_s16
(
v_load
(
dsrc
+
4
*
2
));
v01
=
v_reinterpret_as_s16
(
v_load
(
dsrc
+
4
*
2
+
cn2
));
v10
=
v_reinterpret_as_s16
(
v_load
(
dsrc
+
4
*
2
+
dstep
));
v11
=
v_reinterpret_as_s16
(
v_load
(
dsrc
+
4
*
2
+
dstep
+
cn2
));
v_zip
(
v00
,
v01
,
t00
,
t01
);
v_zip
(
v10
,
v11
,
t10
,
t11
);
t0
=
v_dotprod
(
t00
,
qw0
,
qdelta_d
)
+
v_dotprod
(
t10
,
qw1
);
t1
=
v_dotprod
(
t01
,
qw0
,
qdelta_d
)
+
v_dotprod
(
t11
,
qw1
);
t0
=
t0
>>
W_BITS1
;
t1
=
t1
>>
W_BITS1
;
v00
=
v_pack
(
t0
,
t1
);
// Ix0 Iy0 Ix1 Iy1 ...
v_store
(
dIptr
+
4
*
2
,
v00
);
v00
=
v_reinterpret_as_s16
(
v_interleave_pairs
(
v_reinterpret_as_s32
(
v_interleave_pairs
(
v00
))));
v_expand
(
v00
,
t1
,
t0
);
fy
=
v_cvt_f32
(
t0
);
fx
=
v_cvt_f32
(
t1
);
qA22
=
v_muladd
(
fy
,
fy
,
qA22
);
qA12
=
v_muladd
(
fx
,
fy
,
qA12
);
qA11
=
v_muladd
(
fx
,
fx
,
qA11
);
}
#endif
...
...
@@ -417,14 +447,10 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
}
}
#if CV_SSE2
float
CV_DECL_ALIGNED
(
16
)
A11buf
[
4
],
A12buf
[
4
],
A22buf
[
4
];
_mm_store_ps
(
A11buf
,
qA11
);
_mm_store_ps
(
A12buf
,
qA12
);
_mm_store_ps
(
A22buf
,
qA22
);
iA11
+=
A11buf
[
0
]
+
A11buf
[
1
]
+
A11buf
[
2
]
+
A11buf
[
3
];
iA12
+=
A12buf
[
0
]
+
A12buf
[
1
]
+
A12buf
[
2
]
+
A12buf
[
3
];
iA22
+=
A22buf
[
0
]
+
A22buf
[
1
]
+
A22buf
[
2
]
+
A22buf
[
3
];
#if CV_SIMD128 && !CV_NEON
iA11
+=
v_reduce_sum
(
qA11
);
iA12
+=
v_reduce_sum
(
qA12
);
iA22
+=
v_reduce_sum
(
qA22
);
#endif
#if CV_NEON
...
...
@@ -477,10 +503,10 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
iw11
=
(
1
<<
W_BITS
)
-
iw00
-
iw01
-
iw10
;
acctype
ib1
=
0
,
ib2
=
0
;
float
b1
,
b2
;
#if CV_S
SE2
qw0
=
_mm_set1_epi32
(
iw00
+
(
iw01
<<
16
));
qw1
=
_mm_set1_epi32
(
iw10
+
(
iw11
<<
16
));
__m128
qb0
=
_mm_setzero_ps
(),
qb1
=
_mm_setzero_ps
();
#if CV_S
IMD128 && !CV_NEON
qw0
=
v_int16x8
((
short
)(
iw00
),
(
short
)(
iw01
),
(
short
)(
iw00
),
(
short
)(
iw01
),
(
short
)(
iw00
),
(
short
)(
iw01
),
(
short
)(
iw00
),
(
short
)(
iw01
));
qw1
=
v_int16x8
((
short
)(
iw10
),
(
short
)(
iw11
),
(
short
)(
iw10
),
(
short
)(
iw11
),
(
short
)(
iw10
),
(
short
)(
iw11
),
(
short
)(
iw10
),
(
short
)(
iw11
));
v_float32x4
qb0
=
v_setzero_f32
(),
qb1
=
v_setzero_f32
();
#endif
#if CV_NEON
...
...
@@ -501,34 +527,32 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
x
=
0
;
#if CV_S
SE2
#if CV_S
IMD128 && !CV_NEON
for
(
;
x
<=
winSize
.
width
*
cn
-
8
;
x
+=
8
,
dIptr
+=
8
*
2
)
{
__m128i
diff0
=
_mm_loadu_si128
((
const
__m128i
*
)(
Iptr
+
x
)),
diff1
;
__m128i
v00
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
const
__m128i
*
)(
Jptr
+
x
)),
z
);
__m128i
v01
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
const
__m128i
*
)(
Jptr
+
x
+
cn
)),
z
);
__m128i
v10
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
const
__m128i
*
)(
Jptr
+
x
+
stepJ
)),
z
);
__m128i
v11
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
const
__m128i
*
)(
Jptr
+
x
+
stepJ
+
cn
)),
z
);
__m128i
t0
=
_mm_add_epi32
(
_mm_madd_epi16
(
_mm_unpacklo_epi16
(
v00
,
v01
),
qw0
),
_mm_madd_epi16
(
_mm_unpacklo_epi16
(
v10
,
v11
),
qw1
));
__m128i
t1
=
_mm_add_epi32
(
_mm_madd_epi16
(
_mm_unpackhi_epi16
(
v00
,
v01
),
qw0
),
_mm_madd_epi16
(
_mm_unpackhi_epi16
(
v10
,
v11
),
qw1
));
t0
=
_mm_srai_epi32
(
_mm_add_epi32
(
t0
,
qdelta
),
W_BITS1
-
5
);
t1
=
_mm_srai_epi32
(
_mm_add_epi32
(
t1
,
qdelta
),
W_BITS1
-
5
);
diff0
=
_mm_subs_epi16
(
_mm_packs_epi32
(
t0
,
t1
),
diff0
);
diff1
=
_mm_unpackhi_epi16
(
diff0
,
diff0
);
diff0
=
_mm_unpacklo_epi16
(
diff0
,
diff0
);
// It0 It0 It1 It1 ...
v00
=
_mm_loadu_si128
((
const
__m128i
*
)(
dIptr
));
// Ix0 Iy0 Ix1 Iy1 ...
v01
=
_mm_loadu_si128
((
const
__m128i
*
)(
dIptr
+
8
));
v10
=
_mm_unpacklo_epi16
(
v00
,
v01
);
v11
=
_mm_unpackhi_epi16
(
v00
,
v01
);
v00
=
_mm_unpacklo_epi16
(
diff0
,
diff1
);
v01
=
_mm_unpackhi_epi16
(
diff0
,
diff1
);
v00
=
_mm_madd_epi16
(
v00
,
v10
);
v11
=
_mm_madd_epi16
(
v01
,
v11
);
qb0
=
_mm_add_ps
(
qb0
,
_mm_cvtepi32_ps
(
v00
));
qb1
=
_mm_add_ps
(
qb1
,
_mm_cvtepi32_ps
(
v11
));
v_int16x8
diff0
=
v_reinterpret_as_s16
(
v_load
(
Iptr
+
x
)),
diff1
,
diff2
;
v_int16x8
v00
=
v_reinterpret_as_s16
(
v_load_expand
(
Jptr
+
x
));
v_int16x8
v01
=
v_reinterpret_as_s16
(
v_load_expand
(
Jptr
+
x
+
cn
));
v_int16x8
v10
=
v_reinterpret_as_s16
(
v_load_expand
(
Jptr
+
x
+
stepJ
));
v_int16x8
v11
=
v_reinterpret_as_s16
(
v_load_expand
(
Jptr
+
x
+
stepJ
+
cn
));
v_int32x4
t0
,
t1
;
v_int16x8
t00
,
t01
,
t10
,
t11
;
v_zip
(
v00
,
v01
,
t00
,
t01
);
v_zip
(
v10
,
v11
,
t10
,
t11
);
t0
=
v_dotprod
(
t00
,
qw0
,
qdelta
)
+
v_dotprod
(
t10
,
qw1
);
t1
=
v_dotprod
(
t01
,
qw0
,
qdelta
)
+
v_dotprod
(
t11
,
qw1
);
t0
=
t0
>>
(
W_BITS1
-
5
);
t1
=
t1
>>
(
W_BITS1
-
5
);
diff0
=
v_pack
(
t0
,
t1
)
-
diff0
;
v_zip
(
diff0
,
diff0
,
diff2
,
diff1
);
// It0 It0 It1 It1 ...
v00
=
v_reinterpret_as_s16
(
v_load
(
dIptr
));
// Ix0 Iy0 Ix1 Iy1 ...
v01
=
v_reinterpret_as_s16
(
v_load
(
dIptr
+
8
));
v_zip
(
v00
,
v01
,
v10
,
v11
);
v_zip
(
diff2
,
diff1
,
v00
,
v01
);
qb0
+=
v_cvt_f32
(
v_dotprod
(
v00
,
v10
));
qb1
+=
v_cvt_f32
(
v_dotprod
(
v01
,
v11
));
}
#endif
...
...
@@ -614,11 +638,11 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
}
}
#if CV_S
SE2
float
CV_DECL_ALIGNED
(
16
)
bbuf
[
4
]
;
_mm_store_ps
(
bbuf
,
_mm_add_ps
(
qb0
,
qb1
)
);
ib1
+=
bbuf
[
0
]
+
bbuf
[
2
]
;
ib2
+=
bbuf
[
1
]
+
bbuf
[
3
]
;
#if CV_S
IMD128 && !CV_NEON
v_float32x4
qf0
,
qf1
;
v_recombine
(
v_interleave_pairs
(
qb0
+
qb1
),
v_setzero_f32
(),
qf0
,
qf1
);
ib1
+=
v_reduce_sum
(
qf0
)
;
ib2
+=
v_reduce_sum
(
qf1
)
;
#endif
#if CV_NEON
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment