Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
31c79668
Commit
31c79668
authored
Jun 07, 2017
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #8803 from 4ekmah:sgbm_modehh4_SIMD
parents
7b8d1073
a113e8f0
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
171 additions
and
9 deletions
+171
-9
perf_stereosgbm.cpp
modules/calib3d/perf/perf_stereosgbm.cpp
+1
-1
stereosgbm.cpp
modules/calib3d/src/stereosgbm.cpp
+170
-8
No files found.
modules/calib3d/perf/perf_stereosgbm.cpp
View file @
31c79668
...
...
@@ -47,7 +47,7 @@ using namespace cv;
void
MakeArtificialExample
(
RNG
rng
,
Mat
&
dst_left_view
,
Mat
&
dst_view
);
CV_ENUM
(
SGBMModes
,
StereoSGBM
::
MODE_SGBM
,
StereoSGBM
::
MODE_SGBM_3WAY
);
CV_ENUM
(
SGBMModes
,
StereoSGBM
::
MODE_SGBM
,
StereoSGBM
::
MODE_SGBM_3WAY
,
StereoSGBM
::
MODE_HH4
);
typedef
tuple
<
Size
,
int
,
SGBMModes
>
SGBMParams
;
typedef
TestBaseWithParam
<
SGBMParams
>
TestStereoCorresp
;
...
...
modules/calib3d/src/stereosgbm.cpp
View file @
31c79668
...
...
@@ -155,9 +155,13 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
int
n1
=
y
>
0
?
-
(
int
)
img1
.
step
:
0
,
s1
=
y
<
img1
.
rows
-
1
?
(
int
)
img1
.
step
:
0
;
int
n2
=
y
>
0
?
-
(
int
)
img2
.
step
:
0
,
s2
=
y
<
img2
.
rows
-
1
?
(
int
)
img2
.
step
:
0
;
int
minX_cmn
=
std
::
min
(
minX1
,
minX2
)
-
1
;
int
maxX_cmn
=
std
::
max
(
maxX1
,
maxX2
)
+
1
;
minX_cmn
=
std
::
max
(
minX_cmn
,
1
);
maxX_cmn
=
std
::
min
(
maxX_cmn
,
width
-
1
);
if
(
cn
==
1
)
{
for
(
x
=
1
;
x
<
width
-
1
;
x
++
)
for
(
x
=
minX_cmn
;
x
<
maxX_cmn
;
x
++
)
{
prow1
[
x
]
=
tab
[(
row1
[
x
+
1
]
-
row1
[
x
-
1
])
*
2
+
row1
[
x
+
n1
+
1
]
-
row1
[
x
+
n1
-
1
]
+
row1
[
x
+
s1
+
1
]
-
row1
[
x
+
s1
-
1
]];
prow2
[
width
-
1
-
x
]
=
tab
[(
row2
[
x
+
1
]
-
row2
[
x
-
1
])
*
2
+
row2
[
x
+
n2
+
1
]
-
row2
[
x
+
n2
-
1
]
+
row2
[
x
+
s2
+
1
]
-
row2
[
x
+
s2
-
1
]];
...
...
@@ -168,7 +172,7 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
}
else
{
for
(
x
=
1
;
x
<
width
-
1
;
x
++
)
for
(
x
=
minX_cmn
;
x
<
maxX_cmn
;
x
++
)
{
prow1
[
x
]
=
tab
[(
row1
[
x
*
3
+
3
]
-
row1
[
x
*
3
-
3
])
*
2
+
row1
[
x
*
3
+
n1
+
3
]
-
row1
[
x
*
3
+
n1
-
3
]
+
row1
[
x
*
3
+
s1
+
3
]
-
row1
[
x
*
3
+
s1
-
3
]];
prow1
[
x
+
width
]
=
tab
[(
row1
[
x
*
3
+
4
]
-
row1
[
x
*
3
-
2
])
*
2
+
row1
[
x
*
3
+
n1
+
4
]
-
row1
[
x
*
3
+
n1
-
2
]
+
row1
[
x
*
3
+
s1
+
4
]
-
row1
[
x
*
3
+
s1
-
2
]];
...
...
@@ -864,6 +868,7 @@ struct CalcVerticalSums: public ParallelLoopBody
Cbuf
=
alignedBuf
;
Sbuf
=
Cbuf
+
CSBufSize
;
hsumBuf
=
Sbuf
+
CSBufSize
;
useSIMD
=
hasSIMD128
();
}
void
operator
()(
const
Range
&
range
)
const
...
...
@@ -951,6 +956,24 @@ struct CalcVerticalSums: public ParallelLoopBody
const
CostType
*
pixAdd
=
pixDiff
+
std
::
min
(
x
+
SW2
*
D
,
(
width1
-
1
)
*
D
);
const
CostType
*
pixSub
=
pixDiff
+
std
::
max
(
x
-
(
SW2
+
1
)
*
D
,
0
);
#if CV_SIMD128
if
(
useSIMD
)
{
for
(
d
=
0
;
d
<
D
;
d
+=
8
)
{
v_int16x8
hv
=
v_load
(
hsumAdd
+
x
-
D
+
d
);
v_int16x8
Cx
=
v_load
(
Cprev
+
x
+
d
);
v_int16x8
psub
=
v_load
(
pixSub
+
d
);
v_int16x8
padd
=
v_load
(
pixAdd
+
d
);
hv
=
(
hv
-
psub
+
padd
);
psub
=
v_load
(
hsumSub
+
x
+
d
);
Cx
=
Cx
-
psub
+
hv
;
v_store
(
hsumAdd
+
x
+
d
,
hv
);
v_store
(
C
+
x
+
d
,
Cx
);
}
}
else
#endif
{
for
(
d
=
0
;
d
<
D
;
d
++
)
{
...
...
@@ -1010,6 +1033,46 @@ struct CalcVerticalSums: public ParallelLoopBody
const
CostType
*
Cp
=
C
+
x
*
D
;
CostType
*
Sp
=
S
+
x
*
D
;
#if CV_SIMD128
if
(
useSIMD
)
{
v_int16x8
_P1
=
v_setall_s16
((
short
)
P1
);
v_int16x8
_delta
=
v_setall_s16
((
short
)
delta
);
v_int16x8
_minL
=
v_setall_s16
((
short
)
MAX_COST
);
for
(
d
=
0
;
d
<
D
;
d
+=
8
)
{
v_int16x8
Cpd
=
v_load
(
Cp
+
d
);
v_int16x8
L
;
L
=
v_load
(
Lr_ppr
+
d
);
L
=
v_min
(
L
,
(
v_load
(
Lr_ppr
+
d
-
1
)
+
_P1
));
L
=
v_min
(
L
,
(
v_load
(
Lr_ppr
+
d
+
1
)
+
_P1
));
L
=
v_min
(
L
,
_delta
);
L
=
((
L
-
_delta
)
+
Cpd
);
v_store
(
Lr_p
+
d
,
L
);
// Get minimum from in L-L3
_minL
=
v_min
(
_minL
,
L
);
v_int16x8
Sval
=
v_load
(
Sp
+
d
);
Sval
=
Sval
+
L
;
v_store
(
Sp
+
d
,
Sval
);
}
v_int32x4
min1
,
min2
,
min12
;
v_expand
(
_minL
,
min1
,
min2
);
min12
=
v_min
(
min1
,
min2
);
minLr
[
0
][
x
]
=
(
CostType
)
v_reduce_min
(
min12
);
}
else
#endif
{
int
minL
=
MAX_COST
;
...
...
@@ -1058,6 +1121,7 @@ struct CalcVerticalSums: public ParallelLoopBody
size_t
LrSize
;
size_t
hsumBufNRows
;
int
ftzero
;
bool
useSIMD
;
};
struct
CalcHorizontalSums
:
public
ParallelLoopBody
...
...
@@ -1085,6 +1149,7 @@ struct CalcHorizontalSums: public ParallelLoopBody
LrSize
=
2
*
D2
;
Cbuf
=
alignedBuf
;
Sbuf
=
Cbuf
+
CSBufSize
;
useSIMD
=
hasSIMD128
();
}
void
operator
()(
const
Range
&
range
)
const
...
...
@@ -1138,8 +1203,48 @@ struct CalcHorizontalSums: public ParallelLoopBody
const
CostType
*
Cp
=
C
+
x
*
D
;
CostType
*
Sp
=
S
+
x
*
D
;
int
minL
=
MAX_COST
;
#if CV_SIMD128
if
(
useSIMD
)
{
v_int16x8
_P1
=
v_setall_s16
((
short
)
P1
);
v_int16x8
_delta
=
v_setall_s16
((
short
)
delta
);
v_int16x8
_minL
=
v_setall_s16
((
short
)
MAX_COST
);
for
(
d
=
0
;
d
<
D
;
d
+=
8
)
{
v_int16x8
Cpd
=
v_load
(
Cp
+
d
);
v_int16x8
L
;
L
=
v_load
(
Lr_ppr
+
d
);
L
=
v_min
(
L
,
(
v_load
(
Lr_ppr
+
d
-
1
)
+
_P1
));
L
=
v_min
(
L
,
(
v_load
(
Lr_ppr
+
d
+
1
)
+
_P1
));
L
=
v_min
(
L
,
_delta
);
L
=
((
L
-
_delta
)
+
Cpd
);
v_store
(
Lr_p
+
d
,
L
);
// Get minimum from in L-L3
_minL
=
v_min
(
_minL
,
L
);
v_int16x8
Sval
=
v_load
(
Sp
+
d
);
Sval
=
Sval
+
L
;
v_store
(
Sp
+
d
,
Sval
);
}
v_int32x4
min1
,
min2
,
min12
;
v_expand
(
_minL
,
min1
,
min2
);
min12
=
v_min
(
min1
,
min2
);
minLr
=
(
CostType
)
v_reduce_min
(
min12
);
}
else
#endif
{
minLr
=
MAX_COST
;
for
(
d
=
0
;
d
<
D
;
d
++
)
{
int
Cpd
=
Cp
[
d
],
L
;
...
...
@@ -1147,11 +1252,11 @@ struct CalcHorizontalSums: public ParallelLoopBody
L
=
Cpd
+
std
::
min
((
int
)
Lr_ppr
[
d
],
std
::
min
(
Lr_ppr
[
d
-
1
]
+
P1
,
std
::
min
(
Lr_ppr
[
d
+
1
]
+
P1
,
delta
)))
-
delta
;
Lr_p
[
d
]
=
(
CostType
)
L
;
minL
=
std
::
min
(
minL
,
L
);
minLr
=
(
CostType
)
std
::
min
((
int
)
minLr
,
L
);
Sp
[
d
]
=
saturate_cast
<
CostType
>
(
Sp
[
d
]
+
L
);
}
minLr
=
(
CostType
)
minL
;
}
}
memset
(
Lr
-
8
,
0
,
LrSize
*
sizeof
(
CostType
)
);
...
...
@@ -1169,9 +1274,65 @@ struct CalcHorizontalSums: public ParallelLoopBody
const
CostType
*
Cp
=
C
+
x
*
D
;
CostType
*
Sp
=
S
+
x
*
D
;
int
minS
=
MAX_COST
,
bestDisp
=
-
1
;
minLr
=
MAX_COST
;
int
minL
=
MAX_COST
;
#if CV_SIMD128
if
(
useSIMD
)
{
v_int16x8
_P1
=
v_setall_s16
((
short
)
P1
);
v_int16x8
_delta
=
v_setall_s16
((
short
)
delta
);
v_int16x8
_minL
=
v_setall_s16
((
short
)
MAX_COST
);
v_int16x8
_minS
=
v_setall_s16
(
MAX_COST
),
_bestDisp
=
v_setall_s16
(
-
1
);
v_int16x8
_d8
=
v_int16x8
(
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
),
_8
=
v_setall_s16
(
8
);
for
(
d
=
0
;
d
<
D
;
d
+=
8
)
{
v_int16x8
Cpd
=
v_load
(
Cp
+
d
);
v_int16x8
L
;
L
=
v_load
(
Lr_ppr
+
d
);
L
=
v_min
(
L
,
(
v_load
(
Lr_ppr
+
d
-
1
)
+
_P1
));
L
=
v_min
(
L
,
(
v_load
(
Lr_ppr
+
d
+
1
)
+
_P1
));
L
=
v_min
(
L
,
_delta
);
L
=
((
L
-
_delta
)
+
Cpd
);
v_store
(
Lr_p
+
d
,
L
);
// Get minimum from in L-L3
_minL
=
v_min
(
_minL
,
L
);
v_int16x8
Sval
=
v_load
(
Sp
+
d
);
Sval
=
Sval
+
L
;
v_int16x8
mask
=
Sval
<
_minS
;
_minS
=
v_min
(
Sval
,
_minS
);
_bestDisp
=
_bestDisp
^
((
_bestDisp
^
_d8
)
&
mask
);
_d8
=
_d8
+
_8
;
v_store
(
Sp
+
d
,
Sval
);
}
v_int32x4
min1
,
min2
,
min12
;
v_expand
(
_minL
,
min1
,
min2
);
min12
=
v_min
(
min1
,
min2
);
minLr
=
(
CostType
)
v_reduce_min
(
min12
);
v_int32x4
_d0
,
_d1
;
v_expand
(
_minS
,
_d0
,
_d1
);
minS
=
(
int
)
std
::
min
(
v_reduce_min
(
_d0
),
v_reduce_min
(
_d1
));
v_int16x8
v_mask
=
v_setall_s16
((
short
)
minS
)
==
_minS
;
_bestDisp
=
(
_bestDisp
&
v_mask
)
|
(
v_setall_s16
(
SHRT_MAX
)
&
~
v_mask
);
v_expand
(
_bestDisp
,
_d0
,
_d1
);
bestDisp
=
(
int
)
std
::
min
(
v_reduce_min
(
_d0
),
v_reduce_min
(
_d1
));
}
else
#endif
{
for
(
d
=
0
;
d
<
D
;
d
++
)
{
int
Cpd
=
Cp
[
d
],
L
;
...
...
@@ -1179,7 +1340,7 @@ struct CalcHorizontalSums: public ParallelLoopBody
L
=
Cpd
+
std
::
min
((
int
)
Lr_ppr
[
d
],
std
::
min
(
Lr_ppr
[
d
-
1
]
+
P1
,
std
::
min
(
Lr_ppr
[
d
+
1
]
+
P1
,
delta
)))
-
delta
;
Lr_p
[
d
]
=
(
CostType
)
L
;
minL
=
std
::
min
(
minL
,
L
);
minLr
=
(
CostType
)
std
::
min
((
int
)
minLr
,
L
);
Sp
[
d
]
=
saturate_cast
<
CostType
>
(
Sp
[
d
]
+
L
);
if
(
Sp
[
d
]
<
minS
)
...
...
@@ -1188,7 +1349,7 @@ struct CalcHorizontalSums: public ParallelLoopBody
bestDisp
=
d
;
}
}
minLr
=
(
CostType
)
minL
;
}
//Some postprocessing procedures and saving
for
(
d
=
0
;
d
<
D
;
d
++
)
{
...
...
@@ -1263,6 +1424,7 @@ struct CalcHorizontalSums: public ParallelLoopBody
int
INVALID_DISP_SCALED
;
int
uniquenessRatio
;
int
disp12MaxDiff
;
bool
useSIMD
;
};
/*
computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment