Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
ecd36611
Commit
ecd36611
authored
Jul 24, 2015
by
Maksim Shabunin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #4179 from sbokov:improvingStereoSGBM
parents
be70df11
f40b580b
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
845 additions
and
38 deletions
+845
-38
calib3d.hpp
modules/calib3d/include/opencv2/calib3d.hpp
+2
-1
perf_stereosgbm.cpp
modules/calib3d/perf/perf_stereosgbm.cpp
+159
-0
stereosgbm.cpp
modules/calib3d/src/stereosgbm.cpp
+670
-28
test_stereomatching.cpp
modules/calib3d/test/test_stereomatching.cpp
+3
-4
stereo_match.cpp
samples/cpp/stereo_match.cpp
+11
-5
No files found.
modules/calib3d/include/opencv2/calib3d.hpp
View file @
ecd36611
...
@@ -1555,7 +1555,8 @@ public:
...
@@ -1555,7 +1555,8 @@ public:
enum
enum
{
{
MODE_SGBM
=
0
,
MODE_SGBM
=
0
,
MODE_HH
=
1
MODE_HH
=
1
,
MODE_SGBM_3WAY
=
2
};
};
CV_WRAP
virtual
int
getPreFilterCap
()
const
=
0
;
CV_WRAP
virtual
int
getPreFilterCap
()
const
=
0
;
...
...
modules/calib3d/perf/perf_stereosgbm.cpp
0 → 100644
View file @
ecd36611
/*
* By downloading, copying, installing or using the software you agree to this license.
* If you do not agree to this license, do not download, install,
* copy or use the software.
*
*
* License Agreement
* For Open Source Computer Vision Library
* (3 - clause BSD License)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met :
*
* *Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and / or other materials provided with the distribution.
*
* * Neither the names of the copyright holders nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided by the copyright holders and contributors "as is" and
* any express or implied warranties, including, but not limited to, the implied
* warranties of merchantability and fitness for a particular purpose are disclaimed.
* In no event shall copyright holders or contributors be liable for any direct,
* indirect, incidental, special, exemplary, or consequential damages
* (including, but not limited to, procurement of substitute goods or services;
* loss of use, data, or profits; or business interruption) however caused
* and on any theory of liability, whether in contract, strict liability,
* or tort(including negligence or otherwise) arising in any way out of
* the use of this software, even if advised of the possibility of such damage.
*/
#include "perf_precomp.hpp"
namespace
cvtest
{
using
std
::
tr1
::
tuple
;
using
std
::
tr1
::
get
;
using
namespace
perf
;
using
namespace
testing
;
using
namespace
cv
;
void
MakeArtificialExample
(
RNG
rng
,
Mat
&
dst_left_view
,
Mat
&
dst_view
);
CV_ENUM
(
SGBMModes
,
StereoSGBM
::
MODE_SGBM
,
StereoSGBM
::
MODE_SGBM_3WAY
);
typedef
tuple
<
Size
,
int
,
SGBMModes
>
SGBMParams
;
typedef
TestBaseWithParam
<
SGBMParams
>
TestStereoCorresp
;
PERF_TEST_P
(
TestStereoCorresp
,
SGBM
,
Combine
(
Values
(
Size
(
1280
,
720
),
Size
(
640
,
480
)),
Values
(
256
,
128
),
SGBMModes
::
all
())
)
{
RNG
rng
(
0
);
SGBMParams
params
=
GetParam
();
Size
sz
=
get
<
0
>
(
params
);
int
num_disparities
=
get
<
1
>
(
params
);
int
mode
=
get
<
2
>
(
params
);
Mat
src_left
(
sz
,
CV_8UC3
);
Mat
src_right
(
sz
,
CV_8UC3
);
Mat
dst
(
sz
,
CV_16S
);
MakeArtificialExample
(
rng
,
src_left
,
src_right
);
cv
::
setNumThreads
(
cv
::
getNumberOfCPUs
());
int
wsize
=
3
;
int
P1
=
8
*
src_left
.
channels
()
*
wsize
*
wsize
;
TEST_CYCLE
()
{
Ptr
<
StereoSGBM
>
sgbm
=
StereoSGBM
::
create
(
0
,
num_disparities
,
wsize
,
P1
,
4
*
P1
,
1
,
63
,
25
,
0
,
0
,
mode
);
sgbm
->
compute
(
src_left
,
src_right
,
dst
);
}
SANITY_CHECK
(
dst
,
.01
,
ERROR_RELATIVE
);
}
void
MakeArtificialExample
(
RNG
rng
,
Mat
&
dst_left_view
,
Mat
&
dst_right_view
)
{
int
w
=
dst_left_view
.
cols
;
int
h
=
dst_left_view
.
rows
;
//params:
unsigned
char
bg_level
=
(
unsigned
char
)
rng
.
uniform
(
0.0
,
255.0
);
unsigned
char
fg_level
=
(
unsigned
char
)
rng
.
uniform
(
0.0
,
255.0
);
int
rect_width
=
(
int
)
rng
.
uniform
(
w
/
16
,
w
/
2
);
int
rect_height
=
(
int
)
rng
.
uniform
(
h
/
16
,
h
/
2
);
int
rect_disparity
=
(
int
)(
0.15
*
w
);
double
sigma
=
3.0
;
int
rect_x_offset
=
(
w
-
rect_width
)
/
2
;
int
rect_y_offset
=
(
h
-
rect_height
)
/
2
;
if
(
dst_left_view
.
channels
()
==
3
)
{
dst_left_view
=
Scalar
(
Vec3b
(
bg_level
,
bg_level
,
bg_level
));
dst_right_view
=
Scalar
(
Vec3b
(
bg_level
,
bg_level
,
bg_level
));
}
else
{
dst_left_view
=
Scalar
(
bg_level
);
dst_right_view
=
Scalar
(
bg_level
);
}
Mat
dst_left_view_rect
=
Mat
(
dst_left_view
,
Rect
(
rect_x_offset
,
rect_y_offset
,
rect_width
,
rect_height
));
if
(
dst_left_view
.
channels
()
==
3
)
dst_left_view_rect
=
Scalar
(
Vec3b
(
fg_level
,
fg_level
,
fg_level
));
else
dst_left_view_rect
=
Scalar
(
fg_level
);
rect_x_offset
-=
rect_disparity
;
Mat
dst_right_view_rect
=
Mat
(
dst_right_view
,
Rect
(
rect_x_offset
,
rect_y_offset
,
rect_width
,
rect_height
));
if
(
dst_right_view
.
channels
()
==
3
)
dst_right_view_rect
=
Scalar
(
Vec3b
(
fg_level
,
fg_level
,
fg_level
));
else
dst_right_view_rect
=
Scalar
(
fg_level
);
//add some gaussian noise:
unsigned
char
*
l
,
*
r
;
for
(
int
i
=
0
;
i
<
h
;
i
++
)
{
l
=
dst_left_view
.
ptr
(
i
);
r
=
dst_right_view
.
ptr
(
i
);
if
(
dst_left_view
.
channels
()
==
3
)
{
for
(
int
j
=
0
;
j
<
w
;
j
++
)
{
l
[
0
]
=
saturate_cast
<
unsigned
char
>
(
l
[
0
]
+
rng
.
gaussian
(
sigma
));
l
[
1
]
=
saturate_cast
<
unsigned
char
>
(
l
[
1
]
+
rng
.
gaussian
(
sigma
));
l
[
2
]
=
saturate_cast
<
unsigned
char
>
(
l
[
2
]
+
rng
.
gaussian
(
sigma
));
l
+=
3
;
r
[
0
]
=
saturate_cast
<
unsigned
char
>
(
r
[
0
]
+
rng
.
gaussian
(
sigma
));
r
[
1
]
=
saturate_cast
<
unsigned
char
>
(
r
[
1
]
+
rng
.
gaussian
(
sigma
));
r
[
2
]
=
saturate_cast
<
unsigned
char
>
(
r
[
2
]
+
rng
.
gaussian
(
sigma
));
r
+=
3
;
}
}
else
{
for
(
int
j
=
0
;
j
<
w
;
j
++
)
{
l
[
0
]
=
saturate_cast
<
unsigned
char
>
(
l
[
0
]
+
rng
.
gaussian
(
sigma
));
l
++
;
r
[
0
]
=
saturate_cast
<
unsigned
char
>
(
r
[
0
]
+
rng
.
gaussian
(
sigma
));
r
++
;
}
}
}
}
}
modules/calib3d/src/stereosgbm.cpp
View file @
ecd36611
...
@@ -52,6 +52,7 @@
...
@@ -52,6 +52,7 @@
#include "precomp.hpp"
#include "precomp.hpp"
#include <limits.h>
#include <limits.h>
#include "opencv2/hal/intrin.hpp"
namespace
cv
namespace
cv
{
{
...
@@ -110,7 +111,7 @@ struct StereoSGBMParams
...
@@ -110,7 +111,7 @@ struct StereoSGBMParams
};
};
/*
/*
For each pixel row1[x], max(
-
maxD, 0) <= minX <= x < maxX <= width - max(0, -minD),
For each pixel row1[x], max(maxD, 0) <= minX <= x < maxX <= width - max(0, -minD),
and for each disparity minD<=d<maxD the function
and for each disparity minD<=d<maxD the function
computes the cost (cost[(x-minX)*(maxD - minD) + (d - minD)]), depending on the difference between
computes the cost (cost[(x-minX)*(maxD - minD) + (d - minD)]), depending on the difference between
row1[x] and row2[x-d]. The subpixel algorithm from
row1[x] and row2[x-d]. The subpixel algorithm from
...
@@ -125,7 +126,7 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
...
@@ -125,7 +126,7 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
int
tabOfs
,
int
)
int
tabOfs
,
int
)
{
{
int
x
,
c
,
width
=
img1
.
cols
,
cn
=
img1
.
channels
();
int
x
,
c
,
width
=
img1
.
cols
,
cn
=
img1
.
channels
();
int
minX1
=
std
::
max
(
-
maxD
,
0
),
maxX1
=
width
+
std
::
min
(
minD
,
0
);
int
minX1
=
std
::
max
(
maxD
,
0
),
maxX1
=
width
+
std
::
min
(
minD
,
0
);
int
minX2
=
std
::
max
(
minX1
-
maxD
,
0
),
maxX2
=
std
::
min
(
maxX1
-
minD
,
width
);
int
minX2
=
std
::
max
(
minX1
-
maxD
,
0
),
maxX2
=
std
::
min
(
maxX1
-
minD
,
width
);
int
D
=
maxD
-
minD
,
width1
=
maxX1
-
minX1
,
width2
=
maxX2
-
minX2
;
int
D
=
maxD
-
minD
,
width1
=
maxX1
-
minX1
,
width2
=
maxX2
-
minX2
;
const
PixType
*
row1
=
img1
.
ptr
<
PixType
>
(
y
),
*
row2
=
img2
.
ptr
<
PixType
>
(
y
);
const
PixType
*
row1
=
img1
.
ptr
<
PixType
>
(
y
),
*
row2
=
img2
.
ptr
<
PixType
>
(
y
);
...
@@ -180,10 +181,6 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
...
@@ -180,10 +181,6 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
buffer
-=
minX2
;
buffer
-=
minX2
;
cost
-=
minX1
*
D
+
minD
;
// simplify the cost indices inside the loop
cost
-=
minX1
*
D
+
minD
;
// simplify the cost indices inside the loop
#if CV_SSE2
volatile
bool
useSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
#endif
#if 1
#if 1
for
(
c
=
0
;
c
<
cn
*
2
;
c
++
,
prow1
+=
width
,
prow2
+=
width
)
for
(
c
=
0
;
c
<
cn
*
2
;
c
++
,
prow1
+=
width
,
prow2
+=
width
)
{
{
...
@@ -211,32 +208,28 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
...
@@ -211,32 +208,28 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
int
u0
=
std
::
min
(
ul
,
ur
);
u0
=
std
::
min
(
u0
,
u
);
int
u0
=
std
::
min
(
ul
,
ur
);
u0
=
std
::
min
(
u0
,
u
);
int
u1
=
std
::
max
(
ul
,
ur
);
u1
=
std
::
max
(
u1
,
u
);
int
u1
=
std
::
max
(
ul
,
ur
);
u1
=
std
::
max
(
u1
,
u
);
#if CV_SSE2
#if CV_SIMD128
if
(
useSIMD
)
v_uint8x16
_u
=
v_setall_u8
((
uchar
)
u
),
_u0
=
v_setall_u8
((
uchar
)
u0
);
{
v_uint8x16
_u1
=
v_setall_u8
((
uchar
)
u1
);
__m128i
_u
=
_mm_set1_epi8
((
char
)
u
),
_u0
=
_mm_set1_epi8
((
char
)
u0
);
__m128i
_u1
=
_mm_set1_epi8
((
char
)
u1
),
z
=
_mm_setzero_si128
();
__m128i
ds
=
_mm_cvtsi32_si128
(
diff_scale
);
for
(
int
d
=
minD
;
d
<
maxD
;
d
+=
16
)
for
(
int
d
=
minD
;
d
<
maxD
;
d
+=
16
)
{
{
__m128i
_v
=
_mm_loadu_si128
((
const
__m128i
*
)(
prow2
+
width
-
x
-
1
+
d
)
);
v_uint8x16
_v
=
v_load
(
prow2
+
width
-
x
-
1
+
d
);
__m128i
_v0
=
_mm_loadu_si128
((
const
__m128i
*
)(
buffer
+
width
-
x
-
1
+
d
)
);
v_uint8x16
_v0
=
v_load
(
buffer
+
width
-
x
-
1
+
d
);
__m128i
_v1
=
_mm_loadu_si128
((
const
__m128i
*
)(
buffer
+
width
-
x
-
1
+
d
+
width2
)
);
v_uint8x16
_v1
=
v_load
(
buffer
+
width
-
x
-
1
+
d
+
width2
);
__m128i
c0
=
_mm_max_epu8
(
_mm_subs_epu8
(
_u
,
_v1
),
_mm_subs_epu8
(
_v0
,
_u
)
);
v_uint8x16
c0
=
v_max
(
_u
-
_v1
,
_v0
-
_u
);
__m128i
c1
=
_mm_max_epu8
(
_mm_subs_epu8
(
_v
,
_u1
),
_mm_subs_epu8
(
_u0
,
_v
)
);
v_uint8x16
c1
=
v_max
(
_v
-
_u1
,
_u0
-
_v
);
__m128i
diff
=
_mm_min_epu8
(
c0
,
c1
);
v_uint8x16
diff
=
v_min
(
c0
,
c1
);
c0
=
_mm_load_si128
((
__m128i
*
)(
cost
+
x
*
D
+
d
)
);
v_int16x8
_c0
=
v_load_aligned
(
cost
+
x
*
D
+
d
);
c1
=
_mm_load_si128
((
__m128i
*
)(
cost
+
x
*
D
+
d
+
8
)
);
v_int16x8
_c1
=
v_load_aligned
(
cost
+
x
*
D
+
d
+
8
);
_mm_store_si128
((
__m128i
*
)(
cost
+
x
*
D
+
d
),
_mm_adds_epi16
(
c0
,
_mm_srl_epi16
(
_mm_unpacklo_epi8
(
diff
,
z
),
ds
)));
v_uint16x8
diff1
,
diff2
;
_mm_store_si128
((
__m128i
*
)(
cost
+
x
*
D
+
d
+
8
),
_mm_adds_epi16
(
c1
,
_mm_srl_epi16
(
_mm_unpackhi_epi8
(
diff
,
z
),
ds
)));
v_expand
(
diff
,
diff1
,
diff2
);
}
v_store_aligned
(
cost
+
x
*
D
+
d
,
_c0
+
v_reinterpret_as_s16
(
diff1
>>
diff_scale
));
v_store_aligned
(
cost
+
x
*
D
+
d
+
8
,
_c1
+
v_reinterpret_as_s16
(
diff2
>>
diff_scale
));
}
}
else
#else
#endif
{
for
(
int
d
=
minD
;
d
<
maxD
;
d
++
)
for
(
int
d
=
minD
;
d
<
maxD
;
d
++
)
{
{
int
v
=
prow2
[
width
-
x
-
1
+
d
];
int
v
=
prow2
[
width
-
x
-
1
+
d
];
...
@@ -247,7 +240,7 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
...
@@ -247,7 +240,7 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
cost
[
x
*
D
+
d
]
=
(
CostType
)(
cost
[
x
*
D
+
d
]
+
(
std
::
min
(
c0
,
c1
)
>>
diff_scale
));
cost
[
x
*
D
+
d
]
=
(
CostType
)(
cost
[
x
*
D
+
d
]
+
(
std
::
min
(
c0
,
c1
)
>>
diff_scale
));
}
}
}
#endif
}
}
}
}
#else
#else
...
@@ -340,7 +333,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
...
@@ -340,7 +333,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
int
disp12MaxDiff
=
params
.
disp12MaxDiff
>
0
?
params
.
disp12MaxDiff
:
1
;
int
disp12MaxDiff
=
params
.
disp12MaxDiff
>
0
?
params
.
disp12MaxDiff
:
1
;
int
P1
=
params
.
P1
>
0
?
params
.
P1
:
2
,
P2
=
std
::
max
(
params
.
P2
>
0
?
params
.
P2
:
5
,
P1
+
1
);
int
P1
=
params
.
P1
>
0
?
params
.
P1
:
2
,
P2
=
std
::
max
(
params
.
P2
>
0
?
params
.
P2
:
5
,
P1
+
1
);
int
k
,
width
=
disp1
.
cols
,
height
=
disp1
.
rows
;
int
k
,
width
=
disp1
.
cols
,
height
=
disp1
.
rows
;
int
minX1
=
std
::
max
(
-
maxD
,
0
),
maxX1
=
width
+
std
::
min
(
minD
,
0
);
int
minX1
=
std
::
max
(
maxD
,
0
),
maxX1
=
width
+
std
::
min
(
minD
,
0
);
int
D
=
maxD
-
minD
,
width1
=
maxX1
-
minX1
;
int
D
=
maxD
-
minD
,
width1
=
maxX1
-
minX1
;
int
INVALID_DISP
=
minD
-
1
,
INVALID_DISP_SCALED
=
INVALID_DISP
*
DISP_SCALE
;
int
INVALID_DISP
=
minD
-
1
,
INVALID_DISP_SCALED
=
INVALID_DISP
*
DISP_SCALE
;
int
SW2
=
SADWindowSize
.
width
/
2
,
SH2
=
SADWindowSize
.
height
/
2
;
int
SW2
=
SADWindowSize
.
width
/
2
,
SH2
=
SADWindowSize
.
height
/
2
;
...
@@ -829,6 +822,645 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
...
@@ -829,6 +822,645 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
}
}
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
void
getBufferPointers
(
Mat
&
buffer
,
int
width
,
int
width1
,
int
D
,
int
num_ch
,
int
SH2
,
int
P2
,
CostType
*&
curCostVolumeLine
,
CostType
*&
hsumBuf
,
CostType
*&
pixDiff
,
PixType
*&
tmpBuf
,
CostType
*&
horPassCostVolume
,
CostType
*&
vertPassCostVolume
,
CostType
*&
vertPassMin
,
CostType
*&
rightPassBuf
,
CostType
*&
disp2CostBuf
,
short
*&
disp2Buf
);
struct
SGBM3WayMainLoop
:
public
ParallelLoopBody
{
Mat
*
buffers
;
const
Mat
*
img1
,
*
img2
;
Mat
*
dst_disp
;
int
nstripes
,
stripe_sz
;
int
stripe_overlap
;
int
width
,
height
;
int
minD
,
maxD
,
D
;
int
minX1
,
maxX1
,
width1
;
int
SW2
,
SH2
;
int
P1
,
P2
;
int
uniquenessRatio
,
disp12MaxDiff
;
int
costBufSize
,
hsumBufNRows
;
int
TAB_OFS
,
ftzero
;
PixType
*
clipTab
;
SGBM3WayMainLoop
(
Mat
*
_buffers
,
const
Mat
&
_img1
,
const
Mat
&
_img2
,
Mat
*
_dst_disp
,
const
StereoSGBMParams
&
params
,
PixType
*
_clipTab
,
int
_nstripes
,
int
_stripe_overlap
);
void
getRawMatchingCost
(
CostType
*
C
,
CostType
*
hsumBuf
,
CostType
*
pixDiff
,
PixType
*
tmpBuf
,
int
y
,
int
src_start_idx
)
const
;
void
operator
()
(
const
Range
&
range
)
const
;
};
SGBM3WayMainLoop
::
SGBM3WayMainLoop
(
Mat
*
_buffers
,
const
Mat
&
_img1
,
const
Mat
&
_img2
,
Mat
*
_dst_disp
,
const
StereoSGBMParams
&
params
,
PixType
*
_clipTab
,
int
_nstripes
,
int
_stripe_overlap
)
:
buffers
(
_buffers
),
img1
(
&
_img1
),
img2
(
&
_img2
),
dst_disp
(
_dst_disp
),
clipTab
(
_clipTab
)
{
nstripes
=
_nstripes
;
stripe_overlap
=
_stripe_overlap
;
stripe_sz
=
(
int
)
ceil
(
img1
->
rows
/
(
double
)
nstripes
);
width
=
img1
->
cols
;
height
=
img1
->
rows
;
minD
=
params
.
minDisparity
;
maxD
=
minD
+
params
.
numDisparities
;
D
=
maxD
-
minD
;
minX1
=
std
::
max
(
maxD
,
0
);
maxX1
=
width
+
std
::
min
(
minD
,
0
);
width1
=
maxX1
-
minX1
;
CV_Assert
(
D
%
16
==
0
);
SW2
=
SH2
=
params
.
SADWindowSize
>
0
?
params
.
SADWindowSize
/
2
:
1
;
P1
=
params
.
P1
>
0
?
params
.
P1
:
2
;
P2
=
std
::
max
(
params
.
P2
>
0
?
params
.
P2
:
5
,
P1
+
1
);
uniquenessRatio
=
params
.
uniquenessRatio
>=
0
?
params
.
uniquenessRatio
:
10
;
disp12MaxDiff
=
params
.
disp12MaxDiff
>
0
?
params
.
disp12MaxDiff
:
1
;
costBufSize
=
width1
*
D
;
hsumBufNRows
=
SH2
*
2
+
2
;
TAB_OFS
=
256
*
4
;
ftzero
=
std
::
max
(
params
.
preFilterCap
,
15
)
|
1
;
}
void
getBufferPointers
(
Mat
&
buffer
,
int
width
,
int
width1
,
int
D
,
int
num_ch
,
int
SH2
,
int
P2
,
CostType
*&
curCostVolumeLine
,
CostType
*&
hsumBuf
,
CostType
*&
pixDiff
,
PixType
*&
tmpBuf
,
CostType
*&
horPassCostVolume
,
CostType
*&
vertPassCostVolume
,
CostType
*&
vertPassMin
,
CostType
*&
rightPassBuf
,
CostType
*&
disp2CostBuf
,
short
*&
disp2Buf
)
{
// allocating all the required memory:
int
costVolumeLineSize
=
width1
*
D
;
int
width1_ext
=
width1
+
2
;
int
costVolumeLineSize_ext
=
width1_ext
*
D
;
int
hsumBufNRows
=
SH2
*
2
+
2
;
// main buffer to store matching costs for the current line:
int
curCostVolumeLineSize
=
costVolumeLineSize
*
sizeof
(
CostType
);
// auxiliary buffers for the raw matching cost computation:
int
hsumBufSize
=
costVolumeLineSize
*
hsumBufNRows
*
sizeof
(
CostType
);
int
pixDiffSize
=
costVolumeLineSize
*
sizeof
(
CostType
);
int
tmpBufSize
=
width
*
16
*
num_ch
*
sizeof
(
PixType
);
// auxiliary buffers for the matching cost aggregation:
int
horPassCostVolumeSize
=
costVolumeLineSize_ext
*
sizeof
(
CostType
);
// buffer for the 2-pass horizontal cost aggregation
int
vertPassCostVolumeSize
=
costVolumeLineSize_ext
*
sizeof
(
CostType
);
// buffer for the vertical cost aggregation
int
vertPassMinSize
=
width1_ext
*
sizeof
(
CostType
);
// buffer for storing minimum costs from the previous line
int
rightPassBufSize
=
D
*
sizeof
(
CostType
);
// additional small buffer for the right-to-left pass
// buffers for the pseudo-LRC check:
int
disp2CostBufSize
=
width
*
sizeof
(
CostType
);
int
disp2BufSize
=
width
*
sizeof
(
short
);
// sum up the sizes of all the buffers:
size_t
totalBufSize
=
curCostVolumeLineSize
+
hsumBufSize
+
pixDiffSize
+
tmpBufSize
+
horPassCostVolumeSize
+
vertPassCostVolumeSize
+
vertPassMinSize
+
rightPassBufSize
+
disp2CostBufSize
+
disp2BufSize
+
16
;
//to compensate for the alignPtr shifts
if
(
buffer
.
empty
()
||
!
buffer
.
isContinuous
()
||
buffer
.
cols
*
buffer
.
rows
*
buffer
.
elemSize
()
<
totalBufSize
)
buffer
.
create
(
1
,
(
int
)
totalBufSize
,
CV_8U
);
// set up all the pointers:
curCostVolumeLine
=
(
CostType
*
)
alignPtr
(
buffer
.
ptr
(),
16
);
hsumBuf
=
curCostVolumeLine
+
costVolumeLineSize
;
pixDiff
=
hsumBuf
+
costVolumeLineSize
*
hsumBufNRows
;
tmpBuf
=
(
PixType
*
)(
pixDiff
+
costVolumeLineSize
);
horPassCostVolume
=
(
CostType
*
)(
tmpBuf
+
width
*
16
*
num_ch
);
vertPassCostVolume
=
horPassCostVolume
+
costVolumeLineSize_ext
;
rightPassBuf
=
vertPassCostVolume
+
costVolumeLineSize_ext
;
vertPassMin
=
rightPassBuf
+
D
;
disp2CostBuf
=
vertPassMin
+
width1_ext
;
disp2Buf
=
disp2CostBuf
+
width
;
// initialize memory:
memset
(
buffer
.
ptr
(),
0
,
totalBufSize
);
for
(
int
i
=
0
;
i
<
costVolumeLineSize
;
i
++
)
curCostVolumeLine
[
i
]
=
(
CostType
)
P2
;
//such initialization simplifies the cost aggregation loops a bit
}
// performing block matching and building raw cost-volume for the current row
void
SGBM3WayMainLoop
::
getRawMatchingCost
(
CostType
*
C
,
// target cost-volume row
CostType
*
hsumBuf
,
CostType
*
pixDiff
,
PixType
*
tmpBuf
,
//buffers
int
y
,
int
src_start_idx
)
const
{
int
x
,
d
;
int
dy1
=
(
y
==
src_start_idx
)
?
src_start_idx
:
y
+
SH2
,
dy2
=
(
y
==
src_start_idx
)
?
src_start_idx
+
SH2
:
dy1
;
for
(
int
k
=
dy1
;
k
<=
dy2
;
k
++
)
{
CostType
*
hsumAdd
=
hsumBuf
+
(
std
::
min
(
k
,
height
-
1
)
%
hsumBufNRows
)
*
costBufSize
;
if
(
k
<
height
)
{
calcPixelCostBT
(
*
img1
,
*
img2
,
k
,
minD
,
maxD
,
pixDiff
,
tmpBuf
,
clipTab
,
TAB_OFS
,
ftzero
);
memset
(
hsumAdd
,
0
,
D
*
sizeof
(
CostType
));
for
(
x
=
0
;
x
<=
SW2
*
D
;
x
+=
D
)
{
int
scale
=
x
==
0
?
SW2
+
1
:
1
;
for
(
d
=
0
;
d
<
D
;
d
++
)
hsumAdd
[
d
]
=
(
CostType
)(
hsumAdd
[
d
]
+
pixDiff
[
x
+
d
]
*
scale
);
}
if
(
y
>
src_start_idx
)
{
const
CostType
*
hsumSub
=
hsumBuf
+
(
std
::
max
(
y
-
SH2
-
1
,
src_start_idx
)
%
hsumBufNRows
)
*
costBufSize
;
for
(
x
=
D
;
x
<
width1
*
D
;
x
+=
D
)
{
const
CostType
*
pixAdd
=
pixDiff
+
std
::
min
(
x
+
SW2
*
D
,
(
width1
-
1
)
*
D
);
const
CostType
*
pixSub
=
pixDiff
+
std
::
max
(
x
-
(
SW2
+
1
)
*
D
,
0
);
#if CV_SIMD128
v_int16x8
hv_reg
;
for
(
d
=
0
;
d
<
D
;
d
+=
8
)
{
hv_reg
=
v_load_aligned
(
hsumAdd
+
x
-
D
+
d
)
+
(
v_load_aligned
(
pixAdd
+
d
)
-
v_load_aligned
(
pixSub
+
d
));
v_store_aligned
(
hsumAdd
+
x
+
d
,
hv_reg
);
v_store_aligned
(
C
+
x
+
d
,
v_load_aligned
(
C
+
x
+
d
)
+
(
hv_reg
-
v_load_aligned
(
hsumSub
+
x
+
d
)));
}
#else
for
(
d
=
0
;
d
<
D
;
d
++
)
{
int
hv
=
hsumAdd
[
x
+
d
]
=
(
CostType
)(
hsumAdd
[
x
-
D
+
d
]
+
pixAdd
[
d
]
-
pixSub
[
d
]);
C
[
x
+
d
]
=
(
CostType
)(
C
[
x
+
d
]
+
hv
-
hsumSub
[
x
+
d
]);
}
#endif
}
}
else
{
for
(
x
=
D
;
x
<
width1
*
D
;
x
+=
D
)
{
const
CostType
*
pixAdd
=
pixDiff
+
std
::
min
(
x
+
SW2
*
D
,
(
width1
-
1
)
*
D
);
const
CostType
*
pixSub
=
pixDiff
+
std
::
max
(
x
-
(
SW2
+
1
)
*
D
,
0
);
for
(
d
=
0
;
d
<
D
;
d
++
)
hsumAdd
[
x
+
d
]
=
(
CostType
)(
hsumAdd
[
x
-
D
+
d
]
+
pixAdd
[
d
]
-
pixSub
[
d
]);
}
}
}
if
(
y
==
src_start_idx
)
{
int
scale
=
k
==
src_start_idx
?
SH2
+
1
:
1
;
for
(
x
=
0
;
x
<
width1
*
D
;
x
++
)
C
[
x
]
=
(
CostType
)(
C
[
x
]
+
hsumAdd
[
x
]
*
scale
);
}
}
}
#if CV_SIMD128 && CV_SSE2
// define some additional reduce operations:
inline
short
min
(
const
v_int16x8
&
a
)
{
short
CV_DECL_ALIGNED
(
16
)
buf
[
8
];
v_store_aligned
(
buf
,
a
);
short
s0
=
std
::
min
(
buf
[
0
],
buf
[
1
]);
short
s1
=
std
::
min
(
buf
[
2
],
buf
[
3
]);
short
s2
=
std
::
min
(
buf
[
4
],
buf
[
5
]);
short
s3
=
std
::
min
(
buf
[
6
],
buf
[
7
]);
return
std
::
min
(
std
::
min
(
s0
,
s1
),
std
::
min
(
s2
,
s3
));
}
inline
short
min_pos
(
const
v_int16x8
&
val
,
const
v_int16x8
&
pos
)
{
short
CV_DECL_ALIGNED
(
16
)
val_buf
[
8
];
v_store_aligned
(
val_buf
,
val
);
short
CV_DECL_ALIGNED
(
16
)
pos_buf
[
8
];
v_store_aligned
(
pos_buf
,
pos
);
short
res_pos
=
0
;
short
min_val
=
SHRT_MAX
;
if
(
val_buf
[
0
]
<
min_val
)
{
min_val
=
val_buf
[
0
];
res_pos
=
pos_buf
[
0
];}
if
(
val_buf
[
1
]
<
min_val
)
{
min_val
=
val_buf
[
1
];
res_pos
=
pos_buf
[
1
];}
if
(
val_buf
[
2
]
<
min_val
)
{
min_val
=
val_buf
[
2
];
res_pos
=
pos_buf
[
2
];}
if
(
val_buf
[
3
]
<
min_val
)
{
min_val
=
val_buf
[
3
];
res_pos
=
pos_buf
[
3
];}
if
(
val_buf
[
4
]
<
min_val
)
{
min_val
=
val_buf
[
4
];
res_pos
=
pos_buf
[
4
];}
if
(
val_buf
[
5
]
<
min_val
)
{
min_val
=
val_buf
[
5
];
res_pos
=
pos_buf
[
5
];}
if
(
val_buf
[
6
]
<
min_val
)
{
min_val
=
val_buf
[
6
];
res_pos
=
pos_buf
[
6
];}
if
(
val_buf
[
7
]
<
min_val
)
{
min_val
=
val_buf
[
7
];
res_pos
=
pos_buf
[
7
];}
return
res_pos
;
}
#endif
// performing SGM cost accumulation from left to right (result is stored in leftBuf) and
// in-place cost accumulation from top to bottom (result is stored in topBuf)
inline
void
accumulateCostsLeftTop
(
CostType
*
leftBuf
,
CostType
*
leftBuf_prev
,
CostType
*
topBuf
,
CostType
*
costs
,
CostType
&
leftMinCost
,
CostType
&
topMinCost
,
int
D
,
int
P1
,
int
P2
)
{
#if CV_SIMD128 && CV_SSE2
v_int16x8
P1_reg
=
v_setall_s16
(
cv
::
saturate_cast
<
CostType
>
(
P1
));
v_int16x8
leftMinCostP2_reg
=
v_setall_s16
(
cv
::
saturate_cast
<
CostType
>
(
leftMinCost
+
P2
));
v_int16x8
leftMinCost_new_reg
=
v_setall_s16
(
SHRT_MAX
);
v_int16x8
src0_leftBuf
=
v_setall_s16
(
SHRT_MAX
);
v_int16x8
src1_leftBuf
=
v_load_aligned
(
leftBuf_prev
);
v_int16x8
topMinCostP2_reg
=
v_setall_s16
(
cv
::
saturate_cast
<
CostType
>
(
topMinCost
+
P2
));
v_int16x8
topMinCost_new_reg
=
v_setall_s16
(
SHRT_MAX
);
v_int16x8
src0_topBuf
=
v_setall_s16
(
SHRT_MAX
);
v_int16x8
src1_topBuf
=
v_load_aligned
(
topBuf
);
v_int16x8
src2
;
v_int16x8
src_shifted_left
,
src_shifted_right
;
v_int16x8
res
;
for
(
int
i
=
0
;
i
<
D
-
8
;
i
+=
8
)
{
//process leftBuf:
//lookahead load:
src2
=
v_load_aligned
(
leftBuf_prev
+
i
+
8
);
//get shifted versions of the current block:
src_shifted_left
=
v_int16x8
(
_mm_slli_si128
(
src1_leftBuf
.
val
,
2
));
src_shifted_right
=
v_int16x8
(
_mm_srli_si128
(
src1_leftBuf
.
val
,
2
));
// replace shifted-in zeros with proper values and add P1:
src_shifted_left
=
(
src_shifted_left
|
v_int16x8
(
_mm_srli_si128
(
src0_leftBuf
.
val
,
14
)))
+
P1_reg
;
src_shifted_right
=
(
src_shifted_right
|
v_int16x8
(
_mm_slli_si128
(
src2
.
val
,
14
)))
+
P1_reg
;
// process and save current block:
res
=
v_load_aligned
(
costs
+
i
)
+
(
v_min
(
v_min
(
src_shifted_left
,
src_shifted_right
),
v_min
(
src1_leftBuf
,
leftMinCostP2_reg
))
-
leftMinCostP2_reg
);
leftMinCost_new_reg
=
v_min
(
leftMinCost_new_reg
,
res
);
v_store_aligned
(
leftBuf
+
i
,
res
);
//update src buffers:
src0_leftBuf
=
src1_leftBuf
;
src1_leftBuf
=
src2
;
//process topBuf:
//lookahead load:
src2
=
v_load_aligned
(
topBuf
+
i
+
8
);
//get shifted versions of the current block:
src_shifted_left
=
v_int16x8
(
_mm_slli_si128
(
src1_topBuf
.
val
,
2
));
src_shifted_right
=
v_int16x8
(
_mm_srli_si128
(
src1_topBuf
.
val
,
2
));
// replace shifted-in zeros with proper values and add P1:
src_shifted_left
=
(
src_shifted_left
|
v_int16x8
(
_mm_srli_si128
(
src0_topBuf
.
val
,
14
)))
+
P1_reg
;
src_shifted_right
=
(
src_shifted_right
|
v_int16x8
(
_mm_slli_si128
(
src2
.
val
,
14
)))
+
P1_reg
;
// process and save current block:
res
=
v_load_aligned
(
costs
+
i
)
+
(
v_min
(
v_min
(
src_shifted_left
,
src_shifted_right
),
v_min
(
src1_topBuf
,
topMinCostP2_reg
))
-
topMinCostP2_reg
);
topMinCost_new_reg
=
v_min
(
topMinCost_new_reg
,
res
);
v_store_aligned
(
topBuf
+
i
,
res
);
//update src buffers:
src0_topBuf
=
src1_topBuf
;
src1_topBuf
=
src2
;
}
// a bit different processing for the last cycle of the loop:
//process leftBuf:
src_shifted_left
=
v_int16x8
(
_mm_slli_si128
(
src1_leftBuf
.
val
,
2
));
src_shifted_right
=
v_int16x8
(
_mm_srli_si128
(
src1_leftBuf
.
val
,
2
));
src2
=
v_setall_s16
(
SHRT_MAX
);
src_shifted_left
=
(
src_shifted_left
|
v_int16x8
(
_mm_srli_si128
(
src0_leftBuf
.
val
,
14
)))
+
P1_reg
;
src_shifted_right
=
(
src_shifted_right
|
v_int16x8
(
_mm_slli_si128
(
src2
.
val
,
14
)))
+
P1_reg
;
res
=
v_load_aligned
(
costs
+
D
-
8
)
+
(
v_min
(
v_min
(
src_shifted_left
,
src_shifted_right
),
v_min
(
src1_leftBuf
,
leftMinCostP2_reg
))
-
leftMinCostP2_reg
);
leftMinCost
=
min
(
v_min
(
leftMinCost_new_reg
,
res
));
v_store_aligned
(
leftBuf
+
D
-
8
,
res
);
//process topBuf:
src_shifted_left
=
v_int16x8
(
_mm_slli_si128
(
src1_topBuf
.
val
,
2
));
src_shifted_right
=
v_int16x8
(
_mm_srli_si128
(
src1_topBuf
.
val
,
2
));
src2
=
v_setall_s16
(
SHRT_MAX
);
src_shifted_left
=
(
src_shifted_left
|
v_int16x8
(
_mm_srli_si128
(
src0_topBuf
.
val
,
14
)))
+
P1_reg
;
src_shifted_right
=
(
src_shifted_right
|
v_int16x8
(
_mm_slli_si128
(
src2
.
val
,
14
)))
+
P1_reg
;
res
=
v_load_aligned
(
costs
+
D
-
8
)
+
(
v_min
(
v_min
(
src_shifted_left
,
src_shifted_right
),
v_min
(
src1_topBuf
,
topMinCostP2_reg
))
-
topMinCostP2_reg
);
topMinCost
=
min
(
v_min
(
topMinCost_new_reg
,
res
));
v_store_aligned
(
topBuf
+
D
-
8
,
res
);
#else
CostType
leftMinCost_new
=
SHRT_MAX
;
CostType
topMinCost_new
=
SHRT_MAX
;
int
leftMinCost_P2
=
leftMinCost
+
P2
;
int
topMinCost_P2
=
topMinCost
+
P2
;
CostType
leftBuf_prev_i_minus_1
=
SHRT_MAX
;
CostType
topBuf_i_minus_1
=
SHRT_MAX
;
CostType
tmp
;
for
(
int
i
=
0
;
i
<
D
-
1
;
i
++
)
{
leftBuf
[
i
]
=
cv
::
saturate_cast
<
CostType
>
(
costs
[
i
]
+
std
::
min
(
std
::
min
(
leftBuf_prev_i_minus_1
+
P1
,
leftBuf_prev
[
i
+
1
]
+
P1
),
std
::
min
((
int
)
leftBuf_prev
[
i
],
leftMinCost_P2
))
-
leftMinCost_P2
);
leftBuf_prev_i_minus_1
=
leftBuf_prev
[
i
];
leftMinCost_new
=
std
::
min
(
leftMinCost_new
,
leftBuf
[
i
]);
tmp
=
topBuf
[
i
];
topBuf
[
i
]
=
cv
::
saturate_cast
<
CostType
>
(
costs
[
i
]
+
std
::
min
(
std
::
min
(
topBuf_i_minus_1
+
P1
,
topBuf
[
i
+
1
]
+
P1
),
std
::
min
((
int
)
topBuf
[
i
],
topMinCost_P2
))
-
topMinCost_P2
);
topBuf_i_minus_1
=
tmp
;
topMinCost_new
=
std
::
min
(
topMinCost_new
,
topBuf
[
i
]);
}
leftBuf
[
D
-
1
]
=
cv
::
saturate_cast
<
CostType
>
(
costs
[
D
-
1
]
+
std
::
min
(
leftBuf_prev_i_minus_1
+
P1
,
std
::
min
((
int
)
leftBuf_prev
[
D
-
1
],
leftMinCost_P2
))
-
leftMinCost_P2
);
leftMinCost
=
std
::
min
(
leftMinCost_new
,
leftBuf
[
D
-
1
]);
topBuf
[
D
-
1
]
=
cv
::
saturate_cast
<
CostType
>
(
costs
[
D
-
1
]
+
std
::
min
(
topBuf_i_minus_1
+
P1
,
std
::
min
((
int
)
topBuf
[
D
-
1
],
topMinCost_P2
))
-
topMinCost_P2
);
topMinCost
=
std
::
min
(
topMinCost_new
,
topBuf
[
D
-
1
]);
#endif
}
// performing in-place SGM cost accumulation from right to left (the result is stored in rightBuf) and
// summing rightBuf, topBuf, leftBuf together (the result is stored in leftBuf), as well as finding the
// optimal disparity value with minimum accumulated cost
inline
void
accumulateCostsRight
(
CostType
*
rightBuf
,
CostType
*
topBuf
,
CostType
*
leftBuf
,
CostType
*
costs
,
CostType
&
rightMinCost
,
int
D
,
int
P1
,
int
P2
,
int
&
optimal_disp
,
CostType
&
min_cost
)
{
#if CV_SIMD128 && CV_SSE2
v_int16x8
P1_reg
=
v_setall_s16
(
cv
::
saturate_cast
<
CostType
>
(
P1
));
v_int16x8
rightMinCostP2_reg
=
v_setall_s16
(
cv
::
saturate_cast
<
CostType
>
(
rightMinCost
+
P2
));
v_int16x8
rightMinCost_new_reg
=
v_setall_s16
(
SHRT_MAX
);
v_int16x8
src0_rightBuf
=
v_setall_s16
(
SHRT_MAX
);
v_int16x8
src1_rightBuf
=
v_load
(
rightBuf
);
v_int16x8
src2
;
v_int16x8
src_shifted_left
,
src_shifted_right
;
v_int16x8
res
;
v_int16x8
min_sum_cost_reg
=
v_setall_s16
(
SHRT_MAX
);
v_int16x8
min_sum_pos_reg
=
v_setall_s16
(
0
);
v_int16x8
loop_idx
(
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
);
v_int16x8
eight_reg
=
v_setall_s16
(
8
);
for
(
int
i
=
0
;
i
<
D
-
8
;
i
+=
8
)
{
//lookahead load:
src2
=
v_load_aligned
(
rightBuf
+
i
+
8
);
//get shifted versions of the current block:
src_shifted_left
=
v_int16x8
(
_mm_slli_si128
(
src1_rightBuf
.
val
,
2
));
src_shifted_right
=
v_int16x8
(
_mm_srli_si128
(
src1_rightBuf
.
val
,
2
));
// replace shifted-in zeros with proper values and add P1:
src_shifted_left
=
(
src_shifted_left
|
v_int16x8
(
_mm_srli_si128
(
src0_rightBuf
.
val
,
14
)))
+
P1_reg
;
src_shifted_right
=
(
src_shifted_right
|
v_int16x8
(
_mm_slli_si128
(
src2
.
val
,
14
)))
+
P1_reg
;
// process and save current block:
res
=
v_load_aligned
(
costs
+
i
)
+
(
v_min
(
v_min
(
src_shifted_left
,
src_shifted_right
),
v_min
(
src1_rightBuf
,
rightMinCostP2_reg
))
-
rightMinCostP2_reg
);
rightMinCost_new_reg
=
v_min
(
rightMinCost_new_reg
,
res
);
v_store_aligned
(
rightBuf
+
i
,
res
);
// compute and save total cost:
res
=
res
+
v_load_aligned
(
leftBuf
+
i
)
+
v_load_aligned
(
topBuf
+
i
);
v_store_aligned
(
leftBuf
+
i
,
res
);
// track disparity value with the minimum cost:
min_sum_cost_reg
=
v_min
(
min_sum_cost_reg
,
res
);
min_sum_pos_reg
=
min_sum_pos_reg
+
((
min_sum_cost_reg
==
res
)
&
(
loop_idx
-
min_sum_pos_reg
));
loop_idx
=
loop_idx
+
eight_reg
;
//update src:
src0_rightBuf
=
src1_rightBuf
;
src1_rightBuf
=
src2
;
}
// a bit different processing for the last cycle of the loop:
src_shifted_left
=
v_int16x8
(
_mm_slli_si128
(
src1_rightBuf
.
val
,
2
));
src_shifted_right
=
v_int16x8
(
_mm_srli_si128
(
src1_rightBuf
.
val
,
2
));
src2
=
v_setall_s16
(
SHRT_MAX
);
src_shifted_left
=
(
src_shifted_left
|
v_int16x8
(
_mm_srli_si128
(
src0_rightBuf
.
val
,
14
)))
+
P1_reg
;
src_shifted_right
=
(
src_shifted_right
|
v_int16x8
(
_mm_slli_si128
(
src2
.
val
,
14
)))
+
P1_reg
;
res
=
v_load_aligned
(
costs
+
D
-
8
)
+
(
v_min
(
v_min
(
src_shifted_left
,
src_shifted_right
),
v_min
(
src1_rightBuf
,
rightMinCostP2_reg
))
-
rightMinCostP2_reg
);
rightMinCost
=
min
(
v_min
(
rightMinCost_new_reg
,
res
));
v_store_aligned
(
rightBuf
+
D
-
8
,
res
);
res
=
res
+
v_load_aligned
(
leftBuf
+
D
-
8
)
+
v_load_aligned
(
topBuf
+
D
-
8
);
v_store_aligned
(
leftBuf
+
D
-
8
,
res
);
min_sum_cost_reg
=
v_min
(
min_sum_cost_reg
,
res
);
min_cost
=
min
(
min_sum_cost_reg
);
min_sum_pos_reg
=
min_sum_pos_reg
+
((
min_sum_cost_reg
==
res
)
&
(
loop_idx
-
min_sum_pos_reg
));
optimal_disp
=
min_pos
(
min_sum_cost_reg
,
min_sum_pos_reg
);
#else
CostType
rightMinCost_new
=
SHRT_MAX
;
int
rightMinCost_P2
=
rightMinCost
+
P2
;
CostType
rightBuf_i_minus_1
=
SHRT_MAX
;
CostType
tmp
;
min_cost
=
SHRT_MAX
;
for
(
int
i
=
0
;
i
<
D
-
1
;
i
++
)
{
tmp
=
rightBuf
[
i
];
rightBuf
[
i
]
=
cv
::
saturate_cast
<
CostType
>
(
costs
[
i
]
+
std
::
min
(
std
::
min
(
rightBuf_i_minus_1
+
P1
,
rightBuf
[
i
+
1
]
+
P1
),
std
::
min
((
int
)
rightBuf
[
i
],
rightMinCost_P2
))
-
rightMinCost_P2
);
rightBuf_i_minus_1
=
tmp
;
rightMinCost_new
=
std
::
min
(
rightMinCost_new
,
rightBuf
[
i
]);
leftBuf
[
i
]
=
cv
::
saturate_cast
<
CostType
>
((
int
)
leftBuf
[
i
]
+
rightBuf
[
i
]
+
topBuf
[
i
]);
if
(
leftBuf
[
i
]
<
min_cost
)
{
optimal_disp
=
i
;
min_cost
=
leftBuf
[
i
];
}
}
rightBuf
[
D
-
1
]
=
cv
::
saturate_cast
<
CostType
>
(
costs
[
D
-
1
]
+
std
::
min
(
rightBuf_i_minus_1
+
P1
,
std
::
min
((
int
)
rightBuf
[
D
-
1
],
rightMinCost_P2
))
-
rightMinCost_P2
);
rightMinCost
=
std
::
min
(
rightMinCost_new
,
rightBuf
[
D
-
1
]);
leftBuf
[
D
-
1
]
=
cv
::
saturate_cast
<
CostType
>
((
int
)
leftBuf
[
D
-
1
]
+
rightBuf
[
D
-
1
]
+
topBuf
[
D
-
1
]);
if
(
leftBuf
[
D
-
1
]
<
min_cost
)
{
optimal_disp
=
D
-
1
;
min_cost
=
leftBuf
[
D
-
1
];
}
#endif
}
void
SGBM3WayMainLoop
::
operator
()
(
const
Range
&
range
)
const
{
// force separate processing of stripes:
if
(
range
.
end
>
range
.
start
+
1
)
{
for
(
int
n
=
range
.
start
;
n
<
range
.
end
;
n
++
)
(
*
this
)(
Range
(
n
,
n
+
1
));
return
;
}
const
int
DISP_SCALE
=
(
1
<<
StereoMatcher
::
DISP_SHIFT
);
int
INVALID_DISP
=
minD
-
1
,
INVALID_DISP_SCALED
=
INVALID_DISP
*
DISP_SCALE
;
// setting up the ranges:
int
src_start_idx
=
std
::
max
(
std
::
min
(
range
.
start
*
stripe_sz
-
stripe_overlap
,
height
),
0
);
int
src_end_idx
=
std
::
min
(
range
.
end
*
stripe_sz
,
height
);
int
dst_offset
;
if
(
range
.
start
==
0
)
dst_offset
=
stripe_overlap
;
else
dst_offset
=
0
;
Mat
cur_buffer
=
buffers
[
range
.
start
];
Mat
cur_disp
=
dst_disp
[
range
.
start
];
cur_disp
=
Scalar
(
INVALID_DISP_SCALED
);
// prepare buffers:
CostType
*
curCostVolumeLine
,
*
hsumBuf
,
*
pixDiff
;
PixType
*
tmpBuf
;
CostType
*
horPassCostVolume
,
*
vertPassCostVolume
,
*
vertPassMin
,
*
rightPassBuf
,
*
disp2CostBuf
;
short
*
disp2Buf
;
getBufferPointers
(
cur_buffer
,
width
,
width1
,
D
,
img1
->
channels
(),
SH2
,
P2
,
curCostVolumeLine
,
hsumBuf
,
pixDiff
,
tmpBuf
,
horPassCostVolume
,
vertPassCostVolume
,
vertPassMin
,
rightPassBuf
,
disp2CostBuf
,
disp2Buf
);
// start real processing:
for
(
int
y
=
src_start_idx
;
y
<
src_end_idx
;
y
++
)
{
getRawMatchingCost
(
curCostVolumeLine
,
hsumBuf
,
pixDiff
,
tmpBuf
,
y
,
src_start_idx
);
short
*
disp_row
=
(
short
*
)
cur_disp
.
ptr
(
dst_offset
+
(
y
-
src_start_idx
));
// initialize the auxiliary buffers for the pseudo left-right consistency check:
for
(
int
x
=
0
;
x
<
width
;
x
++
)
{
disp2Buf
[
x
]
=
(
short
)
INVALID_DISP_SCALED
;
disp2CostBuf
[
x
]
=
SHRT_MAX
;
}
CostType
*
C
=
curCostVolumeLine
-
D
;
CostType
prev_min
,
min_cost
;
int
d
,
best_d
;
d
=
best_d
=
0
;
// forward pass
prev_min
=
0
;
for
(
int
x
=
D
;
x
<
(
1
+
width1
)
*
D
;
x
+=
D
)
accumulateCostsLeftTop
(
horPassCostVolume
+
x
,
horPassCostVolume
+
x
-
D
,
vertPassCostVolume
+
x
,
C
+
x
,
prev_min
,
vertPassMin
[
x
/
D
],
D
,
P1
,
P2
);
//backward pass
memset
(
rightPassBuf
,
0
,
D
*
sizeof
(
CostType
));
prev_min
=
0
;
for
(
int
x
=
width1
*
D
;
x
>=
D
;
x
-=
D
)
{
accumulateCostsRight
(
rightPassBuf
,
vertPassCostVolume
+
x
,
horPassCostVolume
+
x
,
C
+
x
,
prev_min
,
D
,
P1
,
P2
,
best_d
,
min_cost
);
if
(
uniquenessRatio
>
0
)
{
#if CV_SIMD128
horPassCostVolume
+=
x
;
int
thresh
=
(
100
*
min_cost
)
/
(
100
-
uniquenessRatio
);
v_int16x8
thresh_reg
=
v_setall_s16
((
short
)(
thresh
+
1
));
v_int16x8
d1
=
v_setall_s16
((
short
)(
best_d
-
1
));
v_int16x8
d2
=
v_setall_s16
((
short
)(
best_d
+
1
));
v_int16x8
eight_reg
=
v_setall_s16
(
8
);
v_int16x8
cur_d
(
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
);
v_int16x8
mask
,
cost1
,
cost2
;
for
(
d
=
0
;
d
<
D
;
d
+=
16
)
{
cost1
=
v_load_aligned
(
horPassCostVolume
+
d
);
cost2
=
v_load_aligned
(
horPassCostVolume
+
d
+
8
);
mask
=
cost1
<
thresh_reg
;
mask
=
mask
&
(
(
cur_d
<
d1
)
|
(
cur_d
>
d2
)
);
if
(
v_signmask
(
mask
)
)
break
;
cur_d
=
cur_d
+
eight_reg
;
mask
=
cost2
<
thresh_reg
;
mask
=
mask
&
(
(
cur_d
<
d1
)
|
(
cur_d
>
d2
)
);
if
(
v_signmask
(
mask
)
)
break
;
cur_d
=
cur_d
+
eight_reg
;
}
horPassCostVolume
-=
x
;
#else
for
(
d
=
0
;
d
<
D
;
d
++
)
{
if
(
horPassCostVolume
[
x
+
d
]
*
(
100
-
uniquenessRatio
)
<
min_cost
*
100
&&
std
::
abs
(
d
-
best_d
)
>
1
)
break
;
}
#endif
if
(
d
<
D
)
continue
;
}
d
=
best_d
;
int
_x2
=
x
/
D
-
1
+
minX1
-
d
-
minD
;
if
(
_x2
>=
0
&&
_x2
<
width
&&
disp2CostBuf
[
_x2
]
>
min_cost
)
{
disp2CostBuf
[
_x2
]
=
min_cost
;
disp2Buf
[
_x2
]
=
(
short
)(
d
+
minD
);
}
if
(
0
<
d
&&
d
<
D
-
1
)
{
// do subpixel quadratic interpolation:
// fit parabola into (x1=d-1, y1=Sp[d-1]), (x2=d, y2=Sp[d]), (x3=d+1, y3=Sp[d+1])
// then find minimum of the parabola.
int
denom2
=
std
::
max
(
horPassCostVolume
[
x
+
d
-
1
]
+
horPassCostVolume
[
x
+
d
+
1
]
-
2
*
horPassCostVolume
[
x
+
d
],
1
);
d
=
d
*
DISP_SCALE
+
((
horPassCostVolume
[
x
+
d
-
1
]
-
horPassCostVolume
[
x
+
d
+
1
])
*
DISP_SCALE
+
denom2
)
/
(
denom2
*
2
);
}
else
d
*=
DISP_SCALE
;
disp_row
[(
x
/
D
)
-
1
+
minX1
]
=
(
DispType
)(
d
+
minD
*
DISP_SCALE
);
}
for
(
int
x
=
minX1
;
x
<
maxX1
;
x
++
)
{
// pseudo LRC consistency check using only one disparity map;
// pixels with difference more than disp12MaxDiff are invalidated
int
d1
=
disp_row
[
x
];
if
(
d1
==
INVALID_DISP_SCALED
)
continue
;
int
_d
=
d1
>>
StereoMatcher
::
DISP_SHIFT
;
int
d_
=
(
d1
+
DISP_SCALE
-
1
)
>>
StereoMatcher
::
DISP_SHIFT
;
int
_x
=
x
-
_d
,
x_
=
x
-
d_
;
if
(
0
<=
_x
&&
_x
<
width
&&
disp2Buf
[
_x
]
>=
minD
&&
std
::
abs
(
disp2Buf
[
_x
]
-
_d
)
>
disp12MaxDiff
&&
0
<=
x_
&&
x_
<
width
&&
disp2Buf
[
x_
]
>=
minD
&&
std
::
abs
(
disp2Buf
[
x_
]
-
d_
)
>
disp12MaxDiff
)
disp_row
[
x
]
=
(
short
)
INVALID_DISP_SCALED
;
}
}
}
static
void
computeDisparity3WaySGBM
(
const
Mat
&
img1
,
const
Mat
&
img2
,
Mat
&
disp1
,
const
StereoSGBMParams
&
params
,
Mat
*
buffers
,
int
nstripes
)
{
// precompute a lookup table for the raw matching cost computation:
const
int
TAB_OFS
=
256
*
4
,
TAB_SIZE
=
256
+
TAB_OFS
*
2
;
PixType
*
clipTab
=
new
PixType
[
TAB_SIZE
];
int
ftzero
=
std
::
max
(
params
.
preFilterCap
,
15
)
|
1
;
for
(
int
k
=
0
;
k
<
TAB_SIZE
;
k
++
)
clipTab
[
k
]
=
(
PixType
)(
std
::
min
(
std
::
max
(
k
-
TAB_OFS
,
-
ftzero
),
ftzero
)
+
ftzero
);
// allocate separate dst_disp arrays to avoid conflicts due to stripe overlap:
int
stripe_sz
=
(
int
)
ceil
(
img1
.
rows
/
(
double
)
nstripes
);
int
stripe_overlap
=
(
params
.
SADWindowSize
/
2
+
1
)
+
(
int
)
ceil
(
0.1
*
stripe_sz
);
Mat
*
dst_disp
=
new
Mat
[
nstripes
];
for
(
int
i
=
0
;
i
<
nstripes
;
i
++
)
dst_disp
[
i
].
create
(
stripe_sz
+
stripe_overlap
,
img1
.
cols
,
CV_16S
);
parallel_for_
(
Range
(
0
,
nstripes
),
SGBM3WayMainLoop
(
buffers
,
img1
,
img2
,
dst_disp
,
params
,
clipTab
,
nstripes
,
stripe_overlap
));
//assemble disp1 from dst_disp:
short
*
dst_row
;
short
*
src_row
;
for
(
int
i
=
0
;
i
<
disp1
.
rows
;
i
++
)
{
dst_row
=
(
short
*
)
disp1
.
ptr
(
i
);
src_row
=
(
short
*
)
dst_disp
[
i
/
stripe_sz
].
ptr
(
stripe_overlap
+
i
%
stripe_sz
);
memcpy
(
dst_row
,
src_row
,
disp1
.
cols
*
sizeof
(
short
));
}
delete
[]
clipTab
;
delete
[]
dst_disp
;
}
class
StereoSGBMImpl
:
public
StereoSGBM
class
StereoSGBMImpl
:
public
StereoSGBM
{
{
public
:
public
:
...
@@ -857,7 +1489,11 @@ public:
...
@@ -857,7 +1489,11 @@ public:
disparr
.
create
(
left
.
size
(),
CV_16S
);
disparr
.
create
(
left
.
size
(),
CV_16S
);
Mat
disp
=
disparr
.
getMat
();
Mat
disp
=
disparr
.
getMat
();
if
(
params
.
mode
==
MODE_SGBM_3WAY
)
computeDisparity3WaySGBM
(
left
,
right
,
disp
,
params
,
buffers
,
num_stripes
);
else
computeDisparitySGBM
(
left
,
right
,
disp
,
params
,
buffer
);
computeDisparitySGBM
(
left
,
right
,
disp
,
params
,
buffer
);
medianBlur
(
disp
,
disp
,
3
);
medianBlur
(
disp
,
disp
,
3
);
if
(
params
.
speckleWindowSize
>
0
)
if
(
params
.
speckleWindowSize
>
0
)
...
@@ -933,6 +1569,12 @@ public:
...
@@ -933,6 +1569,12 @@ public:
StereoSGBMParams
params
;
StereoSGBMParams
params
;
Mat
buffer
;
Mat
buffer
;
// the number of stripes is fixed, disregarding the number of threads/processors
// to make the results fully reproducible:
static
const
int
num_stripes
=
4
;
Mat
buffers
[
num_stripes
];
static
const
char
*
name_
;
static
const
char
*
name_
;
};
};
...
...
modules/calib3d/test/test_stereomatching.cpp
View file @
ecd36611
...
@@ -742,7 +742,7 @@ protected:
...
@@ -742,7 +742,7 @@ protected:
{
{
int
ndisp
;
int
ndisp
;
int
winSize
;
int
winSize
;
bool
fullDP
;
int
mode
;
};
};
vector
<
RunParams
>
caseRunParams
;
vector
<
RunParams
>
caseRunParams
;
...
@@ -757,7 +757,7 @@ protected:
...
@@ -757,7 +757,7 @@ protected:
RunParams
params
;
RunParams
params
;
String
ndisp
=
fn
[
i
+
2
];
params
.
ndisp
=
atoi
(
ndisp
.
c_str
());
String
ndisp
=
fn
[
i
+
2
];
params
.
ndisp
=
atoi
(
ndisp
.
c_str
());
String
winSize
=
fn
[
i
+
3
];
params
.
winSize
=
atoi
(
winSize
.
c_str
());
String
winSize
=
fn
[
i
+
3
];
params
.
winSize
=
atoi
(
winSize
.
c_str
());
String
fullDP
=
fn
[
i
+
4
];
params
.
fullDP
=
atoi
(
fullDP
.
c_str
())
==
0
?
false
:
true
;
String
mode
=
fn
[
i
+
4
];
params
.
mode
=
atoi
(
mode
.
c_str
())
;
caseNames
.
push_back
(
caseName
);
caseNames
.
push_back
(
caseName
);
caseDatasets
.
push_back
(
datasetName
);
caseDatasets
.
push_back
(
datasetName
);
caseRunParams
.
push_back
(
params
);
caseRunParams
.
push_back
(
params
);
...
@@ -773,8 +773,7 @@ protected:
...
@@ -773,8 +773,7 @@ protected:
Ptr
<
StereoSGBM
>
sgbm
=
StereoSGBM
::
create
(
0
,
params
.
ndisp
,
params
.
winSize
,
Ptr
<
StereoSGBM
>
sgbm
=
StereoSGBM
::
create
(
0
,
params
.
ndisp
,
params
.
winSize
,
10
*
params
.
winSize
*
params
.
winSize
,
10
*
params
.
winSize
*
params
.
winSize
,
40
*
params
.
winSize
*
params
.
winSize
,
40
*
params
.
winSize
*
params
.
winSize
,
1
,
63
,
10
,
100
,
32
,
params
.
fullDP
?
1
,
63
,
10
,
100
,
32
,
params
.
mode
);
StereoSGBM
::
MODE_HH
:
StereoSGBM
::
MODE_SGBM
);
sgbm
->
compute
(
leftImg
,
rightImg
,
leftDisp
);
sgbm
->
compute
(
leftImg
,
rightImg
,
leftDisp
);
CV_Assert
(
leftDisp
.
type
()
==
CV_16SC1
);
CV_Assert
(
leftDisp
.
type
()
==
CV_16SC1
);
leftDisp
/=
16
;
leftDisp
/=
16
;
...
...
samples/cpp/stereo_match.cpp
View file @
ecd36611
...
@@ -20,7 +20,7 @@ using namespace cv;
...
@@ -20,7 +20,7 @@ using namespace cv;
static
void
print_help
()
static
void
print_help
()
{
{
printf
(
"
\n
Demo stereo matching converting L and R images into disparity and point clouds
\n
"
);
printf
(
"
\n
Demo stereo matching converting L and R images into disparity and point clouds
\n
"
);
printf
(
"
\n
Usage: stereo_match <left_image> <right_image> [--algorithm=bm|sgbm|hh] [--blocksize=<block_size>]
\n
"
printf
(
"
\n
Usage: stereo_match <left_image> <right_image> [--algorithm=bm|sgbm|hh
|sgbm3way
] [--blocksize=<block_size>]
\n
"
"[--max-disparity=<max_disparity>] [--scale=scale_factor>] [-i <intrinsic_filename>] [-e <extrinsic_filename>]
\n
"
"[--max-disparity=<max_disparity>] [--scale=scale_factor>] [-i <intrinsic_filename>] [-e <extrinsic_filename>]
\n
"
"[--no-display] [-o <disparity_image>] [-p <point_cloud_file>]
\n
"
);
"[--no-display] [-o <disparity_image>] [-p <point_cloud_file>]
\n
"
);
}
}
...
@@ -61,7 +61,7 @@ int main(int argc, char** argv)
...
@@ -61,7 +61,7 @@ int main(int argc, char** argv)
const
char
*
disparity_filename
=
0
;
const
char
*
disparity_filename
=
0
;
const
char
*
point_cloud_filename
=
0
;
const
char
*
point_cloud_filename
=
0
;
enum
{
STEREO_BM
=
0
,
STEREO_SGBM
=
1
,
STEREO_HH
=
2
,
STEREO_VAR
=
3
};
enum
{
STEREO_BM
=
0
,
STEREO_SGBM
=
1
,
STEREO_HH
=
2
,
STEREO_VAR
=
3
,
STEREO_3WAY
=
4
};
int
alg
=
STEREO_SGBM
;
int
alg
=
STEREO_SGBM
;
int
SADWindowSize
=
0
,
numberOfDisparities
=
0
;
int
SADWindowSize
=
0
,
numberOfDisparities
=
0
;
bool
no_display
=
false
;
bool
no_display
=
false
;
...
@@ -85,7 +85,8 @@ int main(int argc, char** argv)
...
@@ -85,7 +85,8 @@ int main(int argc, char** argv)
alg
=
strcmp
(
_alg
,
"bm"
)
==
0
?
STEREO_BM
:
alg
=
strcmp
(
_alg
,
"bm"
)
==
0
?
STEREO_BM
:
strcmp
(
_alg
,
"sgbm"
)
==
0
?
STEREO_SGBM
:
strcmp
(
_alg
,
"sgbm"
)
==
0
?
STEREO_SGBM
:
strcmp
(
_alg
,
"hh"
)
==
0
?
STEREO_HH
:
strcmp
(
_alg
,
"hh"
)
==
0
?
STEREO_HH
:
strcmp
(
_alg
,
"var"
)
==
0
?
STEREO_VAR
:
-
1
;
strcmp
(
_alg
,
"var"
)
==
0
?
STEREO_VAR
:
strcmp
(
_alg
,
"sgbm3way"
)
==
0
?
STEREO_3WAY
:
-
1
;
if
(
alg
<
0
)
if
(
alg
<
0
)
{
{
printf
(
"Command-line parameter error: Unknown stereo algorithm
\n\n
"
);
printf
(
"Command-line parameter error: Unknown stereo algorithm
\n\n
"
);
...
@@ -257,7 +258,12 @@ int main(int argc, char** argv)
...
@@ -257,7 +258,12 @@ int main(int argc, char** argv)
sgbm
->
setSpeckleWindowSize
(
100
);
sgbm
->
setSpeckleWindowSize
(
100
);
sgbm
->
setSpeckleRange
(
32
);
sgbm
->
setSpeckleRange
(
32
);
sgbm
->
setDisp12MaxDiff
(
1
);
sgbm
->
setDisp12MaxDiff
(
1
);
sgbm
->
setMode
(
alg
==
STEREO_HH
?
StereoSGBM
::
MODE_HH
:
StereoSGBM
::
MODE_SGBM
);
if
(
alg
==
STEREO_HH
)
sgbm
->
setMode
(
StereoSGBM
::
MODE_HH
);
else
if
(
alg
==
STEREO_SGBM
)
sgbm
->
setMode
(
StereoSGBM
::
MODE_SGBM
);
else
if
(
alg
==
STEREO_3WAY
)
sgbm
->
setMode
(
StereoSGBM
::
MODE_SGBM_3WAY
);
Mat
disp
,
disp8
;
Mat
disp
,
disp8
;
//Mat img1p, img2p, dispp;
//Mat img1p, img2p, dispp;
...
@@ -267,7 +273,7 @@ int main(int argc, char** argv)
...
@@ -267,7 +273,7 @@ int main(int argc, char** argv)
int64
t
=
getTickCount
();
int64
t
=
getTickCount
();
if
(
alg
==
STEREO_BM
)
if
(
alg
==
STEREO_BM
)
bm
->
compute
(
img1
,
img2
,
disp
);
bm
->
compute
(
img1
,
img2
,
disp
);
else
if
(
alg
==
STEREO_SGBM
||
alg
==
STEREO_HH
)
else
if
(
alg
==
STEREO_SGBM
||
alg
==
STEREO_HH
||
alg
==
STEREO_3WAY
)
sgbm
->
compute
(
img1
,
img2
,
disp
);
sgbm
->
compute
(
img1
,
img2
,
disp
);
t
=
getTickCount
()
-
t
;
t
=
getTickCount
()
-
t
;
printf
(
"Time elapsed: %fms
\n
"
,
t
*
1000
/
getTickFrequency
());
printf
(
"Time elapsed: %fms
\n
"
,
t
*
1000
/
getTickFrequency
());
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment