Commit 21be2aa6 authored by Pyotr Chekmaryov's avatar Pyotr Chekmaryov

Memory repaired + Cleanup.

parent 7ffa49e0
...@@ -125,7 +125,6 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y, ...@@ -125,7 +125,6 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
int minD, int maxD, CostType* cost, int minD, int maxD, CostType* cost,
PixType* buffer, const PixType* tab, PixType* buffer, const PixType* tab,
int tabOfs, int , int xrange_min = 0, int xrange_max = DEFAULT_RIGHT_BORDER ) int tabOfs, int , int xrange_min = 0, int xrange_max = DEFAULT_RIGHT_BORDER )
//TODO: This function was changed and modified old function's behabior. Check they in tests
{ {
int x, c, width = img1.cols, cn = img1.channels(); int x, c, width = img1.cols, cn = img1.channels();
int minX1 = std::max(maxD, 0), maxX1 = width + std::min(minD, 0); int minX1 = std::max(maxD, 0), maxX1 = width + std::min(minD, 0);
...@@ -311,9 +310,9 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2, ...@@ -311,9 +310,9 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
const CostType MAX_COST = SHRT_MAX; const CostType MAX_COST = SHRT_MAX;
int minD = params.minDisparity, maxD = minD + params.numDisparities; int minD = params.minDisparity, maxD = minD + params.numDisparities;
Size SADWindowSize; //4e: SAD means Sum of Absolute Differences Size SADWindowSize;
SADWindowSize.width = SADWindowSize.height = params.SADWindowSize > 0 ? params.SADWindowSize : 5; //4e: and this is always square SADWindowSize.width = SADWindowSize.height = params.SADWindowSize > 0 ? params.SADWindowSize : 5;
int ftzero = std::max(params.preFilterCap, 15) | 1; //4e:ftzero clips x-derivatives. I think, this story with arrays is about non-realized SIMD method int ftzero = std::max(params.preFilterCap, 15) | 1;
int uniquenessRatio = params.uniquenessRatio >= 0 ? params.uniquenessRatio : 10; int uniquenessRatio = params.uniquenessRatio >= 0 ? params.uniquenessRatio : 10;
int disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1; int disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1;
int P1 = params.P1 > 0 ? params.P1 : 2, P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1); int P1 = params.P1 > 0 ? params.P1 : 2, P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1);
...@@ -323,11 +322,11 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2, ...@@ -323,11 +322,11 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE; int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
int SW2 = SADWindowSize.width/2, SH2 = SADWindowSize.height/2; int SW2 = SADWindowSize.width/2, SH2 = SADWindowSize.height/2;
bool fullDP = params.mode == StereoSGBM::MODE_HH; bool fullDP = params.mode == StereoSGBM::MODE_HH;
int npasses = fullDP ? 2 : 1; //4e: 2 passes with different behaviour for Hirshmueller method. int npasses = fullDP ? 2 : 1;
const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2; //4e: array is such big due to derivative could be +-8*256 in worst cases const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2;
PixType clipTab[TAB_SIZE]; PixType clipTab[TAB_SIZE];
for( k = 0; k < TAB_SIZE; k++ ) //4e: If ftzero would = 4, array containment will be = -4 -4 -4 ... -4 -3 -2 -1 0 1 2 3 4 ... 4 4 4 for( k = 0; k < TAB_SIZE; k++ )
clipTab[k] = (PixType)(std::min(std::max(k - TAB_OFS, -ftzero), ftzero) + ftzero); clipTab[k] = (PixType)(std::min(std::max(k - TAB_OFS, -ftzero), ftzero) + ftzero);
if( minX1 >= maxX1 ) if( minX1 >= maxX1 )
...@@ -340,25 +339,25 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2, ...@@ -340,25 +339,25 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
// NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8. // NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8.
// if you change NR, please, modify the loop as well. // if you change NR, please, modify the loop as well.
int D2 = D+16, NRD2 = NR2*D2; //4e: Somewhere in code we need d+1, so D+1. One of simplest solutuons is increasing D-dimension on 1. But 1 is 16, when storage should be aligned. int D2 = D+16, NRD2 = NR2*D2;
// the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer: // the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer:
// for 8-way dynamic programming we need the current row and // for 8-way dynamic programming we need the current row and
// the previous row, i.e. 2 rows in total // the previous row, i.e. 2 rows in total
const int NLR = 2; //4e: We assume, that we need one or more previous steps in our linear dynamic(one right here). const int NLR = 2;
const int LrBorder = NLR - 1; //4e: for simplification of calculations we need border for taking previous dynamic solutions. const int LrBorder = NLR - 1;
// for each possible stereo match (img1(x,y) <=> img2(x-d,y)) // for each possible stereo match (img1(x,y) <=> img2(x-d,y))
// we keep pixel difference cost (C) and the summary cost over NR directions (S). // we keep pixel difference cost (C) and the summary cost over NR directions (S).
// we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k) // we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k)
size_t costBufSize = width1*D; size_t costBufSize = width1*D;
size_t CSBufSize = costBufSize*(fullDP ? height : 1); //4e: For HH mode it's better to keep whole array of costs. size_t CSBufSize = costBufSize*(fullDP ? height : 1);
size_t minLrSize = (width1 + LrBorder*2)*NR2, LrSize = minLrSize*D2; //4e: TODO: Understand why NR2 per pass instead od NR2/2 (Probably, without any reason. That doesn't make code wrong) size_t minLrSize = (width1 + LrBorder*2)*NR2, LrSize = minLrSize*D2;
int hsumBufNRows = SH2*2 + 2; int hsumBufNRows = SH2*2 + 2;
size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[] size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[]
costBufSize*(hsumBufNRows + 1)*sizeof(CostType) + // hsumBuf, pixdiff //4e: TODO: Why we should increase sum window height one more time? costBufSize*(hsumBufNRows + 1)*sizeof(CostType) + // hsumBuf, pixdiff
CSBufSize*2*sizeof(CostType) + // C, S //4e: C is Block sum of costs, S is multidirectional dynamic sum with same size CSBufSize*2*sizeof(CostType) + // C, S
width*16*img1.channels()*sizeof(PixType) + // temp buffer for computing per-pixel cost //4e: It is needed for calcPixelCostBT function, as "buffer" value width*16*img1.channels()*sizeof(PixType) + // temp buffer for computing per-pixel cost
width*(sizeof(CostType) + sizeof(DispType)) + 1024; // disp2cost + disp2 width*(sizeof(CostType) + sizeof(DispType)) + 1024; // disp2cost + disp2
if( buffer.empty() || !buffer.isContinuous() || if( buffer.empty() || !buffer.isContinuous() ||
...@@ -371,7 +370,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2, ...@@ -371,7 +370,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
CostType* hsumBuf = Sbuf + CSBufSize; CostType* hsumBuf = Sbuf + CSBufSize;
CostType* pixDiff = hsumBuf + costBufSize*hsumBufNRows; CostType* pixDiff = hsumBuf + costBufSize*hsumBufNRows;
CostType* disp2cost = pixDiff + costBufSize + (LrSize + minLrSize)*NLR; //4e: It is containers for backwards disparity, made by S[d] too, but with other method CostType* disp2cost = pixDiff + costBufSize + (LrSize + minLrSize)*NLR;
DispType* disp2ptr = (DispType*)(disp2cost + width); DispType* disp2ptr = (DispType*)(disp2cost + width);
PixType* tempBuf = (PixType*)(disp2ptr + width); PixType* tempBuf = (PixType*)(disp2ptr + width);
...@@ -383,21 +382,21 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2, ...@@ -383,21 +382,21 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
{ {
int x1, y1, x2, y2, dx, dy; int x1, y1, x2, y2, dx, dy;
if( pass == 1 ) //4e: on the first pass, we work down, so calculate directions down, diagdown and right if( pass == 1 )
{ {
y1 = 0; y2 = height; dy = 1; y1 = 0; y2 = height; dy = 1;
x1 = 0; x2 = width1; dx = 1; x1 = 0; x2 = width1; dx = 1;
} }
else //4e: on the second pass, we work up, so calculate directions up, diagup and up else
{ {
y1 = height-1; y2 = -1; dy = -1; y1 = height-1; y2 = -1; dy = -1;
x1 = width1-1; x2 = -1; dx = -1; x1 = width1-1; x2 = -1; dx = -1;
} }
CostType *Lr[NLR]={0}, *minLr[NLR]={0}; //4e: arrays for L(x,y,r,d) of previous and current rows and minimums of them CostType *Lr[NLR]={0}, *minLr[NLR]={0};
for( k = 0; k < NLR; k++ ) //4e: One of them is needed, and one of them is stored. So, we need to swap pointer for( k = 0; k < NLR; k++ )
{ //4e: Yes, and this is done at the end of next cycle, not here. {
// shift Lr[k] and minLr[k] pointers, because we allocated them with the borders, // shift Lr[k] and minLr[k] pointers, because we allocated them with the borders,
// and will occasionally use negative indices with the arrays // and will occasionally use negative indices with the arrays
// we need to shift Lr[k] pointers by 1, to give the space for d=-1. // we need to shift Lr[k] pointers by 1, to give the space for d=-1.
...@@ -418,28 +417,28 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2, ...@@ -418,28 +417,28 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
if( pass == 1 ) // compute C on the first pass, and reuse it on the second pass, if any. if( pass == 1 ) // compute C on the first pass, and reuse it on the second pass, if any.
{ {
int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1; //4e: for first line's block sum we need calculate half-window of costs and only one for other int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1;
for( k = dy1; k <= dy2; k++ ) for( k = dy1; k <= dy2; k++ )
{ {
CostType* hsumAdd = hsumBuf + (std::min(k, height-1) % hsumBufNRows)*costBufSize; //4e: Ring buffer for horizontally summed lines CostType* hsumAdd = hsumBuf + (std::min(k, height-1) % hsumBufNRows)*costBufSize;
if( k < height ) if( k < height )
{ {
calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero ); calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero );
memset(hsumAdd, 0, D*sizeof(CostType)); memset(hsumAdd, 0, D*sizeof(CostType));
for( x = 0; x <= SW2*D; x += D ) //4e: Calculation summed costs for all disparities in first pixel of line for( x = 0; x <= SW2*D; x += D )
{ {
int scale = x == 0 ? SW2 + 1 : 1; int scale = x == 0 ? SW2 + 1 : 1;
for( d = 0; d < D; d++ ) for( d = 0; d < D; d++ )
hsumAdd[d] = (CostType)(hsumAdd[d] + pixDiff[x + d]*scale); hsumAdd[d] = (CostType)(hsumAdd[d] + pixDiff[x + d]*scale);
} }
if( y > 0 ) //4e: We calculate horizontal sums and forming full block sums for y coord by adding this horsums to previous line's sums and subtracting stored lowest if( y > 0 )
{ //4e: horsum in hsumBuf. Exception is case y=0, where we need many iterations per lines to create full blocking sum. {
const CostType* hsumSub = hsumBuf + (std::max(y - SH2 - 1, 0) % hsumBufNRows)*costBufSize; const CostType* hsumSub = hsumBuf + (std::max(y - SH2 - 1, 0) % hsumBufNRows)*costBufSize;
const CostType* Cprev = !fullDP || y == 0 ? C : C - costBufSize; //4e: Well, actually y>0, so we don't need this check: y==0 const CostType* Cprev = !fullDP || y == 0 ? C : C - costBufSize;
for( x = D; x < width1*D; x += D ) for( x = D; x < width1*D; x += D )
{ {
...@@ -475,8 +474,8 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2, ...@@ -475,8 +474,8 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
} }
else else
{ {
for( x = D; x < width1*D; x += D ) //4e: Calcluates horizontal sums if (y==0). This piece of code is calling SH2+1 times and then result is used in different way for( x = D; x < width1*D; x += D )
{ //4e: to create full blocks sum. That's why this code is isolated from upper case. {
const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D); const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0); const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
...@@ -486,7 +485,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2, ...@@ -486,7 +485,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
} }
} }
if( y == 0 ) //4e: Calculating first full block sum. if( y == 0 )
{ {
int scale = k == 0 ? SH2 + 1 : 1; int scale = k == 0 ? SH2 + 1 : 1;
for( x = 0; x < width1*D; x++ ) for( x = 0; x < width1*D; x++ )
...@@ -495,13 +494,13 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2, ...@@ -495,13 +494,13 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
} }
// also, clear the S buffer // also, clear the S buffer
for( k = 0; k < width1*D; k++ ) //4e: only on first pass, so it keep old information, don't be confused for( k = 0; k < width1*D; k++ )
S[k] = 0; S[k] = 0;
} }
// clear the left and the right borders // clear the left and the right borders
memset( Lr[0] - NRD2*LrBorder - 8, 0, NRD2*LrBorder*sizeof(CostType) ); //4e: To understand this "8" shifts and how they could work it's simpler to imagine pixel dislocation in memory memset( Lr[0] - NRD2*LrBorder - 8, 0, NRD2*LrBorder*sizeof(CostType) );
memset( Lr[0] + width1*NRD2 - 8, 0, NRD2*LrBorder*sizeof(CostType) ); //4e: ...00000000|NRD2-16 of real costs value(and some of them are zeroes too)|00000000... memset( Lr[0] + width1*NRD2 - 8, 0, NRD2*LrBorder*sizeof(CostType) );
memset( minLr[0] - NR2*LrBorder, 0, NR2*LrBorder*sizeof(CostType) ); memset( minLr[0] - NR2*LrBorder, 0, NR2*LrBorder*sizeof(CostType) );
memset( minLr[0] + width1*NR2, 0, NR2*LrBorder*sizeof(CostType) ); memset( minLr[0] + width1*NR2, 0, NR2*LrBorder*sizeof(CostType) );
...@@ -624,7 +623,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2, ...@@ -624,7 +623,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
for( d = 0; d < D; d++ ) for( d = 0; d < D; d++ )
{ {
int Cpd = Cp[d], L0, L1, L2, L3; //4e: Remember, that every Cp is increased on P2 in line number 369. That's why next 4 lines are paper-like actually int Cpd = Cp[d], L0, L1, L2, L3;
L0 = Cpd + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0; L0 = Cpd + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0;
L1 = Cpd + std::min((int)Lr_p1[d], std::min(Lr_p1[d-1] + P1, std::min(Lr_p1[d+1] + P1, delta1))) - delta1; L1 = Cpd + std::min((int)Lr_p1[d], std::min(Lr_p1[d-1] + P1, std::min(Lr_p1[d+1] + P1, delta1))) - delta1;
...@@ -665,7 +664,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2, ...@@ -665,7 +664,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
CostType* Sp = S + x*D; CostType* Sp = S + x*D;
int minS = MAX_COST, bestDisp = -1; int minS = MAX_COST, bestDisp = -1;
if( npasses == 1 ) //4e: in this case we could take fifth direction almost for free(direction "left") if( npasses == 1 )
{ {
int xm = x*NR2, xd = xm*D2; int xm = x*NR2, xd = xm*D2;
...@@ -815,7 +814,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2, ...@@ -815,7 +814,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
disp1ptr[x + minX1] = (DispType)(d + minD*DISP_SCALE); disp1ptr[x + minX1] = (DispType)(d + minD*DISP_SCALE);
} }
for( x = minX1; x < maxX1; x++ ) //4e: Left-right check itself for( x = minX1; x < maxX1; x++ )
{ {
// we round the computed disparity both towards -inf and +inf and check // we round the computed disparity both towards -inf and +inf and check
// if either of the corresponding disparities in disp2 is consistent. // if either of the corresponding disparities in disp2 is consistent.
...@@ -826,8 +825,8 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2, ...@@ -826,8 +825,8 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
int _d = d1 >> DISP_SHIFT; int _d = d1 >> DISP_SHIFT;
int d_ = (d1 + DISP_SCALE-1) >> DISP_SHIFT; int d_ = (d1 + DISP_SCALE-1) >> DISP_SHIFT;
int _x = x - _d, x_ = x - d_; int _x = x - _d, x_ = x - d_;
if( 0 <= _x && _x < width && disp2ptr[_x] >= minD && std::abs(disp2ptr[_x] - _d) > disp12MaxDiff && //4e: To dismiss disparity, we should assure, that there is no any if( 0 <= _x && _x < width && disp2ptr[_x] >= minD && std::abs(disp2ptr[_x] - _d) > disp12MaxDiff &&
0 <= x_ && x_ < width && disp2ptr[x_] >= minD && std::abs(disp2ptr[x_] - d_) > disp12MaxDiff ) //4e: chance to understand this as correct. 0 <= x_ && x_ < width && disp2ptr[x_] >= minD && std::abs(disp2ptr[x_] - d_) > disp12MaxDiff )
disp1ptr[x] = (DispType)INVALID_DISP_SCALED; disp1ptr[x] = (DispType)INVALID_DISP_SCALED;
} }
} }
...@@ -840,8 +839,6 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2, ...@@ -840,8 +839,6 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
} }
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
//TODO: Assumation: Let's pretend, that we allocate memory for pixDiff and tempBuf independently in each thread, with full size, needed for original calcBT
//TODO: Redo size of this arrays even if situation with independent allocation will still.
struct CalcVerticalSums: public ParallelLoopBody struct CalcVerticalSums: public ParallelLoopBody
{ {
CalcVerticalSums(const Mat& _img1, const Mat& _img2, const StereoSGBMParams& params, CalcVerticalSums(const Mat& _img1, const Mat& _img2, const StereoSGBMParams& params,
...@@ -852,7 +849,7 @@ struct CalcVerticalSums: public ParallelLoopBody ...@@ -852,7 +849,7 @@ struct CalcVerticalSums: public ParallelLoopBody
SW2 = SH2 = (params.SADWindowSize > 0 ? params.SADWindowSize : 5)/2; SW2 = SH2 = (params.SADWindowSize > 0 ? params.SADWindowSize : 5)/2;
ftzero = std::max(params.preFilterCap, 15) | 1; ftzero = std::max(params.preFilterCap, 15) | 1;
P1 = params.P1 > 0 ? params.P1 : 2; P1 = params.P1 > 0 ? params.P1 : 2;
P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1); //TODO: think about P1/S(x,y) Proportion P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1);
height = img1.rows; height = img1.rows;
width = img1.cols; width = img1.cols;
int minX1 = std::max(maxD, 0), maxX1 = width + std::min(minD, 0); int minX1 = std::max(maxD, 0), maxX1 = width + std::min(minD, 0);
...@@ -861,7 +858,7 @@ struct CalcVerticalSums: public ParallelLoopBody ...@@ -861,7 +858,7 @@ struct CalcVerticalSums: public ParallelLoopBody
D2 = D + 16; D2 = D + 16;
costBufSize = width1*D; costBufSize = width1*D;
CSBufSize = costBufSize*height; CSBufSize = costBufSize*height;
minLrSize = (width1 + LrBorder*2); minLrSize = width1;
LrSize = minLrSize*D2; LrSize = minLrSize*D2;
hsumBufNRows = SH2*2 + 2; hsumBufNRows = SH2*2 + 2;
Cbuf = alignedBuf; Cbuf = alignedBuf;
...@@ -874,10 +871,11 @@ struct CalcVerticalSums: public ParallelLoopBody ...@@ -874,10 +871,11 @@ struct CalcVerticalSums: public ParallelLoopBody
static const CostType MAX_COST = SHRT_MAX; static const CostType MAX_COST = SHRT_MAX;
static const int ALIGN = 16; static const int ALIGN = 16;
static const int TAB_OFS = 256*4; static const int TAB_OFS = 256*4;
static const int npasses = 2;
int x1 = range.start, x2 = range.end, k; int x1 = range.start, x2 = range.end, k;
size_t pixDiffSize = ((x2 - x1) + 2*SW2)*D; size_t pixDiffSize = ((x2 - x1) + 2*SW2)*D;
size_t auxBufsSize = pixDiffSize*sizeof(CostType) + //pixdiff size size_t auxBufsSize = pixDiffSize*sizeof(CostType) + //pixdiff size
width*16*img1.channels()*sizeof(PixType) + 32; //tempBuf //TODO: Probably it's better 6 instead of 16(alignment?) width*16*img1.channels()*sizeof(PixType) + 32; //tempBuf
Mat auxBuff; Mat auxBuff;
auxBuff.create(1, (int)auxBufsSize, CV_8U); auxBuff.create(1, (int)auxBufsSize, CV_8U);
CostType* pixDiff = (CostType*)alignPtr(auxBuff.ptr(), ALIGN); CostType* pixDiff = (CostType*)alignPtr(auxBuff.ptr(), ALIGN);
...@@ -886,7 +884,7 @@ struct CalcVerticalSums: public ParallelLoopBody ...@@ -886,7 +884,7 @@ struct CalcVerticalSums: public ParallelLoopBody
// Simplification of index calculation // Simplification of index calculation
pixDiff -= (x1>SW2 ? (x1 - SW2): 0)*D; pixDiff -= (x1>SW2 ? (x1 - SW2): 0)*D;
for( int pass = 1; pass <= 2; pass++ ) //TODO: rename this magic 2. for( int pass = 1; pass <= npasses; pass++ )
{ {
int y1, y2, dy; int y1, y2, dy;
...@@ -899,18 +897,18 @@ struct CalcVerticalSums: public ParallelLoopBody ...@@ -899,18 +897,18 @@ struct CalcVerticalSums: public ParallelLoopBody
y1 = height-1; y2 = -1; dy = -1; y1 = height-1; y2 = -1; dy = -1;
} }
CostType *Lr[NLR]={0}, *minLr[NLR]={0}; //4e: arrays for L(x,y,r,d) of previous and current rows and minimums of them CostType *Lr[NLR]={0}, *minLr[NLR]={0};
for( k = 0; k < NLR; k++ ) //4e: One of them is needed, and one of them is stored. So, we need to swap pointer for( k = 0; k < NLR; k++ )
{ //4e: Yes, and this is done at the end of next cycle, not here. {
// shift Lr[k] and minLr[k] pointers, because we allocated them with the borders, // shift Lr[k] and minLr[k] pointers, because we allocated them with the borders,
// and will occasionally use negative indices with the arrays // and will occasionally use negative indices with the arrays
// we need to shift Lr[k] pointers by 1, to give the space for d=-1. // we need to shift Lr[k] pointers by 1, to give the space for d=-1.
// however, then the alignment will be imperfect, i.e. bad for SSE, // however, then the alignment will be imperfect, i.e. bad for SSE,
// thus we shift the pointers by 8 (8*sizeof(short) == 16 - ideal alignment) // thus we shift the pointers by 8 (8*sizeof(short) == 16 - ideal alignment)
Lr[k] = hsumBuf + costBufSize*hsumBufNRows + LrSize*k + D2*LrBorder + 8; Lr[k] = hsumBuf + costBufSize*hsumBufNRows + LrSize*k + 8;
memset( Lr[k] + x1*D2 - 8, 0, (x2-x1)*D2*sizeof(CostType) ); memset( Lr[k] + x1*D2 - 8, 0, (x2-x1)*D2*sizeof(CostType) );
minLr[k] = hsumBuf + costBufSize*hsumBufNRows + LrSize*NLR + minLrSize*k + LrBorder; minLr[k] = hsumBuf + costBufSize*hsumBufNRows + LrSize*NLR + minLrSize*k;
memset( minLr[k] + x1, 0, (x2-x1)*sizeof(CostType) ); memset( minLr[k] + x1, 0, (x2-x1)*sizeof(CostType) );
} }
...@@ -922,60 +920,37 @@ struct CalcVerticalSums: public ParallelLoopBody ...@@ -922,60 +920,37 @@ struct CalcVerticalSums: public ParallelLoopBody
if( pass == 1 ) // compute C on the first pass, and reuse it on the second pass, if any. if( pass == 1 ) // compute C on the first pass, and reuse it on the second pass, if any.
{ {
int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1; //4e: for first line's block sum we need calculate half-window of costs and only one for other int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1;
for( k = dy1; k <= dy2; k++ ) for( k = dy1; k <= dy2; k++ )
{ {
CostType* hsumAdd = hsumBuf + (std::min(k, height-1) % hsumBufNRows)*costBufSize; //4e: Ring buffer for horizontally summed lines CostType* hsumAdd = hsumBuf + (std::min(k, height-1) % hsumBufNRows)*costBufSize;
if( k < height ) if( k < height )
{ {
calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero, x1 - SW2, x2 + SW2); calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero, x1 - SW2, x2 + SW2);
memset(hsumAdd + x1*D, 0, D*sizeof(CostType)); memset(hsumAdd + x1*D, 0, D*sizeof(CostType));
for( x = (x1 - SW2)*D; x <= (x1 + SW2)*D; x += D ) //4e: Calculation summed costs for all disparities in first pixel of line for( x = (x1 - SW2)*D; x <= (x1 + SW2)*D; x += D )
{ {
int xbord = x <= 0 ? 0 : (x > (width1 - 1)*D? (width1 - 1)*D : x); int xbord = x <= 0 ? 0 : (x > (width1 - 1)*D? (width1 - 1)*D : x);
for( d = 0; d < D; d++ ) for( d = 0; d < D; d++ )
hsumAdd[x1*D + d] = (CostType)(hsumAdd[x1*D + d] + pixDiff[xbord + d]); hsumAdd[x1*D + d] = (CostType)(hsumAdd[x1*D + d] + pixDiff[xbord + d]);
} }
if( y > 0 ) //4e: We calculate horizontal sums and forming full block sums for y coord by adding this horsums to previous line's sums and subtracting stored lowest if( y > 0 )
{ //4e: horsum in hsumBuf. Exception is case y=0, where we need many iterations per lines to create full blocking sum. {
const CostType* hsumSub = hsumBuf + (std::max(y - SH2 - 1, 0) % hsumBufNRows)*costBufSize; const CostType* hsumSub = hsumBuf + (std::max(y - SH2 - 1, 0) % hsumBufNRows)*costBufSize;
const CostType* Cprev = C - costBufSize; const CostType* Cprev = C - costBufSize;
// We need to calculate C[x1] in different way, because hsumadd is already calculated for( d = 0; d < D; d++ )
// We don't doing then for x==0, because original function has forgotten to do this //TODO: Check: does this original still exist? C[x1*D + d] = (CostType)(Cprev[x1*D + d] + hsumAdd[x1*D + d] - hsumSub[x1*D + d]);
if(x1!=0)
{
for( d = 0; d < D; d++ )
C[x1*D + d] = (CostType)(Cprev[x1*D + d] + hsumAdd[x1*D + d] - hsumSub[x1*D + d]);
}
for( x = (x1+1)*D; x < x2*D; x += D ) for( x = (x1+1)*D; x < x2*D; x += D )
{ {
const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D); const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0); const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
// #if CV_SIMD128
// if( useSIMD )
// {
// for( d = 0; d < D; d += 8 )
// {
// v_int16x8 hv = v_load(hsumAdd + x - D + d);
// v_int16x8 Cx = v_load(Cprev + x + d);
// v_int16x8 psub = v_load(pixSub + d);
// v_int16x8 padd = v_load(pixAdd + d);
// hv = (hv - psub + padd);
// psub = v_load(hsumSub + x + d);
// Cx = Cx - psub + hv;
// v_store(hsumAdd + x + d, hv);
// v_store(C + x + d, Cx);
// }
// }
// else
// #endif
{ {
for( d = 0; d < D; d++ ) for( d = 0; d < D; d++ )
{ {
...@@ -987,8 +962,8 @@ struct CalcVerticalSums: public ParallelLoopBody ...@@ -987,8 +962,8 @@ struct CalcVerticalSums: public ParallelLoopBody
} }
else else
{ {
for( x = (x1+1)*D; x < x2*D; x += D ) //4e: Calcluates horizontal sums if (y==0). This piece of code is calling SH2+1 times and then result is used in different way for( x = (x1+1)*D; x < x2*D; x += D )
{ //4e: to create full blocks sum. That's why this code is isolated from upper case. {
const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D); const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0); const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
...@@ -996,10 +971,9 @@ struct CalcVerticalSums: public ParallelLoopBody ...@@ -996,10 +971,9 @@ struct CalcVerticalSums: public ParallelLoopBody
hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]); hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
} }
} }
// Return to coordinates, which is needed by CalcCostBT
} }
if( y == 0 ) //4e: Calculating first full block sum. if( y == 0 )
{ {
int scale = k == 0 ? SH2 + 1 : 1; int scale = k == 0 ? SH2 + 1 : 1;
for( x = x1*D; x < x2*D; x++ ) for( x = x1*D; x < x2*D; x++ )
...@@ -1008,26 +982,19 @@ struct CalcVerticalSums: public ParallelLoopBody ...@@ -1008,26 +982,19 @@ struct CalcVerticalSums: public ParallelLoopBody
} }
// also, clear the S buffer // also, clear the S buffer
for( k = x1*D; k < x2*D; k++ ) //4e: only on first pass, so it keep old information, don't be confused for( k = x1*D; k < x2*D; k++ )
S[k] = 0; S[k] = 0;
} }
// [formula 13 in the paper] // [formula 13 in the paper]
// compute L_r(p, d) = C(p, d) + // compute L_r(p, d) = C(p, d) +
// min(L_r(p-r, d), // min(L_r(p-r, d),
// L_r(p-r, d-1) + P1, // L_r(p-r, d-1) + P1,
// L_r(p-r, d+1) + P1, // L_r(p-r, d+1) + P1,
// min_k L_r(p-r, k) + P2) - min_k L_r(p-r, k) // min_k L_r(p-r, k) + P2) - min_k L_r(p-r, k)
// where p = (x,y), r is one of the directions. // where p = (x,y), r is one of the directions.
// we process all the directions at once: // we process one directions on first pass and other on second:
// 0: r=(-dx, 0) // r=(0, dy), where dy=1 on first pass and dy=-1 on second
// 1: r=(-1, -dy)
// 2: r=(0, -dy)
// 3: r=(1, -dy)
// 4: r=(-2, -dy)
// 5: r=(-1, -dy*2)
// 6: r=(1, -dy*2)
// 7: r=(2, -dy)
for( x = x1; x != x2; x++ ) for( x = x1; x != x2; x++ )
{ {
...@@ -1043,88 +1010,12 @@ struct CalcVerticalSums: public ParallelLoopBody ...@@ -1043,88 +1010,12 @@ struct CalcVerticalSums: public ParallelLoopBody
const CostType* Cp = C + x*D; const CostType* Cp = C + x*D;
CostType* Sp = S + x*D; CostType* Sp = S + x*D;
// #if CV_SIMD128
// if( useSIMD )
// {
// v_int16x8 _P1 = v_setall_s16((short)P1);
//
// v_int16x8 _delta0 = v_setall_s16((short)delta0);
// v_int16x8 _delta1 = v_setall_s16((short)delta1);
// v_int16x8 _delta2 = v_setall_s16((short)delta2);
// v_int16x8 _delta3 = v_setall_s16((short)delta3);
// v_int16x8 _minL0 = v_setall_s16((short)MAX_COST);
//
// for( d = 0; d < D; d += 8 )
// {
// v_int16x8 Cpd = v_load(Cp + d);
// v_int16x8 L0, L1, L2, L3;
//
// L0 = v_load(Lr_p0 + d);
// L1 = v_load(Lr_p1 + d);
// L2 = v_load(Lr_ppr + d);
// L3 = v_load(Lr_p3 + d);
//
// L0 = v_min(L0, (v_load(Lr_p0 + d - 1) + _P1));
// L0 = v_min(L0, (v_load(Lr_p0 + d + 1) + _P1));
//
// L1 = v_min(L1, (v_load(Lr_p1 + d - 1) + _P1));
// L1 = v_min(L1, (v_load(Lr_p1 + d + 1) + _P1));
//
// L2 = v_min(L2, (v_load(Lr_ppr + d - 1) + _P1));
// L2 = v_min(L2, (v_load(Lr_ppr + d + 1) + _P1));
//
// L3 = v_min(L3, (v_load(Lr_p3 + d - 1) + _P1));
// L3 = v_min(L3, (v_load(Lr_p3 + d + 1) + _P1));
//
// L0 = v_min(L0, _delta0);
// L0 = ((L0 - _delta0) + Cpd);
//
// L1 = v_min(L1, _delta1);
// L1 = ((L1 - _delta1) + Cpd);
//
// L2 = v_min(L2, _delta2);
// L2 = ((L2 - _delta2) + Cpd);
//
// L3 = v_min(L3, _delta3);
// L3 = ((L3 - _delta3) + Cpd);
//
// v_store(Lr_p + d, L0);
// v_store(Lr_p + d + D2, L1);
// v_store(Lr_p + d + D2*2, L2);
// v_store(Lr_p + d + D2*3, L3);
//
// // Get minimum from in L0-L3
// v_int16x8 t02L, t02H, t13L, t13H, t0123L, t0123H;
// v_zip(L0, L2, t02L, t02H); // L0[0] L2[0] L0[1] L2[1]...
// v_zip(L1, L3, t13L, t13H); // L1[0] L3[0] L1[1] L3[1]...
// v_int16x8 t02 = v_min(t02L, t02H); // L0[i] L2[i] L0[i] L2[i]...
// v_int16x8 t13 = v_min(t13L, t13H); // L1[i] L3[i] L1[i] L3[i]...
// v_zip(t02, t13, t0123L, t0123H); // L0[i] L1[i] L2[i] L3[i]...
// v_int16x8 t0 = v_min(t0123L, t0123H);
// _minL0 = v_min(_minL0, t0);
//
// v_int16x8 Sval = v_load(Sp + d);
//
// L0 = L0 + L1;
// L2 = L2 + L3;
// Sval = Sval + L0;
// Sval = Sval + L2;
//
// v_store(Sp + d, Sval);
// }
//
// v_int32x4 minL, minH;
// v_expand(_minL0, minL, minH);
// v_pack_store(&minLr[0][x], v_min(minL, minH));
// }
// else
// #endif
{ {
int minL = MAX_COST; int minL = MAX_COST;
for( d = 0; d < D; d++ ) for( d = 0; d < D; d++ )
{ {
int Cpd = Cp[d], L; //4e: Remember, that every Cp is increased on P2 in line number 369. That's why next 4 lines are paper-like actually int Cpd = Cp[d], L;
L = Cpd + std::min((int)Lr_ppr[d], std::min(Lr_ppr[d-1] + P1, std::min(Lr_ppr[d+1] + P1, delta))) - delta; L = Cpd + std::min((int)Lr_ppr[d], std::min(Lr_ppr[d-1] + P1, std::min(Lr_ppr[d+1] + P1, delta))) - delta;
...@@ -1144,7 +1035,6 @@ struct CalcVerticalSums: public ParallelLoopBody ...@@ -1144,7 +1035,6 @@ struct CalcVerticalSums: public ParallelLoopBody
} }
} }
static const int NLR = 2; static const int NLR = 2;
static const int LrBorder = NLR - 1;
const Mat& img1; const Mat& img1;
const Mat& img2; const Mat& img2;
CostType* Cbuf; CostType* Cbuf;
...@@ -1178,7 +1068,7 @@ struct CalcHorizontalSums: public ParallelLoopBody ...@@ -1178,7 +1068,7 @@ struct CalcHorizontalSums: public ParallelLoopBody
minD = params.minDisparity; minD = params.minDisparity;
maxD = minD + params.numDisparities; maxD = minD + params.numDisparities;
P1 = params.P1 > 0 ? params.P1 : 2; P1 = params.P1 > 0 ? params.P1 : 2;
P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1); //TODO: think about P1/S(x,y) Proportion P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1);
uniquenessRatio = params.uniquenessRatio >= 0 ? params.uniquenessRatio : 10; uniquenessRatio = params.uniquenessRatio >= 0 ? params.uniquenessRatio : 10;
disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1; disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1;
height = img1.rows; height = img1.rows;
...@@ -1192,7 +1082,7 @@ struct CalcHorizontalSums: public ParallelLoopBody ...@@ -1192,7 +1082,7 @@ struct CalcHorizontalSums: public ParallelLoopBody
costBufSize = width1*D; costBufSize = width1*D;
CSBufSize = costBufSize*height; CSBufSize = costBufSize*height;
D2 = D + 16; D2 = D + 16;
LrSize = 2 * D2; //TODO: Check: do we need this value or not? LrSize = 2 * D2;
Cbuf = alignedBuf; Cbuf = alignedBuf;
Sbuf = Cbuf + CSBufSize; Sbuf = Cbuf + CSBufSize;
} }
...@@ -1200,13 +1090,11 @@ struct CalcHorizontalSums: public ParallelLoopBody ...@@ -1200,13 +1090,11 @@ struct CalcHorizontalSums: public ParallelLoopBody
void operator()( const Range& range ) const void operator()( const Range& range ) const
{ {
int y1 = range.start, y2 = range.end; int y1 = range.start, y2 = range.end;
static const CostType MAX_COST = SHRT_MAX; size_t auxBufsSize = LrSize * sizeof(CostType) + width*(sizeof(CostType) + sizeof(DispType)) + 32;
static const int ALIGN = 16;
size_t auxBufsSize = LrSize + width*(sizeof(CostType) + sizeof(DispType)) + 32;
Mat auxBuff; Mat auxBuff;
auxBuff.create(1, (int)auxBufsSize, CV_8U); auxBuff.create(1, (int)auxBufsSize, CV_8U);
CostType *Lr = (CostType*)alignPtr(auxBuff.ptr(), ALIGN) + 8; CostType *Lr = ((CostType*)alignPtr(auxBuff.ptr(), ALIGN)) + 8;
CostType* disp2cost = Lr + LrSize; CostType* disp2cost = Lr + LrSize;
DispType* disp2ptr = (DispType*)(disp2cost + width); DispType* disp2ptr = (DispType*)(disp2cost + width);
...@@ -1227,132 +1115,48 @@ struct CalcHorizontalSums: public ParallelLoopBody ...@@ -1227,132 +1115,48 @@ struct CalcHorizontalSums: public ParallelLoopBody
// clear buffers // clear buffers
memset( Lr - 8, 0, LrSize*sizeof(CostType) ); memset( Lr - 8, 0, LrSize*sizeof(CostType) );
Lr[-1] = Lr[D] = Lr[D2 - 1] = Lr[D2 + D] = MAX_COST;
minLr = 0; minLr = 0;
/* // [formula 13 in the paper]
[formula 13 in the paper] // compute L_r(p, d) = C(p, d) +
compute L_r(p, d) = C(p, d) + // min(L_r(p-r, d),
min(L_r(p-r, d), // L_r(p-r, d-1) + P1,
L_r(p-r, d-1) + P1, // L_r(p-r, d+1) + P1,
L_r(p-r, d+1) + P1, // min_k L_r(p-r, k) + P2) - min_k L_r(p-r, k)
min_k L_r(p-r, k) + P2) - min_k L_r(p-r, k) // where p = (x,y), r is one of the directions.
where p = (x,y), r is one of the directions. // we process all the directions at once:
we process all the directions at once: // we process one directions on first pass and other on second:
0: r=(-dx, 0) // r=(dx, 0), where dx=1 on first pass and dx=-1 on second
1: r=(-1, -dy)
2: r=(0, -dy)
3: r=(1, -dy)
4: r=(-2, -dy)
5: r=(-1, -dy*2)
6: r=(1, -dy*2)
7: r=(2, -dy)
*/
for( x = 0; x != width1; x++) for( x = 0; x != width1; x++)
{ {
int delta = minLr + P2; int delta = minLr + P2;
CostType* Lr_ppr = Lr + ((x&1)? 0 : D2); CostType* Lr_ppr = Lr + ((x&1)? 0 : D2);
Lr_ppr[-1] = Lr_ppr[D] = MAX_COST; //TODO: Well, probably, it's better do this once before.
CostType* Lr_p = Lr + ((x&1)? D2 :0); CostType* Lr_p = Lr + ((x&1)? D2 :0);
const CostType* Cp = C + x*D; const CostType* Cp = C + x*D;
CostType* Sp = S + x*D; CostType* Sp = S + x*D;
// #if CV_SIMD128 int minL = MAX_COST;
// if( useSIMD )
// {
// v_int16x8 _P1 = v_setall_s16((short)P1);
//
// v_int16x8 _delta0 = v_setall_s16((short)delta0);
// v_int16x8 _delta1 = v_setall_s16((short)delta1);
// v_int16x8 _delta2 = v_setall_s16((short)delta2);
// v_int16x8 _delta3 = v_setall_s16((short)delta3);
// v_int16x8 _minL0 = v_setall_s16((short)MAX_COST);
//
// for( d = 0; d < D; d += 8 )
// {
// v_int16x8 Cpd = v_load(Cp + d);
// v_int16x8 L0, L1, L2, L3;
//
// L0 = v_load(Lr_ppr + d);
// L1 = v_load(Lr_p1 + d);
// L2 = v_load(Lr_p2 + d);
// L3 = v_load(Lr_p3 + d);
//
// L0 = v_min(L0, (v_load(Lr_ppr + d - 1) + _P1));
// L0 = v_min(L0, (v_load(Lr_ppr + d + 1) + _P1));
//
// L1 = v_min(L1, (v_load(Lr_p1 + d - 1) + _P1));
// L1 = v_min(L1, (v_load(Lr_p1 + d + 1) + _P1));
//
// L2 = v_min(L2, (v_load(Lr_p2 + d - 1) + _P1));
// L2 = v_min(L2, (v_load(Lr_p2 + d + 1) + _P1));
//
// L3 = v_min(L3, (v_load(Lr_p3 + d - 1) + _P1));
// L3 = v_min(L3, (v_load(Lr_p3 + d + 1) + _P1));
//
// L0 = v_min(L0, _delta0);
// L0 = ((L0 - _delta0) + Cpd);
//
// L1 = v_min(L1, _delta1);
// L1 = ((L1 - _delta1) + Cpd);
//
// L2 = v_min(L2, _delta2);
// L2 = ((L2 - _delta2) + Cpd);
//
// L3 = v_min(L3, _delta3);
// L3 = ((L3 - _delta3) + Cpd);
//
// v_store(Lr_p + d, L0);
// v_store(Lr_p + d + D2, L1);
// v_store(Lr_p + d + D2*2, L2);
// v_store(Lr_p + d + D2*3, L3);
//
// // Get minimum from in L0-L3
// v_int16x8 t02L, t02H, t13L, t13H, t0123L, t0123H;
// v_zip(L0, L2, t02L, t02H); // L0[0] L2[0] L0[1] L2[1]...
// v_zip(L1, L3, t13L, t13H); // L1[0] L3[0] L1[1] L3[1]...
// v_int16x8 t02 = v_min(t02L, t02H); // L0[i] L2[i] L0[i] L2[i]...
// v_int16x8 t13 = v_min(t13L, t13H); // L1[i] L3[i] L1[i] L3[i]...
// v_zip(t02, t13, t0123L, t0123H); // L0[i] L1[i] L2[i] L3[i]...
// v_int16x8 t0 = v_min(t0123L, t0123H);
// _minL0 = v_min(_minL0, t0);
//
// v_int16x8 Sval = v_load(Sp + d);
//
// L0 = L0 + L1;
// L2 = L2 + L3;
// Sval = Sval + L0;
// Sval = Sval + L2;
//
// v_store(Sp + d, Sval);
// }
//
// v_int32x4 minL, minH;
// v_expand(_minL0, minL, minH);
// v_pack_store(&minLr[x], v_min(minL, minH));
// }
// else
// #endif
{
int minL = MAX_COST;
for( d = 0; d < D; d++ ) for( d = 0; d < D; d++ )
{ {
int Cpd = Cp[d], L; //4e: Remember, that every Cp is increased on P2 in line number 369. That's why next 4 lines are paper-like actually int Cpd = Cp[d], L;
L = Cpd + std::min((int)Lr_ppr[d], std::min(Lr_ppr[d-1] + P1, std::min(Lr_ppr[d+1] + P1, delta))) - delta; L = Cpd + std::min((int)Lr_ppr[d], std::min(Lr_ppr[d-1] + P1, std::min(Lr_ppr[d+1] + P1, delta))) - delta;
Lr_p[d] = (CostType)L; Lr_p[d] = (CostType)L;
minL = std::min(minL, L); minL = std::min(minL, L);
Sp[d] = saturate_cast<CostType>(Sp[d] + L); Sp[d] = saturate_cast<CostType>(Sp[d] + L);
}
minLr = (CostType)minL;
} }
minLr = (CostType)minL;
} }
memset( Lr - 8, 0, LrSize*sizeof(CostType) ); memset( Lr - 8, 0, LrSize*sizeof(CostType) );
Lr[-1] = Lr[D] = Lr[D2 - 1] = Lr[D2 + D] = MAX_COST;
minLr = 0; minLr = 0;
for( x = width1-1; x != -1; x--) for( x = width1-1; x != -1; x--)
...@@ -1361,136 +1165,30 @@ struct CalcHorizontalSums: public ParallelLoopBody ...@@ -1361,136 +1165,30 @@ struct CalcHorizontalSums: public ParallelLoopBody
CostType* Lr_ppr = Lr + ((x&1)? 0 :D2); CostType* Lr_ppr = Lr + ((x&1)? 0 :D2);
Lr_ppr[-1] = Lr_ppr[D] = MAX_COST;
CostType* Lr_p = Lr + ((x&1)? D2 :0); CostType* Lr_p = Lr + ((x&1)? D2 :0);
const CostType* Cp = C + x*D; const CostType* Cp = C + x*D;
CostType* Sp = S + x*D; CostType* Sp = S + x*D;
int minS = MAX_COST, bestDisp = -1; int minS = MAX_COST, bestDisp = -1;
// #if CV_SIMD128 int minL = MAX_COST;
// if( useSIMD )
// {
// v_int16x8 _P1 = v_setall_s16((short)P1);
//
// v_int16x8 _delta0 = v_setall_s16((short)delta0);
// v_int16x8 _delta1 = v_setall_s16((short)delta1);
// v_int16x8 _delta2 = v_setall_s16((short)delta2);
// v_int16x8 _delta3 = v_setall_s16((short)delta3);
// v_int16x8 _minL0 = v_setall_s16((short)MAX_COST);
//
// for( d = 0; d < D; d += 8 )
// {
// v_int16x8 Cpd = v_load(Cp + d);
// v_int16x8 L0, L1, L2, L3;
//
// L0 = v_load(Lr_ppr + d);
// L1 = v_load(Lr_p1 + d);
// L2 = v_load(Lr_p2 + d);
// L3 = v_load(Lr_p3 + d);
//
// L0 = v_min(L0, (v_load(Lr_ppr + d - 1) + _P1));
// L0 = v_min(L0, (v_load(Lr_ppr + d + 1) + _P1));
//
// L1 = v_min(L1, (v_load(Lr_p1 + d - 1) + _P1));
// L1 = v_min(L1, (v_load(Lr_p1 + d + 1) + _P1));
//
// L2 = v_min(L2, (v_load(Lr_p2 + d - 1) + _P1));
// L2 = v_min(L2, (v_load(Lr_p2 + d + 1) + _P1));
//
// L3 = v_min(L3, (v_load(Lr_p3 + d - 1) + _P1));
// L3 = v_min(L3, (v_load(Lr_p3 + d + 1) + _P1));
//
// L0 = v_min(L0, _delta0);
// L0 = ((L0 - _delta0) + Cpd);
//
// L1 = v_min(L1, _delta1);
// L1 = ((L1 - _delta1) + Cpd);
//
// L2 = v_min(L2, _delta2);
// L2 = ((L2 - _delta2) + Cpd);
//
// L3 = v_min(L3, _delta3);
// L3 = ((L3 - _delta3) + Cpd);
//
// v_store(Lr_p + d, L0);
// v_store(Lr_p + d + D2, L1);
// v_store(Lr_p + d + D2*2, L2);
// v_store(Lr_p + d + D2*3, L3);
//
// // Get minimum from in L0-L3
// v_int16x8 t02L, t02H, t13L, t13H, t0123L, t0123H;
// v_zip(L0, L2, t02L, t02H); // L0[0] L2[0] L0[1] L2[1]...
// v_zip(L1, L3, t13L, t13H); // L1[0] L3[0] L1[1] L3[1]...
// v_int16x8 t02 = v_min(t02L, t02H); // L0[i] L2[i] L0[i] L2[i]...
// v_int16x8 t13 = v_min(t13L, t13H); // L1[i] L3[i] L1[i] L3[i]...
// v_zip(t02, t13, t0123L, t0123H); // L0[i] L1[i] L2[i] L3[i]...
// v_int16x8 t0 = v_min(t0123L, t0123H);
// _minL0 = v_min(_minL0, t0);
//
// v_int16x8 Sval = v_load(Sp + d);
//
// L0 = L0 + L1;
// L2 = L2 + L3;
// Sval = Sval + L0;
// Sval = Sval + L2;
//
// v_store(Sp + d, Sval);
// }
//
// v_int32x4 minL, minH;
// v_expand(_minL0, minL, minH);
// v_pack_store(&minLr[x], v_min(minL, minH));
// }
// else
// #endif
//TODO:Next piece of code is came from postprocessing. Be very careful with joining them!!!
// #if CV_SIMD128
// if( useSIMD )
// {
// v_int16x8 _minS = v_setall_s16(MAX_COST), _bestDisp = v_setall_s16(-1);
// v_int16x8 _d8 = v_int16x8(0, 1, 2, 3, 4, 5, 6, 7), _8 = v_setall_s16(8);
//
// for( d = 0; d < D; d+= 8 )
// {
// v_int16x8 L0 = v_load(Sp + d);
// v_int16x8 mask = L0 < _minS;
// _minS = v_min( L0, _minS );
// _bestDisp = _bestDisp ^ ((_bestDisp ^ _d8) & mask);
// _d8 = _d8 + _8;
// }
// v_int32x4 _d0, _d1;
// v_expand(_minS, _d0, _d1);
// minS = (int)std::min(v_reduce_min(_d0), v_reduce_min(_d1));
// v_int16x8 v_mask = v_setall_s16((short)minS) == _minS;
//
// _bestDisp = (_bestDisp & v_mask) | (v_setall_s16(SHRT_MAX) & ~v_mask);
// v_expand(_bestDisp, _d0, _d1);
// bestDisp = (int)std::min(v_reduce_min(_d0), v_reduce_min(_d1));
// }
// else
// #endif
{
int minL = MAX_COST;
for( d = 0; d < D; d++ ) for( d = 0; d < D; d++ )
{ {
int Cpd = Cp[d], L; //4e: Remember, that every Cp is increased on P2 in line number 369. That's why next 4 lines are paper-like actually int Cpd = Cp[d], L;
L = Cpd + std::min((int)Lr_ppr[d], std::min(Lr_ppr[d-1] + P1, std::min(Lr_ppr[d+1] + P1, delta))) - delta; L = Cpd + std::min((int)Lr_ppr[d], std::min(Lr_ppr[d-1] + P1, std::min(Lr_ppr[d+1] + P1, delta))) - delta;
Lr_p[d] = (CostType)L; Lr_p[d] = (CostType)L;
minL = std::min(minL, L); minL = std::min(minL, L);
Sp[d] = saturate_cast<CostType>(Sp[d] + L); Sp[d] = saturate_cast<CostType>(Sp[d] + L);
if( Sp[d] < minS ) if( Sp[d] < minS )
{ {
minS = Sp[d]; minS = Sp[d];
bestDisp = d; bestDisp = d;
}
} }
minLr = (CostType)minL;
} }
minLr = (CostType)minL;
//Some postprocessing procedures and saving //Some postprocessing procedures and saving
for( d = 0; d < D; d++ ) for( d = 0; d < D; d++ )
{ {
...@@ -1531,17 +1229,17 @@ struct CalcHorizontalSums: public ParallelLoopBody ...@@ -1531,17 +1229,17 @@ struct CalcHorizontalSums: public ParallelLoopBody
int _d = d1 >> DISP_SHIFT; int _d = d1 >> DISP_SHIFT;
int d_ = (d1 + DISP_SCALE-1) >> DISP_SHIFT; int d_ = (d1 + DISP_SCALE-1) >> DISP_SHIFT;
int _x = x - _d, x_ = x - d_; int _x = x - _d, x_ = x - d_;
if( 0 <= _x && _x < width && disp2ptr[_x] >= minD && std::abs(disp2ptr[_x] - _d) > disp12MaxDiff && //4e: To dismiss disparity, we should assure, that there is no any if( 0 <= _x && _x < width && disp2ptr[_x] >= minD && std::abs(disp2ptr[_x] - _d) > disp12MaxDiff &&
0 <= x_ && x_ < width && disp2ptr[x_] >= minD && std::abs(disp2ptr[x_] - d_) > disp12MaxDiff ) //4e: chance to understand this as correct. 0 <= x_ && x_ < width && disp2ptr[x_] >= minD && std::abs(disp2ptr[x_] - d_) > disp12MaxDiff )
disp1ptr[x] = (DispType)INVALID_DISP_SCALED; disp1ptr[x] = (DispType)INVALID_DISP_SCALED;
} }
} }
} }
static const int NLR = 2;
static const int LrBorder = NLR - 1;
static const int DISP_SHIFT = StereoMatcher::DISP_SHIFT; static const int DISP_SHIFT = StereoMatcher::DISP_SHIFT;
static const int DISP_SCALE = (1 << DISP_SHIFT); static const int DISP_SCALE = (1 << DISP_SHIFT);
static const CostType MAX_COST = SHRT_MAX;
static const int ALIGN = 16;
const Mat& img1; const Mat& img1;
const Mat& img2; const Mat& img2;
Mat& disp1; Mat& disp1;
...@@ -1567,68 +1265,41 @@ struct CalcHorizontalSums: public ParallelLoopBody ...@@ -1567,68 +1265,41 @@ struct CalcHorizontalSums: public ParallelLoopBody
int disp12MaxDiff; int disp12MaxDiff;
}; };
/* /*
This is new experimential version of disparity calculation, which should be parralled after
TODO: Don't forget to rewrire this commentaries after
computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf. computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf.
that is, disp1buf(x, y)=d means that img1(x+roi.x, y+roi.y) ~ img2(x+roi.x-d, y+roi.y). that is, disp1buf(x, y)=d means that img1(x+roi.x, y+roi.y) ~ img2(x+roi.x-d, y+roi.y).
minD <= d < maxD. minD <= d < maxD.
disp2full is the reverse disparity map, that is:
disp2full(x+roi.x,y+roi.y)=d means that img2(x+roi.x, y+roi.y) ~ img1(x+roi.x+d, y+roi.y)
note that disp1buf will have the same size as the roi and note that disp1buf will have the same size as the roi and
disp2full will have the same size as img1 (or img2). On exit disp1buf is not the final disparity, it is an intermediate result that becomes
On exit disp2buf is not the final disparity, it is an intermediate result that becomes
final after all the tiles are processed. final after all the tiles are processed.
the disparity in disp1buf is written with sub-pixel accuracy the disparity in disp1buf is written with sub-pixel accuracy
(4 fractional bits, see StereoSGBM::DISP_SCALE), (4 fractional bits, see StereoSGBM::DISP_SCALE),
using quadratic interpolation, while the disparity in disp2buf using quadratic interpolation, while the disparity in disp2buf
is written as is, without interpolation. is written as is, without interpolation.
disp2cost also has the same size as img1 (or img2).
It contains the minimum current cost, used to find the best disparity, corresponding to the minimal cost.
*/ */
static void computeDisparitySGBMParallel( const Mat& img1, const Mat& img2, static void computeDisparitySGBM_HH4( const Mat& img1, const Mat& img2,
Mat& disp1, const StereoSGBMParams& params, Mat& disp1, const StereoSGBMParams& params,
Mat& buffer ) Mat& buffer )
{ {
//#if CV_SIMD128
// // maxDisparity is supposed to multiple of 16, so we can forget doing else
// static const uchar LSBTab[] =
// {
// 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
// 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
// 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
// 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
// 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
// 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
// 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
// 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
// };
// static const v_uint16x8 v_LSB = v_uint16x8(0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);
//
// bool useSIMD = hasSIMD128();
//#endif
const int ALIGN = 16; const int ALIGN = 16;
const int DISP_SHIFT = StereoMatcher::DISP_SHIFT; const int DISP_SHIFT = StereoMatcher::DISP_SHIFT;
const int DISP_SCALE = (1 << DISP_SHIFT); const int DISP_SCALE = (1 << DISP_SHIFT);
int minD = params.minDisparity, maxD = minD + params.numDisparities; int minD = params.minDisparity, maxD = minD + params.numDisparities;
Size SADWindowSize; //4e: SAD means Sum of Absolute Differences Size SADWindowSize;
SADWindowSize.width = SADWindowSize.height = params.SADWindowSize > 0 ? params.SADWindowSize : 5; //4e: and this is always square SADWindowSize.width = SADWindowSize.height = params.SADWindowSize > 0 ? params.SADWindowSize : 5;
int ftzero = std::max(params.preFilterCap, 15) | 1; //4e:ftzero clips x-derivatives. I think, this story with arrays is about non-realized SIMD method int ftzero = std::max(params.preFilterCap, 15) | 1;
int P1 = params.P1 > 0 ? params.P1 : 2, P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1); //TODO: think about P1/S(x,y) Proportion int P1 = params.P1 > 0 ? params.P1 : 2, P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1);
int k, width = disp1.cols, height = disp1.rows; int k, width = disp1.cols, height = disp1.rows;
int minX1 = std::max(maxD, 0), maxX1 = width + std::min(minD, 0); int minX1 = std::max(maxD, 0), maxX1 = width + std::min(minD, 0);
int D = maxD - minD, width1 = maxX1 - minX1; int D = maxD - minD, width1 = maxX1 - minX1;
int SH2 = SADWindowSize.height/2; int SH2 = SADWindowSize.height/2;
int INVALID_DISP = minD - 1; int INVALID_DISP = minD - 1;
int INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE; int INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2; //4e: array is such big due to derivative could be +-8*256 in worst cases const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2;
PixType clipTab[TAB_SIZE]; PixType clipTab[TAB_SIZE];
for( k = 0; k < TAB_SIZE; k++ ) //4e: If ftzero would = 4, array containment will be = -4 -4 -4 ... -4 -3 -2 -1 0 1 2 3 4 ... 4 4 4 for( k = 0; k < TAB_SIZE; k++ )
clipTab[k] = (PixType)(std::min(std::max(k - TAB_OFS, -ftzero), ftzero) + ftzero); clipTab[k] = (PixType)(std::min(std::max(k - TAB_OFS, -ftzero), ftzero) + ftzero);
if( minX1 >= maxX1 ) if( minX1 >= maxX1 )
...@@ -1637,28 +1308,25 @@ static void computeDisparitySGBMParallel( const Mat& img1, const Mat& img2, ...@@ -1637,28 +1308,25 @@ static void computeDisparitySGBMParallel( const Mat& img1, const Mat& img2,
return; return;
} }
CV_Assert( D % 16 == 0 ); //TODO: Are you sure? By the way, why not 8? CV_Assert( D % 16 == 0 );
// NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8. int D2 = D+16;
// if you change NR, please, modify the loop as well.
int D2 = D+16; //4e: Somewhere in code we need d+1, so D+1. One of simplest solutuons is increasing D-dimension on 1. But 1 is 16, when storage should be aligned.
// the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer: // the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer:
// for 8-way dynamic programming we need the current row and // for dynamic programming we need the current row and
// the previous row, i.e. 2 rows in total // the previous row, i.e. 2 rows in total
const int NLR = 2; //4e: We assume, that we need one or more previous steps in our linear dynamic(one right here). const int NLR = 2;
const int LrBorder = NLR - 1; //4e: for simplification of calculations we need border for taking previous dynamic solutions.
// for each possible stereo match (img1(x,y) <=> img2(x-d,y)) // for each possible stereo match (img1(x,y) <=> img2(x-d,y))
// we keep pixel difference cost (C) and the summary cost over NR directions (S). // we keep pixel difference cost (C) and the summary cost over 4 directions (S).
// we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k) // we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k)
size_t costBufSize = width1*D; size_t costBufSize = width1*D;
size_t CSBufSize = costBufSize*height; size_t CSBufSize = costBufSize*height;
size_t minLrSize = (width1 + LrBorder*2), LrSize = minLrSize*D2; //TODO: We don't need LrBorder for vertical passes and we don't need Lr buffer for horizontal passes. size_t minLrSize = width1 , LrSize = minLrSize*D2;
int hsumBufNRows = SH2*2 + 2; int hsumBufNRows = SH2*2 + 2;
size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[] size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[]
costBufSize*hsumBufNRows*sizeof(CostType) + // hsumBuf //4e: TODO: Why we should increase sum window height one more time? costBufSize*hsumBufNRows*sizeof(CostType) + // hsumBuf
CSBufSize*2*sizeof(CostType) + 1024; // C, S //4e: C is Block sum of costs, S is multidirectional dynamic sum with same size CSBufSize*2*sizeof(CostType) + 1024; // C, S
if( buffer.empty() || !buffer.isContinuous() || if( buffer.empty() || !buffer.isContinuous() ||
buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize ) buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize )
...@@ -1671,8 +1339,8 @@ static void computeDisparitySGBMParallel( const Mat& img1, const Mat& img2, ...@@ -1671,8 +1339,8 @@ static void computeDisparitySGBMParallel( const Mat& img1, const Mat& img2,
for(k = 0; k < (int)CSBufSize; k++ ) for(k = 0; k < (int)CSBufSize; k++ )
Cbuf[k] = (CostType)P2; Cbuf[k] = (CostType)P2;
parallel_for_(Range(0,width1),CalcVerticalSums(img1, img2, params, Cbuf, clipTab)); parallel_for_(Range(0,width1),CalcVerticalSums(img1, img2, params, Cbuf, clipTab),8);
parallel_for_(Range(0,height),CalcHorizontalSums(img1, img2, disp1, params, Cbuf)); parallel_for_(Range(0,height),CalcHorizontalSums(img1, img2, disp1, params, Cbuf),8);
} }
...@@ -2331,7 +1999,7 @@ public: ...@@ -2331,7 +1999,7 @@ public:
if(params.mode==MODE_SGBM_3WAY) if(params.mode==MODE_SGBM_3WAY)
computeDisparity3WaySGBM( left, right, disp, params, buffers, num_stripes ); computeDisparity3WaySGBM( left, right, disp, params, buffers, num_stripes );
else if(params.mode==MODE_HH4) else if(params.mode==MODE_HH4)
computeDisparitySGBMParallel( left, right, disp, params, buffer ); computeDisparitySGBM_HH4( left, right, disp, params, buffer );
else else
computeDisparitySGBM( left, right, disp, params, buffer ); computeDisparitySGBM( left, right, disp, params, buffer );
......
...@@ -785,37 +785,22 @@ protected: ...@@ -785,37 +785,22 @@ protected:
TEST(Calib3d_StereoBM, regression) { CV_StereoBMTest test; test.safe_run(); } TEST(Calib3d_StereoBM, regression) { CV_StereoBMTest test; test.safe_run(); }
TEST(Calib3d_StereoSGBM, regression) { CV_StereoSGBMTest test; test.safe_run(); } TEST(Calib3d_StereoSGBM, regression) { CV_StereoSGBMTest test; test.safe_run(); }
TEST(Calib3d_StereoSGBMPar, idontknowhowtotesthere) TEST(Calib3d_StereoSGBM_HH4, regression)
{ {
// <!-- caseName, datasetName, numDisp, winSize, mode --> String path = cvtest::TS::ptr()->get_data_path() + "cv/stereomatching/datasets/teddy/";
// case_teddy_2 teddy "48" "3" "MODE_HH" Mat leftImg = imread(path + "im2.png", 0);
Mat rightImg = imread(path + "im6.png", 0);
//Ptr<StereoSGBM> StereoSGBM::create(int minDisparity, int numDisparities, int SADWindowSize, Mat testData = imread(path + "disp2_hh4.png",-1);
// int P1, int P2, int disp12MaxDiff, Mat leftDisp;
// int preFilterCap, int uniquenessRatio, Mat toCheck;
// int speckleWindowSize, int speckleRange,
// int mode)
Mat leftImg = imread("/home/q/Work/GitVault/opencv_extra/testdata/cv/stereomatching/datasets/teddy/im2.png");
Mat rightImg = imread("/home/q/Work/GitVault/opencv_extra/testdata/cv/stereomatching/datasets/teddy/im6.png");
// Mat leftDisp_old, leftDisp_new;
{
Mat leftDisp;
Ptr<StereoSGBM> sgbm = StereoSGBM::create( 0, 48, 3, 90, 360, 1, 63, 10, 100, 32, StereoSGBM::MODE_HH);
sgbm->compute( leftImg, rightImg, leftDisp);
CV_Assert( leftDisp.type() == CV_16SC1 );
leftDisp/=8;
imwrite( "/home/q/Work/GitVault/modehh4_new.jpg", leftDisp);
}
{ {
Mat leftDisp;
Ptr<StereoSGBM> sgbm = StereoSGBM::create( 0, 48, 3, 90, 360, 1, 63, 10, 100, 32, StereoSGBM::MODE_HH4); Ptr<StereoSGBM> sgbm = StereoSGBM::create( 0, 48, 3, 90, 360, 1, 63, 10, 100, 32, StereoSGBM::MODE_HH4);
sgbm->compute( leftImg, rightImg, leftDisp); sgbm->compute( leftImg, rightImg, leftDisp);
CV_Assert( leftDisp.type() == CV_16SC1 ); CV_Assert( leftDisp.type() == CV_16SC1 );
leftDisp/=8; leftDisp.convertTo(toCheck, CV_16UC1);
imwrite( "/home/q/Work/GitVault/modehh4_old.jpg", leftDisp); // imwrite("/home/q/Work/GitVault/disp2_hh4.png",toCheck);
} }
// Mat diff; Mat diff;
// absdiff(leftDisp_old,leftDisp_new,diff); absdiff(toCheck, testData,diff);
// CV_Assert( countNonZero(diff)==0); CV_Assert( countNonZero(diff)==0);
//
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment