//TODO: Assumation: Let's pretend, that we allocate memory for pixDiff and tempBuf independently in each thread, with full size, needed for original calcBT
/*
//TODO: Redo size of this arrays even if situation with independent allocation will still.
This is new experimential version of disparity calculation, which should be parralled after
structCalcVerticalSums:publicParallelLoopBody
TODO: Don't forget to rewrire this commentaries after
computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf.
that is, disp1buf(x, y)=d means that img1(x+roi.x, y+roi.y) ~ img2(x+roi.x-d, y+roi.y).
minD <= d < maxD.
disp2full is the reverse disparity map, that is:
disp2full(x+roi.x,y+roi.y)=d means that img2(x+roi.x, y+roi.y) ~ img1(x+roi.x+d, y+roi.y)
note that disp1buf will have the same size as the roi and
disp2full will have the same size as img1 (or img2).
On exit disp2buf is not the final disparity, it is an intermediate result that becomes
final after all the tiles are processed.
the disparity in disp1buf is written with sub-pixel accuracy
(4 fractional bits, see StereoSGBM::DISP_SCALE),
using quadratic interpolation, while the disparity in disp2buf
is written as is, without interpolation.
disp2cost also has the same size as img1 (or img2).
It contains the minimum current cost, used to find the best disparity, corresponding to the minimal cost.
CV_Assert(D%16==0);//TODO: Are you sure? By the way, why not 8?
voidoperator()(constRange&range)const
{
// NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8.
staticconstCostTypeMAX_COST=SHRT_MAX;
// if you change NR, please, modify the loop as well.
staticconstintALIGN=16;
intD2=D+16;//4e: Somewhere in code we need d+1, so D+1. One of simplest solutuons is increasing D-dimension on 1. But 1 is 16, when storage should be aligned.
staticconstintTAB_OFS=256*4;
intx1=range.start,x2=range.end,k;
// the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer:
size_tpixDiffSize=((x2-x1)+2*SW2)*D;
// for 8-way dynamic programming we need the current row and
// for each possible stereo match (img1(x,y) <=> img2(x-d,y))
PixType*tempBuf=(PixType*)(pixDiff+pixDiffSize);
// we keep pixel difference cost (C) and the summary cost over NR directions (S).
// we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k)
size_tcostBufSize=width1*D;
size_tCSBufSize=costBufSize*height;//4e: For HH mode it's better to keep whole array of costs.
size_tminLrSize=(width1+LrBorder*2),LrSize=minLrSize*D2;//4e: TODO: Understand why NR2 per pass instead od NR2/2 (Probably, without any reason. That doesn't make code wrong)
inthsumBufNRows=SH2*2+2;
size_ttotalBufSize=(LrSize+minLrSize)*NLR*sizeof(CostType)+// minLr[] and Lr[]
costBufSize*(hsumBufNRows+1)*sizeof(CostType)+// hsumBuf, pixdiff //4e: TODO: Why we should increase sum window height one more time?
CSBufSize*2*sizeof(CostType)+// C, S //4e: C is Block sum of costs, S is multidirectional dynamic sum with same size
width*16*img1.channels()*sizeof(PixType)+// temp buffer for computing per-pixel cost //4e: It is needed for calcPixelCostBT function, as "buffer" value
if(y>0)//4e: We calculate horizontal sums and forming full block sums for y coord by adding this horsums to previous line's sums and subtracting stored lowest
if(y>0)//4e: We calculate horizontal sums and forming full block sums for y coord by adding this horsums to previous line's sums and subtracting stored lowest
{//4e: horsum in hsumBuf. Exception is case y=0, where we need many iterations per lines to create full blocking sum.
{//4e: horsum in hsumBuf. Exception is case y=0, where we need many iterations per lines to create full blocking sum.
for(x=D;x<width1*D;x+=D)//4e: Calcluates horizontal sums if (y==0). This piece of code is calling SH2+1 times and then result is used in different way
for(x=(x1+1)*D;x<x2*D;x+=D)//4e: Calcluates horizontal sums if (y==0). This piece of code is calling SH2+1 times and then result is used in different way
{//4e: to create full blocks sum. That's why this code is isolated from upper case.
{//4e: to create full blocks sum. That's why this code is isolated from upper case.
// Return to coordinates, which is needed by CalcCostBT
}
}
if(y==0)//4e: Calculating first full block sum.
if(y==0)//4e: Calculating first full block sum.
{
{
intscale=k==0?SH2+1:1;
intscale=k==0?SH2+1:1;
for(x=0;x<width1*D;x++)
for(x=x1*D;x<x2*D;x++)
C[x]=(CostType)(C[x]+hsumAdd[x]*scale);
C[x]=(CostType)(C[x]+hsumAdd[x]*scale);
}
}
}
}
// also, clear the S buffer
// also, clear the S buffer
for(k=0;k<width1*D;k++)//4e: only on first pass, so it keep old information, don't be confused
for(k=x1*D;k<x2*D;k++)//4e: only on first pass, so it keep old information, don't be confused
S[k]=0;
S[k]=0;
}
}
// clear the left and the right borders
// [formula 13 in the paper]
memset(Lr[0]-D2*LrBorder-8,0,D2*LrBorder*sizeof(CostType));//4e: To understand this "8" shifts and how they could work it's simpler to imagine pixel dislocation in memory
// compute L_r(p, d) = C(p, d) +
memset(Lr[0]+width1*D2-8,0,D2*LrBorder*sizeof(CostType));//4e: ...00000000|D2-16 of real costs value(and some of them are zeroes too)|00000000...
CV_Assert(D%16==0);//TODO: Are you sure? By the way, why not 8?
// NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8.
// if you change NR, please, modify the loop as well.
intD2=D+16;//4e: Somewhere in code we need d+1, so D+1. One of simplest solutuons is increasing D-dimension on 1. But 1 is 16, when storage should be aligned.
// the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer:
// for 8-way dynamic programming we need the current row and
// the previous row, i.e. 2 rows in total
constintNLR=2;//4e: We assume, that we need one or more previous steps in our linear dynamic(one right here).
constintLrBorder=NLR-1;//4e: for simplification of calculations we need border for taking previous dynamic solutions.
// for each possible stereo match (img1(x,y) <=> img2(x-d,y))
// we keep pixel difference cost (C) and the summary cost over NR directions (S).
// we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k)
size_tcostBufSize=width1*D;
size_tCSBufSize=costBufSize*height;
size_tminLrSize=(width1+LrBorder*2),LrSize=minLrSize*D2;//TODO: We don't need LrBorder for vertical passes and we don't need Lr buffer for horizontal passes.
inthsumBufNRows=SH2*2+2;
size_ttotalBufSize=(LrSize+minLrSize)*NLR*sizeof(CostType)+// minLr[] and Lr[]
costBufSize*hsumBufNRows*sizeof(CostType)+// hsumBuf //4e: TODO: Why we should increase sum window height one more time?
CSBufSize*2*sizeof(CostType)+// C, S //4e: C is Block sum of costs, S is multidirectional dynamic sum with same size
CostType*disp2cost=hsumBuf+costBufSize*hsumBufNRows+(LrSize+minLrSize)*NLR;//4e: It is containers for backwards disparity, made by S[d] too, but with other method
DispType*disp2ptr=(DispType*)(disp2cost+width);
// add P2 to every C(x,y). it saves a few operations in the inner loops
CV_Assert(D%16==0);//TODO: Are you sure? By the way, why not 8?
// NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8.
// if you change NR, please, modify the loop as well.
intD2=D+16;//4e: Somewhere in code we need d+1, so D+1. One of simplest solutuons is increasing D-dimension on 1. But 1 is 16, when storage should be aligned.
// the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer:
// for 8-way dynamic programming we need the current row and
// the previous row, i.e. 2 rows in total
constintNLR=2;//4e: We assume, that we need one or more previous steps in our linear dynamic(one right here).
constintLrBorder=NLR-1;//4e: for simplification of calculations we need border for taking previous dynamic solutions.
// for each possible stereo match (img1(x,y) <=> img2(x-d,y))
// we keep pixel difference cost (C) and the summary cost over NR directions (S).
// we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k)
size_tcostBufSize=width1*D;
size_tCSBufSize=costBufSize*height;//4e: For HH mode it's better to keep whole array of costs.
size_tminLrSize=(width1+LrBorder*2),LrSize=minLrSize*D2;//4e: TODO: Understand why NR2 per pass instead od NR2/2 (Probably, without any reason. That doesn't make code wrong)
inthsumBufNRows=SH2*2+2;
size_ttotalBufSize=(LrSize+minLrSize)*NLR*sizeof(CostType)+// minLr[] and Lr[]
costBufSize*(hsumBufNRows+1)*sizeof(CostType)+// hsumBuf, pixdiff //4e: TODO: Why we should increase sum window height one more time?
CSBufSize*2*sizeof(CostType)+// C, S //4e: C is Block sum of costs, S is multidirectional dynamic sum with same size
width*16*img1.channels()*sizeof(PixType)+// temp buffer for computing per-pixel cost //4e: It is needed for calcPixelCostBT function, as "buffer" value
if(y>0)//4e: We calculate horizontal sums and forming full block sums for y coord by adding this horsums to previous line's sums and subtracting stored lowest
{//4e: horsum in hsumBuf. Exception is case y=0, where we need many iterations per lines to create full blocking sum.
for(k=0;k<width1*D;k++)//4e: only on first pass, so it keep old information, don't be confused
S[k]=0;
}
// clear the left and the right borders
memset(Lr[0]-D2*LrBorder-8,0,D2*LrBorder*sizeof(CostType));//4e: To understand this "8" shifts and how they could work it's simpler to imagine pixel dislocation in memory
memset(Lr[0]+width1*D2-8,0,D2*LrBorder*sizeof(CostType));//4e: ...00000000|D2-16 of real costs value(and some of them are zeroes too)|00000000...
memset(Lr-D2*LrBorder-8,0,D2*LrBorder*sizeof(CostType));//4e: To understand this "8" shifts and how they could work it's simpler to imagine pixel dislocation in memory
memset(Lr+width1*D2-8,0,D2*LrBorder*sizeof(CostType));//4e: ...00000000|D2-16 of real costs value(and some of them are zeroes too)|00000000...