Commit 37478fc5 authored by Muresan Mircea Paul's avatar Muresan Mircea Paul

Paralellized methods using open cv parrallel for

reordered initiallizations

removed unused variable

removed tabs
put const Mat:: & where needed
replaced Macros with enums
i did the step thing
removed the white spaces from the stereo binary sgbm
parent c639de6a
......@@ -42,10 +42,7 @@
/*****************************************************************************************************************\
* The interface contains the main descriptors that will be implemented in the descriptor class *
* *
\******************************************************************************************************************/
#include "descriptor.hpp"
using namespace cv;
......@@ -53,364 +50,60 @@ using namespace stereo;
Descriptor::Descriptor()
{
}
//!Implementation for computing the Census transform on the given image
void Descriptor::applyCensusOnImage(const cv::Mat img, int kernelSize, cv::Mat &dist, const int type)
void Descriptor::applyCensusOnImage(const cv::Mat &img, int kernelSize, cv::Mat &dist, const int type)
{
int n2 = (kernelSize - 1) / 2;
int height = img.rows;
int width = img.cols;
uint8_t * image = img.data;
int *dst = (int *)dist.data;
//#pragma omp parallel for
for (int i = n2; i <= height - n2; i++)
{
int rWidth = i * width;
for (int j = n2; j <= width - n2; j++)
{
if(type == CV_SSE_CENSUS)
{
//to do
}
else
{
int c = 0;
for (int ii = i - n2; ii <= i + n2; ii++)
{
int rrWidth = ii * width;
for (int jj = j - n2; jj <= j + n2; jj++)
{
if (ii != i || jj != j)
{
if (image[(rrWidth + jj)] > image[(rWidth + j)])
{
c = c + 1;
}
c = c * 2;
}
}
}
dst[(rWidth + j)] = c;
}
}
}
parallel_for_(cv::Range(n2, img.rows - n2), singleImageCensus(img.data, img.cols, img.rows, n2, (int *)dist.data, type));
}
/**
Two variations of census applied on input images
Implementation of a census transform which is taking into account just the some pixels from the census kernel thus allowing for larger block sizes
**/
void Descriptor::applyCensusOnImages(const cv::Mat im1, cv::Mat im2, int kernelSize,cv::Mat &dist, cv::Mat &dist2, const int type)
void Descriptor::applyCensusOnImages(const cv::Mat &im1, cv::Mat &im2, int kernelSize, cv::Mat &dist, cv::Mat &dist2, const int type)
{
int n2 = (kernelSize - 1) / 2;
int width = im1.cols;
int height = im2.rows;
uint8_t * image1 = im1.data;
uint8_t * image2 = im2.data;
int *dst1 = (int *)dist.data;
int *dst2 = (int *)dist2.data;
//#pragma omp parallel for
for (int i = n2; i <= height - n2; i++)
{
int rWidth = i * width;
int distV = (i) * width;
for (int j = n2; j <= width - n2; j++)
{
int c = 0;
int c2 = 0;
if(type == CV_DENSE_CENSUS)
{
for (int ii = i - n2; ii <= i + n2; ii++)
{
int rrWidth = ii * width;
for (int jj = j - n2; jj <= j + n2; jj++)
{
if (ii != i || jj != j)
{
if (image1[rrWidth + jj] > image1[rWidth + j])
{
c = c + 1;
}
c = c * 2;
}
if (ii != i || jj != j)
{
if (image2[rrWidth + jj] > image2[rWidth + j])
{
c2 = c2 + 1;
}
c2 = c2 * 2;
}
}
}
}
else if(type == CV_SPARSE_CENSUS)
{
for (int ii = i - n2; ii <= i + n2; ii += 2)
{
int rrWidth = ii * width;
for (int jj = j - n2; jj <= j + n2; jj += 2)
{
if (ii != i || jj != j)
{
if (image1[(rrWidth + jj)] > image1[(rWidth + j)])
{
c = c + 1;
}
c = c * 2;
}
if (ii != i || jj != j)
{
if (image2[(rrWidth + jj)] > image2[(rWidth + j)])
{
c2 = c2 + 1;
}
c2 = c2 * 2;
}
}
}
}
dst1[(distV + j)] = c;
dst2[(distV + j)] = c2;
}
}
parallel_for_(cv::Range(n2, im1.rows - n2), parallelCensus(im1.data, im2.data, im1.cols, im2.rows, n2, (int *)dist.data, (int *)dist2.data, type));
}
/**
STANDARD_MCT - Modified census which is memorizing for each pixel 2 bits and includes a tolerance to the pixel comparison
MCT_MEAN_VARIATION - Implementation of a modified census transform which is also taking into account the variation to the mean of the window not just the center pixel
**/
void Descriptor::applyMCTOnImages(const cv::Mat img1, const cv::Mat img2, int kernelSize, int t, cv::Mat &dist, cv::Mat &dist2, const int type)
void Descriptor::applyMCTOnImages(const cv::Mat &img1, const cv::Mat &img2, int kernelSize, int t, cv::Mat &dist, cv::Mat &dist2, const int type)
{
int n2 = (kernelSize - 1) >> 1;
int width = img1.cols;
int height = img1.rows;
uint8_t *image1 = img1.data;
uint8_t *image2 = img2.data;
int *dst1 = (int *)dist.data;
int *dst2 = (int *)dist2.data;
// #pragma omp parallel for
for (int i = n2 + 2; i <= height - n2 - 2; i++)
{
int rWidth = i * width;
int distV = (i) * width;
for (int j = n2 + 2; j <= width - n2 - 2; j++)
{
int c = 0;
int c2 = 0;
if(type == CV_STANDARD_MCT)
{
for (int ii = i - n2; ii <= i + n2; ii += 2)
{
int rrWidth = ii * width;
for (int jj = j - n2; jj <= j + n2; jj += 2)
{
if (ii != i || jj != j)
{
if (image1[rrWidth + jj] > image1[rWidth + j] - t)
{
c <<= 2;
c |= 0x3;
}
else if (image1[rWidth + j] - t < image1[rrWidth + jj] && image1[rWidth + j] + t >= image1[rrWidth + jj])
{
c <<= 2;
c = c + 1;
}
else
{
c <<= 2;
}
}
if (ii != i || jj != j)
{
if (image2[rrWidth + jj] > image2[rWidth + j] - t)
{
c2 <<= 2;
c2 |= 0x3;
}
else if (image2[rWidth + j] - t < image2[rrWidth + jj] && image2[rWidth + j] + t >= image2[rrWidth + jj])
{
c2 <<= 2;
c2 = c2 + 1;
}
else
{
c2 <<= 2;
}
}
}
}
for (int ii = i - n2; ii <= i + n2; ii += 4)
{
int rrWidth = ii * width;
for (int jj = j - n2; jj <= j + n2; jj += 4)
{
if (ii != i || jj != j)
{
if (image1[rrWidth + jj] > image1[rWidth + j] - t)
{
c <<= 2;
c |= 0x3;
}
else if (image1[rWidth + j] - t < image1[rrWidth + jj] && image1[rWidth + j] + t >= image1[rrWidth + jj])
{
c <<= 2;
c += 1;
}
else
{
c <<= 2;
}
}
if (ii != i || jj != j)
{
if (image2[rrWidth + jj] > image2[rWidth + j] - t)
{
c2 <<= 2;
c2 |= 0x3;
}
else if (image2[rWidth + j] - t < image2[rrWidth + jj] && image2[rWidth + j] + t >= image2[rrWidth + jj])
{
c2 <<= 2;
c2 = c2 + 1;
}
else
{
c2 <<= 2;
}
}
}
}
}
else if(type == CV_MCT_MEAN_VARIATION)
{
//to do mean variation
}
dst1[distV + j] = c;
dst2[distV + j] = c2;
}
}
parallel_for_(cv::Range(n2, img1.rows - n2), parallelMctDescriptor(img1.data, img2.data, img1.cols, img2.rows, n2,t, (int *)dist.data, (int *)dist2.data, type));
}
/**The classical center symetric census
A modified version of cs census which is comparing a pixel with its correspondent after the center
**/
void Descriptor::applySimetricCensus(const cv::Mat img1, const cv::Mat img2,int kernelSize, cv::Mat &dist, cv::Mat &dist2, const int type)
void Descriptor::applySimetricCensus(const cv::Mat &img1, const cv::Mat &img2, int kernelSize, cv::Mat &dist, cv::Mat &dist2, const int type)
{
int n2 = (kernelSize - 1) / 2;
int height = img1.rows;
int width = img1.cols;
uint8_t *image1 = img1.data;
uint8_t *image2 = img2.data;
int *dst1 = (int *)dist.data;
int *dst2 = (int *)dist2.data;
//#pragma omp parallel for
for (int i = + n2; i <= height - n2; i++)
{
int distV = (i) * width;
for (int j = n2; j <= width - n2; j++)
{
int c = 0;
int c2 = 0;
if(type == CV_CLASSIC_CENTER_SYMETRIC_CENSUS)
{
for (int ii = -n2; ii < 0; ii++)
{
int rrWidth = (ii + i) * width;
for (int jj = -n2; jj <= +n2; jj++)
{
if (ii != i || jj != j)
{
if (image1[(rrWidth + (jj + j))] > image1[((ii * (-1) + i) * width + (-1 * jj) + j)])
{
c = c + 1;
}
c = c * 2;
}
if (ii != i || jj != j)
{
if (image2[(rrWidth + (jj + j))] > image2[((ii * (-1) + i) * width + (-1 * jj) + j)])
{
c2 = c2 + 1;
}
c2 = c2 * 2;
}
}
}
for (int jj = -n2; jj < 0; jj++)
{
if (image1[(i * width + (jj + j))] > image1[(i * width + (-1 * jj) + j)])
{
c = c + 1;
}
c = c * 2;
if (image2[(i * width + (jj + j))] > image2[(i * width + (-1 * jj) + j)])
{
c2 = c2 + 1;
}
c2 = c2 * 2;
}
}
else if(type == CV_MODIFIED_CENTER_SIMETRIC_CENSUS)
{
for (int ii = i - n2; ii <= i + 1; ii++)
{
int rrWidth = ii * width;
int rrWidthC = (ii + n2) * width;
for (int jj = j - n2; jj <= j + n2; jj += 2)
{
if (ii != i || jj != j)
{
if (image1[(rrWidth + jj)] > image1[(rrWidthC + (jj + n2))])
{
c = c + 1;
}
c = c * 2;
}
if (ii != i || jj != j)
{
if (image2[(rrWidth + jj)] > image2[(rrWidthC + (jj + n2))])
{
c2 = c2 + 1;
}
c2 = c2 * 2;
}
}
}
}
dst1[(distV + j)] = c;
dst2[(distV + j)] = c2;
}
}
int n2 = (kernelSize - 1) >> 1;
parallel_for_(cv::Range(n2, img1.rows - n2), parallelSymetricCensus(img1.data, img2.data, img1.cols, img2.rows, n2, (int *)dist.data, (int *)dist2.data, type));
}
//!brief binary descriptor used in stereo correspondence
void Descriptor::applyBrifeDescriptor(const cv::Mat image1, const cv::Mat image2,int kernelSize, cv::Mat &dist, cv::Mat &dist2)
void Descriptor::applyBrifeDescriptor(const cv::Mat &image1, const cv::Mat &image2, int kernelSize, cv::Mat &dist, cv::Mat &dist2)
{
//TO DO
//marked the variables in order to avoid warnings
(void) image1;
(void) image2;
(void) dist;
(void) dist2;
(void) kernelSize;
(void)image1;
(void)image2;
(void)dist;
(void)dist2;
(void)kernelSize;
}
//The classical Rank Transform
void Descriptor::applyRTDescriptor(const cv::Mat image1, const cv::Mat image2, int kernelSize, cv::Mat &dist, cv::Mat &dist2)
void Descriptor::applyRTDescriptor(const cv::Mat &image1, const cv::Mat &image2, int kernelSize, cv::Mat &dist, cv::Mat &dist2)
{
//TO DO
//marked the variables in order to avoid warnings
(void) image1;
(void) image2;
(void) dist;
(void) dist2;
(void) kernelSize;
(void)image1;
(void)image2;
(void)dist;
(void)dist2;
(void)kernelSize;
}
Descriptor::~Descriptor(void)
......
......@@ -42,7 +42,6 @@
/*****************************************************************************************************************\
* The interface contains the main descriptors that will be implemented in the descriptor class *
* *
\******************************************************************************************************************/
#include "precomp.hpp"
......@@ -50,38 +49,337 @@
#ifndef _OPENCV_DESCRIPTOR_HPP_
#define _OPENCV_DESCRIPTOR_HPP_
#ifdef __cplusplus
#define CV_DENSE_CENSUS 0
#define CV_SPARSE_CENSUS 1
#define CV_MODIFIED_CENTER_SIMETRIC_CENSUS 0
#define CV_CLASSIC_CENTER_SYMETRIC_CENSUS 1
#define CV_STANDARD_MCT 0
#define CV_MCT_MEAN_VARIATION 1
#define CV_SSE_CENSUS 1
namespace cv
{
namespace stereo
{
class Descriptor
enum ClassicCensus { Dense_Census, Sparse_Census};
enum SymetricCensus {ClassicCenterSymetricCensus, ModifiedCenterSymetricCensus};
enum MCT {StandardMct,MeanVariation};
enum CensusImage {SSE, NonSSE};
//!class implemented to run the census descriptor in parralel
class parallelCensus :public ParallelLoopBody
{
private:
uint8_t *image1, *image2;
int *dst1, *dst2;
int n2, width, height, type;
public:
parallelCensus(uint8_t * img1, uint8_t * img2, int w, int h, int k2, int * distance1, int * distance2,const int t) :
image1(img1), image2(img2), dst1(distance1), dst2(distance2), n2(k2), width(w), height(h), type(t){}
virtual void operator()(const cv::Range &r) const {
int step = (type == ClassicCensus::Sparse_Census)? 2:1;
for (int i = r.start; i <= r.end ; i++)
{
int rWidth = i * width;
for (int j = n2; j <= width - n2; j++)
{
//imitialize the costs for the 2 census descriptors
int c = 0;
int c2 = 0;
for (int ii = i - n2; ii <= i + n2; ii+=step)
{
int rrWidth = ii * width;
for (int jj = j - n2; jj <= j + n2; jj+=step)
{
if (ii != i || jj != j)
{
//compare a pixel with the center from the kernel
if (image1[rrWidth + jj] > image1[rWidth + j])
{
c = c + 1;
}
c = c * 2;
}
if (ii != i || jj != j)
{
//compare pixel with center for image 2
if (image2[rrWidth + jj] > image2[rWidth + j])
{
c2 = c2 + 1;
}
c2 = c2 * 2;
}
}
}
dst1[(rWidth + j)] = c;
dst2[(rWidth + j)] = c2;
}
}
}
};
//!class that implemented the census descriptor on single images
class singleImageCensus : public ParallelLoopBody
{
private:
uint8_t *image;
int *dst;
int n2, width, height, type;
public:
singleImageCensus(uint8_t * img1, int w, int h, int k2, int * distance1,const int t) :
image(img1), dst(distance1), n2(k2), width(w), height(h), type(t){}
virtual void operator()(const cv::Range &r) const {
for (int i = r.start; i <= r.end ; i++)
{
int rWidth = i * width;
for (int j = n2; j <= width - n2; j++)
{
if (type == CensusImage::SSE)
{
//to do
}
else
{
int c = 0;
for (int ii = i - n2; ii <= i + n2; ii++)
{
int rrWidth = ii * width;
for (int jj = j - n2; jj <= j + n2; jj++)
{
if (ii != i || jj != j)
{
if (image[(rrWidth + jj)] > image[(rWidth + j)])
{
c = c + 1;
}
c = c * 2;
}
}
}
dst[(rWidth + j)] = c;
}
}
}
}
};
//! parallel implementation of MCT type of descriptors
class parallelMctDescriptor:public ParallelLoopBody
{
private:
uint8_t *image1, *image2;
int *dst1, *dst2;
int n2,t , width, height, type;
public:
parallelMctDescriptor(uint8_t * img1, uint8_t * img2, int w, int h, int k2,int threshold, int * distance1, int * distance2,const int tip) :
image1(img1), image2(img2), dst1(distance1), dst2(distance2), n2(k2), t(threshold), width(w), height(h), type(tip){}
virtual void operator()(const cv::Range &r) const {
for (int i = r.start; i <= r.end ; i++)
{
int rWidth = i * width;
int distV = (i)* width;
for (int j = n2 + 2; j <= width - n2 - 2; j++)
{
int c = 0;
int c2 = 0;
if (type == MCT::StandardMct)
{
for (int ii = i - n2; ii <= i + n2; ii += 2)
{
int rrWidth = ii * width;
for (int jj = j - n2; jj <= j + n2; jj += 2)
{
if (ii != i || jj != j)
{
if (image1[rrWidth + jj] > image1[rWidth + j] - t)
{
c <<= 2;
c |= 0x3;
}
else if (image1[rWidth + j] - t < image1[rrWidth + jj] && image1[rWidth + j] + t >= image1[rrWidth + jj])
{
c <<= 2;
c = c + 1;
}
else
{
c <<= 2;
}
}
if (ii != i || jj != j)
{
if (image2[rrWidth + jj] > image2[rWidth + j] - t)
{
c2 <<= 2;
c2 |= 0x3;
}
else if (image2[rWidth + j] - t < image2[rrWidth + jj] && image2[rWidth + j] + t >= image2[rrWidth + jj])
{
c2 <<= 2;
c2 = c2 + 1;
}
else
{
c2 <<= 2;
}
}
}
}
for (int ii = i - n2; ii <= i + n2; ii += 4)
{
int rrWidth = ii * width;
for (int jj = j - n2; jj <= j + n2; jj += 4)
{
if (ii != i || jj != j)
{
if (image1[rrWidth + jj] > image1[rWidth + j] - t)
{
c <<= 2;
c |= 0x3;
}
else if (image1[rWidth + j] - t < image1[rrWidth + jj] && image1[rWidth + j] + t >= image1[rrWidth + jj])
{
c <<= 2;
c += 1;
}
else
{
c <<= 2;
}
}
if (ii != i || jj != j)
{
if (image2[rrWidth + jj] > image2[rWidth + j] - t)
{
c2 <<= 2;
c2 |= 0x3;
}
else if (image2[rWidth + j] - t < image2[rrWidth + jj] && image2[rWidth + j] + t >= image2[rrWidth + jj])
{
c2 <<= 2;
c2 = c2 + 1;
}
else
{
c2 <<= 2;
}
}
}
}
}
else if (type == MCT::MeanVariation)
{
//to do mean variation
}
dst1[distV + j] = c;
dst2[distV + j] = c2;
}
}
}
};
//!paralel implementation of the center symetric census
class parallelSymetricCensus:public ParallelLoopBody
{
private:
uint8_t *image1, *image2;
int *dst1, *dst2;
int n2, width, height, type;
public:
parallelSymetricCensus(uint8_t * img1, uint8_t * img2, int w, int h, int k2, int * distance1, int * distance2,const int t) :
image1(img1), image2(img2), dst1(distance1), dst2(distance2), n2(k2), width(w), height(h), type(t){}
virtual void operator()(const cv::Range &r) const {
for (int i = r.start; i <= r.end ; i++)
{
int distV = (i)* width;
for (int j = n2; j <= width - n2; j++)
{
int c = 0;
int c2 = 0;
//the classic center symetric census which compares the curent pixel with its symetric not its center.
if (type == SymetricCensus::ClassicCenterSymetricCensus)
{
for (int ii = -n2; ii < 0; ii++)
{
int rrWidth = (ii + i) * width;
for (int jj = -n2; jj <= +n2; jj++)
{
if (ii != i || jj != j)
{
if (image1[(rrWidth + (jj + j))] > image1[((ii * (-1) + i) * width + (-1 * jj) + j)])
{
c = c + 1;
}
c = c * 2;
}
if (ii != i || jj != j)
{
if (image2[(rrWidth + (jj + j))] > image2[((ii * (-1) + i) * width + (-1 * jj) + j)])
{
c2 = c2 + 1;
}
c2 = c2 * 2;
}
}
}
for (int jj = -n2; jj < 0; jj++)
{
if (image1[(i * width + (jj + j))] > image1[(i * width + (-1 * jj) + j)])
{
c = c + 1;
}
c = c * 2;
if (image2[(i * width + (jj + j))] > image2[(i * width + (-1 * jj) + j)])
{
c2 = c2 + 1;
}
c2 = c2 * 2;
}
}//a modified version of cs census which compares each pixel with its correspondent from
//the same distance from the center
else if (type == SymetricCensus::ModifiedCenterSymetricCensus)
{
for (int ii = i - n2; ii <= i + 1; ii++)
{
int rrWidth = ii * width;
int rrWidthC = (ii + n2) * width;
for (int jj = j - n2; jj <= j + n2; jj += 2)
{
if (ii != i || jj != j)
{
if (image1[(rrWidth + jj)] > image1[(rrWidthC + (jj + n2))])
{
c = c + 1;
}
c = c * 2;
}
if (ii != i || jj != j)
{
if (image2[(rrWidth + jj)] > image2[(rrWidthC + (jj + n2))])
{
c2 = c2 + 1;
}
c2 = c2 * 2;
}
}
}
}
dst1[(distV + j)] = c;
dst2[(distV + j)] = c2;
}
}
}
};
class Descriptor
{
public:
//Implementation for computing the Census transform on the given image
void applyCensusOnImage(const cv::Mat image, int kernelSize, cv::Mat &dist, const int type = 0);
void applyCensusOnImage(const cv::Mat &image, int kernelSize, cv::Mat &dist, const int type = 0);
//two variations of census applied on input images
//Implementation of a census transform which is taking into account just the some pixels from the census kernel thus allowing for larger block sizes
void applyCensusOnImages(const cv::Mat image1, cv::Mat image2, int kernelSize,cv::Mat &dist, cv::Mat &dist2, const int type = CV_SPARSE_CENSUS);
void applyCensusOnImages(const cv::Mat &image1, cv::Mat &image2, int kernelSize, cv::Mat &dist, cv::Mat &dist2, const int type = ClassicCensus::Sparse_Census);
// STANDARD_MCT - Modified census which is memorizing for each pixel 2 bits and includes a tolerance to the pixel comparison
//MCT_MEAN_VARIATION - Implementation of a modified census transform which is also taking into account the variation to the mean of the window not just the center pixel
void applyMCTOnImages(const cv::Mat image1, const cv::Mat image2, int kernelSize, int t, cv::Mat &dist, cv::Mat &dist2, const int type = CV_STANDARD_MCT);
void applyMCTOnImages(const cv::Mat &image1, const cv::Mat &image2, int kernelSize, int t, cv::Mat &dist, cv::Mat &dist2, const int type = MCT::StandardMct);
//The classical center symetric census
//A modified version of cs census which is comparing the a pixel with its correspondent from the after the center
void applySimetricCensus(const cv::Mat image1, const cv::Mat image2,int kernelSize, cv::Mat &dist, cv::Mat &dist2, const int type = CV_CLASSIC_CENTER_SYMETRIC_CENSUS);
void applySimetricCensus(const cv::Mat &image1, const cv::Mat &image2, int kernelSize, cv::Mat &dist, cv::Mat &dist2, const int type = SymetricCensus::ClassicCenterSymetricCensus);
//The brief binary descriptor
void applyBrifeDescriptor(const cv::Mat image1, const cv::Mat image2,int kernelSize, cv::Mat &dist, cv::Mat &dist2);
void applyBrifeDescriptor(const cv::Mat &image1, const cv::Mat &image2, int kernelSize, cv::Mat &dist, cv::Mat &dist2);
//The classical Rank Transform
void applyRTDescriptor(const cv::Mat image1, const cv::Mat image2, int kernelSize, cv::Mat &dist, cv::Mat &dist2);
void applyRTDescriptor(const cv::Mat &image1, const cv::Mat &image2, int kernelSize, cv::Mat &dist, cv::Mat &dist2);
Descriptor();
~Descriptor(void);
};
......
......@@ -40,12 +40,10 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/*
This is a variation of
"Stereo Processing by Semiglobal Matching and Mutual Information"
by Heiko Hirschmuller.
We match blocks rather than individual pixels, thus the algorithm is called
SGBM (Semi-global block matching)
*/
......@@ -62,8 +60,6 @@ namespace cv
typedef short DispType;
enum { NR = 16, NR2 = NR/2 };
struct StereoBinarySGBMParams
{
StereoBinarySGBMParams()
......@@ -96,7 +92,6 @@ namespace cv
speckleRange = _speckleRange;
mode = _mode;
}
int minDisparity;
int numDisparities;
int SADWindowSize;
......@@ -109,7 +104,6 @@ namespace cv
int disp12MaxDiff;
int mode;
};
/*
For each pixel row1[x], max(-maxD, 0) <= minX <= x < maxX <= width - max(0, -minD),
and for each disparity minD<=d<maxD the function
......@@ -117,7 +111,6 @@ namespace cv
row1[x] and row2[x-d]. The subpixel algorithm from
"Depth Discontinuities by Pixel-to-Pixel Stereo" by Stan Birchfield and C. Tomasi
is used, hence the suffix BT.
the temporary buffer should contain width2*2 elements
*/
static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
......@@ -131,25 +124,20 @@ namespace cv
int D = maxD - minD, width1 = maxX1 - minX1, width2 = maxX2 - minX2;
const PixType *row1 = img1.ptr<PixType>(y), *row2 = img2.ptr<PixType>(y);
PixType *prow1 = buffer + width2*2, *prow2 = prow1 + width*cn*2;
tab += tabOfs;
for( c = 0; c < cn*2; c++ )
{
prow1[width*c] = prow1[width*c + width-1] =
prow2[width*c] = prow2[width*c + width-1] = tab[0];
}
int n1 = y > 0 ? -(int)img1.step : 0, s1 = y < img1.rows-1 ? (int)img1.step : 0;
int n2 = y > 0 ? -(int)img2.step : 0, s2 = y < img2.rows-1 ? (int)img2.step : 0;
if( cn == 1 )
{
for( x = 1; x < width-1; x++ )
{
prow1[x] = tab[(row1[x+1] - row1[x-1])*2 + row1[x+n1+1] - row1[x+n1-1] + row1[x+s1+1] - row1[x+s1-1]];
prow2[width-1-x] = tab[(row2[x+1] - row2[x-1])*2 + row2[x+n2+1] - row2[x+n2-1] + row2[x+s2+1] - row2[x+s2-1]];
prow1[x+width] = row1[x];
prow2[width-1-x+width] = row2[x];
}
......@@ -161,21 +149,17 @@ namespace cv
prow1[x] = tab[(row1[x*3+3] - row1[x*3-3])*2 + row1[x*3+n1+3] - row1[x*3+n1-3] + row1[x*3+s1+3] - row1[x*3+s1-3]];
prow1[x+width] = tab[(row1[x*3+4] - row1[x*3-2])*2 + row1[x*3+n1+4] - row1[x*3+n1-2] + row1[x*3+s1+4] - row1[x*3+s1-2]];
prow1[x+width*2] = tab[(row1[x*3+5] - row1[x*3-1])*2 + row1[x*3+n1+5] - row1[x*3+n1-1] + row1[x*3+s1+5] - row1[x*3+s1-1]];
prow2[width-1-x] = tab[(row2[x*3+3] - row2[x*3-3])*2 + row2[x*3+n2+3] - row2[x*3+n2-3] + row2[x*3+s2+3] - row2[x*3+s2-3]];
prow2[width-1-x+width] = tab[(row2[x*3+4] - row2[x*3-2])*2 + row2[x*3+n2+4] - row2[x*3+n2-2] + row2[x*3+s2+4] - row2[x*3+s2-2]];
prow2[width-1-x+width*2] = tab[(row2[x*3+5] - row2[x*3-1])*2 + row2[x*3+n2+5] - row2[x*3+n2-1] + row2[x*3+s2+5] - row2[x*3+s2-1]];
prow1[x+width*3] = row1[x*3];
prow1[x+width*4] = row1[x*3+1];
prow1[x+width*5] = row1[x*3+2];
prow2[width-1-x+width*3] = row2[x*3];
prow2[width-1-x+width*4] = row2[x*3+1];
prow2[width-1-x+width*5] = row2[x*3+2];
}
}
memset( cost, 0, width1*D*sizeof(cost[0]) );
buffer -= minX2;
......@@ -184,12 +168,10 @@ namespace cv
#if CV_SSE2
volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
#endif
#if 1
for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
{
int diff_scale = c < cn ? 0 : 2;
// precompute
// v0 = min(row2[x-1/2], row2[x], row2[x+1/2]) and
// v1 = max(row2[x-1/2], row2[x], row2[x+1/2]) and
......@@ -203,7 +185,6 @@ namespace cv
buffer[x] = (PixType)v0;
buffer[x + width2] = (PixType)v1;
}
for( x = minX1; x < maxX1; x++ )
{
int u = prow1[x];
......@@ -211,14 +192,12 @@ namespace cv
int ur = x < width-1 ? (u + prow1[x+1])/2 : u;
int u0 = std::min(ul, ur); u0 = std::min(u0, u);
int u1 = std::max(ul, ur); u1 = std::max(u1, u);
#if CV_SSE2
if( useSIMD )
{
__m128i _u = _mm_set1_epi8((char)u), _u0 = _mm_set1_epi8((char)u0);
__m128i _u1 = _mm_set1_epi8((char)u1), z = _mm_setzero_si128();
__m128i ds = _mm_cvtsi32_si128(diff_scale);
for( int d = minD; d < maxD; d += 16 )
{
__m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-x-1 + d));
......@@ -227,10 +206,8 @@ namespace cv
__m128i c0 = _mm_max_epu8(_mm_subs_epu8(_u, _v1), _mm_subs_epu8(_v0, _u));
__m128i c1 = _mm_max_epu8(_mm_subs_epu8(_v, _u1), _mm_subs_epu8(_u0, _v));
__m128i diff = _mm_min_epu8(c0, c1);
c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
_mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_srl_epi16(_mm_unpacklo_epi8(diff,z), ds)));
_mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_srl_epi16(_mm_unpackhi_epi8(diff,z), ds)));
}
......@@ -245,7 +222,6 @@ namespace cv
int v1 = buffer[width-x-1 + d + width2];
int c0 = std::max(0, u - v1); c0 = std::max(c0, v0 - u);
int c1 = std::max(0, v - u1); c1 = std::max(c1, u0 - v);
cost[x*D + d] = (CostType)(cost[x*D+d] + (std::min(c0, c1) >> diff_scale));
}
}
......@@ -286,25 +262,20 @@ namespace cv
}
#endif
}
/*
computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf.
that is, disp1buf(x, y)=d means that img1(x+roi.x, y+roi.y) ~ img2(x+roi.x-d, y+roi.y).
minD <= d < maxD.
disp2full is the reverse disparity map, that is:
disp2full(x+roi.x,y+roi.y)=d means that img2(x+roi.x, y+roi.y) ~ img1(x+roi.x+d, y+roi.y)
note that disp1buf will have the same size as the roi and
disp2full will have the same size as img1 (or img2).
On exit disp2buf is not the final disparity, it is an intermediate result that becomes
final after all the tiles are processed.
the disparity in disp1buf is written with sub-pixel accuracy
(4 fractional bits, see StereoSGBM::DISP_SCALE),
using quadratic interpolation, while the disparity in disp2buf
is written as is, without interpolation.
disp2cost also has the same size as img1 (or img2).
It contains the minimum current cost, used to find the best disparity, corresponding to the minimal cost.
*/
......@@ -332,7 +303,6 @@ namespace cv
const int DISP_SHIFT = StereoMatcher::DISP_SHIFT;
const int DISP_SCALE = (1 << DISP_SHIFT);
const CostType MAX_COST = SHRT_MAX;
int minD = params.minDisparity, maxD = minD + params.numDisparities;
Size SADWindowSize;
SADWindowSize.width = SADWindowSize.height = params.SADWindowSize > 0 ? params.SADWindowSize : 5;
......@@ -349,28 +319,22 @@ namespace cv
int npasses = fullDP ? 2 : 1;
const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2;
PixType clipTab[TAB_SIZE];
for( k = 0; k < TAB_SIZE; k++ )
clipTab[k] = (PixType)(std::min(std::max(k - TAB_OFS, -ftzero), ftzero) + ftzero);
if( minX1 >= maxX1 )
{
disp1 = Scalar::all(INVALID_DISP_SCALED);
return;
}
CV_Assert( D % 16 == 0 );
// NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8.
// if you change NR, please, modify the loop as well.
int D2 = D+16, NRD2 = NR2*D2;
// the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer:
// for 8-way dynamic programming we need the current row and
// the previous row, i.e. 2 rows in total
const int NLR = 2;
const int LrBorder = NLR - 1;
// for each possible stereo match (img1(x,y) <=> img2(x-d,y))
// we keep pixel difference cost (C) and the summary cost over NR directions (S).
// we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k)
......@@ -383,29 +347,23 @@ namespace cv
CSBufSize*2*sizeof(CostType) + // C, S
width*16*img1.channels()*sizeof(PixType) + // temp buffer for computing per-pixel cost
width*(sizeof(CostType) + sizeof(DispType)) + 1024; // disp2cost + disp2
if( buffer.empty() || !buffer.isContinuous() ||
buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize )
buffer.create(1, (int)totalBufSize, CV_8U);
// summary cost over different (nDirs) directions
CostType* Cbuf = (CostType*)alignPtr(buffer.ptr(), ALIGN);
CostType* Sbuf = Cbuf + CSBufSize;
CostType* hsumBuf = Sbuf + CSBufSize;
CostType* pixDiff = hsumBuf + costBufSize*hsumBufNRows;
CostType* disp2cost = pixDiff + costBufSize + (LrSize + minLrSize)*NLR;
DispType* disp2ptr = (DispType*)(disp2cost + width);
PixType* tempBuf = (PixType*)(disp2ptr + width);
// add P2 to every C(x,y). it saves a few operations in the inner loops
for( k = 0; k < width1*D; k++ )
Cbuf[k] = (CostType)P2;
for( int pass = 1; pass <= npasses; pass++ )
{
int x1, y1, x2, y2, dx, dy;
if( pass == 1 )
{
y1 = 0; y2 = height; dy = 1;
......@@ -416,9 +374,7 @@ namespace cv
y1 = height-1; y2 = -1; dy = -1;
x1 = width1-1; x2 = -1; dx = -1;
}
CostType *Lr[NLR]={0}, *minLr[NLR]={0};
for( k = 0; k < NLR; k++ )
{
// shift Lr[k] and minLr[k] pointers, because we allocated them with the borders,
......@@ -431,18 +387,15 @@ namespace cv
minLr[k] = pixDiff + costBufSize + LrSize*NLR + minLrSize*k + NR2*LrBorder;
memset( minLr[k] - LrBorder*NR2, 0, minLrSize*sizeof(CostType) );
}
for( int y = y1; y != y2; y += dy )
{
int x, d;
DispType* disp1ptr = disp1.ptr<DispType>(y);
CostType* C = Cbuf + (!fullDP ? 0 : y*costBufSize);
CostType* S = Sbuf + (!fullDP ? 0 : y*costBufSize);
if( pass == 1 ) // compute C on the first pass, and reuse it on the second pass, if any.
{
int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1;
for( k = dy1; k <= dy2; k++ )
{
CostType* hsumAdd = hsumBuf + (std::min(k, height-1) % hsumBufNRows)*costBufSize;
......@@ -450,7 +403,6 @@ namespace cv
if( k < height )
{
calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero );
memset(hsumAdd, 0, D*sizeof(CostType));
for( x = 0; x <= SW2*D; x += D )
{
......@@ -503,13 +455,11 @@ namespace cv
{
const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
for( d = 0; d < D; d++ )
hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
}
}
}
if( y == 0 )
{
int scale = k == 0 ? SH2 + 1 : 1;
......@@ -517,18 +467,15 @@ namespace cv
C[x] = (CostType)(C[x] + hsumAdd[x]*scale);
}
}
// also, clear the S buffer
for( k = 0; k < width1*D; k++ )
S[k] = 0;
}
// clear the left and the right borders
memset( Lr[0] - NRD2*LrBorder - 8, 0, NRD2*LrBorder*sizeof(CostType) );
memset( Lr[0] + width1*NRD2 - 8, 0, NRD2*LrBorder*sizeof(CostType) );
memset( minLr[0] - NR2*LrBorder, 0, NR2*LrBorder*sizeof(CostType) );
memset( minLr[0] + width1*NR2, 0, NR2*LrBorder*sizeof(CostType) );
/*
[formula 13 in the paper]
compute L_r(p, d) = C(p, d) +
......@@ -550,87 +497,65 @@ namespace cv
for( x = x1; x != x2; x += dx )
{
int xm = x*NR2, xd = xm*D2;
int delta0 = minLr[0][xm - dx*NR2] + P2, delta1 = minLr[1][xm - NR2 + 1] + P2;
int delta2 = minLr[1][xm + 2] + P2, delta3 = minLr[1][xm + NR2 + 3] + P2;
CostType* Lr_p0 = Lr[0] + xd - dx*NRD2;
CostType* Lr_p1 = Lr[1] + xd - NRD2 + D2;
CostType* Lr_p2 = Lr[1] + xd + D2*2;
CostType* Lr_p3 = Lr[1] + xd + NRD2 + D2*3;
Lr_p0[-1] = Lr_p0[D] = Lr_p1[-1] = Lr_p1[D] =
Lr_p2[-1] = Lr_p2[D] = Lr_p3[-1] = Lr_p3[D] = MAX_COST;
CostType* Lr_p = Lr[0] + xd;
const CostType* Cp = C + x*D;
CostType* Sp = S + x*D;
#if CV_SSE2
if( useSIMD )
{
__m128i _P1 = _mm_set1_epi16((short)P1);
__m128i _delta0 = _mm_set1_epi16((short)delta0);
__m128i _delta1 = _mm_set1_epi16((short)delta1);
__m128i _delta2 = _mm_set1_epi16((short)delta2);
__m128i _delta3 = _mm_set1_epi16((short)delta3);
__m128i _minL0 = _mm_set1_epi16((short)MAX_COST);
for( d = 0; d < D; d += 8 )
{
__m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d));
__m128i L0, L1, L2, L3;
L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
L1 = _mm_load_si128((const __m128i*)(Lr_p1 + d));
L2 = _mm_load_si128((const __m128i*)(Lr_p2 + d));
L3 = _mm_load_si128((const __m128i*)(Lr_p3 + d));
L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d - 1)), _P1));
L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d + 1)), _P1));
L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d - 1)), _P1));
L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d + 1)), _P1));
L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d - 1)), _P1));
L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d + 1)), _P1));
L0 = _mm_min_epi16(L0, _delta0);
L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
L1 = _mm_min_epi16(L1, _delta1);
L1 = _mm_adds_epi16(_mm_subs_epi16(L1, _delta1), Cpd);
L2 = _mm_min_epi16(L2, _delta2);
L2 = _mm_adds_epi16(_mm_subs_epi16(L2, _delta2), Cpd);
L3 = _mm_min_epi16(L3, _delta3);
L3 = _mm_adds_epi16(_mm_subs_epi16(L3, _delta3), Cpd);
_mm_store_si128( (__m128i*)(Lr_p + d), L0);
_mm_store_si128( (__m128i*)(Lr_p + d + D2), L1);
_mm_store_si128( (__m128i*)(Lr_p + d + D2*2), L2);
_mm_store_si128( (__m128i*)(Lr_p + d + D2*3), L3);
__m128i t0 = _mm_min_epi16(_mm_unpacklo_epi16(L0, L2), _mm_unpackhi_epi16(L0, L2));
__m128i t1 = _mm_min_epi16(_mm_unpacklo_epi16(L1, L3), _mm_unpackhi_epi16(L1, L3));
t0 = _mm_min_epi16(_mm_unpacklo_epi16(t0, t1), _mm_unpackhi_epi16(t0, t1));
_minL0 = _mm_min_epi16(_minL0, t0);
__m128i Sval = _mm_load_si128((const __m128i*)(Sp + d));
L0 = _mm_adds_epi16(L0, L1);
L2 = _mm_adds_epi16(L2, L3);
Sval = _mm_adds_epi16(Sval, L0);
Sval = _mm_adds_epi16(Sval, L2);
_mm_store_si128((__m128i*)(Sp + d), Sval);
}
_minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
_mm_storel_epi64((__m128i*)&minLr[0][xm], _minL0);
}
......@@ -703,22 +628,18 @@ namespace cv
__m128i _minL0 = _mm_set1_epi16((short)minL0);
__m128i _minS = _mm_set1_epi16(MAX_COST), _bestDisp = _mm_set1_epi16(-1);
__m128i _d8 = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7), _8 = _mm_set1_epi16(8);
for( d = 0; d < D; d += 8 )
{
__m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d)), L0;
L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
L0 = _mm_min_epi16(L0, _delta0);
L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
_mm_store_si128((__m128i*)(Lr_p + d), L0);
_minL0 = _mm_min_epi16(_minL0, L0);
L0 = _mm_adds_epi16(L0, *(__m128i*)(Sp + d));
_mm_store_si128((__m128i*)(Sp + d), L0);
__m128i mask = _mm_cmpgt_epi16(_minS, L0);
_minS = _mm_min_epi16(_minS, L0);
_bestDisp = _mm_xor_si128(_bestDisp, _mm_and_si128(_mm_xor_si128(_bestDisp,_d8), mask));
......@@ -727,22 +648,17 @@ namespace cv
short CV_DECL_ALIGNED(16) bestDispBuf[8];
_mm_store_si128((__m128i*)bestDispBuf, _bestDisp);
_minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
_minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 4));
_minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 2));
__m128i qS = _mm_min_epi16(_minS, _mm_srli_si128(_minS, 8));
qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 4));
qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 2));
minLr[0][xm] = (CostType)_mm_cvtsi128_si32(_minL0);
minS = (CostType)_mm_cvtsi128_si32(qS);
qS = _mm_shuffle_epi32(_mm_unpacklo_epi16(qS, qS), 0);
qS = _mm_cmpeq_epi16(_minS, qS);
int idx = _mm_movemask_epi8(_mm_packs_epi16(qS, qS)) & 255;
bestDisp = bestDispBuf[LSBTab[idx]];
}
else
......@@ -751,10 +667,8 @@ namespace cv
for( d = 0; d < D; d++ )
{
int L0 = Cp[d] + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0;
Lr_p[d] = (CostType)L0;
minL0 = std::min(minL0, L0);
int Sval = Sp[d] = saturate_cast<CostType>(Sp[d] + L0);
if( Sval < minS )
{
......@@ -777,7 +691,6 @@ namespace cv
}
}
}
for( d = 0; d < D; d++ )
{
if( Sp[d]*(100 - uniquenessRatio) < minS*100 && std::abs(bestDisp - d) > 1 )
......@@ -792,7 +705,6 @@ namespace cv
disp2cost[_x2] = (CostType)minS;
disp2ptr[_x2] = (DispType)(d + minD);
}
if( 0 < d && d < D-1 )
{
// do subpixel quadratic interpolation:
......@@ -805,7 +717,6 @@ namespace cv
d *= DISP_SCALE;
disp1ptr[x + minX1] = (DispType)(d + minD*DISP_SCALE);
}
for( x = minX1; x < maxX1; x++ )
{
// we round the computed disparity both towards -inf and +inf and check
......@@ -822,14 +733,12 @@ namespace cv
disp1ptr[x] = (DispType)INVALID_DISP_SCALED;
}
}
// now shift the cyclic buffers
std::swap( Lr[0], Lr[1] );
std::swap( minLr[0], minLr[1] );
}
}
}
class StereoBinarySGBMImpl : public StereoBinarySGBM
{
public:
......@@ -837,7 +746,6 @@ namespace cv
{
params = StereoBinarySGBMParams();
}
StereoBinarySGBMImpl( int _minDisparity, int _numDisparities, int _SADWindowSize,
int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
......@@ -848,16 +756,13 @@ namespace cv
_uniquenessRatio, _speckleWindowSize, _speckleRange,
_mode );
}
void compute( InputArray leftarr, InputArray rightarr, OutputArray disparr )
{
Mat left = leftarr.getMat(), right = rightarr.getMat();
CV_Assert( left.size() == right.size() && left.type() == right.type() &&
left.depth() == CV_8U );
disparr.create( left.size(), CV_16S );
Mat disp = disparr.getMat();
computeDisparityBinarySGBM( left, right, disp, params, buffer );
medianBlur(disp, disp, 3);
......@@ -865,7 +770,6 @@ namespace cv
filterSpeckles(disp, (params.minDisparity - 1)*StereoMatcher::DISP_SCALE, params.speckleWindowSize,
StereoMatcher::DISP_SCALE*params.speckleRange, buffer);
}
int getMinDisparity() const { return params.minDisparity; }
void setMinDisparity(int minDisparity) { params.minDisparity = minDisparity; }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment