Merge pull request #326 from MMp131316:MatchingOperations

2b64abdb · Vadim Pisarevsky · bf0c8712 · 12b530c6 · 2b64abdb · 2b64abdb
Commit 2b64abdb authored Nov 03, 2015 by Vadim Pisarevsky
23 changed files
--- a/modules/README.md
+++ b/modules/README.md
@@ -52,4 +52,4 @@ $ cmake -D OPENCV_EXTRA_MODULES_PATH=<opencv_contrib>/modules -D BUILD_opencv_re
 22. **opencv_xphoto**: Additional photo processing algorithms: Color balance / Denoising / Inpainting.
-23. **opencv_stereo**: Stereo Correspondence done with different descriptors: Census / CS-Census / MCT / BRIEF / MV / RT.
+23. **opencv_stereo**: Stereo Correspondence done with different descriptors: Census / CS-Census / MCT / BRIEF / MV.
--- a/modules/stereo/include/opencv2/stereo.hpp
+++ b/modules/stereo/include/opencv2/stereo.hpp
@@ -47,211 +47,234 @@
 #include "opencv2/core.hpp"
 #include "opencv2/features2d.hpp"
 #include "opencv2/core/affine.hpp"
+#include "../../stereo/src/descriptor.hpp"
+#include "../../stereo/src/matching.hpp"
 /**
 @defgroup stereo Stereo Correspondance Algorithms
- */
+*/
 namespace cv
 {
-	namespace stereo
+    namespace stereo
-	{
+    {
+        //! @addtogroup stereo
-//! @addtogroup stereo
+        //! @{
-//! @{
+        //		 void correctMatches( InputArray F, InputArray points1, InputArray points2,
-//		 void correctMatches( InputArray F, InputArray points1, InputArray points2,
+        //	OutputArray newPoints1, OutputArray newPoints2 );
-		//	OutputArray newPoints1, OutputArray newPoints2 );
+        /** @brief Filters off small noise blobs (speckles) in the disparity map
+        @param img The input 16-bit signed disparity image
-		/** @brief Filters off small noise blobs (speckles) in the disparity map
+        @param newVal The disparity value used to paint-off the speckles
+        @param maxSpeckleSize The maximum speckle size to consider it a speckle. Larger blobs are not
-		@param img The input 16-bit signed disparity image
+        affected by the algorithm
-		@param newVal The disparity value used to paint-off the speckles
+        @param maxDiff Maximum difference between neighbor disparity pixels to put them into the same
-		@param maxSpeckleSize The maximum speckle size to consider it a speckle. Larger blobs are not
+        blob. Note that since StereoBM, StereoSGBM and may be other algorithms return a fixed-point
-		affected by the algorithm
+        disparity map, where disparity values are multiplied by 16, this scale factor should be taken into
-		@param maxDiff Maximum difference between neighbor disparity pixels to put them into the same
+        account when specifying this parameter value.
-		blob. Note that since StereoBM, StereoSGBM and may be other algorithms return a fixed-point
+        @param buf The optional temporary buffer to avoid memory allocation within the function.
-		disparity map, where disparity values are multiplied by 16, this scale factor should be taken into
+        */
-		account when specifying this parameter value.
+        /** @brief The base class for stereo correspondence algorithms.
-		@param buf The optional temporary buffer to avoid memory allocation within the function.
+        */
-		*/
+        class StereoMatcher : public Algorithm
-		/** @brief The base class for stereo correspondence algorithms.
+        {
-		*/
+        public:
-		class StereoMatcher : public Algorithm
+            enum { DISP_SHIFT = 4,
-		{
+                DISP_SCALE = (1 << DISP_SHIFT)
-		public:
+            };
-			enum { DISP_SHIFT = 4,
-				DISP_SCALE = (1 << DISP_SHIFT)
+            /** @brief Computes disparity map for the specified stereo pair
-			};
+            @param left Left 8-bit single-channel image.
-			/** @brief Computes disparity map for the specified stereo pair
+            @param right Right image of the same size and the same type as the left one.
+            @param disparity Output disparity map. It has the same size as the input images. Some algorithms,
-			@param left Left 8-bit single-channel image.
+            like StereoBM or StereoSGBM compute 16-bit fixed-point disparity map (where each disparity value
-			@param right Right image of the same size and the same type as the left one.
+            has 4 fractional bits), whereas other algorithms output 32-bit floating-point disparity map.
-			@param disparity Output disparity map. It has the same size as the input images. Some algorithms,
+            */
-			like StereoBM or StereoSGBM compute 16-bit fixed-point disparity map (where each disparity value
+            virtual void compute( InputArray left, InputArray right,
-			has 4 fractional bits), whereas other algorithms output 32-bit floating-point disparity map.
+                OutputArray disparity ) = 0;
-			*/
-			 virtual void compute( InputArray left, InputArray right,
+            virtual int getMinDisparity() const = 0;
-				OutputArray disparity ) = 0;
+            virtual void setMinDisparity(int minDisparity) = 0;
-			 virtual int getMinDisparity() const = 0;
+            virtual int getNumDisparities() const = 0;
-			 virtual void setMinDisparity(int minDisparity) = 0;
+            virtual void setNumDisparities(int numDisparities) = 0;
-			 virtual int getNumDisparities() const = 0;
+            virtual int getBlockSize() const = 0;
-			 virtual void setNumDisparities(int numDisparities) = 0;
+            virtual void setBlockSize(int blockSize) = 0;
-			 virtual int getBlockSize() const = 0;
+            virtual int getSpeckleWindowSize() const = 0;
-			 virtual void setBlockSize(int blockSize) = 0;
+            virtual void setSpeckleWindowSize(int speckleWindowSize) = 0;
-			 virtual int getSpeckleWindowSize() const = 0;
+            virtual int getSpeckleRange() const = 0;
-			 virtual void setSpeckleWindowSize(int speckleWindowSize) = 0;
+            virtual void setSpeckleRange(int speckleRange) = 0;
-			 virtual int getSpeckleRange() const = 0;
+            virtual int getDisp12MaxDiff() const = 0;
-			 virtual void setSpeckleRange(int speckleRange) = 0;
+            virtual void setDisp12MaxDiff(int disp12MaxDiff) = 0;
-			 virtual int getDisp12MaxDiff() const = 0;
+        };
-			 virtual void setDisp12MaxDiff(int disp12MaxDiff) = 0;
+        //!speckle removal algorithms. These algorithms have the purpose of removing small regions
-		};
+        enum {
+            CV_SPECKLE_REMOVAL_ALGORITHM, CV_SPECKLE_REMOVAL_AVG_ALGORITHM
+        };
-		/** @brief Class for computing stereo correspondence using the block matching algorithm, introduced and
+        //!subpixel interpolationm methods for disparities.
-		contributed to OpenCV by K. Konolige.
+        enum{
-		*/
+            CV_QUADRATIC_INTERPOLATION, CV_SIMETRICV_INTERPOLATION
-		class StereoBinaryBM : public StereoMatcher
+        };
-		{
+        /** @brief Class for computing stereo correspondence using the block matching algorithm, introduced and
-		public:
+        contributed to OpenCV by K. Konolige.
-			enum { PREFILTER_NORMALIZED_RESPONSE = 0,
+        */
-				PREFILTER_XSOBEL              = 1
+        class StereoBinaryBM : public StereoMatcher
-			};
+        {
+        public:
-			 virtual int getPreFilterType() const = 0;
+            enum { PREFILTER_NORMALIZED_RESPONSE = 0,
-			 virtual void setPreFilterType(int preFilterType) = 0;
+                PREFILTER_XSOBEL              = 1
+            };
-			 virtual int getPreFilterSize() const = 0;
-			 virtual void setPreFilterSize(int preFilterSize) = 0;
+            virtual int getPreFilterType() const = 0;
+            virtual void setPreFilterType(int preFilterType) = 0;
-			 virtual int getPreFilterCap() const = 0;
-			 virtual void setPreFilterCap(int preFilterCap) = 0;
+            virtual int getPreFilterSize() const = 0;
+            virtual void setPreFilterSize(int preFilterSize) = 0;
-			 virtual int getTextureThreshold() const = 0;
-			 virtual void setTextureThreshold(int textureThreshold) = 0;
+            virtual int getPreFilterCap() const = 0;
+            virtual void setPreFilterCap(int preFilterCap) = 0;
-			 virtual int getUniquenessRatio() const = 0;
-			 virtual void setUniquenessRatio(int uniquenessRatio) = 0;
+            virtual int getTextureThreshold() const = 0;
+            virtual void setTextureThreshold(int textureThreshold) = 0;
-			 virtual int getSmallerBlockSize() const = 0;
-			 virtual void setSmallerBlockSize(int blockSize) = 0;
+            virtual int getUniquenessRatio() const = 0;
+            virtual void setUniquenessRatio(int uniquenessRatio) = 0;
-			 virtual Rect getROI1() const = 0;
-			 virtual void setROI1(Rect roi1) = 0;
+            virtual int getSmallerBlockSize() const = 0;
+            virtual void setSmallerBlockSize(int blockSize) = 0;
-			 virtual Rect getROI2() const = 0;
-			 virtual void setROI2(Rect roi2) = 0;
+            virtual int getScalleFactor() const = 0 ;
+            virtual void setScalleFactor(int factor) = 0;
-			/** @brief Creates StereoBM object
+            virtual int getSpekleRemovalTechnique() const = 0 ;
-			@param numDisparities the disparity search range. For each pixel algorithm will find the best
+            virtual void setSpekleRemovalTechnique(int factor) = 0;
-			disparity from 0 (default minimum disparity) to numDisparities. The search range can then be
-			shifted by changing the minimum disparity.
+            virtual bool getUsePrefilter() const = 0 ;
-			@param blockSize the linear size of the blocks compared by the algorithm. The size should be odd
+            virtual void setUsePrefilter(bool factor) = 0;
-			(as the block is centered at the current pixel). Larger block size implies smoother, though less
-			accurate disparity map. Smaller block size gives more detailed disparity map, but there is higher
+            virtual int getBinaryKernelType() const = 0;
-			chance for algorithm to find a wrong correspondence.
+            virtual void setBinaryKernelType(int value) = 0;
-			The function create StereoBM object. You can then call StereoBM::compute() to compute disparity for
+            virtual int getAgregationWindowSize() const = 0;
-			a specific stereo pair.
+            virtual void setAgregationWindowSize(int value) = 0;
-			*/
+            /** @brief Creates StereoBM object
-			CV_EXPORTS static Ptr< cv::stereo::StereoBinaryBM > create(int numDisparities = 0, int blockSize = 21);
-		};
+            @param numDisparities the disparity search range. For each pixel algorithm will find the best
+            disparity from 0 (default minimum disparity) to numDisparities. The search range can then be
-		/** @brief The class implements the modified H. Hirschmuller algorithm @cite HH08 that differs from the original
+            shifted by changing the minimum disparity.
-		one as follows:
+            @param blockSize the linear size of the blocks compared by the algorithm. The size should be odd
+            (as the block is centered at the current pixel). Larger block size implies smoother, though less
-		-   By default, the algorithm is single-pass, which means that you consider only 5 directions
+            accurate disparity map. Smaller block size gives more detailed disparity map, but there is higher
-		instead of 8. Set mode=StereoSGBM::MODE_HH in createStereoSGBM to run the full variant of the
+            chance for algorithm to find a wrong correspondence.
-		algorithm but beware that it may consume a lot of memory.
-		-   The algorithm matches blocks, not individual pixels. Though, setting blockSize=1 reduces the
+            The function create StereoBM object. You can then call StereoBM::compute() to compute disparity for
-		blocks to single pixels.
+            a specific stereo pair.
-		-   Mutual information cost function is not implemented. Instead, a simpler Birchfield-Tomasi
+            */
-		sub-pixel metric from @cite BT98 is used. Though, the color images are supported as well.
+            CV_EXPORTS static Ptr< cv::stereo::StereoBinaryBM > create(int numDisparities = 0, int blockSize = 9);
-		-   Some pre- and post- processing steps from K. Konolige algorithm StereoBM are included, for
+        };
-		example: pre-filtering (StereoBM::PREFILTER_XSOBEL type) and post-filtering (uniqueness
-		check, quadratic interpolation and speckle filtering).
+        /** @brief The class implements the modified H. Hirschmuller algorithm @cite HH08 that differs from the original
+        one as follows:
-		@note
-		-   (Python) An example illustrating the use of the StereoSGBM matching algorithm can be found
+        -   By default, the algorithm is single-pass, which means that you consider only 5 directions
-		at opencv_source_code/samples/python2/stereo_match.py
+        instead of 8. Set mode=StereoSGBM::MODE_HH in createStereoSGBM to run the full variant of the
-		*/
+        algorithm but beware that it may consume a lot of memory.
-		class StereoBinarySGBM : public StereoMatcher
+        -   The algorithm matches blocks, not individual pixels. Though, setting blockSize=1 reduces the
-		{
+        blocks to single pixels.
-		public:
+        -   Mutual information cost function is not implemented. Instead, a simpler Birchfield-Tomasi
-			enum
+        sub-pixel metric from @cite BT98 is used. Though, the color images are supported as well.
-			{
+        -   Some pre- and post- processing steps from K. Konolige algorithm StereoBM are included, for
-				MODE_SGBM = 0,
+        example: pre-filtering (StereoBM::PREFILTER_XSOBEL type) and post-filtering (uniqueness
-				MODE_HH   = 1
+        check, quadratic interpolation and speckle filtering).
-			};
+        @note
-			 virtual int getPreFilterCap() const = 0;
+        -   (Python) An example illustrating the use of the StereoSGBM matching algorithm can be found
-			 virtual void setPreFilterCap(int preFilterCap) = 0;
+        at opencv_source_code/samples/python2/stereo_match.py
+        */
-			 virtual int getUniquenessRatio() const = 0;
+        class StereoBinarySGBM : public StereoMatcher
-			 virtual void setUniquenessRatio(int uniquenessRatio) = 0;
+        {
+        public:
-			 virtual int getP1() const = 0;
+            enum
-			 virtual void setP1(int P1) = 0;
+            {
+                MODE_SGBM = 0,
-			 virtual int getP2() const = 0;
+                MODE_HH   = 1
-			 virtual void setP2(int P2) = 0;
+            };
-			 virtual int getMode() const = 0;
+            virtual int getPreFilterCap() const = 0;
-			 virtual void setMode(int mode) = 0;
+            virtual void setPreFilterCap(int preFilterCap) = 0;
-			/** @brief Creates StereoSGBM object
+            virtual int getUniquenessRatio() const = 0;
+            virtual void setUniquenessRatio(int uniquenessRatio) = 0;
-			@param minDisparity Minimum possible disparity value. Normally, it is zero but sometimes
-			rectification algorithms can shift images, so this parameter needs to be adjusted accordingly.
+            virtual int getP1() const = 0;
-			@param numDisparities Maximum disparity minus minimum disparity. The value is always greater than
+            virtual void setP1(int P1) = 0;
-			zero. In the current implementation, this parameter must be divisible by 16.
-			@param blockSize Matched block size. It must be an odd number \>=1 . Normally, it should be
+            virtual int getP2() const = 0;
-			somewhere in the 3..11 range.
+            virtual void setP2(int P2) = 0;
-			@param P1 The first parameter controlling the disparity smoothness. See below.
-			@param P2 The second parameter controlling the disparity smoothness. The larger the values are,
+            virtual int getMode() const = 0;
-			the smoother the disparity is. P1 is the penalty on the disparity change by plus or minus 1
+            virtual void setMode(int mode) = 0;
-			between neighbor pixels. P2 is the penalty on the disparity change by more than 1 between neighbor
-			pixels. The algorithm requires P2 \> P1 . See stereo_match.cpp sample where some reasonably good
+            virtual int getSpekleRemovalTechnique() const = 0 ;
-			P1 and P2 values are shown (like 8\*number_of_image_channels\*SADWindowSize\*SADWindowSize and
+            virtual void setSpekleRemovalTechnique(int factor) = 0;
-			32\*number_of_image_channels\*SADWindowSize\*SADWindowSize , respectively).
-			@param disp12MaxDiff Maximum allowed difference (in integer pixel units) in the left-right
+            virtual int getBinaryKernelType() const = 0;
-			disparity check. Set it to a non-positive value to disable the check.
+            virtual void setBinaryKernelType(int value) = 0;
-			@param preFilterCap Truncation value for the prefiltered image pixels. The algorithm first
-			computes x-derivative at each pixel and clips its value by [-preFilterCap, preFilterCap] interval.
+            virtual int getSubPixelInterpolationMethod() const = 0;
-			The result values are passed to the Birchfield-Tomasi pixel cost function.
+            virtual void setSubPixelInterpolationMethod(int value) = 0;
-			@param uniquenessRatio Margin in percentage by which the best (minimum) computed cost function
-			value should "win" the second best value to consider the found match correct. Normally, a value
+            /** @brief Creates StereoSGBM object
-			within the 5-15 range is good enough.
-			@param speckleWindowSize Maximum size of smooth disparity regions to consider their noise speckles
+            @param minDisparity Minimum possible disparity value. Normally, it is zero but sometimes
-			and invalidate. Set it to 0 to disable speckle filtering. Otherwise, set it somewhere in the
+            rectification algorithms can shift images, so this parameter needs to be adjusted accordingly.
-			50-200 range.
+            @param numDisparities Maximum disparity minus minimum disparity. The value is always greater than
-			@param speckleRange Maximum disparity variation within each connected component. If you do speckle
+            zero. In the current implementation, this parameter must be divisible by 16.
-			filtering, set the parameter to a positive value, it will be implicitly multiplied by 16.
+            @param blockSize Matched block size. It must be an odd number \>=1 . Normally, it should be
-			Normally, 1 or 2 is good enough.
+            somewhere in the 3..11 range.
-			@param mode Set it to StereoSGBM::MODE_HH to run the full-scale two-pass dynamic programming
+            @param P1 The first parameter controlling the disparity smoothness.This parameter is used for the case of slanted surfaces (not fronto parallel).
-			algorithm. It will consume O(W\*H\*numDisparities) bytes, which is large for 640x480 stereo and
+            @param P2 The second parameter controlling the disparity smoothness.This parameter is used for "solving" the depth discontinuities problem.
-			huge for HD-size pictures. By default, it is set to false .
+            The larger the values are, the smoother the disparity is. P1 is the penalty on the disparity change by plus or minus 1
+            between neighbor pixels. P2 is the penalty on the disparity change by more than 1 between neighbor
-			The first constructor initializes StereoSGBM with all the default parameters. So, you only have to
+            pixels. The algorithm requires P2 \> P1 . See stereo_match.cpp sample where some reasonably good
-			set StereoSGBM::numDisparities at minimum. The second constructor enables you to set each parameter
+            P1 and P2 values are shown (like 8\*number_of_image_channels\*SADWindowSize\*SADWindowSize and
-			to a custom value.
+            32\*number_of_image_channels\*SADWindowSize\*SADWindowSize , respectively).
-			*/
+            @param disp12MaxDiff Maximum allowed difference (in integer pixel units) in the left-right
-			 CV_EXPORTS static Ptr<cv::stereo::StereoBinarySGBM> create(int minDisparity, int numDisparities, int blockSize,
+            disparity check. Set it to a non-positive value to disable the check.
-				int P1 = 0, int P2 = 0, int disp12MaxDiff = 0,
+            @param preFilterCap Truncation value for the prefiltered image pixels. The algorithm first
-				int preFilterCap = 0, int uniquenessRatio = 0,
+            computes x-derivative at each pixel and clips its value by [-preFilterCap, preFilterCap] interval.
-				int speckleWindowSize = 0, int speckleRange = 0,
+            The result values are passed to the Birchfield-Tomasi pixel cost function.
-				int mode = StereoBinarySGBM::MODE_SGBM);
+            @param uniquenessRatio Margin in percentage by which the best (minimum) computed cost function
-		};
+            value should "win" the second best value to consider the found match correct. Normally, a value
-		//! @}
+            within the 5-15 range is good enough.
-	}//stereo
+            @param speckleWindowSize Maximum size of smooth disparity regions to consider their noise speckles
+            and invalidate. Set it to 0 to disable speckle filtering. Otherwise, set it somewhere in the
+            50-200 range.
+            @param speckleRange Maximum disparity variation within each connected component. If you do speckle
+            filtering, set the parameter to a positive value, it will be implicitly multiplied by 16.
+            Normally, 1 or 2 is good enough.
+            @param mode Set it to StereoSGBM::MODE_HH to run the full-scale two-pass dynamic programming
+            algorithm. It will consume O(W\*H\*numDisparities) bytes, which is large for 640x480 stereo and
+            huge for HD-size pictures. By default, it is set to false .
+            The first constructor initializes StereoSGBM with all the default parameters. So, you only have to
+            set StereoSGBM::numDisparities at minimum. The second constructor enables you to set each parameter
+            to a custom value.
+            */
+            CV_EXPORTS static Ptr<cv::stereo::StereoBinarySGBM> create(int minDisparity, int numDisparities, int blockSize,
+                int P1 = 100, int P2 = 1000, int disp12MaxDiff = 1,
+                int preFilterCap = 0, int uniquenessRatio = 5,
+                int speckleWindowSize = 400, int speckleRange = 200,
+                int mode = StereoBinarySGBM::MODE_SGBM);
+        };
+        //! @}
+    }//stereo
 } // cv
 #ifndef DISABLE_OPENCV_24_COMPATIBILITY

--- a/modules/stereo/perf/perf_bm.cpp
+++ b/modules/stereo/perf/perf_bm.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "perf_precomp.hpp"
+using namespace std;
+using namespace cv;
+using namespace cv::stereo;
+using namespace perf;
+typedef std::tr1::tuple<Size, MatType, MatDepth> s_bm_test_t;
+typedef perf::TestBaseWithParam<s_bm_test_t> s_bm;
+PERF_TEST_P( s_bm, sgm_perf,
+            testing::Combine(
+            testing::Values( cv::Size(512, 283),  cv::Size(320, 240)),
+            testing::Values( CV_8UC1,CV_8U ),
+            testing::Values( CV_8UC1,CV_8U,CV_16S )
+            )
+            )
+{
+    Size sz = std::tr1::get<0>(GetParam());
+    int matType = std::tr1::get<1>(GetParam());
+    int sdepth = std::tr1::get<2>(GetParam());
+    Mat left(sz, matType);
+    Mat right(sz, matType);
+    Mat out1(sz, sdepth);
+    Ptr<StereoBinarySGBM> sgbm = StereoBinarySGBM::create(0, 16, 5);
+    sgbm->setBinaryKernelType(CV_DENSE_CENSUS);
+    declare.in(left, WARMUP_RNG)
+        .out(out1)
+        .time(0.1)
+        .iterations(20);
+    TEST_CYCLE()
+    {
+        sgbm->compute(left, right, out1);
+    }
+    SANITY_CHECK(out1);
+}
+PERF_TEST_P( s_bm, bm_perf,
+            testing::Combine(
+            testing::Values( cv::Size(512, 383),  cv::Size(320, 240) ),
+            testing::Values( CV_8UC1,CV_8U ),
+            testing::Values( CV_8UC1,CV_8U )
+            )
+            )
+{
+    Size sz = std::tr1::get<0>(GetParam());
+    int matType = std::tr1::get<1>(GetParam());
+    int sdepth = std::tr1::get<2>(GetParam());
+    Mat left(sz, matType);
+    Mat right(sz, matType);
+    Mat out1(sz, sdepth);
+    Ptr<StereoBinaryBM> sbm = StereoBinaryBM::create(16, 9);
+    // we set the corresponding parameters
+    sbm->setPreFilterCap(31);
+    sbm->setMinDisparity(0);
+    sbm->setTextureThreshold(10);
+    sbm->setUniquenessRatio(0);
+    sbm->setSpeckleWindowSize(400);
+    sbm->setDisp12MaxDiff(0);
+    sbm->setAgregationWindowSize(11);
+    // the user can choose between the average speckle removal algorithm or
+    // the classical version that was implemented in OpenCV
+    sbm->setSpekleRemovalTechnique(CV_SPECKLE_REMOVAL_AVG_ALGORITHM);
+    sbm->setUsePrefilter(false);
+    declare.in(left, WARMUP_RNG)
+        .out(out1)
+        .time(0.1)
+        .iterations(20);
+    TEST_CYCLE()
+    {
+        sbm->compute(left, right, out1);
+    }
+    SANITY_CHECK(out1);
+}
--- a/modules/stereo/perf/perf_descriptor.cpp
+++ b/modules/stereo/perf/perf_descriptor.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "perf_precomp.hpp"
+using namespace std;
+using namespace cv;
+using namespace cv::stereo;
+using namespace perf;
+typedef std::tr1::tuple<Size, MatType, MatDepth> descript_params_t;
+typedef perf::TestBaseWithParam<descript_params_t> descript_params;
+PERF_TEST_P( descript_params, census_sparse_descriptor,
+            testing::Combine(
+            testing::Values(  TYPICAL_MAT_SIZES ),
+            testing::Values( CV_8UC1,CV_8U ),
+            testing::Values( CV_32SC4,CV_32S )
+            )
+            )
+{
+    Size sz = std::tr1::get<0>(GetParam());
+    int matType = std::tr1::get<1>(GetParam());
+    int sdepth = std::tr1::get<2>(GetParam());
+    Mat left(sz, matType);
+    Mat out1(sz, sdepth);
+    declare.in(left, WARMUP_RNG)
+        .out(out1)
+        .time(0.01);
+    TEST_CYCLE()
+    {
+        censusTransform(left,9,out1,CV_SPARSE_CENSUS);
+    }
+    SANITY_CHECK(out1);
+}
+PERF_TEST_P( descript_params, star_census_transform,
+            testing::Combine(
+            testing::Values( TYPICAL_MAT_SIZES ),
+            testing::Values( CV_8UC1,CV_8U ),
+            testing::Values( CV_32SC4,CV_32S )
+            )
+            )
+{
+    Size sz = std::tr1::get<0>(GetParam());
+    int matType = std::tr1::get<1>(GetParam());
+    int sdepth = std::tr1::get<2>(GetParam());
+    Mat left(sz, matType);
+    Mat out1(sz, sdepth);
+    declare.in(left, WARMUP_RNG)
+        .out(out1)
+        .time(0.01);
+    TEST_CYCLE()
+    {
+        starCensusTransform(left,9,out1);
+    }
+    SANITY_CHECK(out1);
+}
+PERF_TEST_P( descript_params, modified_census_transform,
+            testing::Combine(
+            testing::Values( TYPICAL_MAT_SIZES ),
+            testing::Values( CV_8UC1,CV_8U ),
+            testing::Values( CV_32SC4,CV_32S )
+            )
+            )
+{
+    Size sz = std::tr1::get<0>(GetParam());
+    int matType = std::tr1::get<1>(GetParam());
+    int sdepth = std::tr1::get<2>(GetParam());
+    Mat left(sz, matType);
+    Mat out1(sz, sdepth);
+    declare.in(left, WARMUP_RNG)
+        .out(out1)
+        .time(0.01);
+    TEST_CYCLE()
+    {
+        modifiedCensusTransform(left,9,out1,CV_MODIFIED_CENSUS_TRANSFORM);
+    }
+    SANITY_CHECK(out1);
+}
+PERF_TEST_P( descript_params, center_symetric_census,
+            testing::Combine(
+            testing::Values( TYPICAL_MAT_SIZES ),
+            testing::Values( CV_8UC1,CV_8U ),
+            testing::Values( CV_32SC4,CV_32S )
+            )
+            )
+{
+    Size sz = std::tr1::get<0>(GetParam());
+    int matType = std::tr1::get<1>(GetParam());
+    int sdepth = std::tr1::get<2>(GetParam());
+    Mat left(sz, matType);
+    Mat out1(sz, sdepth);
+    declare.in(left, WARMUP_RNG)
+        .out(out1)
+        .time(0.01);
+    TEST_CYCLE()
+    {
+        symetricCensusTransform(left,7,out1,CV_CS_CENSUS);
+    }
+    SANITY_CHECK(out1);
+}
--- a/modules/stereo/perf/perf_main.cpp
+++ b/modules/stereo/perf/perf_main.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "perf_precomp.hpp"
+CV_PERF_TEST_MAIN(stereo)
--- a/modules/stereo/perf/perf_precomp.hpp
+++ b/modules/stereo/perf/perf_precomp.hpp
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+#ifndef __OPENCV_PERF_PRECOMP_HPP__
+#define __OPENCV_PERF_PRECOMP_HPP__
+#include <iostream>
+#include "opencv2/ts.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/stereo.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/features2d.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/core/private.hpp"
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/calib3d.hpp"
+#include <algorithm>
+#include <cmath>
+#ifdef GTEST_CREATE_SHARED_LIBRARY
+#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
+#endif
+#endif
--- a/modules/stereo/samples/sample.cpp
+++ b/modules/stereo/samples/sample.cpp
+#include "opencv2/stereo.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/highgui.hpp"
+#include <stdio.h>
+#include <string.h>
+#include <iostream>
+using namespace std;
+using namespace cv;
+using namespace cv::stereo;
+enum { STEREO_BINARY_BM, STEREO_BINARY_SGM };
+static cv::CommandLineParser parse_argument_values(int argc, char **argv, string &left, string &right, int &kernel_size, int &number_of_disparities,
+                                                   int &aggregation_window, int &P1, int &P2, float &scale, int &algo, int &binary_descriptor_type, int &success);
+int main(int argc, char** argv)
+{
+    string left, right;
+    int kernel_size = 0, number_of_disparities = 0, aggregation_window = 0, P1 = 0, P2 = 0;
+    float scale = 4;
+    int algo = STEREO_BINARY_BM;
+    int binary_descriptor_type = 0;
+    int success;
+    // here we extract the values that were added as arguments
+    // we also test to see if they are provided correcly
+    cv::CommandLineParser parser =
+        parse_argument_values(argc, argv, left, right,
+        kernel_size,
+        number_of_disparities,
+        aggregation_window,
+        P1, P2,
+        scale,
+        algo, binary_descriptor_type,success);
+    if (!parser.check() || !success)
+    {
+        parser.printMessage();
+        return 1;
+    }
+    // verify if the user inputs the correct number of parameters
+    Mat image1, image2;
+    // we read  a pair of images from the disk
+    image1 = imread(left, CV_8UC1);
+    image2 = imread(right, CV_8UC1);
+    // verify if they are loaded correctly
+    if (image1.empty() || image2.empty())
+    {
+        cout << " --(!) Error reading images \n";
+        parser.printMessage();
+        return 1;
+    }
+    // we display the parsed parameters
+    const char *b[7] = { "CV_DENSE_CENSUS", "CV_SPARSE_CENSUS", "CV_CS_CENSUS", "CV_MODIFIED_CS_CENSUS",
+        "CV_MODIFIED_CENSUS_TRANSFORM", "CV_MEAN_VARIATION", "CV_STAR_KERNEL" };
+    cout << "Program Name: " << argv[0];
+    cout << "\nPath to left image " << left << " \n" << "Path to right image " << right << "\n";
+    cout << "\nkernel size " << kernel_size << "\n"
+        << "numberOfDisparities " << number_of_disparities << "\n"
+        << "aggregationWindow " << aggregation_window << "\n"
+        << "scallingFactor " << scale << "\n" << "Descriptor name : " << b[binary_descriptor_type] << "\n";
+    Mat imgDisparity16S2 = Mat(image1.rows, image1.cols, CV_16S);
+    Mat imgDisparity8U2 = Mat(image1.rows, image1.cols, CV_8UC1);
+    imshow("Original Left image", image1);
+    if (algo == STEREO_BINARY_BM)
+    {
+        Ptr<StereoBinaryBM> sbm = StereoBinaryBM::create(number_of_disparities, kernel_size);
+        // we set the corresponding parameters
+        sbm->setPreFilterCap(31);
+        sbm->setMinDisparity(0);
+        sbm->setTextureThreshold(10);
+        sbm->setUniquenessRatio(0);
+        sbm->setSpeckleWindowSize(400); // speckle size
+        sbm->setSpeckleRange(200);
+        sbm->setDisp12MaxDiff(0);
+        sbm->setScalleFactor((int)scale); // the scaling factor
+        sbm->setBinaryKernelType(binary_descriptor_type); // binary descriptor kernel
+        sbm->setAgregationWindowSize(aggregation_window);
+        // the user can choose between the average speckle removal algorithm or
+        // the classical version that was implemented in OpenCV
+        sbm->setSpekleRemovalTechnique(CV_SPECKLE_REMOVAL_AVG_ALGORITHM);
+        sbm->setUsePrefilter(false);
+        //-- calculate the disparity image
+        sbm->compute(image1, image2, imgDisparity8U2);
+        imshow("Disparity", imgDisparity8U2);
+    }
+    else if (algo == STEREO_BINARY_SGM)
+    {
+        // we set the corresponding parameters
+        Ptr<StereoBinarySGBM> sgbm = StereoBinarySGBM::create(0, number_of_disparities, kernel_size);
+        // setting the penalties for sgbm
+        sgbm->setP1(P1);
+        sgbm->setP2(P2);
+        sgbm->setMinDisparity(0);
+        sgbm->setUniquenessRatio(5);
+        sgbm->setSpeckleWindowSize(400);
+        sgbm->setSpeckleRange(0);
+        sgbm->setDisp12MaxDiff(1);
+        sgbm->setBinaryKernelType(binary_descriptor_type);
+        sgbm->setSpekleRemovalTechnique(CV_SPECKLE_REMOVAL_AVG_ALGORITHM);
+        sgbm->setSubPixelInterpolationMethod(CV_SIMETRICV_INTERPOLATION);
+        sgbm->compute(image1, image2, imgDisparity16S2);
+        /*Alternative for scalling
+        imgDisparity16S2.convertTo(imgDisparity8U2, CV_8UC1, scale);
+        */
+        double minVal; double maxVal;
+        minMaxLoc(imgDisparity16S2, &minVal, &maxVal);
+        imgDisparity16S2.convertTo(imgDisparity8U2, CV_8UC1, 255 / (maxVal - minVal));
+        //show the disparity image
+        imshow("Windowsgm", imgDisparity8U2);
+    }
+    waitKey(0);
+    return 0;
+}
+static cv::CommandLineParser parse_argument_values(int argc, char **argv, string &left, string &right, int &kernel_size, int &number_of_disparities,
+                                                   int &aggregation_window, int &P1, int &P2, float &scale, int &algo, int &binary_descriptor_type, int &success)
+{
+    static const char* keys =
+        "{ @left                |         | }"
+        "{ @right               |         | }"
+        "{ k kernel_size        |    9    | }"
+        "{ d disparity          |    128   | }"
+        "{ w aggregation_window |    9     | }"
+        "{ P1                   |   100    | }"
+        "{ P2                   |   1000   | }"
+        "{ b binary_descriptor  |     4    | Index of the descriptor type:\n 0 - CV_DENSE_CENSUS,\n 1 - CV_SPARSE_CENSUS,\n 2 - CV_CS_CENSUS,\n 3 - CV_MODIFIED_CS_CENSUS,\n 4 - CV_MODIFIED_CENSUS_TRANSFORM,\n 5 - CV_MEAN_VARIATION,\n 6 - CV_STAR_KERNEL}"
+        "{ s scale              |    1.01593    | }"
+        "{ a algorithm          | sgm     | }"
+        ;
+    cv::CommandLineParser parser( argc, argv, keys );
+    left = parser.get<string>(0);
+    right = parser.get<string>(1);
+    kernel_size = parser.get<int>("kernel_size");
+    number_of_disparities = parser.get<int>("disparity");
+    aggregation_window = parser.get<int>("aggregation_window");
+    P1 = parser.get<int>("P1");
+    P2 = parser.get<int>("P2");
+    binary_descriptor_type = parser.get<int>("binary_descriptor");
+    scale = parser.get<float>("scale");
+    algo = parser.get<string>("algorithm") == "sgm" ? STEREO_BINARY_SGM : STEREO_BINARY_BM;
+    parser.about("\nDemo stereo matching converting L and R images into disparity images using BM and SGBM\n");
+    success = 1;
+    //TEST if the provided parameters are correct
+    if(binary_descriptor_type == CV_DENSE_CENSUS && kernel_size > 5)
+    {
+        cout << "For the dense census transform the maximum kernel size should be 5\n";
+        success = 0;
+    }
+    if((binary_descriptor_type == CV_MEAN_VARIATION || binary_descriptor_type == CV_MODIFIED_CENSUS_TRANSFORM || binary_descriptor_type == CV_STAR_KERNEL) && kernel_size != 9)
+    {
+        cout <<" For Mean variation and the modified census transform the kernel size should be equal to 9\n";
+        success = 0;
+    }
+    if((binary_descriptor_type == CV_CS_CENSUS || binary_descriptor_type == CV_MODIFIED_CS_CENSUS) && kernel_size > 7)
+    {
+        cout << " The kernel size should be smaller or equal to 7 for the CS census and modified center symetric census\n";
+        success = 0;
+    }
+    if(binary_descriptor_type == CV_SPARSE_CENSUS && kernel_size > 11)
+    {
+        cout << "The kernel size for the sparse census must be smaller or equal to 11\n";
+        success = 0;
+    }
+    if(number_of_disparities < 10)
+    {
+        cout << "Number of disparities should be greater than 10\n";
+        success = 0;
+    }
+    if(aggregation_window < 3)
+    {
+        cout << "Aggregation window should be > 3";
+        success = 0;
+    }
+    if(scale < 1)
+    {
+        cout << "The scale should be a positive number \n";
+        success = 0;
+    }
+    if(P1 != 0)
+    {
+        if(P2 / P1 < 2)
+        {
+            cout << "You should probably choose a greater P2 penalty\n";
+            success = 0;
+        }
+    }
+    else
+    {
+        cout << " Penalties should be greater than 0\n";
+        success = 0;
+    }
+    return parser;
+}
--- a/modules/stereo/src/descriptor.cpp
+++ b/modules/stereo/src/descriptor.cpp
+//By downloading, copying, installing or using the software you agree to this license.
+//If you do not agree to this license, do not download, install,
+//copy or use the software.
+//
+//
+//                          License Agreement
+//               For Open Source Computer Vision Library
+//                       (3-clause BSD License)
+//
+//Copyright (C) 2000-2015, Intel Corporation, all rights reserved.
+//Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+//Copyright (C) 2009-2015, NVIDIA Corporation, all rights reserved.
+//Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+//Copyright (C) 2015, OpenCV Foundation, all rights reserved.
+//Copyright (C) 2015, Itseez Inc., all rights reserved.
+//Third party copyrights are property of their respective owners.
+//
+//Redistribution and use in source and binary forms, with or without modification,
+//are permitted provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright notice,
+//    this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+//  * Neither the names of the copyright holders nor the names of the contributors
+//    may be used to endorse or promote products derived from this software
+//    without specific prior written permission.
+//
+//This software is provided by the copyright holders and contributors "as is" and
+//any express or implied warranties, including, but not limited to, the implied
+//warranties of merchantability and fitness for a particular purpose are disclaimed.
+//In no event shall copyright holders or contributors be liable for any direct,
+//indirect, incidental, special, exemplary, or consequential damages
+//(including, but not limited to, procurement of substitute goods or services;
+//loss of use, data, or profits; or business interruption) however caused
+//and on any theory of liability, whether in contract, strict liability,
+//or tort (including negligence or otherwise) arising in any way out of
+//the use of this software, even if advised of the possibility of such damage.
+/*****************************************************************************************************************\
+*                             The file contains the implemented descriptors                                       *
+\******************************************************************************************************************/
+#include "precomp.hpp"
+#include "descriptor.hpp"
+namespace cv
+{
+    namespace stereo
+    {
+        //function that performs the census transform on two images.
+        //Two variants of census are offered a sparse version whcih takes every second pixel as well as dense version
+        CV_EXPORTS void censusTransform(const Mat &image1, const Mat &image2, int kernelSize, Mat &dist1, Mat &dist2, const int type)
+        {
+            CV_Assert(image1.size() == image2.size());
+            CV_Assert(kernelSize % 2 != 0);
+            CV_Assert(image1.type() == CV_8UC1 && image2.type() == CV_8UC1);
+            CV_Assert(type != CV_DENSE_CENSUS || type != CV_SPARSE_CENSUS);
+            CV_Assert(kernelSize <= ((type == 0) ? 5 : 11));
+            int n2 = (kernelSize) / 2;
+            uint8_t *images[] = {image1.data, image2.data};
+            int *costs[] = {(int *)dist1.data,(int *)dist2.data};
+            int stride = (int)image1.step;
+            if(type == CV_DENSE_CENSUS)
+            {
+                parallel_for_(  Range(n2, image1.rows - n2),
+                    CombinedDescriptor<1,1,1,2,CensusKernel<2> >(image1.cols, image1.rows,stride,n2,costs,CensusKernel<2>(images),n2));
+            }
+            else if(type == CV_SPARSE_CENSUS)
+            {
+                parallel_for_( Range(n2, image1.rows - n2),
+                    CombinedDescriptor<2,2,1,2,CensusKernel<2> >(image1.cols, image1.rows, stride,n2,costs,CensusKernel<2>(images),n2));
+            }
+        }
+        //function that performs census on one image
+        CV_EXPORTS void censusTransform(const Mat &image1, int kernelSize, Mat &dist1, const int type)
+        {
+            CV_Assert(image1.size() == dist1.size());
+            CV_Assert(kernelSize % 2 != 0);
+            CV_Assert(image1.type() == CV_8UC1);
+            CV_Assert(type != CV_DENSE_CENSUS || type != CV_SPARSE_CENSUS);
+            CV_Assert(kernelSize <= ((type == 0) ? 5 : 11));
+            int n2 = (kernelSize) / 2;
+            uint8_t *images[] = {image1.data};
+            int *costs[] = {(int *)dist1.data};
+            int stride = (int)image1.step;
+            if(type == CV_DENSE_CENSUS)
+            {
+                parallel_for_( Range(n2, image1.rows - n2),
+                    CombinedDescriptor<1,1,1,1,CensusKernel<1> >(image1.cols, image1.rows,stride,n2,costs,CensusKernel<1>(images),n2));
+            }
+            else if(type == CV_SPARSE_CENSUS)
+            {
+                parallel_for_( Range(n2, image1.rows - n2),
+                    CombinedDescriptor<2,2,1,1,CensusKernel<1> >(image1.cols, image1.rows,stride,n2,costs,CensusKernel<1>(images),n2));
+            }
+        }
+        //in a 9x9 kernel only certain positions are choosen for comparison
+        CV_EXPORTS void starCensusTransform(const Mat &img1, const Mat &img2, int kernelSize, Mat &dist1, Mat &dist2)
+        {
+            CV_Assert(img1.size() == img2.size());
+            CV_Assert(kernelSize % 2 != 0);
+            CV_Assert(img1.type() == CV_8UC1 && img2.type() == CV_8UC1);
+            CV_Assert(kernelSize >= 7);
+            int n2 = (kernelSize) >> 1;
+            Mat images[] = {img1, img2};
+            int *date[] = { (int *)dist1.data, (int *)dist2.data};
+            parallel_for_(Range(n2, img1.rows - n2), StarKernelCensus<2>(images, n2,date));
+        }
+        //single version of star census
+        CV_EXPORTS void starCensusTransform(const Mat &img1, int kernelSize, Mat &dist)
+        {
+            CV_Assert(img1.size() == dist.size());
+            CV_Assert(kernelSize % 2 != 0);
+            CV_Assert(img1.type() == CV_8UC1);
+            CV_Assert(kernelSize >= 7);
+            int n2 = (kernelSize) >> 1;
+            Mat images[] = {img1};
+            int *date[] = { (int *)dist.data};
+            parallel_for_(Range(n2, img1.rows - n2), StarKernelCensus<1>(images, n2,date));
+        }
+        //Modified census transforms
+        //the first one deals with small illumination changes
+        //the sencond modified census transform is invariant to noise; i.e.
+        //if the current pixel with whom we are dooing the comparison is a noise, this descriptor will provide a better result by comparing with the mean of the window
+        //otherwise if the pixel is not noise the information is strengthend
+        CV_EXPORTS void modifiedCensusTransform(const Mat &img1, const Mat &img2, int kernelSize, Mat &dist1,Mat &dist2, const int type, int t, const Mat &IntegralImage1, const Mat &IntegralImage2 )
+        {
+            CV_Assert(img1.size() == img2.size());
+            CV_Assert(kernelSize % 2 != 0);
+            CV_Assert(img1.type() == CV_8UC1 && img2.type() == CV_8UC1);
+            CV_Assert(type != CV_MODIFIED_CENSUS_TRANSFORM || type != CV_MEAN_VARIATION);
+            CV_Assert(kernelSize <= 9);
+            int n2 = (kernelSize - 1) >> 1;
+            uint8_t *images[] = {img1.data, img2.data};
+            int *date[] = { (int *)dist1.data, (int *)dist2.data};
+            int stride = (int)img1.cols;
+            if(type == CV_MODIFIED_CENSUS_TRANSFORM)
+            {
+                //MCT
+                parallel_for_(  Range(n2, img1.rows - n2),
+                    CombinedDescriptor<2,4,2, 2,MCTKernel<2> >(img1.cols, img1.rows,stride,n2,date,MCTKernel<2>(images,t),n2));
+            }
+            else if(type == CV_MEAN_VARIATION)
+            {
+                //MV
+                int *integral[2];
+                integral[0] = (int *)IntegralImage1.data;
+                integral[1] = (int *)IntegralImage2.data;
+                parallel_for_(  Range(n2, img1.rows - n2),
+                    CombinedDescriptor<2,3,2,2, MVKernel<2> >(img1.cols, img1.rows,stride,n2,date,MVKernel<2>(images,integral),n2));
+            }
+        }
+        CV_EXPORTS void modifiedCensusTransform(const Mat &img1, int kernelSize, Mat &dist, const int type, int t , Mat const &IntegralImage)
+        {
+            CV_Assert(img1.size() == dist.size());
+            CV_Assert(kernelSize % 2 != 0);
+            CV_Assert(img1.type() == CV_8UC1);
+            CV_Assert(type != CV_MODIFIED_CENSUS_TRANSFORM || type != CV_MEAN_VARIATION);
+            CV_Assert(kernelSize <= 9);
+            int n2 = (kernelSize - 1) >> 1;
+            uint8_t *images[] = {img1.data};
+            int *date[] = { (int *)dist.data};
+            int stride = (int)img1.step;
+            if(type == CV_MODIFIED_CENSUS_TRANSFORM)
+            {
+                //MCT
+                parallel_for_(Range(n2, img1.rows - n2),
+                    CombinedDescriptor<2,4,2, 1,MCTKernel<1> >(img1.cols, img1.rows,stride,n2,date,MCTKernel<1>(images,t),n2));
+            }
+            else if(type == CV_MEAN_VARIATION)
+            {
+                //MV
+                int *integral[] = { (int *)IntegralImage.data};
+                parallel_for_(Range(n2, img1.rows - n2),
+                    CombinedDescriptor<2,3,2,1, MVKernel<1> >(img1.cols, img1.rows,stride,n2,date,MVKernel<1>(images,integral),n2));
+            }
+        }
+        //different versions of simetric census
+        //These variants since they do not compare with the center they are invariant to noise
+        CV_EXPORTS void symetricCensusTransform(const Mat &img1, const Mat &img2, int kernelSize, Mat &dist1, Mat &dist2, const int type)
+        {
+            CV_Assert(img1.size() ==  img2.size());
+            CV_Assert(kernelSize % 2 != 0);
+            CV_Assert(img1.type() == CV_8UC1 && img2.type() == CV_8UC1);
+            CV_Assert(type != CV_CS_CENSUS || type != CV_MODIFIED_CS_CENSUS);
+            CV_Assert(kernelSize <= 7);
+            int n2 = kernelSize >> 1;
+            uint8_t *images[] = {img1.data, img2.data};
+            Mat imag[] = {img1, img2};
+            int *date[] = { (int *)dist1.data, (int *)dist2.data};
+            int stride = (int)img1.step;
+            if(type == CV_CS_CENSUS)
+            {
+                parallel_for_(Range(n2, img1.rows - n2), SymetricCensus<2>(imag, n2,date));
+            }
+            else if(type == CV_MODIFIED_CS_CENSUS)
+            {
+                parallel_for_(Range(n2, img1.rows - n2),
+                    CombinedDescriptor<1,1,1,2,ModifiedCsCensus<2> >(img1.cols, img1.rows,stride,n2,date,ModifiedCsCensus<2>(images,n2),1));
+            }
+        }
+        CV_EXPORTS void symetricCensusTransform(const Mat &img1, int kernelSize, Mat &dist1, const int type)
+        {
+            CV_Assert(img1.size() ==  dist1.size());
+            CV_Assert(kernelSize % 2 != 0);
+            CV_Assert(img1.type() == CV_8UC1);
+            CV_Assert(type != CV_MODIFIED_CS_CENSUS || type != CV_CS_CENSUS);
+            CV_Assert(kernelSize <= 7);
+            int n2 = kernelSize >> 1;
+            uint8_t *images[] = {img1.data};
+            Mat imag[] = {img1};
+            int *date[] = { (int *)dist1.data};
+            int stride = (int)img1.step;
+            if(type == CV_CS_CENSUS)
+            {
+                parallel_for_( Range(n2, img1.rows - n2), SymetricCensus<1>(imag, n2,date));
+            }
+            else if(type == CV_MODIFIED_CS_CENSUS)
+            {
+                parallel_for_( Range(n2, img1.rows - n2),
+                    CombinedDescriptor<1,1,1,1,ModifiedCsCensus<1> >(img1.cols, img1.rows,stride,n2,date,ModifiedCsCensus<1>(images,n2),1));
+            }
+        }
+        //integral image computation used in the Mean Variation Census Transform
+        void imageMeanKernelSize(const Mat &image, int windowSize, Mat &cost)
+        {
+            CV_Assert(image.size > 0);
+            CV_Assert(cost.size > 0);
+            CV_Assert(windowSize % 2 != 0);
+            int win = windowSize / 2;
+            float scalling = ((float) 1) / (windowSize * windowSize);
+            int height = image.rows;
+            cost.setTo(0);
+            int *c = (int *)cost.data;
+            parallel_for_(Range(win + 1, height - win - 1),MeanKernelIntegralImage(image,win,scalling,c));
+        }
+    }
+}
--- a/modules/stereo/src/descriptor.hpp
+++ b/modules/stereo/src/descriptor.hpp
+//By downloading, copying, installing or using the software you agree to this license.
+//If you do not agree to this license, do not download, install,
+//copy or use the software.
+//
+//
+//                          License Agreement
+//               For Open Source Computer Vision Library
+//                       (3-clause BSD License)
+//
+//Copyright (C) 2000-2015, Intel Corporation, all rights reserved.
+//Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+//Copyright (C) 2009-2015, NVIDIA Corporation, all rights reserved.
+//Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+//Copyright (C) 2015, OpenCV Foundation, all rights reserved.
+//Copyright (C) 2015, Itseez Inc., all rights reserved.
+//Third party copyrights are property of their respective owners.
+//
+//Redistribution and use in source and binary forms, with or without modification,
+//are permitted provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright notice,
+//    this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+//  * Neither the names of the copyright holders nor the names of the contributors
+//    may be used to endorse or promote products derived from this software
+//    without specific prior written permission.
+//
+//This software is provided by the copyright holders and contributors "as is" and
+//any express or implied warranties, including, but not limited to, the implied
+//warranties of merchantability and fitness for a particular purpose are disclaimed.
+//In no event shall copyright holders or contributors be liable for any direct,
+//indirect, incidental, special, exemplary, or consequential damages
+//(including, but not limited to, procurement of substitute goods or services;
+//loss of use, data, or profits; or business interruption) however caused
+//and on any theory of liability, whether in contract, strict liability,
+//or tort (including negligence or otherwise) arising in any way out of
+//the use of this software, even if advised of the possibility of such damage.
+/*****************************************************************************************************************\
+*   The interface contains the main descriptors that will be implemented in the descriptor class                  *
+\*****************************************************************************************************************/
+#include "precomp.hpp"
+#include <stdint.h>
+#ifndef _OPENCV_DESCRIPTOR_HPP_
+#define _OPENCV_DESCRIPTOR_HPP_
+#ifdef __cplusplus
+namespace cv
+{
+    namespace stereo
+    {
+        //types of supported kernels
+        enum {
+            CV_DENSE_CENSUS, CV_SPARSE_CENSUS,
+            CV_CS_CENSUS, CV_MODIFIED_CS_CENSUS, CV_MODIFIED_CENSUS_TRANSFORM,
+            CV_MEAN_VARIATION, CV_STAR_KERNEL
+        };
+        //!Mean Variation is a robust kernel that compares a pixel
+        //!not just with the center but also with the mean of the window
+        template<int num_images>
+        struct MVKernel
+        {
+            uint8_t *image[num_images];
+            int *integralImage[num_images];
+            int stop;
+            MVKernel(){}
+            MVKernel(uint8_t **images, int **integral)
+            {
+                for(int i = 0; i < num_images; i++)
+                {
+                    image[i] = images[i];
+                    integralImage[i] = integral[i];
+                }
+                stop = num_images;
+            }
+            void operator()(int rrWidth,int w2, int rWidth, int jj, int j, int c[num_images]) const
+            {
+                (void)w2;
+                for (int i = 0; i < stop; i++)
+                {
+                    if (image[i][rrWidth + jj] > image[i][rWidth + j])
+                    {
+                        c[i] = c[i] + 1;
+                    }
+                    c[i] = c[i] << 1;
+                    if (integralImage[i][rrWidth + jj] > image[i][rWidth + j])
+                    {
+                        c[i] = c[i] + 1;
+                    }
+                    c[i] = c[i] << 1;
+                }
+            }
+        };
+        //!Compares pixels from a patch giving high weights to pixels in which
+        //!the intensity is higher. The other pixels receive a lower weight
+        template <int num_images>
+        struct MCTKernel
+        {
+            uint8_t *image[num_images];
+            int t,imageStop;
+            MCTKernel(){}
+            MCTKernel(uint8_t ** images, int threshold)
+            {
+                for(int i = 0; i < num_images; i++)
+                {
+                    image[i] = images[i];
+                }
+                imageStop = num_images;
+                t = threshold;
+            }
+            void operator()(int rrWidth,int w2, int rWidth, int jj, int j, int c[num_images]) const
+            {
+                (void)w2;
+                for(int i = 0; i < imageStop; i++)
+                {
+                    if (image[i][rrWidth + jj] > image[i][rWidth + j] - t)
+                    {
+                        c[i] = c[i] << 1;
+                        c[i] = c[i] + 1;
+                        c[i] = c[i] << 1;
+                        c[i] = c[i] + 1;
+                    }
+                    else if (image[i][rWidth + j] - t < image[i][rrWidth + jj] && image[i][rWidth + j] + t >= image[i][rrWidth + jj])
+                    {
+                        c[i] = c[i] << 2;
+                        c[i] = c[i] + 1;
+                    }
+                    else
+                    {
+                        c[i] <<= 2;
+                    }
+                }
+            }
+        };
+        //!A madified cs census that compares a pixel with the imediat neightbour starting
+        //!from the center
+        template<int num_images>
+        struct ModifiedCsCensus
+        {
+            uint8_t *image[num_images];
+            int n2;
+            int imageStop;
+            ModifiedCsCensus(){}
+            ModifiedCsCensus(uint8_t **images, int ker)
+            {
+                for(int i = 0; i < num_images; i++)
+                    image[i] = images[i];
+                imageStop = num_images;
+                n2 = ker;
+            }
+            void operator()(int rrWidth,int w2, int rWidth, int jj, int j, int c[num_images]) const
+            {
+                (void)j;
+                (void)rWidth;
+                for(int i = 0; i < imageStop; i++)
+                {
+                    if (image[i][(rrWidth + jj)] > image[i][(w2 + (jj + n2))])
+                    {
+                        c[i] = c[i] + 1;
+                    }
+                    c[i] = c[i] * 2;
+                }
+            }
+        };
+        //!A kernel in which a pixel is compared with the center of the window
+        template<int num_images>
+        struct CensusKernel
+        {
+            uint8_t *image[num_images];
+            int imageStop;
+            CensusKernel(){}
+            CensusKernel(uint8_t **images)
+            {
+                for(int i = 0; i < num_images; i++)
+                    image[i] = images[i];
+                imageStop = num_images;
+            }
+            void operator()(int rrWidth,int w2, int rWidth, int jj, int j, int c[num_images]) const
+            {
+                (void)w2;
+                for(int i = 0; i < imageStop; i++)
+                {
+                    ////compare a pixel with the center from the kernel
+                    if (image[i][rrWidth + jj] > image[i][rWidth + j])
+                    {
+                        c[i] += 1;
+                    }
+                    c[i] <<= 1;
+                }
+            }
+        };
+        //template clas which efficiently combines the descriptors
+        template <int step_start, int step_end, int step_inc,int nr_img, typename Kernel>
+        class CombinedDescriptor:public ParallelLoopBody
+        {
+        private:
+            int width, height,n2;
+            int stride_;
+            int *dst[nr_img];
+            Kernel kernel_;
+            int n2_stop;
+        public:
+            CombinedDescriptor(int w, int h,int stride, int k2, int **distance, Kernel kernel,int k2Stop)
+            {
+                width = w;
+                height = h;
+                n2 = k2;
+                stride_ = stride;
+                for(int i = 0; i < nr_img; i++)
+                    dst[i] = distance[i];
+                kernel_ = kernel;
+                n2_stop = k2Stop;
+            }
+            void operator()(const cv::Range &r) const {
+                for (int i = r.start; i <= r.end ; i++)
+                {
+                    int rWidth = i * stride_;
+                    for (int j = n2 + 2; j <= width - n2 - 2; j++)
+                    {
+                        int c[nr_img];
+                        memset(c,0,nr_img);
+                        for(int step = step_start; step <= step_end; step += step_inc)
+                        {
+                            for (int ii = - n2; ii <= + n2_stop; ii += step)
+                            {
+                                int rrWidth = (ii + i) * stride_;
+                                int rrWidthC = (ii + i + n2) * stride_;
+                                for (int jj = j - n2; jj <= j + n2; jj += step)
+                                {
+                                    if (ii != i || jj != j)
+                                    {
+                                        kernel_(rrWidth,rrWidthC, rWidth, jj, j,c);
+                                    }
+                                }
+                            }
+                        }
+                        for(int l = 0; l < nr_img; l++)
+                            dst[l][rWidth + j] = c[l];
+                    }
+                }
+            }
+        };
+        //!calculate the mean of every windowSizexWindwoSize block from the integral Image
+        //!this is a preprocessing for MV kernel
+        class MeanKernelIntegralImage : public ParallelLoopBody
+        {
+        private:
+            int *img;
+            int windowSize,width;
+            float scalling;
+            int *c;
+        public:
+            MeanKernelIntegralImage(const cv::Mat &image, int window,float scale, int *cost):
+                img((int *)image.data),windowSize(window) ,width(image.cols) ,scalling(scale) , c(cost){};
+            void operator()(const cv::Range &r) const{
+                for (int i = r.start; i <= r.end; i++)
+                {
+                    int iw = i * width;
+                    for (int j = windowSize + 1; j <= width - windowSize - 1; j++)
+                    {
+                        c[iw + j] = (int)((img[(i + windowSize - 1) * width + j + windowSize - 1] + img[(i - windowSize - 1) * width + j - windowSize - 1]
+                        - img[(i + windowSize) * width + j - windowSize] - img[(i - windowSize) * width + j + windowSize]) * scalling);
+                    }
+                }
+            }
+        };
+        //!implementation for the star kernel descriptor
+        template<int num_images>
+        class StarKernelCensus:public ParallelLoopBody
+        {
+        private:
+            uint8_t *image[num_images];
+            int *dst[num_images];
+            int n2, width, height, im_num,stride_;
+        public:
+            StarKernelCensus(const cv::Mat *img, int k2, int **distance)
+            {
+                for(int i = 0; i < num_images; i++)
+                {
+                    image[i] = img[i].data;
+                    dst[i] = distance[i];
+                }
+                n2 = k2;
+                width = img[0].cols;
+                height = img[0].rows;
+                im_num = num_images;
+                stride_ = (int)img[0].step;
+            }
+            void operator()(const cv::Range &r) const {
+                for (int i = r.start; i <= r.end ; i++)
+                {
+                    int rWidth = i * stride_;
+                    for (int j = n2; j <= width - n2; j++)
+                    {
+                        for(int d = 0 ; d < im_num; d++)
+                        {
+                            int c = 0;
+                            for(int step = 4; step > 0; step--)
+                            {
+                                for (int ii = i - step; ii <= i + step; ii += step)
+                                {
+                                    int rrWidth = ii * stride_;
+                                    for (int jj = j - step; jj <= j + step; jj += step)
+                                    {
+                                        if (image[d][rrWidth + jj] > image[d][rWidth + j])
+                                        {
+                                            c = c + 1;
+                                        }
+                                        c = c * 2;
+                                    }
+                                }
+                            }
+                            for (int ii = -1; ii <= +1; ii++)
+                            {
+                                int rrWidth = (ii + i) * stride_;
+                                if (i == -1)
+                                {
+                                    if (ii + i != i)
+                                    {
+                                        if (image[d][rrWidth + j] > image[d][rWidth + j])
+                                        {
+                                            c = c + 1;
+                                        }
+                                        c = c * 2;
+                                    }
+                                }
+                                else if (i == 0)
+                                {
+                                    for (int j2 = -1; j2 <= 1; j2 += 2)
+                                    {
+                                        if (ii + i != i)
+                                        {
+                                            if (image[d][rrWidth + j + j2] > image[d][rWidth + j])
+                                            {
+                                                c = c + 1;
+                                            }
+                                            c = c * 2;
+                                        }
+                                    }
+                                }
+                                else
+                                {
+                                    if (ii + i != i)
+                                    {
+                                        if (image[d][rrWidth + j] > image[d][rWidth + j])
+                                        {
+                                            c = c + 1;
+                                        }
+                                        c = c * 2;
+                                    }
+                                }
+                            }
+                            dst[d][rWidth + j] = c;
+                        }
+                    }
+                }
+            }
+        };
+        //!paralel implementation of the center symetric census
+        template <int num_images>
+        class SymetricCensus:public ParallelLoopBody
+        {
+        private:
+            uint8_t *image[num_images];
+            int *dst[num_images];
+            int n2, width, height, im_num,stride_;
+        public:
+            SymetricCensus(const cv::Mat *img, int k2, int **distance)
+            {
+                for(int i = 0; i < num_images; i++)
+                {
+                    image[i] = img[i].data;
+                    dst[i] = distance[i];
+                }
+                n2 = k2;
+                width = img[0].cols;
+                height = img[0].rows;
+                im_num = num_images;
+                stride_ = (int)img[0].step;
+            }
+            void operator()(const cv::Range &r) const {
+                for (int i = r.start; i <= r.end ; i++)
+                {
+                    int distV = i*stride_;
+                    for (int j = n2; j <= width - n2; j++)
+                    {
+                        for(int d = 0; d < im_num; d++)
+                        {
+                            int c = 0;
+                            //the classic center symetric census which compares the curent pixel with its symetric not its center.
+                            for (int ii = -n2; ii <= 0; ii++)
+                            {
+                                int rrWidth = (ii + i) * stride_;
+                                for (int jj = -n2; jj <= +n2; jj++)
+                                {
+                                    if (image[d][(rrWidth + (jj + j))] > image[d][((ii * (-1) + i) * width + (-1 * jj) + j)])
+                                    {
+                                        c = c + 1;
+                                    }
+                                    c = c * 2;
+                                    if(ii == 0 && jj < 0)
+                                    {
+                                        if (image[d][(i * width + (jj + j))] > image[d][(i * width + (-1 * jj) + j)])
+                                        {
+                                            c = c + 1;
+                                        }
+                                        c = c * 2;
+                                    }
+                                }
+                            }
+                            dst[d][(distV + j)] = c;
+                        }
+                    }
+                }
+            }
+        };
+        /**
+        Two variations of census applied on input images
+        Implementation of a census transform which is taking into account just the some pixels from the census kernel thus allowing for larger block sizes
+        **/
+        //void applyCensusOnImages(const cv::Mat &im1,const cv::Mat &im2, int kernelSize, cv::Mat &dist, cv::Mat &dist2, const int type);
+        CV_EXPORTS void censusTransform(const cv::Mat &image1, const cv::Mat &image2, int kernelSize, cv::Mat &dist1, cv::Mat &dist2, const int type);
+        //single image census transform
+        CV_EXPORTS void censusTransform(const cv::Mat &image1, int kernelSize, cv::Mat &dist1, const int type);
+        /**
+        STANDARD_MCT - Modified census which is memorizing for each pixel 2 bits and includes a tolerance to the pixel comparison
+        MCT_MEAN_VARIATION - Implementation of a modified census transform which is also taking into account the variation to the mean of the window not just the center pixel
+        **/
+        CV_EXPORTS void modifiedCensusTransform(const cv::Mat &img1, const cv::Mat &img2, int kernelSize, cv::Mat &dist1,cv::Mat &dist2, const int type, int t = 0 , const cv::Mat &IntegralImage1 = cv::Mat::zeros(100,100,CV_8UC1), const cv::Mat &IntegralImage2 = cv::Mat::zeros(100,100,CV_8UC1));
+        //single version of modified census transform descriptor
+        CV_EXPORTS void modifiedCensusTransform(const cv::Mat &img1, int kernelSize, cv::Mat &dist, const int type, int t = 0 ,const cv::Mat &IntegralImage = cv::Mat::zeros(100,100,CV_8UC1));
+        /**The classical center symetric census
+        A modified version of cs census which is comparing a pixel with its correspondent after the center
+        **/
+        CV_EXPORTS void symetricCensusTransform(const cv::Mat &img1, const cv::Mat &img2, int kernelSize, cv::Mat &dist1, cv::Mat &dist2, const int type);
+        //single version of census transform
+        CV_EXPORTS void symetricCensusTransform(const cv::Mat &img1, int kernelSize, cv::Mat &dist1, const int type);
+        //in a 9x9 kernel only certain positions are choosen
+        CV_EXPORTS void starCensusTransform(const cv::Mat &img1, const cv::Mat &img2, int kernelSize, cv::Mat &dist1,cv::Mat &dist2);
+        //single image version of star kernel
+        CV_EXPORTS void starCensusTransform(const cv::Mat &img1, int kernelSize, cv::Mat &dist);
+        //integral image computation used in the Mean Variation Census Transform
+        void imageMeanKernelSize(const cv::Mat &img, int windowSize, cv::Mat &c);
+    }
+}
+#endif
+#endif
+/*End of file*/
--- a/modules/stereo/src/matching.hpp
+++ b/modules/stereo/src/matching.hpp
+//By downloading, copying, installing or using the software you agree to this license.
+//If you do not agree to this license, do not download, install,
+//copy or use the software.
+//
+//
+//                          License Agreement
+//               For Open Source Computer Vision Library
+//                       (3-clause BSD License)
+//
+//Copyright (C) 2000-2015, Intel Corporation, all rights reserved.
+//Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
+//Copyright (C) 2009-2015, NVIDIA Corporation, all rights reserved.
+//Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+//Copyright (C) 2015, OpenCV Foundation, all rights reserved.
+//Copyright (C) 2015, Itseez Inc., all rights reserved.
+//Third party copyrights are property of their respective owners.
+//
+//Redistribution and use in source and binary forms, with or without modification,
+//are permitted provided that the following conditions are met:
+//
+//  * Redistributions of source code must retain the above copyright notice,
+//    this list of conditions and the following disclaimer.
+//
+//  * Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+//  * Neither the names of the copyright holders nor the names of the contributors
+//    may be used to endorse or promote products derived from this software
+//    without specific prior written permission.
+//
+//This software is provided by the copyright holders and contributors "as is" and
+//any express or implied warranties, including, but not limited to, the implied
+//warranties of merchantability and fitness for a particular purpose are disclaimed.
+//In no event shall copyright holders or contributors be liable for any direct,
+//indirect, incidental, special, exemplary, or consequential damages
+//(including, but not limited to, procurement of substitute goods or services;
+//loss of use, data, or profits; or business interruption) however caused
+//and on any theory of liability, whether in contract, strict liability,
+//or tort (including negligence or otherwise) arising in any way out of
+//the use of this software, even if advised of the possibility of such damage.
+/*****************************************************************************************************************\
+*   The interface contains the main methods for computing the matching between the left and right images	      *
+*                                                                                                                 *
+\******************************************************************************************************************/
+#include "precomp.hpp"
+#include <stdint.h>
+#ifndef _OPENCV_MATCHING_HPP_
+#define _OPENCV_MATCHING_HPP_
+#ifdef __cplusplus
+namespace cv
+{
+    namespace stereo
+    {
+        class Matching
+        {
+        private:
+            //!The maximum disparity
+            int maxDisparity;
+            //!the factor by which we are multiplying the disparity
+            int scallingFactor;
+            //!the confidence to which a min disparity found is good or not
+            double confidenceCheck;
+            //!the LUT used in case SSE is not available
+            int hamLut[65537];
+            //!function used for getting the minimum disparity from the cost volume"
+            static int minim(short *c, int iwpj, int widthDisp,const double confidence, const int search_region)
+            {
+                double mini, mini2, mini3;
+                mini = mini2 = mini3 = DBL_MAX;
+                int index = 0;
+                int iw = iwpj;
+                int widthDisp2;
+                widthDisp2 = widthDisp;
+                widthDisp -= 1;
+                for (int i = 0; i <= widthDisp; i++)
+                {
+                    if (c[(iw + i * search_region) * widthDisp2 + i] < mini)
+                    {
+                        mini3 = mini2;
+                        mini2 = mini;
+                        mini = c[(iw + i * search_region) * widthDisp2 + i];
+                        index = i;
+                    }
+                    else if (c[(iw + i * search_region) * widthDisp2 + i] < mini2)
+                    {
+                        mini3 = mini2;
+                        mini2 = c[(iw + i * search_region) * widthDisp2 + i];
+                    }
+                    else if (c[(iw + i * search_region) * widthDisp2 + i] < mini3)
+                    {
+                        mini3 = c[(iw + i * search_region) * widthDisp2 + i];
+                    }
+                }
+                if(mini != 0)
+                {
+                    if (mini3 / mini <= confidence)
+                        return index;
+                }
+                return -1;
+            }
+            //!Interpolate in order to obtain better results
+            //!function for refining the disparity at sub pixel using simetric v
+            static double symetricVInterpolation(short *c, int iwjp, int widthDisp, int winDisp,const int search_region)
+            {
+                if (winDisp == 0 || winDisp == widthDisp - 1)
+                    return winDisp;
+                double m2m1, m3m1, m3, m2, m1;
+                m2 = c[(iwjp + (winDisp - 1) * search_region) * widthDisp + winDisp - 1];
+                m3 = c[(iwjp + (winDisp + 1) * search_region)* widthDisp + winDisp + 1];
+                m1 = c[(iwjp + winDisp * search_region) * widthDisp + winDisp];
+                m2m1 = m2 - m1;
+                m3m1 = m3 - m1;
+                if (m2m1 == 0 || m3m1 == 0) return winDisp;
+                double p;
+                p = 0;
+                if (m2 > m3)
+                {
+                    p = (0.5 - 0.25 * ((m3m1 * m3m1) / (m2m1 * m2m1) + (m3m1 / m2m1)));
+                }
+                else
+                {
+                    p = -1 * (0.5 - 0.25 * ((m2m1 * m2m1) / (m3m1 * m3m1) + (m2m1 / m3m1)));
+                }
+                if (p >= -0.5 && p <= 0.5)
+                    p = winDisp + p;
+                return p;
+            }
+            //!a pre processing function that generates the Hamming LUT in case the algorithm will ever be used on platform where SSE is not available
+            void hammingLut()
+            {
+                for (int i = 0; i <= 65536; i++)
+                {
+                    int dist = 0;
+                    int j = i;
+                    //we number the bits from our number
+                    while (j)
+                    {
+                        dist = dist + 1;
+                        j = j & (j - 1);
+                    }
+                    hamLut[i] = dist;
+                }
+            }
+            //!the class used in computing the hamming distance
+            class hammingDistance : public ParallelLoopBody
+            {
+            private:
+                int *left, *right;
+                short *c;
+                int v,kernelSize, width, height;
+                int MASK;
+                int *hammLut;
+            public :
+                hammingDistance(const Mat &leftImage, const Mat &rightImage, short *cost, int maxDisp, int kerSize, int *hammingLUT):
+                    left((int *)leftImage.data), right((int *)rightImage.data), c(cost), v(maxDisp),kernelSize(kerSize),width(leftImage.cols), height(leftImage.rows), MASK(65535), hammLut(hammingLUT){}
+                void operator()(const cv::Range &r) const {
+                    for (int i = r.start; i <= r.end ; i++)
+                    {
+                        int iw = i * width;
+                        for (int j = kernelSize; j < width - kernelSize; j++)
+                        {
+                            int j2;
+                            int xorul;
+                            int iwj;
+                            iwj = iw + j;
+                            for (int d = 0; d <= v; d++)
+                            {
+                                j2 = (0 > j - d) ? (0) : (j - d);
+                                xorul = left[(iwj)] ^ right[(iw + j2)];
+#if CV_SSE4_1
+                                c[(iwj)* (v + 1) + d] = (short)_mm_popcnt_u32(xorul);
+#else
+                                c[(iwj)* (v + 1) + d] = (short)(hammLut[xorul & MASK] + hammLut[(xorul >> 16) & MASK]);
+#endif
+                            }
+                        }
+                    }
+                }
+            };
+            //!cost aggregation
+            class agregateCost:public ParallelLoopBody
+            {
+            private:
+                int win;
+                short *c, *parSum;
+                int maxDisp,width, height;
+            public:
+                agregateCost(const Mat &partialSums, int windowSize, int maxDispa, Mat &cost)
+                {
+                    win = windowSize / 2;
+                    c = (short *)cost.data;
+                    maxDisp = maxDispa;
+                    width = cost.cols / ( maxDisp + 1) - 1;
+                    height = cost.rows - 1;
+                    parSum = (short *)partialSums.data;
+                }
+                void operator()(const cv::Range &r) const {
+                    for (int i = r.start; i <= r.end; i++)
+                    {
+                        int iwi = (i - 1) * width;
+                        for (int j = win + 1; j <= width - win - 1; j++)
+                        {
+                            int w1 = ((i + win + 1) * width + j + win) * (maxDisp + 1);
+                            int w2 = ((i - win) * width + j - win - 1) * (maxDisp + 1);
+                            int w3 = ((i + win + 1) * width + j - win - 1) * (maxDisp + 1);
+                            int w4 = ((i - win) * width + j + win) * (maxDisp + 1);
+                            int w = (iwi + j - 1) * (maxDisp + 1);
+                            for (int d = 0; d <= maxDisp; d++)
+                            {
+                                c[w + d] = parSum[w1 + d] + parSum[w2 + d]
+                                - parSum[w3 + d] - parSum[w4 + d];
+                            }
+                        }
+                    }
+                }
+            };
+            //!class that is responsable for generating the disparity map
+            class makeMap:public ParallelLoopBody
+            {
+            private:
+                //enum used to notify wether we are searching on the vertical ie (lr) or diagonal (rl)
+                enum {CV_VERTICAL_SEARCH, CV_DIAGONAL_SEARCH};
+                int width,disparity,scallingFact,th;
+                double confCheck;
+                uint8_t *map;
+                short *c;
+            public:
+                makeMap(const Mat &costVolume, int threshold, int maxDisp, double confidence,int scale, Mat &mapFinal)
+                {
+                    c = (short *)costVolume.data;
+                    map = mapFinal.data;
+                    disparity = maxDisp;
+                    width = costVolume.cols / ( disparity + 1) - 1;
+                    th = threshold;
+                    scallingFact = scale;
+                    confCheck = confidence;
+                }
+                void operator()(const cv::Range &r) const {
+                    for (int i = r.start; i <= r.end ; i++)
+                    {
+                        int lr;
+                        int v = -1;
+                        double p1, p2;
+                        int iw = i * width;
+                        for (int j = 0; j < width; j++)
+                        {
+                            lr = Matching:: minim(c, iw + j, disparity + 1, confCheck,CV_VERTICAL_SEARCH);
+                            if (lr != -1)
+                            {
+                                v = Matching::minim(c, iw + j - lr, disparity + 1, confCheck,CV_DIAGONAL_SEARCH);
+                                if (v != -1)
+                                {
+                                    p1 = Matching::symetricVInterpolation(c, iw + j - lr, disparity + 1, v,CV_DIAGONAL_SEARCH);
+                                    p2 = Matching::symetricVInterpolation(c, iw + j, disparity + 1, lr,CV_VERTICAL_SEARCH);
+                                    if (abs(p1 - p2) <= th)
+                                        map[iw + j] = (uint8_t)((p2)* scallingFact);
+                                    else
+                                    {
+                                        map[iw + j] = 0;
+                                    }
+                                }
+                                else
+                                {
+                                    if (width - j <= disparity)
+                                    {
+                                        p2 = Matching::symetricVInterpolation(c, iw + j, disparity + 1, lr,CV_VERTICAL_SEARCH);
+                                        map[iw + j] = (uint8_t)(p2* scallingFact);
+                                    }
+                                }
+                            }
+                            else
+                            {
+                                map[iw + j] = 0;
+                            }
+                        }
+                    }
+                }
+            };
+            //!median 1x9 paralelized filter
+            template <typename T>
+            class Median1x9:public ParallelLoopBody
+            {
+            private:
+                T *original;
+                T *filtered;
+                int height, width;
+            public:
+                Median1x9(const Mat &originalImage, Mat &filteredImage)
+                {
+                    original = (T *)originalImage.data;
+                    filtered = (T *)filteredImage.data;
+                    height = originalImage.rows;
+                    width = originalImage.cols;
+                }
+                void operator()(const cv::Range &r) const{
+                    for (int m = r.start; m <= r.end; m++)
+                    {
+                        for (int n = 4; n < width - 4; ++n)
+                        {
+                            int k = 0;
+                            T window[9];
+                            for (int i = n - 4; i <= n + 4; ++i)
+                                window[k++] = original[m * width + i];
+                            for (int j = 0; j < 5; ++j)
+                            {
+                                int min = j;
+                                for (int l = j + 1; l < 9; ++l)
+                                    if (window[l] < window[min])
+                                        min = l;
+                                const T temp = window[j];
+                                window[j] = window[min];
+                                window[min] = temp;
+                            }
+                            filtered[m  * width + n] = window[4];
+                        }
+                    }
+                }
+            };
+            //!median 9x1 paralelized filter
+            template <typename T>
+            class Median9x1:public ParallelLoopBody
+            {
+            private:
+                T *original;
+                T *filtered;
+                int height, width;
+            public:
+                Median9x1(const Mat &originalImage, Mat &filteredImage)
+                {
+                    original = (T *)originalImage.data;
+                    filtered = (T *)filteredImage.data;
+                    height = originalImage.rows;
+                    width = originalImage.cols;
+                }
+                void operator()(const Range &r) const{
+                    for (int n = r.start; n <= r.end; ++n)
+                    {
+                        for (int m = 4; m < height - 4; ++m)
+                        {
+                            int k = 0;
+                            T window[9];
+                            for (int i = m - 4; i <= m + 4; ++i)
+                                window[k++] = original[i * width + n];
+                            for (int j = 0; j < 5; j++)
+                            {
+                                int min = j;
+                                for (int l = j + 1; l < 9; ++l)
+                                    if (window[l] < window[min])
+                                        min = l;
+                                const T temp = window[j];
+                                window[j] = window[min];
+                                window[min] = temp;
+                            }
+                            filtered[m  * width + n] = window[4];
+                        }
+                    }
+                }
+            };
+        protected:
+            //arrays used in the region removal
+            Mat speckleY;
+            Mat speckleX;
+            Mat puss;
+            //int *specklePointX;
+            //int *specklePointY;
+            //long long *pus;
+            int previous_size;
+            //!method for setting the maximum disparity
+            void setMaxDisparity(int val)
+            {
+                CV_Assert(val > 10);
+                this->maxDisparity = val;
+            }
+            //!method for getting the disparity
+            int getMaxDisparity()
+            {
+                return this->maxDisparity;
+            }
+            //! a number by which the disparity will be multiplied for better display
+            void setScallingFactor(int val)
+            {
+                CV_Assert(val > 0);
+                this->scallingFactor = val;
+            }
+            //!method for getting the scalling factor
+            int getScallingFactor()
+            {
+                return scallingFactor;
+            }
+            //!setter for the confidence check
+            void setConfidence(double val)
+            {
+                CV_Assert(val >= 1);
+                this->confidenceCheck = val;
+            }
+            //getter for confidence check
+            double getConfidence()
+            {
+                return confidenceCheck;
+            }
+            //! Hamming distance computation method
+            //! leftImage and rightImage are the two transformed images
+            //! the cost is the resulted cost volume and kernel Size is the size of the matching window
+            void hammingDistanceBlockMatching(const Mat &leftImage, const Mat &rightImage, Mat &cost, const int kernelSize= 9)
+            {
+                CV_Assert(leftImage.cols == rightImage.cols);
+                CV_Assert(leftImage.rows == rightImage.rows);
+                CV_Assert(kernelSize % 2 != 0);
+                CV_Assert(cost.rows == leftImage.rows);
+                CV_Assert(cost.cols / (maxDisparity + 1) == leftImage.cols);
+                short *c = (short *)cost.data;
+                memset(c, 0, sizeof(c[0]) * leftImage.cols * leftImage.rows * (maxDisparity + 1));
+                parallel_for_(cv::Range(kernelSize / 2,leftImage.rows - kernelSize / 2), hammingDistance(leftImage,rightImage,(short *)cost.data,maxDisparity,kernelSize / 2,hamLut));
+            }
+            //preprocessing the cost volume in order to get it ready for aggregation
+            void costGathering(const Mat &hammingDistanceCost, Mat &cost)
+            {
+                CV_Assert(hammingDistanceCost.rows == hammingDistanceCost.rows);
+                CV_Assert(hammingDistanceCost.type() == CV_16S);
+                CV_Assert(cost.type() == CV_16S);
+                int maxDisp = maxDisparity;
+                int width = cost.cols / ( maxDisp + 1) - 1;
+                int height = cost.rows - 1;
+                short *c = (short *)cost.data;
+                short *ham = (short *)hammingDistanceCost.data;
+                memset(c, 0, sizeof(c[0]) * (width + 1) * (height + 1) * (maxDisp + 1));
+                for (int i = 1; i <= height; i++)
+                {
+                    int iw = i * width;
+                    int iwi = (i - 1) * width;
+                    for (int j = 1; j <= width; j++)
+                    {
+                        int iwj = (iw + j) * (maxDisp + 1);
+                        int iwjmu = (iw + j - 1) * (maxDisp + 1);
+                        int iwijmu = (iwi + j - 1) * (maxDisp + 1);
+                        for (int d = 0; d <= maxDisp; d++)
+                        {
+                            c[iwj + d] = ham[iwijmu + d] + c[iwjmu + d];
+                        }
+                    }
+                }
+                for (int i = 1; i <= height; i++)
+                {
+                    for (int j = 1; j <= width; j++)
+                    {
+                        int iwj = (i * width + j) * (maxDisp + 1);
+                        int iwjmu = ((i - 1)  * width + j) * (maxDisp + 1);
+                        for (int d = 0; d <= maxDisp; d++)
+                        {
+                            c[iwj + d] += c[iwjmu + d];
+                        }
+                    }
+                }
+            }
+            //!The aggregation on the cost volume
+            void blockAgregation(const Mat &partialSums, int windowSize, Mat &cost)
+            {
+                CV_Assert(windowSize % 2 != 0);
+                CV_Assert(partialSums.rows == cost.rows);
+                CV_Assert(partialSums.cols == cost.cols);
+                int win = windowSize / 2;
+                short *c = (short *)cost.data;
+                int maxDisp = maxDisparity;
+                int width = cost.cols / ( maxDisp + 1) - 1;
+                int height = cost.rows - 1;
+                memset(c, 0, sizeof(c[0]) * width * height * (maxDisp + 1));
+                parallel_for_(cv::Range(win + 1,height - win - 1), agregateCost(partialSums,windowSize,maxDisp,cost));
+            }
+            //!remove small regions that have an area smaller than t, we fill the region with the average of the good pixels around it
+            template <typename T>
+            void smallRegionRemoval(const Mat &currentMap, int t, Mat &out)
+            {
+                CV_Assert(currentMap.cols == out.cols);
+                CV_Assert(currentMap.rows == out.rows);
+                CV_Assert(t >= 0);
+                int *pus = (int *)puss.data;
+                int *specklePointX = (int *)speckleX.data;
+                int *specklePointY = (int *)speckleY.data;
+                memset(pus, 0, previous_size * sizeof(pus[0]));
+                T *map = (T *)currentMap.data;
+                T *outputMap = (T *)out.data;
+                int height = currentMap.rows;
+                int width = currentMap.cols;
+                T k = 1;
+                int st, dr;
+                int di[] = { -1, -1, -1, 0, 1, 1, 1, 0 },
+                    dj[] = { -1, 0, 1, 1, 1, 0, -1, -1 };
+                int speckle_size = 0;
+                st = 0;
+                dr = 0;
+                for (int i = 1; i < height - 1; i++)
+                {
+                    int iw = i * width;
+                    for (int j = 1; j < width - 1; j++)
+                    {
+                        if (map[iw + j] != 0)
+                        {
+                            outputMap[iw + j] = map[iw + j];
+                        }
+                        else if (map[iw + j] == 0)
+                        {
+                            T nr = 1;
+                            T avg = 0;
+                            speckle_size = dr;
+                            specklePointX[dr] = i;
+                            specklePointY[dr] = j;
+                            pus[i * width + j] = 1;
+                            dr++;
+                            map[iw + j] = k;
+                            while (st < dr)
+                            {
+                                int ii = specklePointX[st];
+                                int jj = specklePointY[st];
+                                //going on 8 directions
+                                for (int d = 0; d < 8; d++)
+                                {//if insisde
+                                    if (ii + di[d] >= 0 && ii + di[d] < height && jj + dj[d] >= 0 && jj + dj[d] < width &&
+                                        pus[(ii + di[d]) * width + jj + dj[d]] == 0)
+                                    {
+                                        T val = map[(ii + di[d]) * width + jj + dj[d]];
+                                        if (val == 0)
+                                        {
+                                            map[(ii + di[d]) * width + jj + dj[d]] = k;
+                                            specklePointX[dr] = (ii + di[d]);
+                                            specklePointY[dr] = (jj + dj[d]);
+                                            dr++;
+                                            pus[(ii + di[d]) * width + jj + dj[d]] = 1;
+                                        }//this means that my point is a good point to be used in computing the final filling value
+                                        else if (val >= 1 && val < 250)
+                                        {
+                                            avg += val;
+                                            nr++;
+                                        }
+                                    }
+                                }
+                                st++;
+                            }//if hole size is smaller than a specified threshold we fill the respective hole with the average of the good neighbours
+                            if (st - speckle_size <= t)
+                            {
+                                T fillValue = (T)(avg / nr);
+                                while (speckle_size < st)
+                                {
+                                    int ii = specklePointX[speckle_size];
+                                    int jj = specklePointY[speckle_size];
+                                    outputMap[ii * width + jj] = fillValue;
+                                    speckle_size++;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            //!Method responsible for generating the disparity map
+            //!function for generating disparity maps at sub pixel level
+            /* costVolume - represents the cost volume
+            * width, height - represent the width and height of the iage
+            *disparity - represents the maximum disparity
+            *map - is the disparity map that will result
+            *th - is the LR threshold
+            */
+            void dispartyMapFormation(const Mat &costVolume, Mat &mapFinal, int th)
+            {
+                uint8_t *map = mapFinal.data;
+                int disparity = maxDisparity;
+                int width = costVolume.cols / ( disparity + 1) - 1;
+                int height = costVolume.rows - 1;
+                memset(map, 0, sizeof(map[0]) * width * height);
+                parallel_for_(Range(0,height - 1), makeMap(costVolume,th,disparity,confidenceCheck,scallingFactor,mapFinal));
+            }
+        public:
+            //!a median filter of 1x9 and 9x1
+            //!1x9 median filter
+            template<typename T>
+            void Median1x9Filter(const Mat &originalImage, Mat &filteredImage)
+            {
+                CV_Assert(originalImage.rows == filteredImage.rows);
+                CV_Assert(originalImage.cols == filteredImage.cols);
+                parallel_for_(Range(1,originalImage.rows - 2), Median1x9<T>(originalImage,filteredImage));
+            }
+            //!9x1 median filter
+            template<typename T>
+            void Median9x1Filter(const Mat &originalImage, Mat &filteredImage)
+            {
+                CV_Assert(originalImage.cols == filteredImage.cols);
+                CV_Assert(originalImage.cols == filteredImage.cols);
+                parallel_for_(Range(1,originalImage.cols - 2), Median9x1<T>(originalImage,filteredImage));
+            }
+            //!constructor for the matching class
+            //!maxDisp - represents the maximum disparity
+            Matching(void)
+            {
+                hammingLut();
+            }
+            ~Matching(void)
+            {
+            }
+            //constructor for the matching class
+            //maxDisp - represents the maximum disparity
+            //confidence - represents the confidence check
+            Matching(int maxDisp, int scalling = 4, int confidence = 6)
+            {
+                //set the maximum disparity
+                setMaxDisparity(maxDisp);
+                //set scalling factor
+                setScallingFactor(scalling);
+                //set the value for the confidence
+                setConfidence(confidence);
+                //generate the hamming lut in case SSE is not available
+                hammingLut();
+            }
+        };
+    }
+}
+#endif
+#endif
+/*End of file*/
\ No newline at end of file
--- a/modules/stereo/src/precomp.hpp
+++ b/modules/stereo/src/precomp.hpp
@@ -56,4 +56,3 @@
 #include <cmath>
 #endif
--- a/modules/stereo/src/stereo_binary_bm.cpp
+++ b/modules/stereo/src/stereo_binary_bm.cpp
@@ -46,693 +46,482 @@
 \****************************************************************************************/
 #include "precomp.hpp"
+#include "descriptor.hpp"
+#include "matching.hpp"
 #include <stdio.h>
 #include <limits>
 namespace cv
 {
-	namespace stereo
+    namespace stereo
-	{
+    {
-		struct StereoBinaryBMParams
-		{
+        struct StereoBinaryBMParams
-			StereoBinaryBMParams(int _numDisparities = 64, int _SADWindowSize = 9)
+        {
-			{
+            StereoBinaryBMParams(int _numDisparities = 64, int _kernelSize = 9)
-				preFilterType = StereoBinaryBM::PREFILTER_XSOBEL;
+            {
-				preFilterSize = 9;
+                preFilterType = StereoBinaryBM::PREFILTER_XSOBEL;
-				preFilterCap = 31;
+                preFilterSize = 9;
-				SADWindowSize = _SADWindowSize;
+                preFilterCap = 31;
-				minDisparity = 0;
+                kernelSize = _kernelSize;
-				numDisparities = _numDisparities > 0 ? _numDisparities : 64;
+                minDisparity = 0;
-				textureThreshold = 10;
+                numDisparities = _numDisparities > 0 ? _numDisparities : 64;
-				uniquenessRatio = 15;
+                textureThreshold = 10;
-				speckleRange = speckleWindowSize = 0;
+                uniquenessRatio = 15;
-				roi1 = roi2 = Rect(0, 0, 0, 0);
+                speckleRange = speckleWindowSize = 0;
-				disp12MaxDiff = -1;
+                disp12MaxDiff = -1;
-				dispType = CV_16S;
+                dispType = CV_16S;
-			}
+                usePrefilter = false;
+                regionRemoval = 1;
-			int preFilterType;
+                scalling = 4;
-			int preFilterSize;
+                kernelType = CV_MODIFIED_CENSUS_TRANSFORM;
-			int preFilterCap;
+                agregationWindowSize = 9;
-			int SADWindowSize;
+            }
-			int minDisparity;
-			int numDisparities;
+            int preFilterType;
-			int textureThreshold;
+            int preFilterSize;
-			int uniquenessRatio;
+            int preFilterCap;
-			int speckleRange;
+            int kernelSize;
-			int speckleWindowSize;
+            int minDisparity;
-			Rect roi1, roi2;
+            int numDisparities;
-			int disp12MaxDiff;
+            int textureThreshold;
-			int dispType;
+            int uniquenessRatio;
-		};
+            int speckleRange;
+            int speckleWindowSize;
-		static void prefilterNorm(const Mat& src, Mat& dst, int winsize, int ftzero, uchar* buf)
+            int disp12MaxDiff;
-		{
+            int dispType;
-			int x, y, wsz2 = winsize / 2;
+            int scalling;
-			int* vsum = (int*)alignPtr(buf + (wsz2 + 1)*sizeof(vsum[0]), 32);
+            bool usePrefilter;
-			int scale_g = winsize*winsize / 8, scale_s = (1024 + scale_g) / (scale_g * 2);
+            int regionRemoval;
-			const int OFS = 256 * 5, TABSZ = OFS * 2 + 256;
+            int kernelType;
-			uchar tab[TABSZ];
+            int agregationWindowSize;
-			const uchar* sptr = src.ptr();
+        };
-			int srcstep = (int)src.step;
-			Size size = src.size();
+        static void prefilterNorm(const Mat& src, Mat& dst, int winsize, int ftzero, uchar* buf)
+        {
-			scale_g *= scale_s;
+            int x, y, wsz2 = winsize / 2;
+            int* vsum = (int*)alignPtr(buf + (wsz2 + 1)*sizeof(vsum[0]), 32);
-			for (x = 0; x < TABSZ; x++)
+            int scale_g = winsize*winsize / 8, scale_s = (1024 + scale_g) / (scale_g * 2);
-				tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero * 2 : x - OFS + ftzero);
+            const int OFS = 256 * 5, TABSZ = OFS * 2 + 256;
+            uchar tab[TABSZ];
-			for (x = 0; x < size.width; x++)
+            const uchar* sptr = src.ptr();
-				vsum[x] = (ushort)(sptr[x] * (wsz2 + 2));
+            int srcstep = (int)src.step;
+            Size size = src.size();
-			for (y = 1; y < wsz2; y++)
-			{
+            scale_g *= scale_s;
-				for (x = 0; x < size.width; x++)
-					vsum[x] = (ushort)(vsum[x] + sptr[srcstep*y + x]);
+            for (x = 0; x < TABSZ; x++)
-			}
+                tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero * 2 : x - OFS + ftzero);
-			for (y = 0; y < size.height; y++)
+            for (x = 0; x < size.width; x++)
-			{
+                vsum[x] = (ushort)(sptr[x] * (wsz2 + 2));
-				const uchar* top = sptr + srcstep*MAX(y - wsz2 - 1, 0);
+            for (y = 1; y < wsz2; y++)
-				const uchar* bottom = sptr + srcstep*MIN(y + wsz2, size.height - 1);
+            {
-				const uchar* prev = sptr + srcstep*MAX(y - 1, 0);
+                for (x = 0; x < size.width; x++)
-				const uchar* curr = sptr + srcstep*y;
+                    vsum[x] = (ushort)(vsum[x] + sptr[srcstep*y + x]);
-				const uchar* next = sptr + srcstep*MIN(y + 1, size.height - 1);
+            }
-				uchar* dptr = dst.ptr<uchar>(y);
+            for (y = 0; y < size.height; y++)
+            {
-				for (x = 0; x < size.width; x++)
+                const uchar* top = sptr + srcstep*MAX(y - wsz2 - 1, 0);
-					vsum[x] = (ushort)(vsum[x] + bottom[x] - top[x]);
+                const uchar* bottom = sptr + srcstep*MIN(y + wsz2, size.height - 1);
+                const uchar* prev = sptr + srcstep*MAX(y - 1, 0);
-				for (x = 0; x <= wsz2; x++)
+                const uchar* curr = sptr + srcstep*y;
-				{
+                const uchar* next = sptr + srcstep*MIN(y + 1, size.height - 1);
-					vsum[-x - 1] = vsum[0];
+                uchar* dptr = dst.ptr<uchar>(y);
-					vsum[size.width + x] = vsum[size.width - 1];
+                for (x = 0; x < size.width; x++)
-				}
+                    vsum[x] = (ushort)(vsum[x] + bottom[x] - top[x]);
-				int sum = vsum[0] * (wsz2 + 1);
+                for (x = 0; x <= wsz2; x++)
-				for (x = 1; x <= wsz2; x++)
+                {
-					sum += vsum[x];
+                    vsum[-x - 1] = vsum[0];
+                    vsum[size.width + x] = vsum[size.width - 1];
-				int val = ((curr[0] * 5 + curr[1] + prev[0] + next[0])*scale_g - sum*scale_s) >> 10;
+                }
-				dptr[0] = tab[val + OFS];
+                int sum = vsum[0] * (wsz2 + 1);
-				for (x = 1; x < size.width - 1; x++)
+                for (x = 1; x <= wsz2; x++)
-				{
+                    sum += vsum[x];
-					sum += vsum[x + wsz2] - vsum[x - wsz2 - 1];
+                int val = ((curr[0] * 5 + curr[1] + prev[0] + next[0])*scale_g - sum*scale_s) >> 10;
-					val = ((curr[x] * 4 + curr[x - 1] + curr[x + 1] + prev[x] + next[x])*scale_g - sum*scale_s) >> 10;
+                dptr[0] = tab[val + OFS];
-					dptr[x] = tab[val + OFS];
+                for (x = 1; x < size.width - 1; x++)
-				}
+                {
+                    sum += vsum[x + wsz2] - vsum[x - wsz2 - 1];
-				sum += vsum[x + wsz2] - vsum[x - wsz2 - 1];
+                    val = ((curr[x] * 4 + curr[x - 1] + curr[x + 1] + prev[x] + next[x])*scale_g - sum*scale_s) >> 10;
-				val = ((curr[x] * 5 + curr[x - 1] + prev[x] + next[x])*scale_g - sum*scale_s) >> 10;
+                    dptr[x] = tab[val + OFS];
-				dptr[x] = tab[val + OFS];
+                }
-			}
-		}
+                sum += vsum[x + wsz2] - vsum[x - wsz2 - 1];
+                val = ((curr[x] * 5 + curr[x - 1] + prev[x] + next[x])*scale_g - sum*scale_s) >> 10;
-		static void
+                dptr[x] = tab[val + OFS];
-			prefilterXSobel(const Mat& src, Mat& dst, int ftzero)
+            }
-		{
+        }
-			int x, y;
-			const int OFS = 256 * 4, TABSZ = OFS * 2 + 256;
+        static void
-			uchar tab[TABSZ];
+            prefilterXSobel(const Mat& src, Mat& dst, int ftzero)
-			Size size = src.size();
+        {
+            int x, y;
-			for (x = 0; x < TABSZ; x++)
+            const int OFS = 256 * 4, TABSZ = OFS * 2 + 256;
-				tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero * 2 : x - OFS + ftzero);
+            uchar tab[TABSZ];
-			uchar val0 = tab[0 + OFS];
+            Size size = src.size();
+            for (x = 0; x < TABSZ; x++)
+                tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero * 2 : x - OFS + ftzero);
+            uchar val0 = tab[0 + OFS];
 #if CV_SSE2
-			volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
+            volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
 #endif
-			for (y = 0; y < size.height - 1; y += 2)
+            for (y = 0; y < size.height - 1; y += 2)
-			{
+            {
-				const uchar* srow1 = src.ptr<uchar>(y);
+                const uchar* srow1 = src.ptr<uchar>(y);
-				const uchar* srow0 = y > 0 ? srow1 - src.step : size.height > 1 ? srow1 + src.step : srow1;
+                const uchar* srow0 = y > 0 ? srow1 - src.step : size.height > 1 ? srow1 + src.step : srow1;
-				const uchar* srow2 = y < size.height - 1 ? srow1 + src.step : size.height > 1 ? srow1 - src.step : srow1;
+                const uchar* srow2 = y < size.height - 1 ? srow1 + src.step : size.height > 1 ? srow1 - src.step : srow1;
-				const uchar* srow3 = y < size.height - 2 ? srow1 + src.step * 2 : srow1;
+                const uchar* srow3 = y < size.height - 2 ? srow1 + src.step * 2 : srow1;
-				uchar* dptr0 = dst.ptr<uchar>(y);
+                uchar* dptr0 = dst.ptr<uchar>(y);
-				uchar* dptr1 = dptr0 + dst.step;
+                uchar* dptr1 = dptr0 + dst.step;
-				dptr0[0] = dptr0[size.width - 1] = dptr1[0] = dptr1[size.width - 1] = val0;
+                dptr0[0] = dptr0[size.width - 1] = dptr1[0] = dptr1[size.width - 1] = val0;
-				x = 1;
+                x = 1;
 #if CV_SSE2
-				if (useSIMD)
+                if (useSIMD)
-				{
+                {
-					__m128i z = _mm_setzero_si128(), ftz = _mm_set1_epi16((short)ftzero),
+                    __m128i z = _mm_setzero_si128(), ftz = _mm_set1_epi16((short)ftzero),
-						ftz2 = _mm_set1_epi8(cv::saturate_cast<uchar>(ftzero * 2));
+                        ftz2 = _mm_set1_epi8(cv::saturate_cast<uchar>(ftzero * 2));
-					for (; x <= size.width - 9; x += 8)
+                    for (; x <= size.width - 9; x += 8)
-					{
+                    {
-						__m128i c0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow0 + x - 1)), z);
+                        __m128i c0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow0 + x - 1)), z);
-						__m128i c1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow1 + x - 1)), z);
+                        __m128i c1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow1 + x - 1)), z);
-						__m128i d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow0 + x + 1)), z);
+                        __m128i d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow0 + x + 1)), z);
-						__m128i d1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow1 + x + 1)), z);
+                        __m128i d1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow1 + x + 1)), z);
-						d0 = _mm_sub_epi16(d0, c0);
+                        d0 = _mm_sub_epi16(d0, c0);
-						d1 = _mm_sub_epi16(d1, c1);
+                        d1 = _mm_sub_epi16(d1, c1);
-						__m128i c2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow2 + x - 1)), z);
+                        __m128i c2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow2 + x - 1)), z);
-						__m128i c3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow3 + x - 1)), z);
+                        __m128i c3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow3 + x - 1)), z);
-						__m128i d2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow2 + x + 1)), z);
+                        __m128i d2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow2 + x + 1)), z);
-						__m128i d3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow3 + x + 1)), z);
+                        __m128i d3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow3 + x + 1)), z);
-						d2 = _mm_sub_epi16(d2, c2);
+                        d2 = _mm_sub_epi16(d2, c2);
-						d3 = _mm_sub_epi16(d3, c3);
+                        d3 = _mm_sub_epi16(d3, c3);
-						__m128i v0 = _mm_add_epi16(d0, _mm_add_epi16(d2, _mm_add_epi16(d1, d1)));
+                        __m128i v0 = _mm_add_epi16(d0, _mm_add_epi16(d2, _mm_add_epi16(d1, d1)));
-						__m128i v1 = _mm_add_epi16(d1, _mm_add_epi16(d3, _mm_add_epi16(d2, d2)));
+                        __m128i v1 = _mm_add_epi16(d1, _mm_add_epi16(d3, _mm_add_epi16(d2, d2)));
-						v0 = _mm_packus_epi16(_mm_add_epi16(v0, ftz), _mm_add_epi16(v1, ftz));
+                        v0 = _mm_packus_epi16(_mm_add_epi16(v0, ftz), _mm_add_epi16(v1, ftz));
-						v0 = _mm_min_epu8(v0, ftz2);
+                        v0 = _mm_min_epu8(v0, ftz2);
-						_mm_storel_epi64((__m128i*)(dptr0 + x), v0);
+                        _mm_storel_epi64((__m128i*)(dptr0 + x), v0);
-						_mm_storel_epi64((__m128i*)(dptr1 + x), _mm_unpackhi_epi64(v0, v0));
+                        _mm_storel_epi64((__m128i*)(dptr1 + x), _mm_unpackhi_epi64(v0, v0));
-					}
+                    }
-				}
+                }
 #endif
-				for (; x < size.width - 1; x++)
+                for (; x < size.width - 1; x++)
-				{
+                {
-					int d0 = srow0[x + 1] - srow0[x - 1], d1 = srow1[x + 1] - srow1[x - 1],
+                    int d0 = srow0[x + 1] - srow0[x - 1], d1 = srow1[x + 1] - srow1[x - 1],
-						d2 = srow2[x + 1] - srow2[x - 1], d3 = srow3[x + 1] - srow3[x - 1];
+                        d2 = srow2[x + 1] - srow2[x - 1], d3 = srow3[x + 1] - srow3[x - 1];
-					int v0 = tab[d0 + d1 * 2 + d2 + OFS];
+                    int v0 = tab[d0 + d1 * 2 + d2 + OFS];
-					int v1 = tab[d1 + d2 * 2 + d3 + OFS];
+                    int v1 = tab[d1 + d2 * 2 + d3 + OFS];
-					dptr0[x] = (uchar)v0;
+                    dptr0[x] = (uchar)v0;
-					dptr1[x] = (uchar)v1;
+                    dptr1[x] = (uchar)v1;
-				}
+                }
-			}
+            }
-			for (; y < size.height; y++)
+            for (; y < size.height; y++)
-			{
+            {
-				uchar* dptr = dst.ptr<uchar>(y);
+                uchar* dptr = dst.ptr<uchar>(y);
-				for (x = 0; x < size.width; x++)
+                for (x = 0; x < size.width; x++)
-					dptr[x] = val0;
+                    dptr[x] = val0;
-			}
+            }
-		}
+        }
-		static const int DISPARITY_SHIFT = 4;
+        static const int DISPARITY_SHIFT = 4;
-		static void
+        struct PrefilterInvoker : public ParallelLoopBody
-			findStereoCorrespondenceBM(const Mat& left, const Mat& right,
+        {
-			Mat& disp, Mat& cost, const StereoBinaryBMParams& state,
+            PrefilterInvoker(const Mat& left0, const Mat& right0, Mat& left, Mat& right,
-			uchar* buf, int _dy0, int _dy1)
+                uchar* buf0, uchar* buf1, StereoBinaryBMParams* _state)
-		{
+            {
-			const int ALIGN = 16;
+                imgs0[0] = &left0; imgs0[1] = &right0;
-			int x, y, d;
+                imgs[0] = &left; imgs[1] = &right;
-			int wsz = state.SADWindowSize, wsz2 = wsz / 2;
+                buf[0] = buf0; buf[1] = buf1;
-			int dy0 = MIN(_dy0, wsz2 + 1), dy1 = MIN(_dy1, wsz2 + 1);
+                state = _state;
-			int ndisp = state.numDisparities;
+            }
-			int mindisp = state.minDisparity;
-			int lofs = MAX(ndisp - 1 + mindisp, 0);
+            void operator()(const Range& range) const
-			int rofs = -MIN(ndisp - 1 + mindisp, 0);
+            {
-			int width = left.cols, height = left.rows;
+                for (int i = range.start; i < range.end; i++)
-			int width1 = width - rofs - ndisp + 1;
+                {
-			int ftzero = state.preFilterCap;
+                    if (state->preFilterType == StereoBinaryBM::PREFILTER_NORMALIZED_RESPONSE)
-			int textureThreshold = state.textureThreshold;
+                        prefilterNorm(*imgs0[i], *imgs[i], state->preFilterSize, state->preFilterCap, buf[i]);
-			int uniquenessRatio = state.uniquenessRatio;
+                    else
-			short FILTERED = (short)((mindisp - 1) << DISPARITY_SHIFT);
+                        prefilterXSobel(*imgs0[i], *imgs[i], state->preFilterCap);
+                }
-			int *sad, *hsad0, *hsad, *hsad_sub, *htext;
+            }
-			uchar *cbuf0, *cbuf;
-			const uchar* lptr0 = left.ptr() + lofs;
+            const Mat* imgs0[2];
-			const uchar* rptr0 = right.ptr() + rofs;
+            Mat* imgs[2];
-			const uchar *lptr, *lptr_sub, *rptr;
+            uchar* buf[2];
-			short* dptr = disp.ptr<short>();
+            StereoBinaryBMParams* state;
-			int sstep = (int)left.step;
+        };
-			int dstep = (int)(disp.step / sizeof(dptr[0]));
-			int cstep = (height + dy0 + dy1)*ndisp;
+        class StereoBinaryBMImpl : public StereoBinaryBM, public Matching
-			int costbuf = 0;
+        {
-			int coststep = cost.data ? (int)(cost.step / sizeof(costbuf)) : 0;
+        public:
-			const int TABSZ = 256;
+            StereoBinaryBMImpl(): Matching(64)
-			uchar tab[TABSZ];
+            {
+                params = StereoBinaryBMParams();
-			sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN);
+            }
-			hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN);
-			htext = (int*)alignPtr((int*)(hsad0 + (height + dy1)*ndisp) + wsz2 + 2, ALIGN);
+            StereoBinaryBMImpl(int _numDisparities, int _kernelSize) : Matching(_numDisparities)
-			cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN);
+            {
+                params = StereoBinaryBMParams(_numDisparities, _kernelSize);
-			for (x = 0; x < TABSZ; x++)
+                previous_size = 0;
-				tab[x] = (uchar)std::abs(x - ftzero);
+            }
-			// initialize buffers
+            void compute(InputArray leftarr, InputArray rightarr, OutputArray disparr)
-			memset(hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]));
+            {
-			memset(htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]));
+                int dtype = disparr.fixedType() ? disparr.type() : params.dispType;
+                Size leftsize = leftarr.size();
-			for (x = -wsz2 - 1; x < wsz2; x++)
-			{
+                if (leftarr.size() != rightarr.size())
-				hsad = hsad0 - dy0*ndisp; cbuf = cbuf0 + (x + wsz2 + 1)*cstep - dy0*ndisp;
+                    CV_Error(Error::StsUnmatchedSizes, "All the images must have the same size");
-				lptr = lptr0 + std::min(std::max(x, -lofs), width - lofs - 1) - dy0*sstep;
-				rptr = rptr0 + std::min(std::max(x, -rofs), width - rofs - 1) - dy0*sstep;
+                if (leftarr.type() != CV_8UC1 || rightarr.type() != CV_8UC1)
-				for (y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep)
+                    CV_Error(Error::StsUnsupportedFormat, "Both input images must have CV_8UC1");
-				{
-					int lval = lptr[0];
+                if (dtype != CV_16SC1 && dtype != CV_32FC1)
-					for (d = 0; d < ndisp; d++)
+                    CV_Error(Error::StsUnsupportedFormat, "Disparity image must have CV_16SC1 or CV_32FC1 format");
-					{
-						int diff = std::abs(lval - rptr[d]);
+                if (params.preFilterType != PREFILTER_NORMALIZED_RESPONSE &&
-						cbuf[d] = (uchar)diff;
+                    params.preFilterType != PREFILTER_XSOBEL)
-						hsad[d] = (int)(hsad[d] + diff);
+                    CV_Error(Error::StsOutOfRange, "preFilterType must be = CV_STEREO_BM_NORMALIZED_RESPONSE");
-					}
-					htext[y] += tab[lval];
+                if (params.preFilterSize < 5 || params.preFilterSize > 255 || params.preFilterSize % 2 == 0)
-				}
+                    CV_Error(Error::StsOutOfRange, "preFilterSize must be odd and be within 5..255");
-			}
+                if (params.preFilterCap < 1 || params.preFilterCap > 63)
-			// initialize the left and right borders of the disparity map
+                    CV_Error(Error::StsOutOfRange, "preFilterCap must be within 1..63");
-			for (y = 0; y < height; y++)
-			{
+                if (params.kernelSize < 5 || params.kernelSize > 255 || params.kernelSize % 2 == 0 ||
-				for (x = 0; x < lofs; x++)
+                    params.kernelSize >= std::min(leftsize.width, leftsize.height))
-					dptr[y*dstep + x] = FILTERED;
+                    CV_Error(Error::StsOutOfRange, "kernelSize must be odd, be within 5..255 and be not larger than image width or height");
-				for (x = lofs + width1; x < width; x++)
-					dptr[y*dstep + x] = FILTERED;
+                if (params.numDisparities <= 0 || params.numDisparities % 16 != 0)
-			}
+                    CV_Error(Error::StsOutOfRange, "numDisparities must be positive and divisble by 16");
-			dptr += lofs;
+                if (params.textureThreshold < 0)
-			for (x = 0; x < width1; x++, dptr++)
+                    CV_Error(Error::StsOutOfRange, "texture threshold must be non-negative");
-			{
-				int* costptr = cost.data ? cost.ptr<int>() + lofs + x : &costbuf;
+                if (params.uniquenessRatio < 0)
-				int x0 = x - wsz2 - 1, x1 = x + wsz2;
+                    CV_Error(Error::StsOutOfRange, "uniqueness ratio must be non-negative");
-				const uchar* cbuf_sub = cbuf0 + ((x0 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
-				cbuf = cbuf0 + ((x1 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
+                int FILTERED = (params.minDisparity - 1) << DISPARITY_SHIFT;
-				hsad = hsad0 - dy0*ndisp;
-				lptr_sub = lptr0 + MIN(MAX(x0, -lofs), width - 1 - lofs) - dy0*sstep;
+                Mat left0 = leftarr.getMat(), right0 = rightarr.getMat();
-				lptr = lptr0 + MIN(MAX(x1, -lofs), width - 1 - lofs) - dy0*sstep;
+                Mat disp0 = disparr.getMat();
-				rptr = rptr0 + MIN(MAX(x1, -rofs), width - 1 - rofs) - dy0*sstep;
+                int width = left0.cols;
-				for (y = -dy0; y < height + dy1; y++, cbuf += ndisp, cbuf_sub += ndisp,
+                int height = left0.rows;
-					hsad += ndisp, lptr += sstep, lptr_sub += sstep, rptr += sstep)
+                if(previous_size != width * height)
-				{
+                {
-					int lval = lptr[0];
+                    previous_size = width * height;
-					for (d = 0; d < ndisp; d++)
+                    speckleX.create(height,width,CV_32SC4);
-					{
+                    speckleY.create(height,width,CV_32SC4);
-						int diff = std::abs(lval - rptr[d]);
+                    puss.create(height,width,CV_32SC4);
-						cbuf[d] = (uchar)diff;
-						hsad[d] = hsad[d] + diff - cbuf_sub[d];
+                    censusImage[0].create(left0.rows,left0.cols,CV_32SC4);
-					}
+                    censusImage[1].create(left0.rows,left0.cols,CV_32SC4);
-					htext[y] += tab[lval] - tab[lptr_sub[0]];
-				}
+                    partialSumsLR.create(left0.rows + 1,(left0.cols + 1) * (params.numDisparities + 1),CV_16S);
+                    agregatedHammingLRCost.create(left0.rows + 1,(left0.cols + 1) * (params.numDisparities + 1),CV_16S);
-				// fill borders
+                    hammingDistance.create(left0.rows, left0.cols * (params.numDisparities + 1),CV_16S);
-				for (y = dy1; y <= wsz2; y++)
-					htext[height + y] = htext[height + dy1 - 1];
+                    preFilteredImg0.create(left0.size(), CV_8U);
-				for (y = -wsz2 - 1; y < -dy0; y++)
+                    preFilteredImg1.create(left0.size(), CV_8U);
-					htext[y] = htext[-dy0];
+                    aux.create(height,width,CV_8UC1);
-				// initialize sums
+                }
-				int tsum = 0;
-				{
+                Mat left = preFilteredImg0, right = preFilteredImg1;
-					for (d = 0; d < ndisp; d++)
-						sad[d] = (int)(hsad0[d - ndisp*dy0] * (wsz2 + 2 - dy0));
+                int ndisp = params.numDisparities;
-					hsad = hsad0 + (1 - dy0)*ndisp;
+                int wsz = params.kernelSize;
-					for (y = 1 - dy0; y < wsz2; y++, hsad += ndisp)
+                int bufSize0 = (int)((ndisp + 2)*sizeof(int));
-						for (d = 0; d < ndisp; d++)
+                bufSize0 += (int)((height + wsz + 2)*ndisp*sizeof(int));
-							sad[d] = (int)(sad[d] + hsad[d]);
+                bufSize0 += (int)((height + wsz + 2)*sizeof(int));
+                bufSize0 += (int)((height + wsz + 2)*ndisp*(wsz + 2)*sizeof(uchar) + 256);
-					for (y = -wsz2 - 1; y < wsz2; y++)
-						tsum += htext[y];
+                int bufSize1 = (int)((width + params.preFilterSize + 2) * sizeof(int) + 256);
-				}
+                if(params.usePrefilter == true)
-				// finally, start the real processing
+                {
-				{
+                    uchar *_buf = slidingSumBuf.ptr();
-					for (y = 0; y < height; y++)
-					{
+                    parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, _buf, _buf + bufSize1, &params), 1);
-						int minsad = INT_MAX, mind = -1;
+                }
-						hsad = hsad0 + MIN(y + wsz2, height + dy1 - 1)*ndisp;
+                else if(params.usePrefilter == false)
-						hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp;
+                {
+                    left = left0;
-						for (d = 0; d < ndisp; d++)
+                    right = right0;
-						{
+                }
-							int currsad = sad[d] + hsad[d] - hsad_sub[d];
+                if(params.kernelType == CV_SPARSE_CENSUS)
-							sad[d] = currsad;
+                {
-							if (currsad < minsad)
+                    censusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_SPARSE_CENSUS);
-							{
+                }
-								minsad = currsad;
+                else if(params.kernelType == CV_DENSE_CENSUS)
-								mind = d;
+                {
-							}
+                    censusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_SPARSE_CENSUS);
-						}
+                }
+                else if(params.kernelType == CV_CS_CENSUS)
-						tsum += htext[y + wsz2] - htext[y - wsz2 - 1];
+                {
-						if (tsum < textureThreshold)
+                    symetricCensusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_CS_CENSUS);
-						{
+                }
-							dptr[y*dstep] = FILTERED;
+                else if(params.kernelType == CV_MODIFIED_CS_CENSUS)
-							continue;
+                {
-						}
+                    symetricCensusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_MODIFIED_CS_CENSUS);
+                }
-						if (uniquenessRatio > 0)
+                else if(params.kernelType == CV_MODIFIED_CENSUS_TRANSFORM)
-						{
+                {
-							int thresh = minsad + (minsad * uniquenessRatio / 100);
+                    modifiedCensusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_MODIFIED_CENSUS_TRANSFORM,0);
-							for (d = 0; d < ndisp; d++)
+                }
-							{
+                else if(params.kernelType == CV_MEAN_VARIATION)
-								if ((d < mind - 1 || d > mind + 1) && sad[d] <= thresh)
+                {
-									break;
+                    parSumsIntensityImage[0].create(left0.rows, left0.cols,CV_32SC4);
-							}
+                    parSumsIntensityImage[1].create(left0.rows, left0.cols,CV_32SC4);
-							if (d < ndisp)
+                    Integral[0].create(left0.rows,left0.cols,CV_32SC4);
-							{
+                    Integral[1].create(left0.rows,left0.cols,CV_32SC4);
-								dptr[y*dstep] = FILTERED;
+                    integral(left, parSumsIntensityImage[0],CV_32S);
-								continue;
+                    integral(right, parSumsIntensityImage[1],CV_32S);
-							}
+                    imageMeanKernelSize(parSumsIntensityImage[0], params.kernelSize,Integral[0]);
-						}
+                    imageMeanKernelSize(parSumsIntensityImage[1], params.kernelSize, Integral[1]);
+                    modifiedCensusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1],CV_MEAN_VARIATION,0,Integral[0], Integral[1]);
-						{
+                }
-							sad[-1] = sad[1];
+                else if(params.kernelType == CV_STAR_KERNEL)
-							sad[ndisp] = sad[ndisp - 2];
+                {
-							int p = sad[mind + 1], n = sad[mind - 1];
+                    starCensusTransform(left,right,params.kernelSize,censusImage[0],censusImage[1]);
-							d = p + n - 2 * sad[mind] + std::abs(p - n);
+                }
-							dptr[y*dstep] = (short)(((ndisp - mind - 1 + mindisp) * 256 + (d != 0 ? (p - n) * 256 / d : 0) + 15) >> 4);
+                hammingDistanceBlockMatching(censusImage[0], censusImage[1], hammingDistance);
-							costptr[y*coststep] = sad[mind];
+                costGathering(hammingDistance, partialSumsLR);
-						}
+                blockAgregation(partialSumsLR, params.agregationWindowSize, agregatedHammingLRCost);
-					}
+                dispartyMapFormation(agregatedHammingLRCost, disp0, 3);
-				}
+                Median1x9Filter<uint8_t>(disp0, aux);
-			}
+                Median9x1Filter<uint8_t>(aux,disp0);
-		}
+                if(params.regionRemoval == CV_SPECKLE_REMOVAL_AVG_ALGORITHM)
+                {
-		struct PrefilterInvoker : public ParallelLoopBody
+                    smallRegionRemoval<uint8_t>(disp0,params.speckleWindowSize,disp0);
-		{
+                }
-			PrefilterInvoker(const Mat& left0, const Mat& right0, Mat& left, Mat& right,
+                else if(params.regionRemoval == CV_SPECKLE_REMOVAL_ALGORITHM)
-				uchar* buf0, uchar* buf1, StereoBinaryBMParams* _state)
+                {
-			{
+                    if (params.speckleRange >= 0 && params.speckleWindowSize > 0)
-				imgs0[0] = &left0; imgs0[1] = &right0;
+                        filterSpeckles(disp0, FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
-				imgs[0] = &left; imgs[1] = &right;
+                }
-				buf[0] = buf0; buf[1] = buf1;
+            }
-				state = _state;
+            int getAgregationWindowSize() const { return params.agregationWindowSize;}
-			}
+            void setAgregationWindowSize(int value = 9) { CV_Assert(value % 2 != 0); params.agregationWindowSize = value;}
-			void operator()(const Range& range) const
+            int getBinaryKernelType() const { return params.kernelType;}
-			{
+            void setBinaryKernelType(int value = CV_MODIFIED_CENSUS_TRANSFORM) { CV_Assert(value < 7); params.kernelType = value; }
-				for (int i = range.start; i < range.end; i++)
-				{
+            int getSpekleRemovalTechnique() const { return params.regionRemoval;}
-					if (state->preFilterType == StereoBinaryBM::PREFILTER_NORMALIZED_RESPONSE)
+            void setSpekleRemovalTechnique(int factor = CV_SPECKLE_REMOVAL_AVG_ALGORITHM) {CV_Assert(factor < 2); params.regionRemoval = factor; }
-						prefilterNorm(*imgs0[i], *imgs[i], state->preFilterSize, state->preFilterCap, buf[i]);
-					else
+            bool getUsePrefilter() const { return params.usePrefilter;}
-						prefilterXSobel(*imgs0[i], *imgs[i], state->preFilterCap);
+            void setUsePrefilter(bool value = false) { params.usePrefilter = value;}
-				}
-			}
+            int getScalleFactor() const { return params.scalling;}
+            void setScalleFactor(int factor = 4) {CV_Assert(factor > 0); params.scalling = factor; setScallingFactor(factor);}
-			const Mat* imgs0[2];
-			Mat* imgs[2];
+            int getMinDisparity() const { return params.minDisparity; }
-			uchar* buf[2];
+            void setMinDisparity(int minDisparity) {CV_Assert(minDisparity >= 0); params.minDisparity = minDisparity; }
-			StereoBinaryBMParams* state;
-		};
+            int getNumDisparities() const { return params.numDisparities; }
+            void setNumDisparities(int numDisparities) {CV_Assert(numDisparities > 0); params.numDisparities = numDisparities; }
-		struct FindStereoCorrespInvoker : public ParallelLoopBody
-		{
+            int getBlockSize() const { return params.kernelSize; }
-			FindStereoCorrespInvoker(const Mat& _left, const Mat& _right,
+            void setBlockSize(int blockSize) {CV_Assert(blockSize % 2 != 0); params.kernelSize = blockSize; }
-				Mat& _disp, StereoBinaryBMParams* _state,
-				int _nstripes, size_t _stripeBufSize,
+            int getSpeckleWindowSize() const { return params.speckleWindowSize; }
-				bool _useShorts, Rect _validDisparityRect,
+            void setSpeckleWindowSize(int speckleWindowSize) {CV_Assert(speckleWindowSize >= 0); params.speckleWindowSize = speckleWindowSize; }
-				Mat& _slidingSumBuf, Mat& _cost)
-			{
+            int getSpeckleRange() const { return params.speckleRange; }
-				left = &_left; right = &_right;
+            void setSpeckleRange(int speckleRange) {CV_Assert(speckleRange >= 0); params.speckleRange = speckleRange; }
-				disp = &_disp; state = _state;
-				nstripes = _nstripes; stripeBufSize = _stripeBufSize;
+            int getDisp12MaxDiff() const { return params.disp12MaxDiff; }
-				useShorts = _useShorts;
+            void setDisp12MaxDiff(int disp12MaxDiff) {CV_Assert(disp12MaxDiff >= 0); params.disp12MaxDiff = disp12MaxDiff; }
-				validDisparityRect = _validDisparityRect;
-				slidingSumBuf = &_slidingSumBuf;
+            int getPreFilterType() const { return params.preFilterType; }
-				cost = &_cost;
+            void setPreFilterType(int preFilterType) { CV_Assert(preFilterType >= 0); params.preFilterType = preFilterType; }
-			}
+            int getPreFilterSize() const { return params.preFilterSize; }
-			void operator()(const Range& range) const
+            void setPreFilterSize(int preFilterSize) { CV_Assert(preFilterSize >= 0);  params.preFilterSize = preFilterSize; }
-			{
-				int cols = left->cols, rows = left->rows;
+            int getPreFilterCap() const { return params.preFilterCap; }
-				int _row0 = std::min(cvRound(range.start * rows / nstripes), rows);
+            void setPreFilterCap(int preFilterCap) {CV_Assert(preFilterCap >= 0); params.preFilterCap = preFilterCap; }
-				int _row1 = std::min(cvRound(range.end * rows / nstripes), rows);
-				uchar *ptr = slidingSumBuf->ptr() + range.start * stripeBufSize;
+            int getTextureThreshold() const { return params.textureThreshold; }
-				int FILTERED = (state->minDisparity - 1) * 16;
+            void setTextureThreshold(int textureThreshold) {CV_Assert(textureThreshold >= 0); params.textureThreshold = textureThreshold; }
-				Rect roi = validDisparityRect & Rect(0, _row0, cols, _row1 - _row0);
+            int getUniquenessRatio() const { return params.uniquenessRatio; }
-				if (roi.height == 0)
+            void setUniquenessRatio(int uniquenessRatio) {CV_Assert(uniquenessRatio >= 0); params.uniquenessRatio = uniquenessRatio; }
-					return;
-				int row0 = roi.y;
+            int getSmallerBlockSize() const { return 0; }
-				int row1 = roi.y + roi.height;
+            void setSmallerBlockSize(int) {}
-				Mat part;
+            void write(FileStorage& fs) const
-				if (row0 > _row0)
+            {
-				{
+                fs << "name" << name_
-					part = disp->rowRange(_row0, row0);
+                    << "minDisparity" << params.minDisparity
-					part = Scalar::all(FILTERED);
+                    << "numDisparities" << params.numDisparities
-				}
+                    << "blockSize" << params.kernelSize
-				if (_row1 > row1)
+                    << "speckleWindowSize" << params.speckleWindowSize
-				{
+                    << "speckleRange" << params.speckleRange
-					part = disp->rowRange(row1, _row1);
+                    << "disp12MaxDiff" << params.disp12MaxDiff
-					part = Scalar::all(FILTERED);
+                    << "preFilterType" << params.preFilterType
-				}
+                    << "preFilterSize" << params.preFilterSize
+                    << "preFilterCap" << params.preFilterCap
-				Mat left_i = left->rowRange(row0, row1);
+                    << "textureThreshold" << params.textureThreshold
-				Mat right_i = right->rowRange(row0, row1);
+                    << "uniquenessRatio" << params.uniquenessRatio;
-				Mat disp_i = disp->rowRange(row0, row1);
+            }
-				Mat cost_i = state->disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat();
+            void read(const FileNode& fn)
-				findStereoCorrespondenceBM(left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1);
+            {
+                FileNode n = fn["name"];
-				if (state->disp12MaxDiff >= 0)
+                CV_Assert(n.isString() && String(n) == name_);
-					validateDisparity(disp_i, cost_i, state->minDisparity, state->numDisparities, state->disp12MaxDiff);
+                params.minDisparity = (int)fn["minDisparity"];
+                params.numDisparities = (int)fn["numDisparities"];
-				if (roi.x > 0)
+                params.kernelSize = (int)fn["blockSize"];
-				{
+                params.speckleWindowSize = (int)fn["speckleWindowSize"];
-					part = disp_i.colRange(0, roi.x);
+                params.speckleRange = (int)fn["speckleRange"];
-					part = Scalar::all(FILTERED);
+                params.disp12MaxDiff = (int)fn["disp12MaxDiff"];
-				}
+                params.preFilterType = (int)fn["preFilterType"];
-				if (roi.x + roi.width < cols)
+                params.preFilterSize = (int)fn["preFilterSize"];
-				{
+                params.preFilterCap = (int)fn["preFilterCap"];
-					part = disp_i.colRange(roi.x + roi.width, cols);
+                params.textureThreshold = (int)fn["textureThreshold"];
-					part = Scalar::all(FILTERED);
+                params.uniquenessRatio = (int)fn["uniquenessRatio"];
-				}
+            }
-			}
+            StereoBinaryBMParams params;
-		protected:
+            Mat preFilteredImg0, preFilteredImg1, cost, dispbuf;
-			const Mat *left, *right;
+            Mat slidingSumBuf;
-			Mat* disp, *slidingSumBuf, *cost;
+            Mat parSumsIntensityImage[2];
-			StereoBinaryBMParams *state;
+            Mat Integral[2];
+            Mat censusImage[2];
-			int nstripes;
+            Mat hammingDistance;
-			size_t stripeBufSize;
+            Mat partialSumsLR;
-			bool useShorts;
+            Mat agregatedHammingLRCost;
-			Rect validDisparityRect;
+            Mat aux;
-		};
+            static const char* name_;
+        };
-		class StereoBinaryBMImpl : public StereoBinaryBM
-		{
+        const char* StereoBinaryBMImpl::name_ = "StereoBinaryMatcher.BM";
-		public:
-			StereoBinaryBMImpl()
+        Ptr<StereoBinaryBM> StereoBinaryBM::create(int _numDisparities, int _kernelSize)
-			{
+        {
-				params = StereoBinaryBMParams();
+            return makePtr<StereoBinaryBMImpl>(_numDisparities, _kernelSize);
-			}
+        }
+    }
-			StereoBinaryBMImpl(int _numDisparities, int _SADWindowSize)
-			{
-				params = StereoBinaryBMParams(_numDisparities, _SADWindowSize);
-			}
-			void compute(InputArray leftarr, InputArray rightarr, OutputArray disparr)
-			{
-				int dtype = disparr.fixedType() ? disparr.type() : params.dispType;
-				Size leftsize = leftarr.size();
-				if (leftarr.size() != rightarr.size())
-					CV_Error(Error::StsUnmatchedSizes, "All the images must have the same size");
-				if (leftarr.type() != CV_8UC1 || rightarr.type() != CV_8UC1)
-					CV_Error(Error::StsUnsupportedFormat, "Both input images must have CV_8UC1");
-				if (dtype != CV_16SC1 && dtype != CV_32FC1)
-					CV_Error(Error::StsUnsupportedFormat, "Disparity image must have CV_16SC1 or CV_32FC1 format");
-				if (params.preFilterType != PREFILTER_NORMALIZED_RESPONSE &&
-					params.preFilterType != PREFILTER_XSOBEL)
-					CV_Error(Error::StsOutOfRange, "preFilterType must be = CV_STEREO_BM_NORMALIZED_RESPONSE");
-				if (params.preFilterSize < 5 || params.preFilterSize > 255 || params.preFilterSize % 2 == 0)
-					CV_Error(Error::StsOutOfRange, "preFilterSize must be odd and be within 5..255");
-				if (params.preFilterCap < 1 || params.preFilterCap > 63)
-					CV_Error(Error::StsOutOfRange, "preFilterCap must be within 1..63");
-				if (params.SADWindowSize < 5 || params.SADWindowSize > 255 || params.SADWindowSize % 2 == 0 ||
-					params.SADWindowSize >= std::min(leftsize.width, leftsize.height))
-					CV_Error(Error::StsOutOfRange, "SADWindowSize must be odd, be within 5..255 and be not larger than image width or height");
-				if (params.numDisparities <= 0 || params.numDisparities % 16 != 0)
-					CV_Error(Error::StsOutOfRange, "numDisparities must be positive and divisble by 16");
-				if (params.textureThreshold < 0)
-					CV_Error(Error::StsOutOfRange, "texture threshold must be non-negative");
-				if (params.uniquenessRatio < 0)
-					CV_Error(Error::StsOutOfRange, "uniqueness ratio must be non-negative");
-				int FILTERED = (params.minDisparity - 1) << DISPARITY_SHIFT;
-				Mat left0 = leftarr.getMat(), right0 = rightarr.getMat();
-				disparr.create(left0.size(), dtype);
-				Mat disp0 = disparr.getMat();
-				preFilteredImg0.create(left0.size(), CV_8U);
-				preFilteredImg1.create(left0.size(), CV_8U);
-				cost.create(left0.size(), CV_16S);
-				Mat left = preFilteredImg0, right = preFilteredImg1;
-				int mindisp = params.minDisparity;
-				int ndisp = params.numDisparities;
-				int width = left0.cols;
-				int height = left0.rows;
-				int lofs = std::max(ndisp - 1 + mindisp, 0);
-				int rofs = -std::min(ndisp - 1 + mindisp, 0);
-				int width1 = width - rofs - ndisp + 1;
-				if (lofs >= width || rofs >= width || width1 < 1)
-				{
-					disp0 = Scalar::all(FILTERED * (disp0.type() < CV_32F ? 1 : 1. / (1 << DISPARITY_SHIFT)));
-					return;
-				}
-				Mat disp = disp0;
-				if (dtype == CV_32F)
-				{
-					dispbuf.create(disp0.size(), CV_16S);
-					disp = dispbuf;
-				}
-				int wsz = params.SADWindowSize;
-				int bufSize0 = (int)((ndisp + 2)*sizeof(int));
-				bufSize0 += (int)((height + wsz + 2)*ndisp*sizeof(int));
-				bufSize0 += (int)((height + wsz + 2)*sizeof(int));
-				bufSize0 += (int)((height + wsz + 2)*ndisp*(wsz + 2)*sizeof(uchar) + 256);
-				int bufSize1 = (int)((width + params.preFilterSize + 2) * sizeof(int) + 256);
-				int bufSize2 = 0;
-				if (params.speckleRange >= 0 && params.speckleWindowSize > 0)
-					bufSize2 = width*height*(sizeof(Point_<short>) + sizeof(int) + sizeof(uchar));
-#if CV_SSE2
-				bool useShorts = params.preFilterCap <= 31 && params.SADWindowSize <= 21 && checkHardwareSupport(CV_CPU_SSE2);
-#else
-				const bool useShorts = false;
-#endif
-				const double SAD_overhead_coeff = 10.0;
-				double N0 = 8000000 / (useShorts ? 1 : 4);  // approx tbb's min number instructions reasonable for one thread
-				double maxStripeSize = std::min(std::max(N0 / (width * ndisp), (wsz - 1) * SAD_overhead_coeff), (double)height);
-				int nstripes = cvCeil(height / maxStripeSize);
-				int bufSize = std::max(bufSize0 * nstripes, std::max(bufSize1 * 2, bufSize2));
-				if (slidingSumBuf.cols < bufSize)
-					slidingSumBuf.create(1, bufSize, CV_8U);
-				uchar *_buf = slidingSumBuf.ptr();
-				parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, _buf, _buf + bufSize1, &params), 1);
-				Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2;
-				validDisparityRect = getValidDisparityROI(R1.area() > 0 ? Rect(0, 0, width, height) : validDisparityRect,
-					R2.area() > 0 ? Rect(0, 0, width, height) : validDisparityRect,
-					params.minDisparity, params.numDisparities,
-					params.SADWindowSize);
-				parallel_for_(Range(0, nstripes),
-					FindStereoCorrespInvoker(left, right, disp, &params, nstripes,
-					bufSize0, useShorts, validDisparityRect,
-					slidingSumBuf, cost));
-				if (params.speckleRange >= 0 && params.speckleWindowSize > 0)
-					filterSpeckles(disp, FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
-				if (disp0.data != disp.data)
-					disp.convertTo(disp0, disp0.type(), 1. / (1 << DISPARITY_SHIFT), 0);
-			}
-			int getMinDisparity() const { return params.minDisparity; }
-			void setMinDisparity(int minDisparity) { params.minDisparity = minDisparity; }
-			int getNumDisparities() const { return params.numDisparities; }
-			void setNumDisparities(int numDisparities) { params.numDisparities = numDisparities; }
-			int getBlockSize() const { return params.SADWindowSize; }
-			void setBlockSize(int blockSize) { params.SADWindowSize = blockSize; }
-			int getSpeckleWindowSize() const { return params.speckleWindowSize; }
-			void setSpeckleWindowSize(int speckleWindowSize) { params.speckleWindowSize = speckleWindowSize; }
-			int getSpeckleRange() const { return params.speckleRange; }
-			void setSpeckleRange(int speckleRange) { params.speckleRange = speckleRange; }
-			int getDisp12MaxDiff() const { return params.disp12MaxDiff; }
-			void setDisp12MaxDiff(int disp12MaxDiff) { params.disp12MaxDiff = disp12MaxDiff; }
-			int getPreFilterType() const { return params.preFilterType; }
-			void setPreFilterType(int preFilterType) { params.preFilterType = preFilterType; }
-			int getPreFilterSize() const { return params.preFilterSize; }
-			void setPreFilterSize(int preFilterSize) { params.preFilterSize = preFilterSize; }
-			int getPreFilterCap() const { return params.preFilterCap; }
-			void setPreFilterCap(int preFilterCap) { params.preFilterCap = preFilterCap; }
-			int getTextureThreshold() const { return params.textureThreshold; }
-			void setTextureThreshold(int textureThreshold) { params.textureThreshold = textureThreshold; }
-			int getUniquenessRatio() const { return params.uniquenessRatio; }
-			void setUniquenessRatio(int uniquenessRatio) { params.uniquenessRatio = uniquenessRatio; }
-			int getSmallerBlockSize() const { return 0; }
-			void setSmallerBlockSize(int) {}
-			Rect getROI1() const { return params.roi1; }
-			void setROI1(Rect roi1) { params.roi1 = roi1; }
-			Rect getROI2() const { return params.roi2; }
-			void setROI2(Rect roi2) { params.roi2 = roi2; }
-			void write(FileStorage& fs) const
-			{
-				fs << "name" << name_
-					<< "minDisparity" << params.minDisparity
-					<< "numDisparities" << params.numDisparities
-					<< "blockSize" << params.SADWindowSize
-					<< "speckleWindowSize" << params.speckleWindowSize
-					<< "speckleRange" << params.speckleRange
-					<< "disp12MaxDiff" << params.disp12MaxDiff
-					<< "preFilterType" << params.preFilterType
-					<< "preFilterSize" << params.preFilterSize
-					<< "preFilterCap" << params.preFilterCap
-					<< "textureThreshold" << params.textureThreshold
-					<< "uniquenessRatio" << params.uniquenessRatio;
-			}
-			void read(const FileNode& fn)
-			{
-				FileNode n = fn["name"];
-				CV_Assert(n.isString() && String(n) == name_);
-				params.minDisparity = (int)fn["minDisparity"];
-				params.numDisparities = (int)fn["numDisparities"];
-				params.SADWindowSize = (int)fn["blockSize"];
-				params.speckleWindowSize = (int)fn["speckleWindowSize"];
-				params.speckleRange = (int)fn["speckleRange"];
-				params.disp12MaxDiff = (int)fn["disp12MaxDiff"];
-				params.preFilterType = (int)fn["preFilterType"];
-				params.preFilterSize = (int)fn["preFilterSize"];
-				params.preFilterCap = (int)fn["preFilterCap"];
-				params.textureThreshold = (int)fn["textureThreshold"];
-				params.uniquenessRatio = (int)fn["uniquenessRatio"];
-				params.roi1 = params.roi2 = Rect();
-			}
-			StereoBinaryBMParams params;
-			Mat preFilteredImg0, preFilteredImg1, cost, dispbuf;
-			Mat slidingSumBuf;
-			static const char* name_;
-		};
-		const char* StereoBinaryBMImpl::name_ = "StereoMatcher.BM";
-		Ptr<StereoBinaryBM> StereoBinaryBM::create(int _numDisparities, int _SADWindowSize)
-		{
-			return makePtr<StereoBinaryBMImpl>(_numDisparities, _SADWindowSize);
-		}
-	}
 }
 /* End of file. */
--- a/modules/stereo/src/stereo_binary_sgbm.cpp
+++ b/modules/stereo/src/stereo_binary_sgbm.cpp
@@ -40,919 +40,789 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 /*
 This is a variation of
 "Stereo Processing by Semiglobal Matching and Mutual Information"
 by Heiko Hirschmuller.
 We match blocks rather than individual pixels, thus the algorithm is called
 SGBM (Semi-global block matching)
 */
 #include "precomp.hpp"
 #include <limits.h>
+#include <descriptor.hpp>
+#include <matching.hpp>
 namespace cv
 {
-	namespace stereo
+    namespace stereo
-	{
+    {
-		typedef uchar PixType;
+        typedef uchar PixType;
-		typedef short CostType;
+        typedef short CostType;
-		typedef short DispType;
+        typedef short DispType;
+        enum { NR = 16, NR2 = NR/2 };
-		enum { NR = 16, NR2 = NR/2 };
+        struct StereoBinarySGBMParams
+        {
-		struct StereoBinarySGBMParams
+            StereoBinarySGBMParams()
-		{
+            {
-			StereoBinarySGBMParams()
+                minDisparity = numDisparities = 0;
-			{
+                kernelSize = 0;
-				minDisparity = numDisparities = 0;
+                P1 = P2 = 0;
-				SADWindowSize = 0;
+                disp12MaxDiff = 0;
-				P1 = P2 = 0;
+                preFilterCap = 0;
-				disp12MaxDiff = 0;
+                uniquenessRatio = 0;
-				preFilterCap = 0;
+                speckleWindowSize = 0;
-				uniquenessRatio = 0;
+                speckleRange = 0;
-				speckleWindowSize = 0;
+                mode = StereoBinarySGBM::MODE_SGBM;
-				speckleRange = 0;
+            }
-				mode = StereoBinarySGBM::MODE_SGBM;
+            StereoBinarySGBMParams( int _minDisparity, int _numDisparities, int _SADWindowSize,
-			}
+                int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
+                int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
-			StereoBinarySGBMParams( int _minDisparity, int _numDisparities, int _SADWindowSize,
+                int _mode )
-				int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
+            {
-				int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
+                minDisparity = _minDisparity;
-				int _mode )
+                numDisparities = _numDisparities;
-			{
+                kernelSize = _SADWindowSize;
-				minDisparity = _minDisparity;
+                P1 = _P1;
-				numDisparities = _numDisparities;
+                P2 = _P2;
-				SADWindowSize = _SADWindowSize;
+                disp12MaxDiff = _disp12MaxDiff;
-				P1 = _P1;
+                preFilterCap = _preFilterCap;
-				P2 = _P2;
+                uniquenessRatio = _uniquenessRatio;
-				disp12MaxDiff = _disp12MaxDiff;
+                speckleWindowSize = _speckleWindowSize;
-				preFilterCap = _preFilterCap;
+                speckleRange = _speckleRange;
-				uniquenessRatio = _uniquenessRatio;
+                mode = _mode;
-				speckleWindowSize = _speckleWindowSize;
+                regionRemoval = 1;
-				speckleRange = _speckleRange;
+                kernelType = CV_MODIFIED_CENSUS_TRANSFORM;
-				mode = _mode;
+                subpixelInterpolationMethod = CV_QUADRATIC_INTERPOLATION;
-			}
+            }
+            int minDisparity;
-			int minDisparity;
+            int numDisparities;
-			int numDisparities;
+            int kernelSize;
-			int SADWindowSize;
+            int preFilterCap;
-			int preFilterCap;
+            int uniquenessRatio;
-			int uniquenessRatio;
+            int P1;
-			int P1;
+            int P2;
-			int P2;
+            int speckleWindowSize;
-			int speckleWindowSize;
+            int speckleRange;
-			int speckleRange;
+            int disp12MaxDiff;
-			int disp12MaxDiff;
+            int mode;
-			int mode;
+            int regionRemoval;
-		};
+            int kernelType;
+            int subpixelInterpolationMethod;
-		/*
+        };
-		For each pixel row1[x], max(-maxD, 0) <= minX <= x < maxX <= width - max(0, -minD),
-		and for each disparity minD<=d<maxD the function
+        /*
-		computes the cost (cost[(x-minX)*(maxD - minD) + (d - minD)]), depending on the difference between
+        computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf.
-		row1[x] and row2[x-d]. The subpixel algorithm from
+        that is, disp1buf(x, y)=d means that img1(x+roi.x, y+roi.y) ~ img2(x+roi.x-d, y+roi.y).
-		"Depth Discontinuities by Pixel-to-Pixel Stereo" by Stan Birchfield and C. Tomasi
+        minD <= d < maxD.
-		is used, hence the suffix BT.
+        disp2full is the reverse disparity map, that is:
+        disp2full(x+roi.x,y+roi.y)=d means that img2(x+roi.x, y+roi.y) ~ img1(x+roi.x+d, y+roi.y)
-		the temporary buffer should contain width2*2 elements
+        note that disp1buf will have the same size as the roi and
-		*/
+        disp2full will have the same size as img1 (or img2).
-		static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
+        On exit disp2buf is not the final disparity, it is an intermediate result that becomes
-			int minD, int maxD, CostType* cost,
+        final after all the tiles are processed.
-			PixType* buffer, const PixType* tab,
+        the disparity in disp1buf is written with sub-pixel accuracy
-			int tabOfs, int )
+        (4 fractional bits, see StereoSGBM::DISP_SCALE),
-		{
+        using quadratic interpolation, while the disparity in disp2buf
-			int x, c, width = img1.cols, cn = img1.channels();
+        is written as is, without interpolation.
-			int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0);
+        disp2cost also has the same size as img1 (or img2).
-			int minX2 = std::max(minX1 - maxD, 0), maxX2 = std::min(maxX1 - minD, width);
+        It contains the minimum current cost, used to find the best disparity, corresponding to the minimal cost.
-			int D = maxD - minD, width1 = maxX1 - minX1, width2 = maxX2 - minX2;
+        */
-			const PixType *row1 = img1.ptr<PixType>(y), *row2 = img2.ptr<PixType>(y);
+        static void computeDisparityBinarySGBM( const Mat& img1, const Mat& img2,
-			PixType *prow1 = buffer + width2*2, *prow2 = prow1 + width*cn*2;
+            Mat& disp1, const StereoBinarySGBMParams& params,
+            Mat& buffer,const Mat& hamDist)
-			tab += tabOfs;
+        {
-			for( c = 0; c < cn*2; c++ )
-			{
-				prow1[width*c] = prow1[width*c + width-1] =
-					prow2[width*c] = prow2[width*c + width-1] = tab[0];
-			}
-			int n1 = y > 0 ? -(int)img1.step : 0, s1 = y < img1.rows-1 ? (int)img1.step : 0;
-			int n2 = y > 0 ? -(int)img2.step : 0, s2 = y < img2.rows-1 ? (int)img2.step : 0;
-			if( cn == 1 )
-			{
-				for( x = 1; x < width-1; x++ )
-				{
-					prow1[x] = tab[(row1[x+1] - row1[x-1])*2 + row1[x+n1+1] - row1[x+n1-1] + row1[x+s1+1] - row1[x+s1-1]];
-					prow2[width-1-x] = tab[(row2[x+1] - row2[x-1])*2 + row2[x+n2+1] - row2[x+n2-1] + row2[x+s2+1] - row2[x+s2-1]];
-					prow1[x+width] = row1[x];
-					prow2[width-1-x+width] = row2[x];
-				}
-			}
-			else
-			{
-				for( x = 1; x < width-1; x++ )
-				{
-					prow1[x] = tab[(row1[x*3+3] - row1[x*3-3])*2 + row1[x*3+n1+3] - row1[x*3+n1-3] + row1[x*3+s1+3] - row1[x*3+s1-3]];
-					prow1[x+width] = tab[(row1[x*3+4] - row1[x*3-2])*2 + row1[x*3+n1+4] - row1[x*3+n1-2] + row1[x*3+s1+4] - row1[x*3+s1-2]];
-					prow1[x+width*2] = tab[(row1[x*3+5] - row1[x*3-1])*2 + row1[x*3+n1+5] - row1[x*3+n1-1] + row1[x*3+s1+5] - row1[x*3+s1-1]];
-					prow2[width-1-x] = tab[(row2[x*3+3] - row2[x*3-3])*2 + row2[x*3+n2+3] - row2[x*3+n2-3] + row2[x*3+s2+3] - row2[x*3+s2-3]];
-					prow2[width-1-x+width] = tab[(row2[x*3+4] - row2[x*3-2])*2 + row2[x*3+n2+4] - row2[x*3+n2-2] + row2[x*3+s2+4] - row2[x*3+s2-2]];
-					prow2[width-1-x+width*2] = tab[(row2[x*3+5] - row2[x*3-1])*2 + row2[x*3+n2+5] - row2[x*3+n2-1] + row2[x*3+s2+5] - row2[x*3+s2-1]];
-					prow1[x+width*3] = row1[x*3];
-					prow1[x+width*4] = row1[x*3+1];
-					prow1[x+width*5] = row1[x*3+2];
-					prow2[width-1-x+width*3] = row2[x*3];
-					prow2[width-1-x+width*4] = row2[x*3+1];
-					prow2[width-1-x+width*5] = row2[x*3+2];
-				}
-			}
-			memset( cost, 0, width1*D*sizeof(cost[0]) );
-			buffer -= minX2;
-			cost -= minX1*D + minD; // simplify the cost indices inside the loop
 #if CV_SSE2
-			volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
+            static const uchar LSBTab[] =
+            {
+                0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+            };
+            volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
 #endif
-#if 1
+            const int ALIGN = 16;
-			for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
+            const int DISP_SHIFT = StereoMatcher::DISP_SHIFT;
-			{
+            const int DISP_SCALE = (1 << DISP_SHIFT);
-				int diff_scale = c < cn ? 0 : 2;
+            const CostType MAX_COST = SHRT_MAX;
+            int minD = params.minDisparity, maxD = minD + params.numDisparities;
-				// precompute
+            Size kernelSize;
-				//   v0 = min(row2[x-1/2], row2[x], row2[x+1/2]) and
+            kernelSize.width = kernelSize.height = params.kernelSize > 0 ? params.kernelSize : 5;
-				//   v1 = max(row2[x-1/2], row2[x], row2[x+1/2]) and
+            int uniquenessRatio = params.uniquenessRatio >= 0 ? params.uniquenessRatio : 10;
-				for( x = minX2; x < maxX2; x++ )
+            int disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1;
-				{
+            int P1 = params.P1 > 0 ? params.P1 : 2, P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1);
-					int v = prow2[x];
+            int k, width = disp1.cols, height = disp1.rows;
-					int vl = x > 0 ? (v + prow2[x-1])/2 : v;
+            int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0);
-					int vr = x < width-1 ? (v + prow2[x+1])/2 : v;
+            int D = maxD - minD, width1 = maxX1 - minX1;
-					int v0 = std::min(vl, vr); v0 = std::min(v0, v);
+            int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
-					int v1 = std::max(vl, vr); v1 = std::max(v1, v);
+            int SW2 = kernelSize.width/2, SH2 = kernelSize.height/2;
-					buffer[x] = (PixType)v0;
+            bool fullDP = params.mode == StereoBinarySGBM::MODE_HH;
-					buffer[x + width2] = (PixType)v1;
+            int npasses = fullDP ? 2 : 1;
-				}
+            if( minX1 >= maxX1 )
-				for( x = minX1; x < maxX1; x++ )
+            {
-				{
+                disp1 = Scalar::all(INVALID_DISP_SCALED);
-					int u = prow1[x];
+                return;
-					int ul = x > 0 ? (u + prow1[x-1])/2 : u;
+            }
-					int ur = x < width-1 ? (u + prow1[x+1])/2 : u;
+            CV_Assert( D % 16 == 0 );
-					int u0 = std::min(ul, ur); u0 = std::min(u0, u);
+            // NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8.
-					int u1 = std::max(ul, ur); u1 = std::max(u1, u);
+            // if you change NR, please, modify the loop as well.
+            int D2 = D+16, NRD2 = NR2*D2;
+            // the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer:
+            // for 8-way dynamic programming we need the current row and
+            // the previous row, i.e. 2 rows in total
+            const int NLR = 2;
+            const int LrBorder = NLR - 1;
+            int ww = img2.cols;
+            short *ham;
+            ham = (short *)hamDist.data;
+            // for each possible stereo match (img1(x,y) <=> img2(x-d,y))
+            // we keep pixel difference cost (C) and the summary cost over NR directions (S).
+            // we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k)
+            size_t costBufSize = width1*D;
+            size_t CSBufSize = costBufSize*(fullDP ? height : 1);
+            size_t minLrSize = (width1 + LrBorder*2)*NR2, LrSize = minLrSize*D2;
+            int hsumBufNRows = SH2*2 + 2;
+            size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[]
+                costBufSize*(hsumBufNRows + 1)*sizeof(CostType) + // hsumBuf, pixdiff
+                CSBufSize*2*sizeof(CostType) + // C, S
+                width*16*img1.channels()*sizeof(PixType) + // temp buffer for computing per-pixel cost
+                width*(sizeof(CostType) + sizeof(DispType)) + 1024; // disp2cost + disp2
+            if( buffer.empty() || !buffer.isContinuous() ||
+                buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize )
+                buffer.create(1, (int)totalBufSize, CV_8U);
+            // summary cost over different (nDirs) directions
+            CostType* Cbuf = (CostType*)alignPtr(buffer.ptr(), ALIGN);
+            CostType* Sbuf = Cbuf + CSBufSize;
+            CostType* hsumBuf = Sbuf + CSBufSize;
+            CostType* pixDiff = hsumBuf + costBufSize*hsumBufNRows;
+            CostType* disp2cost = pixDiff + costBufSize + (LrSize + minLrSize)*NLR;
+            DispType* disp2ptr = (DispType*)(disp2cost + width);
+            //            PixType* tempBuf = (PixType*)(disp2ptr + width);
+            // add P2 to every C(x,y). it saves a few operations in the inner loops
+            for( k = 0; k < width1*D; k++ )
+                Cbuf[k] = (CostType)P2;
+            for( int pass = 1; pass <= npasses; pass++ )
+            {
+                int x1, y1, x2, y2, dx, dy;
+                if( pass == 1 )
+                {
+                    y1 = 0; y2 = height; dy = 1;
+                    x1 = 0; x2 = width1; dx = 1;
+                }
+                else
+                {
+                    y1 = height-1; y2 = -1; dy = -1;
+                    x1 = width1-1; x2 = -1; dx = -1;
+                }
+                CostType *Lr[NLR]={0}, *minLr[NLR]={0};
+                for( k = 0; k < NLR; k++ )
+                {
+                    // shift Lr[k] and minLr[k] pointers, because we allocated them with the borders,
+                    // and will occasionally use negative indices with the arrays
+                    // we need to shift Lr[k] pointers by 1, to give the space for d=-1.
+                    // however, then the alignment will be imperfect, i.e. bad for SSE,
+                    // thus we shift the pointers by 8 (8*sizeof(short) == 16 - ideal alignment)
+                    Lr[k] = pixDiff + costBufSize + LrSize*k + NRD2*LrBorder + 8;
+                    memset( Lr[k] - LrBorder*NRD2 - 8, 0, LrSize*sizeof(CostType) );
+                    minLr[k] = pixDiff + costBufSize + LrSize*NLR + minLrSize*k + NR2*LrBorder;
+                    memset( minLr[k] - LrBorder*NR2, 0, minLrSize*sizeof(CostType) );
+                }
+                for( int y = y1; y != y2; y += dy )
+                {
+                    int x, d;
+                    DispType* disp1ptr = disp1.ptr<DispType>(y);
+                    CostType* C = Cbuf + (!fullDP ? 0 : y*costBufSize);
+                    CostType* S = Sbuf + (!fullDP ? 0 : y*costBufSize);
+                    if( pass == 1 ) // compute C on the first pass, and reuse it on the second pass, if any.
+                    {
+                        int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1;
+                        for( k = dy1; k <= dy2; k++ )
+                        {
+                            CostType* hsumAdd = hsumBuf + (std::min(k, height-1) % hsumBufNRows)*costBufSize;
+                            if( k < height )
+                            {
+                                for(int ii = 0; ii <= ww; ii++)
+                                {
+                                    for(int dd = 0; dd <= params.numDisparities; dd++)
+                                    {
+                                        pixDiff[ii * (params.numDisparities)+ dd] = (CostType)(ham[(k * (ww) + ii) * (params.numDisparities +1) + dd]);
+                                    }
+                                }
+                                memset(hsumAdd, 0, D*sizeof(CostType));
+                                for( x = 0; x <= SW2*D; x += D )
+                                {
+                                    int scale = x == 0 ? SW2 + 1 : 1;
+                                    for( d = 0; d < D; d++ )
+                                        hsumAdd[d] = (CostType)(hsumAdd[d] + pixDiff[x + d]*scale);
+                                }
+                                if( y > 0 )
+                                {
+                                    const CostType* hsumSub = hsumBuf + (std::max(y - SH2 - 1, 0) % hsumBufNRows)*costBufSize;
+                                    const CostType* Cprev = !fullDP || y == 0 ? C : C - costBufSize;
+                                    for( x = D; x < width1*D; x += D )
+                                    {
+                                        const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
+                                        const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
 #if CV_SSE2
-					if( useSIMD )
+                                        if( useSIMD )
-					{
+                                        {
-						__m128i _u = _mm_set1_epi8((char)u), _u0 = _mm_set1_epi8((char)u0);
+                                            for( d = 0; d < D; d += 8 )
-						__m128i _u1 = _mm_set1_epi8((char)u1), z = _mm_setzero_si128();
+                                            {
-						__m128i ds = _mm_cvtsi32_si128(diff_scale);
+                                                __m128i hv = _mm_load_si128((const __m128i*)(hsumAdd + x - D + d));
+                                                __m128i Cx = _mm_load_si128((__m128i*)(Cprev + x + d));
-						for( int d = minD; d < maxD; d += 16 )
+                                                hv = _mm_adds_epi16(_mm_subs_epi16(hv,
-						{
+                                                    _mm_load_si128((const __m128i*)(pixSub + d))),
-							__m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-x-1 + d));
+                                                    _mm_load_si128((const __m128i*)(pixAdd + d)));
-							__m128i _v0 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d));
+                                                Cx = _mm_adds_epi16(_mm_subs_epi16(Cx,
-							__m128i _v1 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d + width2));
+                                                    _mm_load_si128((const __m128i*)(hsumSub + x + d))),
-							__m128i c0 = _mm_max_epu8(_mm_subs_epu8(_u, _v1), _mm_subs_epu8(_v0, _u));
+                                                    hv);
-							__m128i c1 = _mm_max_epu8(_mm_subs_epu8(_v, _u1), _mm_subs_epu8(_u0, _v));
+                                                _mm_store_si128((__m128i*)(hsumAdd + x + d), hv);
-							__m128i diff = _mm_min_epu8(c0, c1);
+                                                _mm_store_si128((__m128i*)(C + x + d), Cx);
+                                            }
-							c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
+                                        }
-							c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
+                                        else
-							_mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_srl_epi16(_mm_unpacklo_epi8(diff,z), ds)));
-							_mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_srl_epi16(_mm_unpackhi_epi8(diff,z), ds)));
-						}
-					}
-					else
-#endif
-					{
-						for( int d = minD; d < maxD; d++ )
-						{
-							int v = prow2[width-x-1 + d];
-							int v0 = buffer[width-x-1 + d];
-							int v1 = buffer[width-x-1 + d + width2];
-							int c0 = std::max(0, u - v1); c0 = std::max(c0, v0 - u);
-							int c1 = std::max(0, v - u1); c1 = std::max(c1, u0 - v);
-							cost[x*D + d] = (CostType)(cost[x*D+d] + (std::min(c0, c1) >> diff_scale));
-						}
-					}
-				}
-			}
-#else
-			for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
-			{
-				for( x = minX1; x < maxX1; x++ )
-				{
-					int u = prow1[x];
-#if CV_SSE2
-					if( useSIMD )
-					{
-						__m128i _u = _mm_set1_epi8(u), z = _mm_setzero_si128();
-						for( int d = minD; d < maxD; d += 16 )
-						{
-							__m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-1-x + d));
-							__m128i diff = _mm_adds_epu8(_mm_subs_epu8(_u,_v), _mm_subs_epu8(_v,_u));
-							__m128i c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
-							__m128i c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
-							_mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_unpacklo_epi8(diff,z)));
-							_mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_unpackhi_epi8(diff,z)));
-						}
-					}
-					else
-#endif
-					{
-						for( int d = minD; d < maxD; d++ )
-						{
-							int v = prow2[width-1-x + d];
-							cost[x*D + d] = (CostType)(cost[x*D + d] + (CostType)std::abs(u - v));
-						}
-					}
-				}
-			}
-#endif
-		}
-		/*
-		computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf.
-		that is, disp1buf(x, y)=d means that img1(x+roi.x, y+roi.y) ~ img2(x+roi.x-d, y+roi.y).
-		minD <= d < maxD.
-		disp2full is the reverse disparity map, that is:
-		disp2full(x+roi.x,y+roi.y)=d means that img2(x+roi.x, y+roi.y) ~ img1(x+roi.x+d, y+roi.y)
-		note that disp1buf will have the same size as the roi and
-		disp2full will have the same size as img1 (or img2).
-		On exit disp2buf is not the final disparity, it is an intermediate result that becomes
-		final after all the tiles are processed.
-		the disparity in disp1buf is written with sub-pixel accuracy
-		(4 fractional bits, see StereoSGBM::DISP_SCALE),
-		using quadratic interpolation, while the disparity in disp2buf
-		is written as is, without interpolation.
-		disp2cost also has the same size as img1 (or img2).
-		It contains the minimum current cost, used to find the best disparity, corresponding to the minimal cost.
-		*/
-		static void computeDisparityBinarySGBM( const Mat& img1, const Mat& img2,
-			Mat& disp1, const StereoBinarySGBMParams& params,
-			Mat& buffer )
-		{
-#if CV_SSE2
-			static const uchar LSBTab[] =
-			{
-				0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-				5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-				6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-				5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-				7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-				5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-				6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-				5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
-			};
-			volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
 #endif
+                                        {
-			const int ALIGN = 16;
+                                            for( d = 0; d < D; d++ )
-			const int DISP_SHIFT = StereoMatcher::DISP_SHIFT;
+                                            {
-			const int DISP_SCALE = (1 << DISP_SHIFT);
+                                                int hv = hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
-			const CostType MAX_COST = SHRT_MAX;
+                                                C[x + d] = (CostType)(Cprev[x + d] + hv - hsumSub[x + d]);
+                                            }
-			int minD = params.minDisparity, maxD = minD + params.numDisparities;
+                                        }
-			Size SADWindowSize;
+                                    }
-			SADWindowSize.width = SADWindowSize.height = params.SADWindowSize > 0 ? params.SADWindowSize : 5;
+                                }
-			int ftzero = std::max(params.preFilterCap, 15) | 1;
+                                else
-			int uniquenessRatio = params.uniquenessRatio >= 0 ? params.uniquenessRatio : 10;
+                                {
-			int disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1;
+                                    for( x = D; x < width1*D; x += D )
-			int P1 = params.P1 > 0 ? params.P1 : 2, P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1);
+                                    {
-			int k, width = disp1.cols, height = disp1.rows;
+                                        const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
-			int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0);
+                                        const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
-			int D = maxD - minD, width1 = maxX1 - minX1;
+                                        for( d = 0; d < D; d++ )
-			int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
+                                            hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
-			int SW2 = SADWindowSize.width/2, SH2 = SADWindowSize.height/2;
+                                    }
-			bool fullDP = params.mode == StereoBinarySGBM::MODE_HH;
+                                }
-			int npasses = fullDP ? 2 : 1;
+                            }
-			const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2;
+                            if( y == 0 )
-			PixType clipTab[TAB_SIZE];
+                            {
+                                int scale = k == 0 ? SH2 + 1 : 1;
-			for( k = 0; k < TAB_SIZE; k++ )
+                                for( x = 0; x < width1*D; x++ )
-				clipTab[k] = (PixType)(std::min(std::max(k - TAB_OFS, -ftzero), ftzero) + ftzero);
+                                    C[x] = (CostType)(C[x] + hsumAdd[x]*scale);
+                            }
-			if( minX1 >= maxX1 )
+                        }
-			{
+                        // also, clear the S buffer
-				disp1 = Scalar::all(INVALID_DISP_SCALED);
+                        for( k = 0; k < width1*D; k++ )
-				return;
+                            S[k] = 0;
-			}
+                    }
+                    // clear the left and the right borders
-			CV_Assert( D % 16 == 0 );
+                    memset( Lr[0] - NRD2*LrBorder - 8, 0, NRD2*LrBorder*sizeof(CostType) );
+                    memset( Lr[0] + width1*NRD2 - 8, 0, NRD2*LrBorder*sizeof(CostType) );
-			// NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8.
+                    memset( minLr[0] - NR2*LrBorder, 0, NR2*LrBorder*sizeof(CostType) );
-			// if you change NR, please, modify the loop as well.
+                    memset( minLr[0] + width1*NR2, 0, NR2*LrBorder*sizeof(CostType) );
-			int D2 = D+16, NRD2 = NR2*D2;
+                    /*
+                    [formula 13 in the paper]
-			// the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer:
+                    compute L_r(p, d) = C(p, d) +
-			// for 8-way dynamic programming we need the current row and
+                    min(L_r(p-r, d),
-			// the previous row, i.e. 2 rows in total
+                    L_r(p-r, d-1) + P1,
-			const int NLR = 2;
+                    L_r(p-r, d+1) + P1,
-			const int LrBorder = NLR - 1;
+                    min_k L_r(p-r, k) + P2) - min_k L_r(p-r, k)
+                    where p = (x,y), r is one of the directions.
-			// for each possible stereo match (img1(x,y) <=> img2(x-d,y))
+                    we process all the directions at once:
-			// we keep pixel difference cost (C) and the summary cost over NR directions (S).
+                    0: r=(-dx, 0)
-			// we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k)
+                    1: r=(-1, -dy)
-			size_t costBufSize = width1*D;
+                    2: r=(0, -dy)
-			size_t CSBufSize = costBufSize*(fullDP ? height : 1);
+                    3: r=(1, -dy)
-			size_t minLrSize = (width1 + LrBorder*2)*NR2, LrSize = minLrSize*D2;
+                    4: r=(-2, -dy)
-			int hsumBufNRows = SH2*2 + 2;
+                    5: r=(-1, -dy*2)
-			size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[]
+                    6: r=(1, -dy*2)
-				costBufSize*(hsumBufNRows + 1)*sizeof(CostType) + // hsumBuf, pixdiff
+                    7: r=(2, -dy)
-				CSBufSize*2*sizeof(CostType) + // C, S
+                    */
-				width*16*img1.channels()*sizeof(PixType) + // temp buffer for computing per-pixel cost
+                    for( x = x1; x != x2; x += dx )
-				width*(sizeof(CostType) + sizeof(DispType)) + 1024; // disp2cost + disp2
+                    {
+                        int xm = x*NR2, xd = xm*D2;
-			if( buffer.empty() || !buffer.isContinuous() ||
+                        int delta0 = minLr[0][xm - dx*NR2] + P2, delta1 = minLr[1][xm - NR2 + 1] + P2;
-				buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize )
+                        int delta2 = minLr[1][xm + 2] + P2, delta3 = minLr[1][xm + NR2 + 3] + P2;
-				buffer.create(1, (int)totalBufSize, CV_8U);
+                        CostType* Lr_p0 = Lr[0] + xd - dx*NRD2;
+                        CostType* Lr_p1 = Lr[1] + xd - NRD2 + D2;
-			// summary cost over different (nDirs) directions
+                        CostType* Lr_p2 = Lr[1] + xd + D2*2;
-			CostType* Cbuf = (CostType*)alignPtr(buffer.ptr(), ALIGN);
+                        CostType* Lr_p3 = Lr[1] + xd + NRD2 + D2*3;
-			CostType* Sbuf = Cbuf + CSBufSize;
+                        Lr_p0[-1] = Lr_p0[D] = Lr_p1[-1] = Lr_p1[D] =
-			CostType* hsumBuf = Sbuf + CSBufSize;
+                            Lr_p2[-1] = Lr_p2[D] = Lr_p3[-1] = Lr_p3[D] = MAX_COST;
-			CostType* pixDiff = hsumBuf + costBufSize*hsumBufNRows;
+                        CostType* Lr_p = Lr[0] + xd;
+                        const CostType* Cp = C + x*D;
-			CostType* disp2cost = pixDiff + costBufSize + (LrSize + minLrSize)*NLR;
+                        CostType* Sp = S + x*D;
-			DispType* disp2ptr = (DispType*)(disp2cost + width);
-			PixType* tempBuf = (PixType*)(disp2ptr + width);
-			// add P2 to every C(x,y). it saves a few operations in the inner loops
-			for( k = 0; k < width1*D; k++ )
-				Cbuf[k] = (CostType)P2;
-			for( int pass = 1; pass <= npasses; pass++ )
-			{
-				int x1, y1, x2, y2, dx, dy;
-				if( pass == 1 )
-				{
-					y1 = 0; y2 = height; dy = 1;
-					x1 = 0; x2 = width1; dx = 1;
-				}
-				else
-				{
-					y1 = height-1; y2 = -1; dy = -1;
-					x1 = width1-1; x2 = -1; dx = -1;
-				}
-				CostType *Lr[NLR]={0}, *minLr[NLR]={0};
-				for( k = 0; k < NLR; k++ )
-				{
-					// shift Lr[k] and minLr[k] pointers, because we allocated them with the borders,
-					// and will occasionally use negative indices with the arrays
-					// we need to shift Lr[k] pointers by 1, to give the space for d=-1.
-					// however, then the alignment will be imperfect, i.e. bad for SSE,
-					// thus we shift the pointers by 8 (8*sizeof(short) == 16 - ideal alignment)
-					Lr[k] = pixDiff + costBufSize + LrSize*k + NRD2*LrBorder + 8;
-					memset( Lr[k] - LrBorder*NRD2 - 8, 0, LrSize*sizeof(CostType) );
-					minLr[k] = pixDiff + costBufSize + LrSize*NLR + minLrSize*k + NR2*LrBorder;
-					memset( minLr[k] - LrBorder*NR2, 0, minLrSize*sizeof(CostType) );
-				}
-				for( int y = y1; y != y2; y += dy )
-				{
-					int x, d;
-					DispType* disp1ptr = disp1.ptr<DispType>(y);
-					CostType* C = Cbuf + (!fullDP ? 0 : y*costBufSize);
-					CostType* S = Sbuf + (!fullDP ? 0 : y*costBufSize);
-					if( pass == 1 ) // compute C on the first pass, and reuse it on the second pass, if any.
-					{
-						int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1;
-						for( k = dy1; k <= dy2; k++ )
-						{
-							CostType* hsumAdd = hsumBuf + (std::min(k, height-1) % hsumBufNRows)*costBufSize;
-							if( k < height )
-							{
-								calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero );
-								memset(hsumAdd, 0, D*sizeof(CostType));
-								for( x = 0; x <= SW2*D; x += D )
-								{
-									int scale = x == 0 ? SW2 + 1 : 1;
-									for( d = 0; d < D; d++ )
-										hsumAdd[d] = (CostType)(hsumAdd[d] + pixDiff[x + d]*scale);
-								}
-								if( y > 0 )
-								{
-									const CostType* hsumSub = hsumBuf + (std::max(y - SH2 - 1, 0) % hsumBufNRows)*costBufSize;
-									const CostType* Cprev = !fullDP || y == 0 ? C : C - costBufSize;
-									for( x = D; x < width1*D; x += D )
-									{
-										const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
-										const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
 #if CV_SSE2
-										if( useSIMD )
+                        if( useSIMD )
-										{
+                        {
-											for( d = 0; d < D; d += 8 )
+                            __m128i _P1 = _mm_set1_epi16((short)P1);
-											{
+                            __m128i _delta0 = _mm_set1_epi16((short)delta0);
-												__m128i hv = _mm_load_si128((const __m128i*)(hsumAdd + x - D + d));
+                            __m128i _delta1 = _mm_set1_epi16((short)delta1);
-												__m128i Cx = _mm_load_si128((__m128i*)(Cprev + x + d));
+                            __m128i _delta2 = _mm_set1_epi16((short)delta2);
-												hv = _mm_adds_epi16(_mm_subs_epi16(hv,
+                            __m128i _delta3 = _mm_set1_epi16((short)delta3);
-													_mm_load_si128((const __m128i*)(pixSub + d))),
+                            __m128i _minL0 = _mm_set1_epi16((short)MAX_COST);
-													_mm_load_si128((const __m128i*)(pixAdd + d)));
+                            for( d = 0; d < D; d += 8 )
-												Cx = _mm_adds_epi16(_mm_subs_epi16(Cx,
+                            {
-													_mm_load_si128((const __m128i*)(hsumSub + x + d))),
+                                __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d));
-													hv);
+                                __m128i L0, L1, L2, L3;
-												_mm_store_si128((__m128i*)(hsumAdd + x + d), hv);
+                                L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
-												_mm_store_si128((__m128i*)(C + x + d), Cx);
+                                L1 = _mm_load_si128((const __m128i*)(Lr_p1 + d));
-											}
+                                L2 = _mm_load_si128((const __m128i*)(Lr_p2 + d));
-										}
+                                L3 = _mm_load_si128((const __m128i*)(Lr_p3 + d));
-										else
+                                L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
+                                L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
+                                L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d - 1)), _P1));
+                                L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d + 1)), _P1));
+                                L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d - 1)), _P1));
+                                L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d + 1)), _P1));
+                                L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d - 1)), _P1));
+                                L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d + 1)), _P1));
+                                L0 = _mm_min_epi16(L0, _delta0);
+                                L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
+                                L1 = _mm_min_epi16(L1, _delta1);
+                                L1 = _mm_adds_epi16(_mm_subs_epi16(L1, _delta1), Cpd);
+                                L2 = _mm_min_epi16(L2, _delta2);
+                                L2 = _mm_adds_epi16(_mm_subs_epi16(L2, _delta2), Cpd);
+                                L3 = _mm_min_epi16(L3, _delta3);
+                                L3 = _mm_adds_epi16(_mm_subs_epi16(L3, _delta3), Cpd);
+                                _mm_store_si128( (__m128i*)(Lr_p + d), L0);
+                                _mm_store_si128( (__m128i*)(Lr_p + d + D2), L1);
+                                _mm_store_si128( (__m128i*)(Lr_p + d + D2*2), L2);
+                                _mm_store_si128( (__m128i*)(Lr_p + d + D2*3), L3);
+                                __m128i t0 = _mm_min_epi16(_mm_unpacklo_epi16(L0, L2), _mm_unpackhi_epi16(L0, L2));
+                                __m128i t1 = _mm_min_epi16(_mm_unpacklo_epi16(L1, L3), _mm_unpackhi_epi16(L1, L3));
+                                t0 = _mm_min_epi16(_mm_unpacklo_epi16(t0, t1), _mm_unpackhi_epi16(t0, t1));
+                                _minL0 = _mm_min_epi16(_minL0, t0);
+                                __m128i Sval = _mm_load_si128((const __m128i*)(Sp + d));
+                                L0 = _mm_adds_epi16(L0, L1);
+                                L2 = _mm_adds_epi16(L2, L3);
+                                Sval = _mm_adds_epi16(Sval, L0);
+                                Sval = _mm_adds_epi16(Sval, L2);
+                                _mm_store_si128((__m128i*)(Sp + d), Sval);
+                            }
+                            _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
+                            _mm_storel_epi64((__m128i*)&minLr[0][xm], _minL0);
+                        }
+                        else
 #endif
-										{
+                        {
-											for( d = 0; d < D; d++ )
+                            int minL0 = MAX_COST, minL1 = MAX_COST, minL2 = MAX_COST, minL3 = MAX_COST;
-											{
-												int hv = hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
+                            for( d = 0; d < D; d++ )
-												C[x + d] = (CostType)(Cprev[x + d] + hv - hsumSub[x + d]);
+                            {
-											}
+                                int Cpd = Cp[d], L0, L1, L2, L3;
-										}
-									}
+                                L0 = Cpd + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0;
-								}
+                                L1 = Cpd + std::min((int)Lr_p1[d], std::min(Lr_p1[d-1] + P1, std::min(Lr_p1[d+1] + P1, delta1))) - delta1;
-								else
+                                L2 = Cpd + std::min((int)Lr_p2[d], std::min(Lr_p2[d-1] + P1, std::min(Lr_p2[d+1] + P1, delta2))) - delta2;
-								{
+                                L3 = Cpd + std::min((int)Lr_p3[d], std::min(Lr_p3[d-1] + P1, std::min(Lr_p3[d+1] + P1, delta3))) - delta3;
-									for( x = D; x < width1*D; x += D )
-									{
+                                Lr_p[d] = (CostType)L0;
-										const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
+                                minL0 = std::min(minL0, L0);
-										const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
+                                Lr_p[d + D2] = (CostType)L1;
-										for( d = 0; d < D; d++ )
+                                minL1 = std::min(minL1, L1);
-											hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
-									}
+                                Lr_p[d + D2*2] = (CostType)L2;
-								}
+                                minL2 = std::min(minL2, L2);
-							}
+                                Lr_p[d + D2*3] = (CostType)L3;
-							if( y == 0 )
+                                minL3 = std::min(minL3, L3);
-							{
-								int scale = k == 0 ? SH2 + 1 : 1;
+                                Sp[d] = saturate_cast<CostType>(Sp[d] + L0 + L1 + L2 + L3);
-								for( x = 0; x < width1*D; x++ )
+                            }
-									C[x] = (CostType)(C[x] + hsumAdd[x]*scale);
+                            minLr[0][xm] = (CostType)minL0;
-							}
+                            minLr[0][xm+1] = (CostType)minL1;
-						}
+                            minLr[0][xm+2] = (CostType)minL2;
+                            minLr[0][xm+3] = (CostType)minL3;
-						// also, clear the S buffer
+                        }
-						for( k = 0; k < width1*D; k++ )
+                    }
-							S[k] = 0;
-					}
+                    if( pass == npasses )
+                    {
-					// clear the left and the right borders
+                        for( x = 0; x < width; x++ )
-					memset( Lr[0] - NRD2*LrBorder - 8, 0, NRD2*LrBorder*sizeof(CostType) );
+                        {
-					memset( Lr[0] + width1*NRD2 - 8, 0, NRD2*LrBorder*sizeof(CostType) );
+                            disp1ptr[x] = disp2ptr[x] = (DispType)INVALID_DISP_SCALED;
-					memset( minLr[0] - NR2*LrBorder, 0, NR2*LrBorder*sizeof(CostType) );
+                            disp2cost[x] = MAX_COST;
-					memset( minLr[0] + width1*NR2, 0, NR2*LrBorder*sizeof(CostType) );
+                        }
-					/*
+                        for( x = width1 - 1; x >= 0; x-- )
-					[formula 13 in the paper]
+                        {
-					compute L_r(p, d) = C(p, d) +
+                            CostType* Sp = S + x*D;
-					min(L_r(p-r, d),
+                            int minS = MAX_COST, bestDisp = -1;
-					L_r(p-r, d-1) + P1,
-					L_r(p-r, d+1) + P1,
+                            if( npasses == 1 )
-					min_k L_r(p-r, k) + P2) - min_k L_r(p-r, k)
+                            {
-					where p = (x,y), r is one of the directions.
+                                int xm = x*NR2, xd = xm*D2;
-					we process all the directions at once:
-					0: r=(-dx, 0)
+                                int minL0 = MAX_COST;
-					1: r=(-1, -dy)
+                                int delta0 = minLr[0][xm + NR2] + P2;
-					2: r=(0, -dy)
+                                CostType* Lr_p0 = Lr[0] + xd + NRD2;
-					3: r=(1, -dy)
+                                Lr_p0[-1] = Lr_p0[D] = MAX_COST;
-					4: r=(-2, -dy)
+                                CostType* Lr_p = Lr[0] + xd;
-					5: r=(-1, -dy*2)
+                                const CostType* Cp = C + x*D;
-					6: r=(1, -dy*2)
-					7: r=(2, -dy)
-					*/
-					for( x = x1; x != x2; x += dx )
-					{
-						int xm = x*NR2, xd = xm*D2;
-						int delta0 = minLr[0][xm - dx*NR2] + P2, delta1 = minLr[1][xm - NR2 + 1] + P2;
-						int delta2 = minLr[1][xm + 2] + P2, delta3 = minLr[1][xm + NR2 + 3] + P2;
-						CostType* Lr_p0 = Lr[0] + xd - dx*NRD2;
-						CostType* Lr_p1 = Lr[1] + xd - NRD2 + D2;
-						CostType* Lr_p2 = Lr[1] + xd + D2*2;
-						CostType* Lr_p3 = Lr[1] + xd + NRD2 + D2*3;
-						Lr_p0[-1] = Lr_p0[D] = Lr_p1[-1] = Lr_p1[D] =
-							Lr_p2[-1] = Lr_p2[D] = Lr_p3[-1] = Lr_p3[D] = MAX_COST;
-						CostType* Lr_p = Lr[0] + xd;
-						const CostType* Cp = C + x*D;
-						CostType* Sp = S + x*D;
-#if CV_SSE2
-						if( useSIMD )
-						{
-							__m128i _P1 = _mm_set1_epi16((short)P1);
-							__m128i _delta0 = _mm_set1_epi16((short)delta0);
-							__m128i _delta1 = _mm_set1_epi16((short)delta1);
-							__m128i _delta2 = _mm_set1_epi16((short)delta2);
-							__m128i _delta3 = _mm_set1_epi16((short)delta3);
-							__m128i _minL0 = _mm_set1_epi16((short)MAX_COST);
-							for( d = 0; d < D; d += 8 )
-							{
-								__m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d));
-								__m128i L0, L1, L2, L3;
-								L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
-								L1 = _mm_load_si128((const __m128i*)(Lr_p1 + d));
-								L2 = _mm_load_si128((const __m128i*)(Lr_p2 + d));
-								L3 = _mm_load_si128((const __m128i*)(Lr_p3 + d));
-								L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
-								L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
-								L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d - 1)), _P1));
-								L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d + 1)), _P1));
-								L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d - 1)), _P1));
-								L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d + 1)), _P1));
-								L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d - 1)), _P1));
-								L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d + 1)), _P1));
-								L0 = _mm_min_epi16(L0, _delta0);
-								L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
-								L1 = _mm_min_epi16(L1, _delta1);
-								L1 = _mm_adds_epi16(_mm_subs_epi16(L1, _delta1), Cpd);
-								L2 = _mm_min_epi16(L2, _delta2);
-								L2 = _mm_adds_epi16(_mm_subs_epi16(L2, _delta2), Cpd);
-								L3 = _mm_min_epi16(L3, _delta3);
-								L3 = _mm_adds_epi16(_mm_subs_epi16(L3, _delta3), Cpd);
-								_mm_store_si128( (__m128i*)(Lr_p + d), L0);
-								_mm_store_si128( (__m128i*)(Lr_p + d + D2), L1);
-								_mm_store_si128( (__m128i*)(Lr_p + d + D2*2), L2);
-								_mm_store_si128( (__m128i*)(Lr_p + d + D2*3), L3);
-								__m128i t0 = _mm_min_epi16(_mm_unpacklo_epi16(L0, L2), _mm_unpackhi_epi16(L0, L2));
-								__m128i t1 = _mm_min_epi16(_mm_unpacklo_epi16(L1, L3), _mm_unpackhi_epi16(L1, L3));
-								t0 = _mm_min_epi16(_mm_unpacklo_epi16(t0, t1), _mm_unpackhi_epi16(t0, t1));
-								_minL0 = _mm_min_epi16(_minL0, t0);
-								__m128i Sval = _mm_load_si128((const __m128i*)(Sp + d));
-								L0 = _mm_adds_epi16(L0, L1);
-								L2 = _mm_adds_epi16(L2, L3);
-								Sval = _mm_adds_epi16(Sval, L0);
-								Sval = _mm_adds_epi16(Sval, L2);
-								_mm_store_si128((__m128i*)(Sp + d), Sval);
-							}
-							_minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
-							_mm_storel_epi64((__m128i*)&minLr[0][xm], _minL0);
-						}
-						else
-#endif
-						{
-							int minL0 = MAX_COST, minL1 = MAX_COST, minL2 = MAX_COST, minL3 = MAX_COST;
-							for( d = 0; d < D; d++ )
-							{
-								int Cpd = Cp[d], L0, L1, L2, L3;
-								L0 = Cpd + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0;
-								L1 = Cpd + std::min((int)Lr_p1[d], std::min(Lr_p1[d-1] + P1, std::min(Lr_p1[d+1] + P1, delta1))) - delta1;
-								L2 = Cpd + std::min((int)Lr_p2[d], std::min(Lr_p2[d-1] + P1, std::min(Lr_p2[d+1] + P1, delta2))) - delta2;
-								L3 = Cpd + std::min((int)Lr_p3[d], std::min(Lr_p3[d-1] + P1, std::min(Lr_p3[d+1] + P1, delta3))) - delta3;
-								Lr_p[d] = (CostType)L0;
-								minL0 = std::min(minL0, L0);
-								Lr_p[d + D2] = (CostType)L1;
-								minL1 = std::min(minL1, L1);
-								Lr_p[d + D2*2] = (CostType)L2;
-								minL2 = std::min(minL2, L2);
-								Lr_p[d + D2*3] = (CostType)L3;
-								minL3 = std::min(minL3, L3);
-								Sp[d] = saturate_cast<CostType>(Sp[d] + L0 + L1 + L2 + L3);
-							}
-							minLr[0][xm] = (CostType)minL0;
-							minLr[0][xm+1] = (CostType)minL1;
-							minLr[0][xm+2] = (CostType)minL2;
-							minLr[0][xm+3] = (CostType)minL3;
-						}
-					}
-					if( pass == npasses )
-					{
-						for( x = 0; x < width; x++ )
-						{
-							disp1ptr[x] = disp2ptr[x] = (DispType)INVALID_DISP_SCALED;
-							disp2cost[x] = MAX_COST;
-						}
-						for( x = width1 - 1; x >= 0; x-- )
-						{
-							CostType* Sp = S + x*D;
-							int minS = MAX_COST, bestDisp = -1;
-							if( npasses == 1 )
-							{
-								int xm = x*NR2, xd = xm*D2;
-								int minL0 = MAX_COST;
-								int delta0 = minLr[0][xm + NR2] + P2;
-								CostType* Lr_p0 = Lr[0] + xd + NRD2;
-								Lr_p0[-1] = Lr_p0[D] = MAX_COST;
-								CostType* Lr_p = Lr[0] + xd;
-								const CostType* Cp = C + x*D;
 #if CV_SSE2
-								if( useSIMD )
+                                if( useSIMD )
-								{
+                                {
-									__m128i _P1 = _mm_set1_epi16((short)P1);
+                                    __m128i _P1 = _mm_set1_epi16((short)P1);
-									__m128i _delta0 = _mm_set1_epi16((short)delta0);
+                                    __m128i _delta0 = _mm_set1_epi16((short)delta0);
+                                    __m128i _minL0 = _mm_set1_epi16((short)minL0);
-									__m128i _minL0 = _mm_set1_epi16((short)minL0);
+                                    __m128i _minS = _mm_set1_epi16(MAX_COST), _bestDisp = _mm_set1_epi16(-1);
-									__m128i _minS = _mm_set1_epi16(MAX_COST), _bestDisp = _mm_set1_epi16(-1);
+                                    __m128i _d8 = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7), _8 = _mm_set1_epi16(8);
-									__m128i _d8 = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7), _8 = _mm_set1_epi16(8);
+                                    for( d = 0; d < D; d += 8 )
+                                    {
-									for( d = 0; d < D; d += 8 )
+                                        __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d)), L0;
-									{
+                                        L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
-										__m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d)), L0;
+                                        L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
+                                        L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
-										L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
+                                        L0 = _mm_min_epi16(L0, _delta0);
-										L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
+                                        L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
-										L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
+                                        _mm_store_si128((__m128i*)(Lr_p + d), L0);
-										L0 = _mm_min_epi16(L0, _delta0);
+                                        _minL0 = _mm_min_epi16(_minL0, L0);
-										L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
+                                        L0 = _mm_adds_epi16(L0, *(__m128i*)(Sp + d));
+                                        _mm_store_si128((__m128i*)(Sp + d), L0);
-										_mm_store_si128((__m128i*)(Lr_p + d), L0);
+                                        __m128i mask = _mm_cmpgt_epi16(_minS, L0);
-										_minL0 = _mm_min_epi16(_minL0, L0);
+                                        _minS = _mm_min_epi16(_minS, L0);
-										L0 = _mm_adds_epi16(L0, *(__m128i*)(Sp + d));
+                                        _bestDisp = _mm_xor_si128(_bestDisp, _mm_and_si128(_mm_xor_si128(_bestDisp,_d8), mask));
-										_mm_store_si128((__m128i*)(Sp + d), L0);
+                                        _d8 = _mm_adds_epi16(_d8, _8);
+                                    }
-										__m128i mask = _mm_cmpgt_epi16(_minS, L0);
+                                    short CV_DECL_ALIGNED(16) bestDispBuf[8];
-										_minS = _mm_min_epi16(_minS, L0);
+                                    _mm_store_si128((__m128i*)bestDispBuf, _bestDisp);
-										_bestDisp = _mm_xor_si128(_bestDisp, _mm_and_si128(_mm_xor_si128(_bestDisp,_d8), mask));
+                                    _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
-										_d8 = _mm_adds_epi16(_d8, _8);
+                                    _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 4));
-									}
+                                    _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 2));
+                                    __m128i qS = _mm_min_epi16(_minS, _mm_srli_si128(_minS, 8));
-									short CV_DECL_ALIGNED(16) bestDispBuf[8];
+                                    qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 4));
-									_mm_store_si128((__m128i*)bestDispBuf, _bestDisp);
+                                    qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 2));
+                                    minLr[0][xm] = (CostType)_mm_cvtsi128_si32(_minL0);
-									_minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
+                                    minS = (CostType)_mm_cvtsi128_si32(qS);
-									_minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 4));
+                                    qS = _mm_shuffle_epi32(_mm_unpacklo_epi16(qS, qS), 0);
-									_minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 2));
+                                    qS = _mm_cmpeq_epi16(_minS, qS);
+                                    int idx = _mm_movemask_epi8(_mm_packs_epi16(qS, qS)) & 255;
-									__m128i qS = _mm_min_epi16(_minS, _mm_srli_si128(_minS, 8));
+                                    bestDisp = bestDispBuf[LSBTab[idx]];
-									qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 4));
+                                }
-									qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 2));
+                                else
-									minLr[0][xm] = (CostType)_mm_cvtsi128_si32(_minL0);
-									minS = (CostType)_mm_cvtsi128_si32(qS);
-									qS = _mm_shuffle_epi32(_mm_unpacklo_epi16(qS, qS), 0);
-									qS = _mm_cmpeq_epi16(_minS, qS);
-									int idx = _mm_movemask_epi8(_mm_packs_epi16(qS, qS)) & 255;
-									bestDisp = bestDispBuf[LSBTab[idx]];
-								}
-								else
 #endif
-								{
+                                {
-									for( d = 0; d < D; d++ )
+                                    for( d = 0; d < D; d++ )
-									{
+                                    {
-										int L0 = Cp[d] + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0;
+                                        int L0 = Cp[d] + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0;
+                                        Lr_p[d] = (CostType)L0;
-										Lr_p[d] = (CostType)L0;
+                                        minL0 = std::min(minL0, L0);
-										minL0 = std::min(minL0, L0);
+                                        int Sval = Sp[d] = saturate_cast<CostType>(Sp[d] + L0);
+                                        if( Sval < minS )
-										int Sval = Sp[d] = saturate_cast<CostType>(Sp[d] + L0);
+                                        {
-										if( Sval < minS )
+                                            minS = Sval;
-										{
+                                            bestDisp = d;
-											minS = Sval;
+                                        }
-											bestDisp = d;
+                                    }
-										}
+                                    minLr[0][xm] = (CostType)minL0;
-									}
+                                }
-									minLr[0][xm] = (CostType)minL0;
+                            }
-								}
+                            else
-							}
+                            {
-							else
+                                for( d = 0; d < D; d++ )
-							{
+                                {
-								for( d = 0; d < D; d++ )
+                                    int Sval = Sp[d];
-								{
+                                    if( Sval < minS )
-									int Sval = Sp[d];
+                                    {
-									if( Sval < minS )
+                                        minS = Sval;
-									{
+                                        bestDisp = d;
-										minS = Sval;
+                                    }
-										bestDisp = d;
+                                }
-									}
+                            }
-								}
+                            for( d = 0; d < D; d++ )
-							}
+                            {
+                                if( Sp[d]*(100 - uniquenessRatio) < minS*100 && std::abs(bestDisp - d) > 1 )
-							for( d = 0; d < D; d++ )
+                                    break;
-							{
+                            }
-								if( Sp[d]*(100 - uniquenessRatio) < minS*100 && std::abs(bestDisp - d) > 1 )
+                            if( d < D )
-									break;
+                                continue;
-							}
+                            d = bestDisp;
-							if( d < D )
+                            int _x2 = x + minX1 - d - minD;
-								continue;
+                            if( disp2cost[_x2] > minS )
-							d = bestDisp;
+                            {
-							int _x2 = x + minX1 - d - minD;
+                                disp2cost[_x2] = (CostType)minS;
-							if( disp2cost[_x2] > minS )
+                                disp2ptr[_x2] = (DispType)(d + minD);
-							{
+                            }
-								disp2cost[_x2] = (CostType)minS;
+                            if( 0 < d && d < D-1 )
-								disp2ptr[_x2] = (DispType)(d + minD);
+                            {
-							}
+                                if(params.subpixelInterpolationMethod == CV_SIMETRICV_INTERPOLATION)
+                                {
-							if( 0 < d && d < D-1 )
+                                    double m2m1, m3m1, m3, m2, m1;
-							{
+                                    m2 = Sp[d - 1];
-								// do subpixel quadratic interpolation:
+                                    m3 = Sp[d + 1];
-								//   fit parabola into (x1=d-1, y1=Sp[d-1]), (x2=d, y2=Sp[d]), (x3=d+1, y3=Sp[d+1])
+                                    m1 = Sp[d];
-								//   then find minimum of the parabola.
+                                    m2m1 = m2 - m1;
-								int denom2 = std::max(Sp[d-1] + Sp[d+1] - 2*Sp[d], 1);
+                                    m3m1 = m3 - m1;
-								d = d*DISP_SCALE + ((Sp[d-1] - Sp[d+1])*DISP_SCALE + denom2)/(denom2*2);
+                                    if (!(m2m1 == 0 || m3m1 == 0))
-							}
+                                    {
-							else
+                                        double p;
-								d *= DISP_SCALE;
+                                        p = 0;
-							disp1ptr[x + minX1] = (DispType)(d + minD*DISP_SCALE);
+                                        if (m2 > m3)
-						}
+                                        {
+                                            p = (0.5 - 0.25 * ((m3m1 * m3m1) / (m2m1 * m2m1) + (m3m1 / m2m1)));
-						for( x = minX1; x < maxX1; x++ )
+                                        }
-						{
+                                        else
-							// we round the computed disparity both towards -inf and +inf and check
+                                        {
-							// if either of the corresponding disparities in disp2 is consistent.
+                                            p = -1 * (0.5 - 0.25 * ((m2m1 * m2m1) / (m3m1 * m3m1) + (m2m1 / m3m1)));
-							// This is to give the computed disparity a chance to look valid if it is.
+                                        }
-							int d1 = disp1ptr[x];
+                                        if (p >= -0.5 && p <= 0.5)
-							if( d1 == INVALID_DISP_SCALED )
+                                            d = (int)(d * DISP_SCALE + p * DISP_SCALE );
-								continue;
+                                    }
-							int _d = d1 >> DISP_SHIFT;
+                                    else
-							int d_ = (d1 + DISP_SCALE-1) >> DISP_SHIFT;
+                                    {
-							int _x = x - _d, x_ = x - d_;
+                                        d *= DISP_SCALE;
-							if( 0 <= _x && _x < width && disp2ptr[_x] >= minD && std::abs(disp2ptr[_x] - _d) > disp12MaxDiff &&
+                                    }
-								0 <= x_ && x_ < width && disp2ptr[x_] >= minD && std::abs(disp2ptr[x_] - d_) > disp12MaxDiff )
+                                }
-								disp1ptr[x] = (DispType)INVALID_DISP_SCALED;
+                                else if(params.subpixelInterpolationMethod == CV_QUADRATIC_INTERPOLATION)
-						}
+                                {
-					}
+                                    // do subpixel quadratic interpolation:
+                                    //   fit parabola into (x1=d-1, y1=Sp[d-1]), (x2=d, y2=Sp[d]), (x3=d+1, y3=Sp[d+1])
-					// now shift the cyclic buffers
+                                    //   then find minimum of the parabola.
-					std::swap( Lr[0], Lr[1] );
+                                    int denom2 = std::max(Sp[d-1] + Sp[d+1] - 2*Sp[d], 1);
-					std::swap( minLr[0], minLr[1] );
+                                    d = d*DISP_SCALE + ((Sp[d-1] - Sp[d+1])*DISP_SCALE + denom2)/(denom2*2);
-				}
+                                }
-			}
+                            }
-		}
+                            else
+                                d *= DISP_SCALE;
-		class StereoBinarySGBMImpl : public StereoBinarySGBM
+                            disp1ptr[x + minX1] = (DispType)(d + minD*DISP_SCALE);
-		{
+                        }
-		public:
+                        for( x = minX1; x < maxX1; x++ )
-			StereoBinarySGBMImpl()
+                        {
-			{
+                            // we round the computed disparity both towards -inf and +inf and check
-				params = StereoBinarySGBMParams();
+                            // if either of the corresponding disparities in disp2 is consistent.
-			}
+                            // This is to give the computed disparity a chance to look valid if it is.
+                            int d1 = disp1ptr[x];
-			StereoBinarySGBMImpl( int _minDisparity, int _numDisparities, int _SADWindowSize,
+                            if( d1 == INVALID_DISP_SCALED )
-				int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
+                                continue;
-				int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
+                            int _d = d1 >> DISP_SHIFT;
-				int _mode )
+                            int d_ = (d1 + DISP_SCALE-1) >> DISP_SHIFT;
-			{
+                            int _x = x - _d, x_ = x - d_;
-				params = StereoBinarySGBMParams( _minDisparity, _numDisparities, _SADWindowSize,
+                            if( 0 <= _x && _x < width && disp2ptr[_x] >= minD && std::abs(disp2ptr[_x] - _d) > disp12MaxDiff &&
-					_P1, _P2, _disp12MaxDiff, _preFilterCap,
+                                0 <= x_ && x_ < width && disp2ptr[x_] >= minD && std::abs(disp2ptr[x_] - d_) > disp12MaxDiff )
-					_uniquenessRatio, _speckleWindowSize, _speckleRange,
+                                disp1ptr[x] = (DispType)INVALID_DISP_SCALED;
-					_mode );
+                        }
-			}
+                    }
+                    // now shift the cyclic buffers
-			void compute( InputArray leftarr, InputArray rightarr, OutputArray disparr )
+                    std::swap( Lr[0], Lr[1] );
-			{
+                    std::swap( minLr[0], minLr[1] );
-				Mat left = leftarr.getMat(), right = rightarr.getMat();
+                }
-				CV_Assert( left.size() == right.size() && left.type() == right.type() &&
+            }
-					left.depth() == CV_8U );
+        }
+        class StereoBinarySGBMImpl : public StereoBinarySGBM, public Matching
-				disparr.create( left.size(), CV_16S );
+        {
-				Mat disp = disparr.getMat();
+        public:
+            StereoBinarySGBMImpl():Matching()
-				computeDisparityBinarySGBM( left, right, disp, params, buffer );
+            {
-				medianBlur(disp, disp, 3);
+                params = StereoBinarySGBMParams();
+            }
-				if( params.speckleWindowSize > 0 )
+            StereoBinarySGBMImpl( int _minDisparity, int _numDisparities, int _SADWindowSize,
-					filterSpeckles(disp, (params.minDisparity - 1)*StereoMatcher::DISP_SCALE, params.speckleWindowSize,
+                int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
-					StereoMatcher::DISP_SCALE*params.speckleRange, buffer);
+                int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
-			}
+                int _mode ):Matching(_numDisparities)
+            {
-			int getMinDisparity() const { return params.minDisparity; }
+                params = StereoBinarySGBMParams( _minDisparity, _numDisparities, _SADWindowSize,
-			void setMinDisparity(int minDisparity) { params.minDisparity = minDisparity; }
+                    _P1, _P2, _disp12MaxDiff, _preFilterCap,
+                    _uniquenessRatio, _speckleWindowSize, _speckleRange,
-			int getNumDisparities() const { return params.numDisparities; }
+                    _mode );
-			void setNumDisparities(int numDisparities) { params.numDisparities = numDisparities; }
+            }
+            void compute( InputArray leftarr, InputArray rightarr, OutputArray disparr )
-			int getBlockSize() const { return params.SADWindowSize; }
+            {
-			void setBlockSize(int blockSize) { params.SADWindowSize = blockSize; }
+                Mat left = leftarr.getMat(), right = rightarr.getMat();
+                CV_Assert( left.size() == right.size() && left.type() == right.type() &&
-			int getSpeckleWindowSize() const { return params.speckleWindowSize; }
+                    left.depth() == CV_8U );
-			void setSpeckleWindowSize(int speckleWindowSize) { params.speckleWindowSize = speckleWindowSize; }
+                disparr.create( left.size(), CV_16S );
+                Mat disp = disparr.getMat();
-			int getSpeckleRange() const { return params.speckleRange; }
+                censusImageLeft.create(left.rows,left.cols,CV_32SC4);
-			void setSpeckleRange(int speckleRange) { params.speckleRange = speckleRange; }
+                censusImageRight.create(left.rows,left.cols,CV_32SC4);
-			int getDisp12MaxDiff() const { return params.disp12MaxDiff; }
+                hamDist.create(left.rows, left.cols * (params.numDisparities + 1),CV_16S);
-			void setDisp12MaxDiff(int disp12MaxDiff) { params.disp12MaxDiff = disp12MaxDiff; }
+                if(params.kernelType == CV_SPARSE_CENSUS)
-			int getPreFilterCap() const { return params.preFilterCap; }
+                {
-			void setPreFilterCap(int preFilterCap) { params.preFilterCap = preFilterCap; }
+                    censusTransform(left,right,params.kernelSize,censusImageLeft,censusImageRight,CV_SPARSE_CENSUS);
+                }
-			int getUniquenessRatio() const { return params.uniquenessRatio; }
+                else if(params.kernelType == CV_DENSE_CENSUS)
-			void setUniquenessRatio(int uniquenessRatio) { params.uniquenessRatio = uniquenessRatio; }
+                {
+                    censusTransform(left,right,params.kernelSize,censusImageLeft,censusImageRight,CV_SPARSE_CENSUS);
-			int getP1() const { return params.P1; }
+                }
-			void setP1(int P1) { params.P1 = P1; }
+                else if(params.kernelType == CV_CS_CENSUS)
+                {
-			int getP2() const { return params.P2; }
+                    symetricCensusTransform(left,right,params.kernelSize,censusImageLeft,censusImageRight,CV_CS_CENSUS);
-			void setP2(int P2) { params.P2 = P2; }
+                }
+                else if(params.kernelType == CV_MODIFIED_CS_CENSUS)
-			int getMode() const { return params.mode; }
+                {
-			void setMode(int mode) { params.mode = mode; }
+                    symetricCensusTransform(left,right,params.kernelSize,censusImageLeft,censusImageRight,CV_MODIFIED_CS_CENSUS);
+                }
-			void write(FileStorage& fs) const
+                else if(params.kernelType == CV_MODIFIED_CENSUS_TRANSFORM)
-			{
+                {
-				fs << "name" << name_
+                    modifiedCensusTransform(left,right,params.kernelSize,censusImageLeft,censusImageRight,CV_MODIFIED_CENSUS_TRANSFORM,0);
-					<< "minDisparity" << params.minDisparity
+                }
-					<< "numDisparities" << params.numDisparities
+                else if(params.kernelType == CV_MEAN_VARIATION)
-					<< "blockSize" << params.SADWindowSize
+                {
-					<< "speckleWindowSize" << params.speckleWindowSize
+                    parSumsIntensityImage[0].create(left.rows, left.cols,CV_32SC4);
-					<< "speckleRange" << params.speckleRange
+                    parSumsIntensityImage[1].create(left.rows, left.cols,CV_32SC4);
-					<< "disp12MaxDiff" << params.disp12MaxDiff
+                    Integral[0].create(left.rows,left.cols,CV_32SC4);
-					<< "preFilterCap" << params.preFilterCap
+                    Integral[1].create(left.rows,left.cols,CV_32SC4);
-					<< "uniquenessRatio" << params.uniquenessRatio
+                    integral(left, parSumsIntensityImage[0],CV_32S);
-					<< "P1" << params.P1
+                    integral(right, parSumsIntensityImage[1],CV_32S);
-					<< "P2" << params.P2
+                    imageMeanKernelSize(parSumsIntensityImage[0], params.kernelSize,Integral[0]);
-					<< "mode" << params.mode;
+                    imageMeanKernelSize(parSumsIntensityImage[1], params.kernelSize, Integral[1]);
-			}
+                    modifiedCensusTransform(left,right,params.kernelSize,censusImageLeft,censusImageRight,CV_MEAN_VARIATION,0,Integral[0], Integral[1]);
+                }
-			void read(const FileNode& fn)
+                else if(params.kernelType == CV_STAR_KERNEL)
-			{
+                {
-				FileNode n = fn["name"];
+                    starCensusTransform(left,right,params.kernelSize,censusImageLeft,censusImageRight);
-				CV_Assert( n.isString() && String(n) == name_ );
+                }
-				params.minDisparity = (int)fn["minDisparity"];
-				params.numDisparities = (int)fn["numDisparities"];
+                hammingDistanceBlockMatching(censusImageLeft, censusImageRight, hamDist);
-				params.SADWindowSize = (int)fn["blockSize"];
-				params.speckleWindowSize = (int)fn["speckleWindowSize"];
+                computeDisparityBinarySGBM( left, right, disp, params, buffer,hamDist);
-				params.speckleRange = (int)fn["speckleRange"];
-				params.disp12MaxDiff = (int)fn["disp12MaxDiff"];
+                if(params.regionRemoval == CV_SPECKLE_REMOVAL_AVG_ALGORITHM)
-				params.preFilterCap = (int)fn["preFilterCap"];
+                {
-				params.uniquenessRatio = (int)fn["uniquenessRatio"];
+                    int width = left.cols;
-				params.P1 = (int)fn["P1"];
+                    int height = left.rows;
-				params.P2 = (int)fn["P2"];
+                    if(previous_size != width * height)
-				params.mode = (int)fn["mode"];
+                    {
-			}
+                        previous_size = width * height;
+                        speckleX.create(height,width,CV_32SC4);
-			StereoBinarySGBMParams params;
+                        speckleY.create(height,width,CV_32SC4);
-			Mat buffer;
+                        puss.create(height,width,CV_32SC4);
-			static const char* name_;
+                    }
-		};
+                    Mat aux;
+                    aux.create(height,width,CV_16S);
-		const char* StereoBinarySGBMImpl::name_ = "StereoMatcher.SGBM";
+                    Median1x9Filter<short>(disp, aux);
+                    Median9x1Filter<short>(aux,disp);
-		Ptr<StereoBinarySGBM> StereoBinarySGBM::create(int minDisparity, int numDisparities, int SADWindowSize,
+                    smallRegionRemoval<short>(disp, params.speckleWindowSize, disp);
-			int P1, int P2, int disp12MaxDiff,
+                }
-			int preFilterCap, int uniquenessRatio,
+                else if(params.regionRemoval == CV_SPECKLE_REMOVAL_ALGORITHM)
-			int speckleWindowSize, int speckleRange,
+                {
-			int mode)
+                    int width = left.cols;
-		{
+                    int height = left.rows;
-			return Ptr<StereoBinarySGBM>(
+                    Mat aux;
-				new StereoBinarySGBMImpl(minDisparity, numDisparities, SADWindowSize,
+                    aux.create(height,width,CV_16S);
-				P1, P2, disp12MaxDiff,
+                    Median1x9Filter<short>(disp, aux);
-				preFilterCap, uniquenessRatio,
+                    Median9x1Filter<short>(aux,disp);
-				speckleWindowSize, speckleRange,
+                    if( params.speckleWindowSize > 0 )
-				mode));
+                        filterSpeckles(disp, (params.minDisparity - 1) * StereoMatcher::DISP_SCALE, params.speckleWindowSize,
-		}
+                        StereoMatcher::DISP_SCALE * params.speckleRange, buffer);
+                }
-		typedef cv::Point_<short> Point2s;
+            }
-	}
+            int getSubPixelInterpolationMethod() const { return params.subpixelInterpolationMethod;}
+            void setSubPixelInterpolationMethod(int value = CV_QUADRATIC_INTERPOLATION) { CV_Assert(value < 2); params.subpixelInterpolationMethod = value;}
+            int getBinaryKernelType() const { return params.kernelType;}
+            void setBinaryKernelType(int value = CV_MODIFIED_CENSUS_TRANSFORM) { CV_Assert(value < 7); params.kernelType = value; }
+            int getSpekleRemovalTechnique() const { return params.regionRemoval;}
+            void setSpekleRemovalTechnique(int factor = CV_SPECKLE_REMOVAL_AVG_ALGORITHM) { CV_Assert(factor < 2); params.regionRemoval = factor; }
+            int getMinDisparity() const { return params.minDisparity; }
+            void setMinDisparity(int minDisparity) {CV_Assert(minDisparity >= 0); params.minDisparity = minDisparity; }
+            int getNumDisparities() const { return params.numDisparities; }
+            void setNumDisparities(int numDisparities) { CV_Assert(numDisparities > 0); params.numDisparities = numDisparities; }
+            int getBlockSize() const { return params.kernelSize; }
+            void setBlockSize(int blockSize) {CV_Assert(blockSize % 2 != 0); params.kernelSize = blockSize; }
+            int getSpeckleWindowSize() const { return params.speckleWindowSize; }
+            void setSpeckleWindowSize(int speckleWindowSize) {CV_Assert(speckleWindowSize >= 0); params.speckleWindowSize = speckleWindowSize; }
+            int getSpeckleRange() const { return params.speckleRange; }
+            void setSpeckleRange(int speckleRange) { CV_Assert(speckleRange >= 0); params.speckleRange = speckleRange; }
+            int getDisp12MaxDiff() const { return params.disp12MaxDiff; }
+            void setDisp12MaxDiff(int disp12MaxDiff) {CV_Assert(disp12MaxDiff > 0); params.disp12MaxDiff = disp12MaxDiff; }
+            int getPreFilterCap() const { return params.preFilterCap; }
+            void setPreFilterCap(int preFilterCap) { CV_Assert(preFilterCap > 0); params.preFilterCap = preFilterCap; }
+            int getUniquenessRatio() const { return params.uniquenessRatio; }
+            void setUniquenessRatio(int uniquenessRatio) { CV_Assert(uniquenessRatio >= 0); params.uniquenessRatio = uniquenessRatio; }
+            int getP1() const { return params.P1; }
+            void setP1(int P1) { CV_Assert(P1 > 0); params.P1 = P1; }
+            int getP2() const { return params.P2; }
+            void setP2(int P2) {CV_Assert(P2 > 0); CV_Assert(P2 >= 2 * params.P1); params.P2 = P2; }
+            int getMode() const { return params.mode; }
+            void setMode(int mode) { params.mode = mode; }
+            void write(FileStorage& fs) const
+            {
+                fs << "name" << name_
+                    << "minDisparity" << params.minDisparity
+                    << "numDisparities" << params.numDisparities
+                    << "blockSize" << params.kernelSize
+                    << "speckleWindowSize" << params.speckleWindowSize
+                    << "speckleRange" << params.speckleRange
+                    << "disp12MaxDiff" << params.disp12MaxDiff
+                    << "preFilterCap" << params.preFilterCap
+                    << "uniquenessRatio" << params.uniquenessRatio
+                    << "P1" << params.P1
+                    << "P2" << params.P2
+                    << "mode" << params.mode;
+            }
+            void read(const FileNode& fn)
+            {
+                FileNode n = fn["name"];
+                CV_Assert( n.isString() && String(n) == name_ );
+                params.minDisparity = (int)fn["minDisparity"];
+                params.numDisparities = (int)fn["numDisparities"];
+                params.kernelSize = (int)fn["blockSize"];
+                params.speckleWindowSize = (int)fn["speckleWindowSize"];
+                params.speckleRange = (int)fn["speckleRange"];
+                params.disp12MaxDiff = (int)fn["disp12MaxDiff"];
+                params.preFilterCap = (int)fn["preFilterCap"];
+                params.uniquenessRatio = (int)fn["uniquenessRatio"];
+                params.P1 = (int)fn["P1"];
+                params.P2 = (int)fn["P2"];
+                params.mode = (int)fn["mode"];
+            }
+            StereoBinarySGBMParams params;
+            Mat buffer;
+            static const char* name_;
+            Mat censusImageLeft;
+            Mat censusImageRight;
+            Mat partialSumsLR;
+            Mat agregatedHammingLRCost;
+            Mat hamDist;
+            Mat parSumsIntensityImage[2];
+            Mat Integral[2];
+        };
+        const char* StereoBinarySGBMImpl::name_ = "StereoBinaryMatcher.SGBM";
+        Ptr<StereoBinarySGBM> StereoBinarySGBM::create(int minDisparity, int numDisparities, int kernelSize,
+            int P1, int P2, int disp12MaxDiff,
+            int preFilterCap, int uniquenessRatio,
+            int speckleWindowSize, int speckleRange,
+            int mode)
+        {
+            return Ptr<StereoBinarySGBM>(
+                new StereoBinarySGBMImpl(minDisparity, numDisparities, kernelSize,
+                P1, P2, disp12MaxDiff,
+                preFilterCap, uniquenessRatio,
+                speckleWindowSize, speckleRange,
+                mode));
+        }
+        typedef cv::Point_<short> Point2s;
+    }
 }
--- a/modules/stereo/test/test_block_matching.cpp
+++ b/modules/stereo/test/test_block_matching.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "test_precomp.hpp"
+#include <limits.h>
+using namespace cv;
+using namespace cv::stereo;
+using namespace std;
+class CV_BlockMatchingTest : public cvtest::BaseTest
+{
+public:
+    CV_BlockMatchingTest();
+    ~CV_BlockMatchingTest();
+protected:
+    void run(int /* idx */);
+};
+CV_BlockMatchingTest::CV_BlockMatchingTest(){}
+CV_BlockMatchingTest::~CV_BlockMatchingTest(){}
+static double errorLevel(const Mat &ideal, Mat &actual)
+{
+    uint8_t *date, *harta;
+    harta = actual.data;
+    date = ideal.data;
+    int stride, h;
+    stride = (int)ideal.step;
+    h = ideal.rows;
+    int error = 0;
+    for (int i = 0; i < ideal.rows; i++)
+    {
+        for (int j = 0; j < ideal.cols; j++)
+        {
+            if (date[i * stride + j] != 0)
+                if (abs(date[i * stride + j] - harta[i * stride + j]) > 2 * 16)
+                {
+                    error += 1;
+                }
+        }
+    }
+    return ((double)((error * 100) * 1.0) / (stride * h));
+}
+void CV_BlockMatchingTest::run(int )
+{
+    Mat image1, image2, gt;
+    //some test images can be found in the test data folder
+    //in order for the tests to build succesfully please replace
+    //ts->get_data_path() + "testdata/imL2l.bmp with the path from your disk
+    //for example if your images are on D:\\ , please write D:\\testdata\\imL2l.bmp
+    image1 = imread(ts->get_data_path() + "testdata/imL2l.bmp", CV_8UC1);
+    image2 = imread(ts->get_data_path() + "testdata/imL2.bmp", CV_8UC1);
+    gt = imread(ts->get_data_path() + "testdata/groundtruth.bmp", CV_8UC1);
+    if(image1.empty() || image2.empty() || gt.empty())
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong input data \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    if(image1.rows != image2.rows || image1.cols != image2.cols || gt.cols != gt.cols || gt.rows != gt.rows)
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong input / output dimension \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    RNG range;
+    //set the parameters
+    int binary_descriptor_type = range.uniform(0,8);
+    int kernel_size, aggregation_window;
+    if(binary_descriptor_type == 0)
+        kernel_size = 5;
+    else if(binary_descriptor_type == 2 || binary_descriptor_type == 3)
+        kernel_size = 7;
+    else if(binary_descriptor_type == 1)
+        kernel_size = 11;
+    else
+        kernel_size = 9;
+    if(binary_descriptor_type == 3)
+        aggregation_window = 13;
+    else
+        aggregation_window = 11;
+    Mat test = Mat(image1.rows, image1.cols, CV_8UC1);
+    Ptr<StereoBinaryBM> sbm = StereoBinaryBM::create(16, kernel_size);
+    //we set the corresponding parameters
+    sbm->setPreFilterCap(31);
+    sbm->setMinDisparity(0);
+    sbm->setTextureThreshold(10);
+    sbm->setUniquenessRatio(0);
+    sbm->setSpeckleWindowSize(400);//speckle size
+    sbm->setSpeckleRange(200);
+    sbm->setDisp12MaxDiff(0);
+    sbm->setScalleFactor(16);//the scaling factor
+    sbm->setBinaryKernelType(binary_descriptor_type);//binary descriptor kernel
+    sbm->setAgregationWindowSize(aggregation_window);
+    //speckle removal algorithm the user can choose between the average speckle removal algorithm
+    //or the classical version that was implemented in open cv
+    sbm->setSpekleRemovalTechnique(CV_SPECKLE_REMOVAL_AVG_ALGORITHM);
+    sbm->setUsePrefilter(false);//pre-filter or not the images prior to making the transformations
+    //-- calculate the disparity image
+    sbm->compute(image1, image2, test);
+    if(test.empty())
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong input / output dimension \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
+        return;
+    }
+    if(errorLevel(gt,test) > 20)
+    {
+        ts->printf( cvtest::TS::LOG,
+            "Too big error\n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY);
+        return;
+    }
+}
+class CV_SGBlockMatchingTest : public cvtest::BaseTest
+{
+public:
+    CV_SGBlockMatchingTest();
+    ~CV_SGBlockMatchingTest();
+protected:
+    void run(int /* idx */);
+};
+CV_SGBlockMatchingTest::CV_SGBlockMatchingTest(){}
+CV_SGBlockMatchingTest::~CV_SGBlockMatchingTest(){}
+void CV_SGBlockMatchingTest::run(int )
+{
+    Mat image1, image2, gt;
+   //some test images can be found in the test data folder
+    image1 = imread(ts->get_data_path() + "testdata/imL2l.bmp", CV_8UC1);
+    image2 = imread(ts->get_data_path() + "testdata/imL2.bmp", CV_8UC1);
+    gt = imread(ts->get_data_path() + "testdata/groundtruth.bmp", CV_8UC1);
+    if(image1.empty() || image2.empty() || gt.empty())
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong input data \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    if(image1.rows != image2.rows || image1.cols != image2.cols || gt.cols != gt.cols || gt.rows != gt.rows)
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong input / output dimension \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    RNG range;
+    //set the parameters
+    int binary_descriptor_type = range.uniform(0,8);
+    int kernel_size;
+    if(binary_descriptor_type == 0)
+        kernel_size = 5;
+    else if(binary_descriptor_type == 2 || binary_descriptor_type == 3)
+        kernel_size = 7;
+    else if(binary_descriptor_type == 1)
+        kernel_size = 11;
+    else
+        kernel_size = 9;
+    Mat test = Mat(image1.rows, image1.cols, CV_8UC1);
+    Mat imgDisparity16S2 = Mat(image1.rows, image1.cols, CV_16S);
+    Ptr<StereoBinarySGBM> sgbm = StereoBinarySGBM::create(0, 16, kernel_size);
+    //setting the penalties for sgbm
+    sgbm->setP1(10);
+    sgbm->setP2(100);
+    sgbm->setMinDisparity(0);
+    sgbm->setNumDisparities(16);//set disparity number
+    sgbm->setUniquenessRatio(1);
+    sgbm->setSpeckleWindowSize(400);
+    sgbm->setSpeckleRange(200);
+    sgbm->setDisp12MaxDiff(1);
+    sgbm->setBinaryKernelType(binary_descriptor_type);//set the binary descriptor
+    sgbm->setSpekleRemovalTechnique(CV_SPECKLE_REMOVAL_AVG_ALGORITHM); //the avg speckle removal algorithm
+    sgbm->setSubPixelInterpolationMethod(CV_SIMETRICV_INTERPOLATION);// the SIMETRIC V interpolation method
+    sgbm->compute(image1, image2, imgDisparity16S2);
+    double minVal; double maxVal;
+    minMaxLoc(imgDisparity16S2, &minVal, &maxVal);
+    imgDisparity16S2.convertTo(test, CV_8UC1, 255 / (maxVal - minVal));
+    if(test.empty())
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong input / output dimension \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
+        return;
+    }
+    double error = errorLevel(gt,test);
+    if(error > 10)
+    {
+        ts->printf( cvtest::TS::LOG,
+            "Too big error\n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY);
+        return;
+    }
+}
+TEST(block_matching_simple_test, accuracy) { CV_BlockMatchingTest test; test.safe_run(); }
+TEST(SG_block_matching_simple_test, accuracy) { CV_SGBlockMatchingTest test; test.safe_run(); }
--- a/modules/stereo/test/test_descriptors.cpp
+++ b/modules/stereo/test/test_descriptors.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "test_precomp.hpp"
+#include <limits.h>
+using namespace cv;
+using namespace cv::stereo;
+using namespace std;
+class CV_DescriptorBaseTest : public cvtest::BaseTest
+{
+public:
+    CV_DescriptorBaseTest();
+    ~CV_DescriptorBaseTest();
+protected:
+    virtual void imageTransformation(const Mat &img1, const Mat &img2, Mat &out1, Mat &out2) = 0;
+    virtual void imageTransformation(const Mat &img1, Mat &out1) = 0;
+    void testROI(const Mat &img);
+    void testMonotonicity(const Mat &img, Mat &out);
+    void run(int );
+    Mat censusImage[2];
+    Mat censusImageSingle[2];
+    Mat left;
+    Mat right;
+    int kernel_size, descriptor_type;
+};
+//we test to see if the descriptor applied on a roi
+//has the same value with the descriptor from the original image
+//tested at the roi boundaries
+void CV_DescriptorBaseTest::testROI(const Mat &img)
+{
+    int pt, pb,w,h;
+    //initialize random values for the roi top and bottom
+    pt = rand() % 100;
+    pb = rand() % 100;
+    //calculate the new width and height
+    w = img.cols;
+    h = img.rows - pt - pb;
+    int start = pt + kernel_size / 2 + 1;
+    int stop = h - kernel_size/2 - 1;
+    //set the region of interest according to above values
+    Rect region_of_interest = Rect(0, pt, w, h);
+    Mat image_roi1 = img(region_of_interest);
+    Mat p1,p2;
+    //create 2 images where to put our output
+    p1.create(image_roi1.rows, image_roi1.cols, CV_32SC4);
+    p2.create(img.rows, img.cols, CV_32SC4);
+    imageTransformation(image_roi1,p1);
+    imageTransformation(img,p2);
+    int *roi_data = (int *)p1.data;
+    int *img_data = (int *)p2.data;
+    //verify result
+    for(int i = start; i < stop; i++)
+    {
+        for(int j = 0; j < w ; j++)
+        {
+            if(roi_data[(i - pt) * w + j] != img_data[(i) * w + j])
+            {
+                ts->printf(cvtest::TS::LOG, "Something wrong with ROI \n");
+                ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
+                return;
+            }
+        }
+    }
+}
+CV_DescriptorBaseTest::~CV_DescriptorBaseTest()
+{
+    left.release();
+    right.release();
+    censusImage[0].release();
+    censusImage[1].release();
+    censusImageSingle[0].release();
+    censusImageSingle[1].release();
+}
+CV_DescriptorBaseTest::CV_DescriptorBaseTest()
+{
+    //read 2 images from file
+    //some test images can be found in the test data folder
+    //in order for the tests to build succesfully please replace
+    //ts->get_data_path() + "testdata/imL2l.bmp with the path from your disk
+    //for example if your images are on D:\\ , please write D:\\testdata\\imL2l.bmp
+    left = imread(ts->get_data_path() + "testdata/imL2l.bmp", CV_8UC1);
+    right = imread(ts->get_data_path() + "testdata/imL2.bmp", CV_8UC1);
+    if(left.empty() || right.empty())
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong input data \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    ts->printf(cvtest::TS::LOG, "Data loaded \n");
+}
+//verify if we don't have an image with all pixels the same( except when all input pixels are equal)
+void CV_DescriptorBaseTest::testMonotonicity(const Mat &img, Mat &out)
+{
+    //verify if input data is correct
+    if(img.rows != out.rows || img.cols != out.cols || img.empty() || out.empty())
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong input / output dimension \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    //verify that for an input image with different pxels the values of the
+    //output pixels are not the same
+    int same = 0;
+    uint8_t *data = img.data;
+    uint8_t val = data[1];
+    int stride = (int)img.step;
+    for(int i = 0 ; i < img.rows && !same; i++)
+    {
+        for(int j = 0; j < img.cols; j++)
+        {
+            if(val != data[i * stride + j])
+            {
+                same = 1;
+                break;
+            }
+        }
+    }
+    int value_descript = out.data[1];
+    int accept = 0;
+    uint8_t *outData = out.data;
+    for(int i = 0 ; i < img.rows && !accept; i++)
+    {
+        for(int j = 0; j < img.cols; j++)
+        {
+            //we verify for the output image if the iage pixels are not all the same of an input
+            //image with different pixels
+            if(value_descript != outData[i * stride + j] && same)
+            {
+                //if we found a value that is different we accept
+                accept = 1;
+                break;
+            }
+        }
+    }
+    if(accept == 1 && same == 0)
+    {
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
+        ts->printf(cvtest::TS::LOG, "The image has all values the same \n");
+        return;
+    }
+    if(accept == 0 && same == 1)
+    {
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
+        ts->printf(cvtest::TS::LOG, "For correct image we get all descriptor values the same \n");
+        return;
+    }
+    ts->set_failed_test_info(cvtest::TS::OK);
+}
+///////////////////////////////////
+//census transform
+class CV_CensusTransformTest: public CV_DescriptorBaseTest
+{
+public:
+    CV_CensusTransformTest();
+protected:
+    void imageTransformation(const Mat &img1, const Mat &img2, Mat &out1, Mat &out2);
+    void imageTransformation(const Mat &img1, Mat &out1);
+};
+CV_CensusTransformTest::CV_CensusTransformTest()
+{
+    kernel_size = 11;
+    descriptor_type = CV_SPARSE_CENSUS;
+}
+void CV_CensusTransformTest::imageTransformation(const Mat &img1, const Mat &img2, Mat &out1, Mat &out2)
+{
+    //verify if input data is correct
+    if(img1.rows != out1.rows || img1.cols != out1.cols || img1.empty() || out1.empty()
+        || img2.rows != out2.rows || img2.cols != out2.cols || img2.empty() || out2.empty())
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong input / output data \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    if(kernel_size % 2 == 0)
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong kernel size;Kernel should be odd \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    censusTransform(img1,img2,kernel_size,out1,out2,descriptor_type);
+}
+void CV_CensusTransformTest::imageTransformation(const Mat &img1, Mat &out1)
+{
+    //verify if input data is correct
+    if(img1.rows != out1.rows || img1.cols != out1.cols || img1.empty() || out1.empty())
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong input / output data \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    if(kernel_size % 2 == 0)
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong kernel size;Kernel should be odd \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    censusTransform(img1,kernel_size,out1,descriptor_type);
+}
+//////////////////////////////////
+//symetric census
+class CV_SymetricCensusTest: public CV_DescriptorBaseTest
+{
+public:
+    CV_SymetricCensusTest();
+protected:
+    void imageTransformation(const Mat &img1, const Mat &img2, Mat &out1, Mat &out2);
+    void imageTransformation(const Mat &img1, Mat &out1);
+};
+CV_SymetricCensusTest::CV_SymetricCensusTest()
+{
+    kernel_size = 7;
+    descriptor_type = CV_CS_CENSUS;
+}
+void CV_SymetricCensusTest::imageTransformation(const Mat &img1, const Mat &img2, Mat &out1, Mat &out2)
+{
+    //verify if input data is correct
+    if(img1.rows != out1.rows || img1.cols != out1.cols || img1.empty() || out1.empty()
+        || img2.rows != out2.rows || img2.cols != out2.cols || img2.empty() || out2.empty())
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong input / output data \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    if(kernel_size % 2 == 0)
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong kernel size;Kernel should be odd \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    symetricCensusTransform(img1,img2,kernel_size,out1,out2,descriptor_type);
+}
+void CV_SymetricCensusTest::imageTransformation(const Mat &img1, Mat &out1)
+{
+    //verify if input data is correct
+    if(img1.rows != out1.rows || img1.cols != out1.cols || img1.empty() || out1.empty())
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong input / output data \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    if(kernel_size % 2 == 0)
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong kernel size;Kernel should be odd \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    symetricCensusTransform(img1,kernel_size,out1,descriptor_type);
+}
+//////////////////////////////////
+//modified census transform
+class CV_ModifiedCensusTransformTest: public CV_DescriptorBaseTest
+{
+public:
+    CV_ModifiedCensusTransformTest();
+protected:
+    void imageTransformation(const Mat &img1, const Mat &img2, Mat &out1, Mat &out2);
+    void imageTransformation(const Mat &img1, Mat &out1);
+};
+CV_ModifiedCensusTransformTest::CV_ModifiedCensusTransformTest()
+{
+    kernel_size = 9;
+    descriptor_type = CV_MODIFIED_CENSUS_TRANSFORM;
+}
+void CV_ModifiedCensusTransformTest::imageTransformation(const Mat &img1, const Mat &img2, Mat &out1, Mat &out2)
+{
+    //verify if input data is correct
+    if(img1.rows != out1.rows || img1.cols != out1.cols || img1.empty() || out1.empty()
+        || img2.rows != out2.rows || img2.cols != out2.cols || img2.empty() || out2.empty())
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong input / output data \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    if(kernel_size % 2 == 0)
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong kernel size;Kernel should be odd \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    modifiedCensusTransform(img1,img2,kernel_size,out1,out2,descriptor_type);
+}
+void CV_ModifiedCensusTransformTest::imageTransformation(const Mat &img1, Mat &out1)
+{
+    if(img1.rows != out1.rows || img1.cols != out1.cols || img1.empty() || out1.empty())
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong input / output data \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    if(kernel_size % 2 == 0)
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong kernel size;Kernel should be odd \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    modifiedCensusTransform(img1,kernel_size,out1,descriptor_type);
+}
+//////////////////////////////////
+//star kernel census
+class CV_StarKernelCensusTest: public CV_DescriptorBaseTest
+{
+public:
+    CV_StarKernelCensusTest();
+protected:
+    void imageTransformation(const Mat &img1, const Mat &img2, Mat &out1, Mat &out2);
+    void imageTransformation(const Mat &img1, Mat &out1);
+};
+CV_StarKernelCensusTest :: CV_StarKernelCensusTest()
+{
+    kernel_size = 9;
+    descriptor_type = CV_STAR_KERNEL;
+}
+void CV_StarKernelCensusTest :: imageTransformation(const Mat &img1, const Mat &img2, Mat &out1, Mat &out2)
+{
+    //verify if input data is correct
+    if(img1.rows != out1.rows || img1.cols != out1.cols || img1.empty() || out1.empty()
+        || img2.rows != out2.rows || img2.cols != out2.cols || img2.empty() || out2.empty())
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong input / output data \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    if(kernel_size % 2 == 0)
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong kernel size;Kernel should be odd \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    starCensusTransform(img1,img2,kernel_size,out1,out2);
+}
+void CV_StarKernelCensusTest::imageTransformation(const Mat &img1, Mat &out1)
+{
+    if(img1.rows != out1.rows || img1.cols != out1.cols || img1.empty() || out1.empty())
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong input / output data \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    if(kernel_size % 2 == 0)
+    {
+        ts->printf(cvtest::TS::LOG, "Wrong kernel size;Kernel should be odd \n");
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    starCensusTransform(img1,kernel_size,out1);
+}
+void CV_DescriptorBaseTest::run(int )
+{
+    if (left.empty() || right.empty())
+    {
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        ts->printf(cvtest::TS::LOG, "No input images detected\n");
+        return;
+    }
+    testROI(left);
+    censusImage[0].create(left.rows, left.cols, CV_32SC4);
+    censusImage[1].create(left.rows, left.cols, CV_32SC4);
+    censusImageSingle[0].create(left.rows, left.cols, CV_32SC4);
+    censusImageSingle[1].create(left.rows, left.cols, CV_32SC4);
+    censusImage[0].setTo(0);
+    censusImage[1].setTo(0);
+    censusImageSingle[0].setTo(0);
+    censusImageSingle[1].setTo(0);
+    imageTransformation(left, right, censusImage[0], censusImage[1]);
+    imageTransformation(left, censusImageSingle[0]);
+    imageTransformation(right, censusImageSingle[1]);
+    testMonotonicity(left,censusImage[0]);
+    testMonotonicity(right,censusImage[1]);
+    testMonotonicity(left,censusImageSingle[0]);
+    testMonotonicity(right,censusImageSingle[1]);
+    if (censusImage[0].empty() || censusImage[1].empty() || censusImageSingle[0].empty() || censusImageSingle[1].empty())
+    {
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
+        ts->printf(cvtest::TS::LOG, "The descriptor images are empty \n");
+        return;
+    }
+    int *datl1 = (int *)censusImage[0].data;
+    int *datr1 = (int *)censusImage[1].data;
+    int *datl2 = (int *)censusImageSingle[0].data;
+    int *datr2 = (int *)censusImageSingle[1].data;
+    for(int i = 0; i < censusImage[0].rows - kernel_size/ 2; i++)
+    {
+        for(int j = 0; j < censusImage[0].cols; j++)
+        {
+            if(datl1[i * censusImage[0].cols + j] != datl2[i * censusImage[0].cols + j])
+            {
+                ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
+                ts->printf(cvtest::TS::LOG, "Mismatch for left images %d \n",descriptor_type);
+                return;
+            }
+            if(datr1[i * censusImage[0].cols + j] != datr2[i * censusImage[0].cols + j])
+            {
+                ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
+                ts->printf(cvtest::TS::LOG, "Mismatch for right images %d \n",descriptor_type);
+                return;
+            }
+        }
+    }
+    int min = numeric_limits<int>::min();
+    int max = numeric_limits<int>::max();
+    //check if all values are between int min and int max and not NAN
+    if (0 != cvtest::check(censusImage[0], min, max, 0))
+    {
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return;
+    }
+    //check if all values are between int min and int max and not NAN
+    if (0 != cvtest::check(censusImage[1], min, max, 0))
+    {
+        ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA);
+        return ;
+    }
+}
+TEST(census_transform_testing, accuracy) { CV_CensusTransformTest test; test.safe_run(); }
+TEST(symetric_census_testing, accuracy) { CV_SymetricCensusTest test; test.safe_run(); }
+TEST(modified_census_testing, accuracy) { CV_ModifiedCensusTransformTest test; test.safe_run(); }
+TEST(star_kernel_testing, accuracy) { CV_StarKernelCensusTest test; test.safe_run(); }
--- a/modules/stereo/test/test_precomp.hpp
+++ b/modules/stereo/test/test_precomp.hpp
@@ -12,8 +12,6 @@
 #include <iostream>
 #include "opencv2/ts.hpp"
 #include "opencv2/imgcodecs.hpp"
 #include "opencv2/stereo.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/features2d.hpp"
@@ -22,10 +20,9 @@
 #include "opencv2/core/cvdef.h"
 #include "opencv2/core.hpp"
 #include "opencv2/highgui.hpp"
+#include "opencv2/calib3d.hpp"
 #include <algorithm>
 #include <cmath>
 #endif
--- a/modules/stereo/testdata/groundtruth.bmp
+++ b/modules/stereo/testdata/groundtruth.bmp
--- a/modules/stereo/testdata/imL2.bmp
+++ b/modules/stereo/testdata/imL2.bmp
--- a/modules/stereo/testdata/imL2l.bmp
+++ b/modules/stereo/testdata/imL2l.bmp
--- a/modules/stereo/testdata/imgKitty.bmp
+++ b/modules/stereo/testdata/imgKitty.bmp
--- a/modules/stereo/testdata/imgKittyl.bmp
+++ b/modules/stereo/testdata/imgKittyl.bmp
--- a/modules/stereo/testdata/rezult0.bmp
+++ b/modules/stereo/testdata/rezult0.bmp
--- a/modules/stereo/testdata/rezult0l.bmp
+++ b/modules/stereo/testdata/rezult0l.bmp