minor formating changes

e69c6fde · Marina Kolpakova · 7c160cdc · e69c6fde · e69c6fde · e69c6fde
Commit e69c6fde authored Jun 18, 2012 by Marina Kolpakova
25 changed files
--- a/modules/gpu/src/cuda/canny.cu
+++ b/modules/gpu/src/cuda/canny.cu
@@ -44,9 +44,9 @@
 #include <algorithm>
 #include "internal_shared.hpp"

-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
-    namespace canny 
+    namespace canny
    {
        __global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
        {
@@ -99,7 +99,7 @@ namespace cv { namespace gpu { namespace device
            }
        };

-        template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, const PtrStepi dy_buf, 
+        template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, const PtrStepi dy_buf,
            PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
        {
            __shared__ int sdx[18][16];
@@ -175,7 +175,7 @@ namespace cv { namespace gpu { namespace device
        }

        //////////////////////////////////////////////////////////////////////////////////////////
-            
+
        #define CANNY_SHIFT 15
        #define TG22        (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5)

@@ -236,7 +236,7 @@ namespace cv { namespace gpu { namespace device
                            edge_type = 1 + (int)(m > high_thresh);
                    }
                }
-                
+
                map.ptr(i + 1)[j + 1] = edge_type;
            }
        }
@@ -270,7 +270,7 @@ namespace cv { namespace gpu { namespace device

            const int tid = threadIdx.y * 16 + threadIdx.x;
            const int lx = tid % 18;
-            const int ly = tid / 18; 
+            const int ly = tid / 18;

            if (ly < 14)
                smem[ly][lx] = map.ptr(blockIdx.y * 16 + ly)[blockIdx.x * 16 + lx];
@@ -294,10 +294,10 @@ namespace cv { namespace gpu { namespace device
                        n += smem[threadIdx.y    ][threadIdx.x    ] == 2;
                        n += smem[threadIdx.y    ][threadIdx.x + 1] == 2;
                        n += smem[threadIdx.y    ][threadIdx.x + 2] == 2;
-                        
+
                        n += smem[threadIdx.y + 1][threadIdx.x    ] == 2;
                        n += smem[threadIdx.y + 1][threadIdx.x + 2] == 2;
-                        
+
                        n += smem[threadIdx.y + 2][threadIdx.x    ] == 2;
                        n += smem[threadIdx.y + 2][threadIdx.x + 1] == 2;
                        n += smem[threadIdx.y + 2][threadIdx.x + 2] == 2;
@@ -318,10 +318,10 @@ namespace cv { namespace gpu { namespace device
                    n += smem[threadIdx.y    ][threadIdx.x    ] == 1;
                    n += smem[threadIdx.y    ][threadIdx.x + 1] == 1;
                    n += smem[threadIdx.y    ][threadIdx.x + 2] == 1;
-                    
+
                    n += smem[threadIdx.y + 1][threadIdx.x    ] == 1;
                    n += smem[threadIdx.y + 1][threadIdx.x + 2] == 1;
-                    
+
                    n += smem[threadIdx.y + 2][threadIdx.x    ] == 1;
                    n += smem[threadIdx.y + 2][threadIdx.x + 1] == 1;
                    n += smem[threadIdx.y + 2][threadIdx.x + 2] == 1;
@@ -361,7 +361,7 @@ namespace cv { namespace gpu { namespace device
            #if __CUDA_ARCH__ >= 120

            const int stack_size = 512;
-            
+
            __shared__ unsigned int s_counter;
            __shared__ unsigned int s_ind;
            __shared__ ushort2 s_st[stack_size];
@@ -404,11 +404,11 @@ namespace cv { namespace gpu { namespace device
                        if (subTaskIdx < portion)
                            pos = s_st[s_counter - 1 - subTaskIdx];
                        __syncthreads();
-                            
+
                        if (threadIdx.x == 0)
                            s_counter -= portion;
                        __syncthreads();
-                         
+
                        if (pos.x > 0 && pos.x <= cols && pos.y > 0 && pos.y <= rows)
                        {
                            pos.x += c_dx[threadIdx.x & 7];
@@ -452,7 +452,7 @@ namespace cv { namespace gpu { namespace device
        {
            void* counter_ptr;
            cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
-            
+
            unsigned int count;
            cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );


--- a/modules/gpu/src/cuda/color.cu
+++ b/modules/gpu/src/cuda/color.cu
@@ -45,7 +45,7 @@
 #include <opencv2/gpu/device/color.hpp>
 #include <cvt_colot_internal.h>

-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits<uchar>::functor_type)
    {
@@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace device
    {
        enum { smart_block_dim_y = 8 };
        enum { smart_shift = 4 };
-    };    
+    };

    OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_xyz4_traits<uchar>::functor_type)
    {

--- a/modules/gpu/src/cuda/column_filter.cu
+++ b/modules/gpu/src/cuda/column_filter.cu
@@ -48,9 +48,9 @@
 #include "opencv2/gpu/device/border_interpolate.hpp"
 #include "opencv2/gpu/device/static_check.hpp"

-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
-    namespace column_filter 
+    namespace column_filter
    {
        #define MAX_KERNEL_SIZE 32

@@ -146,7 +146,7 @@ namespace cv { namespace gpu { namespace device

            const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
            const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y * PATCH_PER_BLOCK));
-            
+
            B<T> brd(src.rows);

            linearColumnFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
@@ -162,7 +162,7 @@ namespace cv { namespace gpu { namespace device
        {
            typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, int cc, cudaStream_t stream);

-            static const caller_t callers[5][33] = 
+            static const caller_t callers[5][33] =
            {
                {
                    0,
@@ -338,9 +338,9 @@ namespace cv { namespace gpu { namespace device
                    linearColumnFilter_caller<30, T, D, BrdColWrap>,
                    linearColumnFilter_caller<31, T, D, BrdColWrap>,
                    linearColumnFilter_caller<32, T, D, BrdColWrap>
-                }               
+                }
            };
-            
+
            loadKernel(kernel, ksize);

            callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, cc, stream);

--- a/modules/gpu/src/cuda/copy_make_border.cu
+++ b/modules/gpu/src/cuda/copy_make_border.cu
@@ -43,9 +43,9 @@
 #include "internal_shared.hpp"
 #include "opencv2/gpu/device/border_interpolate.hpp"

-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
-    namespace imgproc 
+    namespace imgproc
    {
        template <typename Ptr2D, typename T> __global__ void copyMakeBorder(const Ptr2D src, DevMem2D_<T> dst, int top, int left)
        {
@@ -58,9 +58,9 @@ namespace cv { namespace gpu { namespace device

        template <template <typename> class B, typename T> struct CopyMakeBorderDispatcher
        {
-            static void call(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, int top, int left, 
+            static void call(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, int top, int left,
                const typename VecTraits<T>::elem_type* borderValue, cudaStream_t stream)
-            {        
+            {
                dim3 block(32, 8);
                dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));

@@ -75,20 +75,20 @@ namespace cv { namespace gpu { namespace device
            }
        };

-        template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, 
+        template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode,
            const T* borderValue, cudaStream_t stream)
        {
            typedef typename TypeVec<T, cn>::vec_type vec_type;

            typedef void (*caller_t)(const DevMem2D_<vec_type>& src, const DevMem2D_<vec_type>& dst, int top, int left, const T* borderValue, cudaStream_t stream);

-            static const caller_t callers[5] = 
+            static const caller_t callers[5] =
            {
-                CopyMakeBorderDispatcher<BrdReflect101, vec_type>::call, 
-                CopyMakeBorderDispatcher<BrdReplicate, vec_type>::call, 
-                CopyMakeBorderDispatcher<BrdConstant, vec_type>::call, 
-                CopyMakeBorderDispatcher<BrdReflect, vec_type>::call, 
-                CopyMakeBorderDispatcher<BrdWrap, vec_type>::call 
+                CopyMakeBorderDispatcher<BrdReflect101, vec_type>::call,
+                CopyMakeBorderDispatcher<BrdReplicate, vec_type>::call,
+                CopyMakeBorderDispatcher<BrdConstant, vec_type>::call,
+                CopyMakeBorderDispatcher<BrdReflect, vec_type>::call,
+                CopyMakeBorderDispatcher<BrdWrap, vec_type>::call
            };

            callers[borderMode](DevMem2D_<vec_type>(src), DevMem2D_<vec_type>(dst), top, left, borderValue, stream);

--- a/modules/gpu/src/cuda/fast.cu
+++ b/modules/gpu/src/cuda/fast.cu
@@ -40,7 +40,7 @@
 //
 // Copyright (c) 2010, Paul Furgale, Chi Hay Tong
 //
-// The original code was written by Paul Furgale and Chi Hay Tong 
+// The original code was written by Paul Furgale and Chi Hay Tong
 // and later optimized and prepared for integration into OpenCV by Itseez.
 //
 //M*/
@@ -48,9 +48,9 @@
 #include "opencv2/gpu/device/common.hpp"
 #include "opencv2/gpu/device/utility.hpp"

-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
-    namespace fast 
+    namespace fast
    {
        __device__ unsigned int g_counter = 0;

@@ -78,14 +78,14 @@ namespace cv { namespace gpu { namespace device



-            d1 = diffType(v, C[0] & 0xff, th);            
+            d1 = diffType(v, C[0] & 0xff, th);
            d2 = diffType(v, C[2] & 0xff, th);

            if ((d1 | d2) == 0)
                return;

            mask1 |= (d1 & 1) << 0;
-            mask2 |= ((d1 & 2) >> 1) << 0;            
+            mask2 |= ((d1 & 2) >> 1) << 0;

            mask1 |= (d2 & 1) << 8;
            mask2 |= ((d2 & 2) >> 1) << 8;
@@ -141,7 +141,7 @@ namespace cv { namespace gpu { namespace device
                return;*/

            mask1 |= (d1 & 1) << 1;
-            mask2 |= ((d1 & 2) >> 1) << 1; 
+            mask2 |= ((d1 & 2) >> 1) << 1;

            mask1 |= (d2 & 1) << 9;
            mask2 |= ((d2 & 2) >> 1) << 9;
@@ -169,7 +169,7 @@ namespace cv { namespace gpu { namespace device
                return;*/

            mask1 |= (d1 & 1) << 5;
-            mask2 |= ((d1 & 2) >> 1) << 5; 
+            mask2 |= ((d1 & 2) >> 1) << 5;

            mask1 |= (d2 & 1) << 13;
            mask2 |= ((d2 & 2) >> 1) << 13;
@@ -191,7 +191,7 @@ namespace cv { namespace gpu { namespace device
        // 0 -> not a keypoint
        __device__ __forceinline__ bool isKeyPoint(int mask1, int mask2)
        {
-            return (__popc(mask1) > 8 && (c_table[(mask1 >> 3) - 63] & (1 << (mask1 & 7)))) || 
+            return (__popc(mask1) > 8 && (c_table[(mask1 >> 3) - 63] & (1 << (mask1 & 7)))) ||
                   (__popc(mask2) > 8 && (c_table[(mask2 >> 3) - 63] & (1 << (mask2 & 7))));
        }

@@ -212,14 +212,14 @@ namespace cv { namespace gpu { namespace device
                calcMask(C, v, mid, mask1, mask2);

                int isKp = static_cast<int>(isKeyPoint(mask1, mask2));
-                
+
                min = isKp * (mid + 1) + (isKp ^ 1) * min;
                max = (isKp ^ 1) * (mid - 1) + isKp * max;
            }

            return min - 1;
        }
-        
+
        template <bool calcScore, class Mask>
        __global__ void calcKeypoints(const DevMem2Db img, const Mask mask, short2* kpLoc, const unsigned int maxKeypoints, PtrStepi score, const int threshold)
        {
@@ -243,7 +243,7 @@ namespace cv { namespace gpu { namespace device
                C[2] |= static_cast<uint>(img(i - 1, j - 3)) << (3 * 8);
                C[1] |= static_cast<uint>(img(i - 1, j + 3)) << 8;

-                C[3] |= static_cast<uint>(img(i, j - 3));                
+                C[3] |= static_cast<uint>(img(i, j - 3));
                v     = static_cast<int>(img(i, j));
                C[1] |= static_cast<uint>(img(i, j + 3));

@@ -313,7 +313,7 @@ namespace cv { namespace gpu { namespace device
            cudaSafeCall( cudaGetLastError() );

            cudaSafeCall( cudaDeviceSynchronize() );
-            
+
            unsigned int count;
            cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );

@@ -335,14 +335,14 @@ namespace cv { namespace gpu { namespace device

                int score = scoreMat(loc.y, loc.x);

-                bool ismax = 
+                bool ismax =
                    score > scoreMat(loc.y - 1, loc.x - 1) &&
                    score > scoreMat(loc.y - 1, loc.x    ) &&
                    score > scoreMat(loc.y - 1, loc.x + 1) &&

                    score > scoreMat(loc.y    , loc.x - 1) &&
                    score > scoreMat(loc.y    , loc.x + 1) &&
-                
+
                    score > scoreMat(loc.y + 1, loc.x - 1) &&
                    score > scoreMat(loc.y + 1, loc.x    ) &&
                    score > scoreMat(loc.y + 1, loc.x + 1);
@@ -375,7 +375,7 @@ namespace cv { namespace gpu { namespace device
            cudaSafeCall( cudaGetLastError() );

            cudaSafeCall( cudaDeviceSynchronize() );
-            
+
            unsigned int new_count;
            cudaSafeCall( cudaMemcpy(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );


--- a/modules/gpu/src/cuda/gftt.cu
+++ b/modules/gpu/src/cuda/gftt.cu
@@ -40,7 +40,7 @@
 //
 // Copyright (c) 2010, Paul Furgale, Chi Hay Tong
 //
-// The original code was written by Paul Furgale and Chi Hay Tong 
+// The original code was written by Paul Furgale and Chi Hay Tong
 // and later optimized and prepared for integration into OpenCV by Itseez.
 //
 //M*/
@@ -50,9 +50,9 @@
 #include "opencv2/gpu/device/common.hpp"
 #include "opencv2/gpu/device/utility.hpp"

-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
-    namespace gfft 
+    namespace gfft
    {
        texture<float, cudaTextureType2D, cudaReadModeElementType> eigTex(0, cudaFilterModePoint, cudaAddressModeClamp);

@@ -117,7 +117,7 @@ namespace cv { namespace gpu { namespace device
            cudaSafeCall( cudaGetLastError() );

            cudaSafeCall( cudaDeviceSynchronize() );
-            
+
            uint count;
            cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(uint), cudaMemcpyDeviceToHost) );

@@ -126,9 +126,9 @@ namespace cv { namespace gpu { namespace device

        class EigGreater
        {
-        public:            
-            __device__ __forceinline__ bool operator()(float2 a, float2 b) const 
-            { 
+        public:
+            __device__ __forceinline__ bool operator()(float2 a, float2 b) const
+            {
                return tex2D(eigTex, a.x, a.y) > tex2D(eigTex, b.x, b.y);
            }
        };

--- a/modules/gpu/src/cuda/hist.cu
+++ b/modules/gpu/src/cuda/hist.cu
@@ -45,7 +45,7 @@
 #include "opencv2/gpu/device/utility.hpp"
 #include "opencv2/gpu/device/saturate_cast.hpp"

-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
    #define UINT_BITS 32U

@@ -65,7 +65,7 @@ namespace cv { namespace gpu { namespace device

    #define USE_SMEM_ATOMICS (__CUDA_ARCH__ >= 120)

-    namespace hist 
+    namespace hist
    {
        #if (!USE_SMEM_ATOMICS)

@@ -173,7 +173,7 @@ namespace cv { namespace gpu { namespace device
        {
            histogram256<<<PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_THREADBLOCK_SIZE, 0, stream>>>(
                DevMem2D_<uint>(src),
-                buf, 
+                buf,
                static_cast<uint>(src.rows * src.step / sizeof(uint)),
                src.cols);


--- a/modules/gpu/src/cuda/hog.cu
+++ b/modules/gpu/src/cuda/hog.cu
--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpu/src/cuda/imgproc.cu
@@ -970,12 +970,12 @@ namespace cv { namespace gpu { namespace device
        #undef IMPLEMENT_FILTER2D_TEX_READER

        template <typename T, typename D>
-        void filter2D_gpu(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, 
-                          int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, 
+        void filter2D_gpu(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst,
+                          int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
                          int borderMode, const float* borderValue, cudaStream_t stream)
        {
            typedef void (*func_t)(const DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2D_<D> dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
-            static const func_t funcs[] = 
+            static const func_t funcs[] =
            {
                Filter2DCaller<T, D, BrdReflect101>::call,
                Filter2DCaller<T, D, BrdReplicate>::call,

--- a/modules/gpu/src/cuda/internal_shared.hpp
+++ b/modules/gpu/src/cuda/internal_shared.hpp
@@ -50,9 +50,9 @@
 #include "safe_call.hpp"
 #include "opencv2/gpu/device/common.hpp"

-namespace cv { namespace gpu 
+namespace cv { namespace gpu
 {
-    enum 
+    enum
    {
        BORDER_REFLECT101_GPU = 0,
        BORDER_REPLICATE_GPU,
@@ -60,7 +60,7 @@ namespace cv { namespace gpu
        BORDER_REFLECT_GPU,
        BORDER_WRAP_GPU
    };
-            
+
    // Converts CPU border extrapolation mode into GPU internal analogue.
    // Returns true if the GPU analogue exists, false otherwise.
    bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType);

--- a/modules/gpu/src/cuda/match_template.cu
+++ b/modules/gpu/src/cuda/match_template.cu
--- a/modules/gpu/src/cuda/mathfunc.cu
+++ b/modules/gpu/src/cuda/mathfunc.cu
@@ -42,9 +42,9 @@

 #include "internal_shared.hpp"

-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
-    namespace mathfunc 
+    namespace mathfunc
    {
        //////////////////////////////////////////////////////////////////////////////////////
        // Cart <-> Polar
@@ -79,7 +79,7 @@ namespace cv { namespace gpu { namespace device
            }
        };
        template <typename Mag, typename Angle>
-        __global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step, 
+        __global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step,
                                    float* mag, size_t mag_step, float* angle, size_t angle_step, float scale, int width, int height)
        {
 	        const int x = blockDim.x * blockIdx.x + threadIdx.x;
@@ -137,11 +137,11 @@ namespace cv { namespace gpu { namespace device

            grid.x = divUp(x.cols, threads.x);
            grid.y = divUp(x.rows, threads.y);
-            
+
            const float scale = angleInDegrees ? (float)(180.0f / CV_PI) : 1.f;

            cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
-                x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(), 
+                x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(),
                mag.data, mag.step/mag.elemSize(), angle.data, angle.step/angle.elemSize(), scale, x.cols, x.rows);
            cudaSafeCall( cudaGetLastError() );

@@ -152,7 +152,7 @@ namespace cv { namespace gpu { namespace device
        void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
        {
            typedef void (*caller_t)(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
-            static const caller_t callers[2][2][2] = 
+            static const caller_t callers[2][2][2] =
            {
                {
                    {
@@ -187,10 +187,10 @@ namespace cv { namespace gpu { namespace device

            grid.x = divUp(mag.cols, threads.x);
            grid.y = divUp(mag.rows, threads.y);
-            
+
            const float scale = angleInDegrees ? (float)(CV_PI / 180.0f) : 1.0f;

-            polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.data, mag.step/mag.elemSize(), 
+            polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.data, mag.step/mag.elemSize(),
                angle.data, angle.step/angle.elemSize(), scale, x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(), mag.cols, mag.rows);
            cudaSafeCall( cudaGetLastError() );

@@ -201,7 +201,7 @@ namespace cv { namespace gpu { namespace device
        void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
        {
            typedef void (*caller_t)(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
-            static const caller_t callers[2] = 
+            static const caller_t callers[2] =
            {
                polarToCart_caller<NonEmptyMag>,
                polarToCart_caller<EmptyMag>

--- a/modules/gpu/src/cuda/matrix_reductions.cu
+++ b/modules/gpu/src/cuda/matrix_reductions.cu
--- a/modules/gpu/src/cuda/optical_flow.cu
+++ b/modules/gpu/src/cuda/optical_flow.cu
@@ -42,7 +42,7 @@

 #include "opencv2/gpu/device/common.hpp"

-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
    namespace optical_flow
    {
@@ -50,7 +50,7 @@ namespace cv { namespace gpu { namespace device
        #define NUM_VERTS_PER_ARROW 6

        __global__ void NeedleMapAverageKernel(const DevMem2Df u, const PtrStepf v, PtrStepf u_avg, PtrStepf v_avg)
-        {   
+        {
            __shared__ float smem[2 * NEEDLE_MAP_SCALE];

            volatile float* u_col_sum = smem;
@@ -70,7 +70,7 @@ namespace cv { namespace gpu { namespace device
            }

            if (threadIdx.x < 8)
-            {        	
+            {
                // now add the column sums
                const uint X = threadIdx.x;

@@ -80,8 +80,8 @@ namespace cv { namespace gpu { namespace device
                    v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 1];
                }

-                if (X | 0xfe == 0xfc) // bits 0 & 1 == 0 
-                { 
+                if (X | 0xfe == 0xfc) // bits 0 & 1 == 0
+                {
                    u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 2];
                    v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 2];
                }
@@ -110,7 +110,7 @@ namespace cv { namespace gpu { namespace device
                v_avg(blockIdx.y, blockIdx.x) = v_col_sum[0];
            }
        }
-        
+
        void NeedleMapAverage_gpu(DevMem2Df u, DevMem2Df v, DevMem2Df u_avg, DevMem2Df v_avg)
        {
            const dim3 block(NEEDLE_MAP_SCALE);

--- a/modules/gpu/src/cuda/orb.cu
+++ b/modules/gpu/src/cuda/orb.cu
@@ -40,7 +40,7 @@
 //
 // Copyright (c) 2010, Paul Furgale, Chi Hay Tong
 //
-// The original code was written by Paul Furgale and Chi Hay Tong 
+// The original code was written by Paul Furgale and Chi Hay Tong
 // and later optimized and prepared for integration into OpenCV by Itseez.
 //
 //M*/
@@ -51,7 +51,7 @@
 #include "opencv2/gpu/device/utility.hpp"
 #include "opencv2/gpu/device/functional.hpp"

-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
    namespace orb
    {
@@ -59,7 +59,7 @@ namespace cv { namespace gpu { namespace device
        // cull

        int cull_gpu(int* loc, float* response, int size, int n_points)
-        {            
+        {
            thrust::device_ptr<int> loc_ptr(loc);
            thrust::device_ptr<float> response_ptr(response);

@@ -83,10 +83,10 @@ namespace cv { namespace gpu { namespace device
            {
                const short2 loc = loc_[ptidx];

-                const int r = blockSize / 2;                
+                const int r = blockSize / 2;
                const int x0 = loc.x - r;
                const int y0 = loc.y - r;
-                
+
                int a = 0, b = 0, c = 0;

                for (int ind = threadIdx.x; ind < blockSize * blockSize; ind += blockDim.x)
@@ -94,12 +94,12 @@ namespace cv { namespace gpu { namespace device
                    const int i = ind / blockSize;
                    const int j = ind % blockSize;

-                    int Ix = (img(y0 + i, x0 + j + 1) - img(y0 + i, x0 + j - 1)) * 2 + 
-                        (img(y0 + i - 1, x0 + j + 1) - img(y0 + i - 1, x0 + j - 1)) + 
+                    int Ix = (img(y0 + i, x0 + j + 1) - img(y0 + i, x0 + j - 1)) * 2 +
+                        (img(y0 + i - 1, x0 + j + 1) - img(y0 + i - 1, x0 + j - 1)) +
                        (img(y0 + i + 1, x0 + j + 1) - img(y0 + i + 1, x0 + j - 1));

-                    int Iy = (img(y0 + i + 1, x0 + j) - img(y0 + i - 1, x0 + j)) * 2 + 
-                        (img(y0 + i + 1, x0 + j - 1) - img(y0 + i - 1, x0 + j - 1)) + 
+                    int Iy = (img(y0 + i + 1, x0 + j) - img(y0 + i - 1, x0 + j)) * 2 +
+                        (img(y0 + i + 1, x0 + j - 1) - img(y0 + i - 1, x0 + j - 1)) +
                        (img(y0 + i + 1, x0 + j + 1) - img(y0 + i - 1, x0 + j + 1));

                    a += Ix * Ix;
@@ -160,7 +160,7 @@ namespace cv { namespace gpu { namespace device
                int m_01 = 0, m_10 = 0;

                const short2 loc = loc_[ptidx];
-                        
+
                // Treat the center line differently, v=0
                for (int u = threadIdx.x - half_k; u <= half_k; u += blockDim.x)
                    m_10 += u * image(loc.y, loc.x + u);
@@ -173,7 +173,7 @@ namespace cv { namespace gpu { namespace device
                    int v_sum = 0;
                    int m_sum = 0;
                    const int d = c_u_max[v];
-                    
+
                    for (int u = threadIdx.x - d; u <= d; u += blockDim.x)
                    {
                        int val_plus = image(loc.y + v, loc.x + u);
@@ -229,7 +229,7 @@ namespace cv { namespace gpu { namespace device
        {
            __device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
            {
-                pattern_x += 16 * i; 
+                pattern_x += 16 * i;
                pattern_y += 16 * i;

                int t0, t1, val;
@@ -257,7 +257,7 @@ namespace cv { namespace gpu { namespace device

                t0 = GET_VALUE(14); t1 = GET_VALUE(15);
                val |= (t0 < t1) << 7;
-                
+
                return val;
            }
        };
@@ -266,23 +266,23 @@ namespace cv { namespace gpu { namespace device
        {
            __device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
            {
-                pattern_x += 12 * i; 
+                pattern_x += 12 * i;
                pattern_y += 12 * i;
-             
+
                int t0, t1, t2, val;

                t0 = GET_VALUE(0); t1 = GET_VALUE(1); t2 = GET_VALUE(2);
                val = t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0);
-                
+
                t0 = GET_VALUE(3); t1 = GET_VALUE(4); t2 = GET_VALUE(5);
                val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 2;
-                
+
                t0 = GET_VALUE(6); t1 = GET_VALUE(7); t2 = GET_VALUE(8);
                val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 4;
-                
+
                t0 = GET_VALUE(9); t1 = GET_VALUE(10); t2 = GET_VALUE(11);
                val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 6;
-                
+
                return val;
            }
        };
@@ -291,9 +291,9 @@ namespace cv { namespace gpu { namespace device
        {
            __device__ static int calc(const PtrStepb& img, short2 loc, const int* pattern_x, const int* pattern_y, float sina, float cosa, int i)
            {
-                pattern_x += 16 * i; 
+                pattern_x += 16 * i;
                pattern_y += 16 * i;
-             
+
                int t0, t1, t2, t3, k, val;
                int a, b;

@@ -304,7 +304,7 @@ namespace cv { namespace gpu { namespace device
                if( t3 > t2 ) t2 = t3, b = 3;
                k = t0 > t2 ? a : b;
                val = k;
-                
+
                t0 = GET_VALUE(4); t1 = GET_VALUE(5);
                t2 = GET_VALUE(6); t3 = GET_VALUE(7);
                a = 0, b = 2;
@@ -312,7 +312,7 @@ namespace cv { namespace gpu { namespace device
                if( t3 > t2 ) t2 = t3, b = 3;
                k = t0 > t2 ? a : b;
                val |= k << 2;
-                
+
                t0 = GET_VALUE(8); t1 = GET_VALUE(9);
                t2 = GET_VALUE(10); t3 = GET_VALUE(11);
                a = 0, b = 2;
@@ -320,7 +320,7 @@ namespace cv { namespace gpu { namespace device
                if( t3 > t2 ) t2 = t3, b = 3;
                k = t0 > t2 ? a : b;
                val |= k << 4;
-                
+
                t0 = GET_VALUE(12); t1 = GET_VALUE(13);
                t2 = GET_VALUE(14); t3 = GET_VALUE(15);
                a = 0, b = 2;
@@ -328,7 +328,7 @@ namespace cv { namespace gpu { namespace device
                if( t3 > t2 ) t2 = t3, b = 3;
                k = t0 > t2 ? a : b;
                val |= k << 6;
-                
+
                return val;
            }
        };
@@ -399,7 +399,7 @@ namespace cv { namespace gpu { namespace device
                y[ptidx] = loc.y * scale;
            }
        }
-        
+
        void mergeLocation_gpu(const short2* loc, float* x, float* y, int npoints, float scale, cudaStream_t stream)
        {
            dim3 block(256);

--- a/modules/gpu/src/cuda/remap.cu
+++ b/modules/gpu/src/cuda/remap.cu
@@ -69,7 +69,7 @@ namespace cv { namespace gpu { namespace device
        {
            static void call(DevMem2D_<T> src, DevMem2Df mapx, DevMem2Df mapy, DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int)
            {
-                typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type; 
+                typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;

                dim3 block(32, 8);
                dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
@@ -159,7 +159,7 @@ namespace cv { namespace gpu { namespace device
                    cudaSafeCall( cudaDeviceSynchronize() ); \
                } \
            };
-            
+
        OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar)
        //OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar2)
        OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar4)
@@ -188,7 +188,7 @@ namespace cv { namespace gpu { namespace device

        template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
        {
-            static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy, 
+            static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy,
                DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int cc)
            {
                if (stream == 0)
@@ -198,13 +198,13 @@ namespace cv { namespace gpu { namespace device
            }
        };

-        template <typename T> void remap_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, 
+        template <typename T> void remap_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap,
            DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc)
        {
-            typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, 
+            typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap,
                DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int cc);

-            static const caller_t callers[3][5] = 
+            static const caller_t callers[3][5] =
            {
                {
                    RemapDispatcher<PointFilter, BrdReflect101, T>::call,
@@ -229,7 +229,7 @@ namespace cv { namespace gpu { namespace device
                }
            };

-            callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, xmap, ymap, 
+            callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, xmap, ymap,
                static_cast< DevMem2D_<T> >(dst), borderValue, stream, cc);
        }


--- a/modules/gpu/src/cuda/resize.cu
+++ b/modules/gpu/src/cuda/resize.cu
@@ -228,7 +228,7 @@ namespace cv { namespace gpu { namespace device
            }
        };

-        template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, 
+        template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy,
            DevMem2Db dst, int interpolation, cudaStream_t stream)
        {
            typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream);
@@ -244,7 +244,7 @@ namespace cv { namespace gpu { namespace device
            if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
                interpolation = 1;

-            callers[interpolation](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, fx, fy, 
+            callers[interpolation](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, fx, fy,
                static_cast< DevMem2D_<T> >(dst), stream);
        }


--- a/modules/gpu/src/cuda/rgb_to_yv12.cu
+++ b/modules/gpu/src/cuda/rgb_to_yv12.cu
@@ -43,7 +43,7 @@
 #include "opencv2/gpu/device/common.hpp"
 #include "opencv2/gpu/device/vec_traits.hpp"

-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
    namespace video_encoding
    {
@@ -159,12 +159,12 @@ namespace cv { namespace gpu { namespace device
        void YV12_gpu(const DevMem2Db src, int cn, DevMem2Db dst)
        {
            typedef void (*func_t)(const DevMem2Db src, PtrStepb dst);
-            
-            static const func_t funcs[] = 
+
+            static const func_t funcs[] =
            {
                0, Gray_to_YV12_caller, 0, BGR_to_YV12_caller<3>, BGR_to_YV12_caller<4>
            };
-            
+
            funcs[cn](src, dst);
        }
    }

--- a/modules/gpu/src/cuda/row_filter.cu
+++ b/modules/gpu/src/cuda/row_filter.cu
@@ -48,9 +48,9 @@
 #include "opencv2/gpu/device/border_interpolate.hpp"
 #include "opencv2/gpu/device/static_check.hpp"

-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
-    namespace row_filter 
+    namespace row_filter
    {
        #define MAX_KERNEL_SIZE 32

@@ -79,7 +79,7 @@ namespace cv { namespace gpu { namespace device
            typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;

            __shared__ sum_t smem[BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X];
-            
+
            const int y = blockIdx.y * BLOCK_DIM_Y + threadIdx.y;

            if (y >= src.rows)
@@ -161,7 +161,7 @@ namespace cv { namespace gpu { namespace device
        {
            typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, int cc, cudaStream_t stream);

-            static const caller_t callers[5][33] = 
+            static const caller_t callers[5][33] =
            {
                {
                    0,
@@ -337,9 +337,9 @@ namespace cv { namespace gpu { namespace device
                    linearRowFilter_caller<30, T, D, BrdRowWrap>,
                    linearRowFilter_caller<31, T, D, BrdRowWrap>,
                    linearRowFilter_caller<32, T, D, BrdRowWrap>
-                }               
+                }
            };
-            
+
            loadKernel(kernel, ksize);

            callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, cc, stream);

--- a/modules/gpu/src/cuda/safe_call.hpp
+++ b/modules/gpu/src/cuda/safe_call.hpp
@@ -60,7 +60,7 @@
    #define cublasSafeCall(expr)  ___cublasSafeCall(expr, __FILE__, __LINE__)
 #endif

-namespace cv { namespace gpu 
+namespace cv { namespace gpu
 {
    void nppError(int err, const char *file, const int line, const char *func = "");
    void ncvError(int err, const char *file, const int line, const char *func = "");

--- a/modules/gpu/src/cuda/split_merge.cu
+++ b/modules/gpu/src/cuda/split_merge.cu
@@ -42,12 +42,12 @@

 #include "internal_shared.hpp"

-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
-    namespace split_merge 
+    namespace split_merge
    {
        template <typename T, size_t elem_size = sizeof(T)>
-        struct TypeTraits 
+        struct TypeTraits
        {
            typedef T type;
            typedef T type2;
@@ -74,7 +74,7 @@ namespace cv { namespace gpu { namespace device
        };

        template <typename T>
-        struct TypeTraits<T, 4> 
+        struct TypeTraits<T, 4>
        {
            typedef int type;
            typedef int2 type2;
@@ -83,7 +83,7 @@ namespace cv { namespace gpu { namespace device
        };

        template <typename T>
-        struct TypeTraits<T, 8> 
+        struct TypeTraits<T, 8>
        {
            typedef double type;
            typedef double2 type2;
@@ -95,11 +95,11 @@ namespace cv { namespace gpu { namespace device
        typedef void (*SplitFunction)(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream);

        //------------------------------------------------------------
-        // Merge    
+        // Merge

        template <typename T>
-        __global__ void mergeC2_(const uchar* src0, size_t src0_step, 
-                                 const uchar* src1, size_t src1_step, 
+        __global__ void mergeC2_(const uchar* src0, size_t src0_step,
+                                 const uchar* src1, size_t src1_step,
                                 int rows, int cols, uchar* dst, size_t dst_step)
        {
            typedef typename TypeTraits<T>::type2 dst_type;
@@ -111,8 +111,8 @@ namespace cv { namespace gpu { namespace device
            const T* src1_y = (const T*)(src1 + y * src1_step);
            dst_type* dst_y = (dst_type*)(dst + y * dst_step);

-            if (x < cols && y < rows) 
-            {                        
+            if (x < cols && y < rows)
+            {
                dst_type dst_elem;
                dst_elem.x = src0_y[x];
                dst_elem.y = src1_y[x];
@@ -122,9 +122,9 @@ namespace cv { namespace gpu { namespace device


        template <typename T>
-        __global__ void mergeC3_(const uchar* src0, size_t src0_step, 
-                                 const uchar* src1, size_t src1_step, 
-                                 const uchar* src2, size_t src2_step, 
+        __global__ void mergeC3_(const uchar* src0, size_t src0_step,
+                                 const uchar* src1, size_t src1_step,
+                                 const uchar* src2, size_t src2_step,
                                 int rows, int cols, uchar* dst, size_t dst_step)
        {
            typedef typename TypeTraits<T>::type3 dst_type;
@@ -137,8 +137,8 @@ namespace cv { namespace gpu { namespace device
            const T* src2_y = (const T*)(src2 + y * src2_step);
            dst_type* dst_y = (dst_type*)(dst + y * dst_step);

-            if (x < cols && y < rows) 
-            {                        
+            if (x < cols && y < rows)
+            {
                dst_type dst_elem;
                dst_elem.x = src0_y[x];
                dst_elem.y = src1_y[x];
@@ -149,9 +149,9 @@ namespace cv { namespace gpu { namespace device


        template <>
-        __global__ void mergeC3_<double>(const uchar* src0, size_t src0_step, 
-                                 const uchar* src1, size_t src1_step, 
-                                 const uchar* src2, size_t src2_step, 
+        __global__ void mergeC3_<double>(const uchar* src0, size_t src0_step,
+                                 const uchar* src1, size_t src1_step,
+                                 const uchar* src2, size_t src2_step,
                                 int rows, int cols, uchar* dst, size_t dst_step)
        {
            const int x = blockIdx.x * blockDim.x + threadIdx.x;
@@ -162,8 +162,8 @@ namespace cv { namespace gpu { namespace device
            const double* src2_y = (const double*)(src2 + y * src2_step);
            double* dst_y = (double*)(dst + y * dst_step);

-            if (x < cols && y < rows) 
-            {                        
+            if (x < cols && y < rows)
+            {
                dst_y[3 * x] = src0_y[x];
                dst_y[3 * x + 1] = src1_y[x];
                dst_y[3 * x + 2] = src2_y[x];
@@ -172,10 +172,10 @@ namespace cv { namespace gpu { namespace device


        template <typename T>
-        __global__ void mergeC4_(const uchar* src0, size_t src0_step, 
-                                 const uchar* src1, size_t src1_step, 
-                                 const uchar* src2, size_t src2_step, 
-                                 const uchar* src3, size_t src3_step, 
+        __global__ void mergeC4_(const uchar* src0, size_t src0_step,
+                                 const uchar* src1, size_t src1_step,
+                                 const uchar* src2, size_t src2_step,
+                                 const uchar* src3, size_t src3_step,
                                 int rows, int cols, uchar* dst, size_t dst_step)
        {
            typedef typename TypeTraits<T>::type4 dst_type;
@@ -189,8 +189,8 @@ namespace cv { namespace gpu { namespace device
            const T* src3_y = (const T*)(src3 + y * src3_step);
            dst_type* dst_y = (dst_type*)(dst + y * dst_step);

-            if (x < cols && y < rows) 
-            {                        
+            if (x < cols && y < rows)
+            {
                dst_type dst_elem;
                dst_elem.x = src0_y[x];
                dst_elem.y = src1_y[x];
@@ -202,10 +202,10 @@ namespace cv { namespace gpu { namespace device


        template <>
-        __global__ void mergeC4_<double>(const uchar* src0, size_t src0_step, 
-                                 const uchar* src1, size_t src1_step, 
-                                 const uchar* src2, size_t src2_step, 
-                                 const uchar* src3, size_t src3_step, 
+        __global__ void mergeC4_<double>(const uchar* src0, size_t src0_step,
+                                 const uchar* src1, size_t src1_step,
+                                 const uchar* src2, size_t src2_step,
+                                 const uchar* src3, size_t src3_step,
                                 int rows, int cols, uchar* dst, size_t dst_step)
        {
            const int x = blockIdx.x * blockDim.x + threadIdx.x;
@@ -217,8 +217,8 @@ namespace cv { namespace gpu { namespace device
            const double* src3_y = (const double*)(src3 + y * src3_step);
            double2* dst_y = (double2*)(dst + y * dst_step);

-            if (x < cols && y < rows) 
-            {                        
+            if (x < cols && y < rows)
+            {
                dst_y[2 * x] = make_double2(src0_y[x], src1_y[x]);
                dst_y[2 * x + 1] = make_double2(src2_y[x], src3_y[x]);
            }
@@ -303,7 +303,7 @@ namespace cv { namespace gpu { namespace device


        template <typename T>
-        __global__ void splitC2_(const uchar* src, size_t src_step, 
+        __global__ void splitC2_(const uchar* src, size_t src_step,
                                int rows, int cols,
                                uchar* dst0, size_t dst0_step,
                                uchar* dst1, size_t dst1_step)
@@ -317,7 +317,7 @@ namespace cv { namespace gpu { namespace device
            T* dst0_y = (T*)(dst0 + y * dst0_step);
            T* dst1_y = (T*)(dst1 + y * dst1_step);

-            if (x < cols && y < rows) 
+            if (x < cols && y < rows)
            {
                src_type src_elem = src_y[x];
                dst0_y[x] = src_elem.x;
@@ -327,7 +327,7 @@ namespace cv { namespace gpu { namespace device


        template <typename T>
-        __global__ void splitC3_(const uchar* src, size_t src_step, 
+        __global__ void splitC3_(const uchar* src, size_t src_step,
                                int rows, int cols,
                                uchar* dst0, size_t dst0_step,
                                uchar* dst1, size_t dst1_step,
@@ -343,7 +343,7 @@ namespace cv { namespace gpu { namespace device
            T* dst1_y = (T*)(dst1 + y * dst1_step);
            T* dst2_y = (T*)(dst2 + y * dst2_step);

-            if (x < cols && y < rows) 
+            if (x < cols && y < rows)
            {
                src_type src_elem = src_y[x];
                dst0_y[x] = src_elem.x;
@@ -368,7 +368,7 @@ namespace cv { namespace gpu { namespace device
            double* dst1_y = (double*)(dst1 + y * dst1_step);
            double* dst2_y = (double*)(dst2 + y * dst2_step);

-            if (x < cols && y < rows) 
+            if (x < cols && y < rows)
            {
                dst0_y[x] = src_y[3 * x];
                dst1_y[x] = src_y[3 * x + 1];
@@ -395,7 +395,7 @@ namespace cv { namespace gpu { namespace device
            T* dst2_y = (T*)(dst2 + y * dst2_step);
            T* dst3_y = (T*)(dst3 + y * dst3_step);

-            if (x < cols && y < rows) 
+            if (x < cols && y < rows)
            {
                src_type src_elem = src_y[x];
                dst0_y[x] = src_elem.x;
@@ -423,7 +423,7 @@ namespace cv { namespace gpu { namespace device
            double* dst2_y = (double*)(dst2 + y * dst2_step);
            double* dst3_y = (double*)(dst3 + y * dst3_step);

-            if (x < cols && y < rows) 
+            if (x < cols && y < rows)
            {
                double2 src_elem1 = src_y[2 * x];
                double2 src_elem2 = src_y[2 * x + 1];

--- a/modules/gpu/src/cuda/stereobm.cu
+++ b/modules/gpu/src/cuda/stereobm.cu
@@ -42,9 +42,9 @@

 #include "internal_shared.hpp"

-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
-    namespace stereobm 
+    namespace stereobm
    {
        //////////////////////////////////////////////////////////////////////////////////////////////////
        /////////////////////////////////////// Stereo BM ////////////////////////////////////////////////
@@ -70,7 +70,7 @@ namespace cv { namespace gpu { namespace device

        template<int RADIUS>
        __device__ unsigned int CalcSSD(volatile unsigned int *col_ssd_cache, volatile unsigned int *col_ssd)
-        {	
+        {
            unsigned int cache = 0;
            unsigned int cache2 = 0;

@@ -401,8 +401,8 @@ namespace cv { namespace gpu { namespace device
            prefilter_kernel<<<grid, threads, 0, stream>>>(output, prefilterCap);
            cudaSafeCall( cudaGetLastError() );

-            if (stream == 0)   
-                cudaSafeCall( cudaDeviceSynchronize() );    
+            if (stream == 0)
+                cudaSafeCall( cudaDeviceSynchronize() );

            cudaSafeCall( cudaUnbindTexture (texForSobel ) );
        }

--- a/modules/gpu/src/cuda/stereobp.cu
+++ b/modules/gpu/src/cuda/stereobp.cu
@@ -44,9 +44,9 @@
 #include "opencv2/gpu/device/saturate_cast.hpp"
 #include "opencv2/gpu/device/limits.hpp"

-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
-    namespace stereobp 
+    namespace stereobp
    {
        ///////////////////////////////////////////////////////////////
        /////////////////////// load constants ////////////////////////

--- a/modules/gpu/src/cuda/stereocsbp.cu
+++ b/modules/gpu/src/cuda/stereocsbp.cu
@@ -44,9 +44,9 @@
 #include "opencv2/gpu/device/saturate_cast.hpp"
 #include "opencv2/gpu/device/limits.hpp"

-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
-    namespace stereocsbp 
+    namespace stereocsbp
    {
        ///////////////////////////////////////////////////////////////
        /////////////////////// load constants ////////////////////////
@@ -62,7 +62,7 @@ namespace cv { namespace gpu { namespace device
        __constant__ int cth;

        __constant__ size_t cimg_step;
-        __constant__ size_t cmsg_step;        
+        __constant__ size_t cmsg_step;
        __constant__ size_t cdisp_step1;
        __constant__ size_t cdisp_step2;

@@ -392,7 +392,7 @@ namespace cv { namespace gpu { namespace device
                get_first_k_initial_local<<<grid, threads, 0, stream>>> (data_cost_selected, disp_selected_pyr, h, w, nr_plane);
            else
                get_first_k_initial_global<<<grid, threads, 0, stream>>>(data_cost_selected, disp_selected_pyr, h, w, nr_plane);
-            
+
            cudaSafeCall( cudaGetLastError() );

            if (stream == 0)
@@ -575,7 +575,7 @@ namespace cv { namespace gpu { namespace device
            cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
            cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
            cudaSafeCall( cudaMemcpyToSymbol(cmsg_step,  &msg_step,  sizeof(size_t)) );
-            
+
            callers[level](disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, stream);
            cudaSafeCall( cudaGetLastError() );

@@ -588,13 +588,13 @@ namespace cv { namespace gpu { namespace device

        template void compute_data_cost(const float* disp_selected_pyr, float* data_cost, size_t msg_step,
                               int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream);
-             
+

        ///////////////////////////////////////////////////////////////
        //////////////////////// init message /////////////////////////
        ///////////////////////////////////////////////////////////////

-         
+
         template <typename T>
        __device__ void get_first_k_element_increase(T* u_new, T* d_new, T* l_new, T* r_new,
                                                     const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
@@ -691,7 +691,7 @@ namespace cv { namespace gpu { namespace device
            cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
            cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
            cudaSafeCall( cudaMemcpyToSymbol(cmsg_step,   &msg_step, sizeof(size_t)) );
-            
+
            dim3 threads(32, 8, 1);
            dim3 grid(1, 1, 1);

@@ -720,7 +720,7 @@ namespace cv { namespace gpu { namespace device
                          const float* u_cur, const float* d_cur, const float* l_cur, const float* r_cur,
                          float* selected_disp_pyr_new, const float* selected_disp_pyr_cur,
                          float* data_cost_selected, const float* data_cost, size_t msg_step,
-                          int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream);        
+                          int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream);

        ///////////////////////////////////////////////////////////////
        ////////////////////  calc all iterations /////////////////////
@@ -805,7 +805,7 @@ namespace cv { namespace gpu { namespace device
            for(int t = 0; t < iters; ++t)
            {
                compute_message<<<grid, threads, 0, stream>>>(u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1);
-                cudaSafeCall( cudaGetLastError() );                
+                cudaSafeCall( cudaGetLastError() );
            }
 			if (stream == 0)
                    cudaSafeCall( cudaDeviceSynchronize() );
@@ -814,7 +814,7 @@ namespace cv { namespace gpu { namespace device
        template void calc_all_iterations(short* u, short* d, short* l, short* r, const short* data_cost_selected, const short* selected_disp_pyr_cur, size_t msg_step,
            int h, int w, int nr_plane, int iters, cudaStream_t stream);

-        template void calc_all_iterations(float* u, float* d, float* l, float* r, const float* data_cost_selected, const float* selected_disp_pyr_cur, size_t msg_step, 
+        template void calc_all_iterations(float* u, float* d, float* l, float* r, const float* data_cost_selected, const float* selected_disp_pyr_cur, size_t msg_step,
            int h, int w, int nr_plane, int iters, cudaStream_t stream);


@@ -879,7 +879,7 @@ namespace cv { namespace gpu { namespace device
                cudaSafeCall( cudaDeviceSynchronize() );
        }

-        template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step, 
+        template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step,
            const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);

        template void compute_disp(const float* u, const float* d, const float* l, const float* r, const float* data_cost_selected, const float* disp_selected, size_t msg_step,

--- a/modules/gpu/src/cuda/warp.cu
+++ b/modules/gpu/src/cuda/warp.cu
@@ -98,7 +98,7 @@ namespace cv { namespace gpu { namespace device
        {
            dim3 block(32, 8);
            dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y));
-            
+
            buildWarpMaps<Transform><<<grid, block, 0, stream>>>(xmap, ymap);
            cudaSafeCall( cudaGetLastError() );

@@ -158,7 +158,7 @@ namespace cv { namespace gpu { namespace device
        {
            static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2D_<T> dst, const float* borderValue, int)
            {
-                typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type; 
+                typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;

                dim3 block(32, 8);
                dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
@@ -256,7 +256,7 @@ namespace cv { namespace gpu { namespace device
        #undef OPENCV_GPU_IMPLEMENT_WARP_TEX

        template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcher
-        { 
+        {
            static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int cc)
            {
                if (stream == 0)
@@ -266,7 +266,7 @@ namespace cv { namespace gpu { namespace device
            }
        };

-        template <class Transform, typename T> 
+        template <class Transform, typename T>
        void warp_caller(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Db dst, int interpolation,
                         int borderMode, const float* borderValue, cudaStream_t stream, int cc)
        {