Merge pull request #974 from jet47:gpu-core-refactoring

81c6b46f · Roman Donchenko · OpenCV Buildbot · e16af9bd · 4f0d72bf · 81c6b46f
Commit 81c6b46f authored Jun 11, 2013 by Roman Donchenko Committed by OpenCV Buildbot Jun 11, 2013
93 changed files
--- a/doc/check_docs2.py
+++ b/doc/check_docs2.py
@@ -201,9 +201,9 @@ def process_module(module, path):
            hdrlist.append(os.path.join(root, filename))
    if module == "gpu":
-        hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "cuda_devptrs.hpp"))
+        hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "gpu_types.hpp"))
-        hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "gpumat.hpp"))
+        hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "gpu.hpp"))
-        hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "stream_accessor.hpp"))
+        hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "gpu_stream_accessor.hpp"))
    decls = []
    for hname in hdrlist:

--- a/modules/core/include/opencv2/core/base.hpp
+++ b/modules/core/include/opencv2/core/base.hpp
@@ -493,6 +493,9 @@ namespace ogl
 namespace gpu
 {
    class CV_EXPORTS GpuMat;
+    class CV_EXPORTS CudaMem;
+    class CV_EXPORTS Stream;
+    class CV_EXPORTS Event;
 }
 } // cv

--- a/modules/core/include/opencv2/core/cuda/common.hpp
+++ b/modules/core/include/opencv2/core/cuda/common.hpp
@@ -44,7 +44,7 @@
 #define __OPENCV_GPU_COMMON_HPP__
 #include <cuda_runtime.h>
-#include "opencv2/core/cuda_devptrs.hpp"
+#include "opencv2/core/gpu_types.hpp"
 #include "opencv2/core/cvdef.h"
 #include "opencv2/core/base.hpp"

--- a/modules/core/include/opencv2/core/gpumat.hpp
+++ b/modules/core/include/opencv2/core/gpumat.hpp
--- a/modules/core/include/opencv2/core/gpu.inl.hpp
+++ b/modules/core/include/opencv2/core/gpu.inl.hpp
--- a/modules/core/include/opencv2/core/stream_accessor.hpp
+++ b/modules/core/include/opencv2/core/stream_accessor.hpp
@@ -40,28 +40,38 @@
 //
 //M*/
-#ifndef __OPENCV_CUDA_STREAM_ACCESSOR_HPP__
+#ifndef __OPENCV_CORE_GPU_STREAM_ACCESSOR_HPP__
-#define __OPENCV_CUDA_STREAM_ACCESSOR_HPP__
+#define __OPENCV_CORE_GPU_STREAM_ACCESSOR_HPP__
-#include <cuda_runtime.h>
+#ifndef __cplusplus
-#include "opencv2/core/cvdef.h"
+#  error gpu_stream_accessor.hpp header must be compiled as C++
+#endif
 // This is only header file that depends on Cuda. All other headers are independent.
 // So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
 // But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
 // In this case you have to install Cuda Toolkit.
+#include <cuda_runtime.h>
+#include "opencv2/core/cvdef.h"
 namespace cv
 {
    namespace gpu
    {
        class Stream;
+        class Event;
        struct StreamAccessor
        {
            CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
        };
+        struct EventAccessor
+        {
+            CV_EXPORTS static cudaEvent_t getEvent(const Event& event);
+        };
    }
 }
-#endif /* __OPENCV_CUDA_STREAM_ACCESSOR_HPP__ */
+#endif /* __OPENCV_CORE_GPU_STREAM_ACCESSOR_HPP__ */
--- a/modules/core/include/opencv2/core/cuda_devptrs.hpp
+++ b/modules/core/include/opencv2/core/cuda_devptrs.hpp
@@ -40,10 +40,12 @@
 //
 //M*/
-#ifndef __OPENCV_CORE_DEVPTRS_HPP__
+#ifndef __OPENCV_CORE_GPU_TYPES_HPP__
-#define __OPENCV_CORE_DEVPTRS_HPP__
+#define __OPENCV_CORE_GPU_TYPES_HPP__
-#ifdef __cplusplus
+#ifndef __cplusplus
+#  error gpu_types.hpp header must be compiled as C++
+#endif
 #ifdef __CUDACC__
    #define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
@@ -58,7 +60,7 @@ namespace cv
        // Simple lightweight structures that encapsulates information about an image on device.
        // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
-        template<typename T> struct DevPtr
+        template <typename T> struct DevPtr
        {
            typedef T elem_type;
            typedef int index_type;
@@ -75,7 +77,7 @@ namespace cv
            __CV_GPU_HOST_DEVICE__ operator const T*() const { return data; }
        };
-        template<typename T> struct PtrSz : public DevPtr<T>
+        template <typename T> struct PtrSz : public DevPtr<T>
        {
            __CV_GPU_HOST_DEVICE__ PtrSz() : size(0) {}
            __CV_GPU_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
@@ -83,12 +85,12 @@ namespace cv
            size_t size;
        };
-        template<typename T> struct PtrStep : public DevPtr<T>
+        template <typename T> struct PtrStep : public DevPtr<T>
        {
            __CV_GPU_HOST_DEVICE__ PtrStep() : step(0) {}
            __CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
-            /** \brief stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!! */
+            //! stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!!
            size_t step;
            __CV_GPU_HOST_DEVICE__       T* ptr(int y = 0)       { return (      T*)( (      char*)DevPtr<T>::data + y * step); }
@@ -118,36 +120,7 @@ namespace cv
        typedef PtrStep<unsigned char> PtrStepb;
        typedef PtrStep<float> PtrStepf;
        typedef PtrStep<int> PtrStepi;
-#if defined __GNUC__
-    #define __CV_GPU_DEPR_BEFORE__
-    #define __CV_GPU_DEPR_AFTER__ __attribute__ ((deprecated))
-#elif defined(__MSVC__) //|| defined(__CUDACC__)
-    #pragma deprecated(DevMem2D_)
-    #define __CV_GPU_DEPR_BEFORE__ __declspec(deprecated)
-    #define __CV_GPU_DEPR_AFTER__
-#else
-    #define __CV_GPU_DEPR_BEFORE__
-    #define __CV_GPU_DEPR_AFTER__
-#endif
-        template <typename T> struct __CV_GPU_DEPR_BEFORE__ DevMem2D_ : public PtrStepSz<T>
-        {
-            DevMem2D_() {}
-            DevMem2D_(int rows_, int cols_, T* data_, size_t step_) : PtrStepSz<T>(rows_, cols_, data_, step_) {}
-            template <typename U>
-            explicit __CV_GPU_DEPR_BEFORE__ DevMem2D_(const DevMem2D_<U>& d) : PtrStepSz<T>(d.rows, d.cols, (T*)d.data, d.step) {}
-        } __CV_GPU_DEPR_AFTER__ ;
-        typedef DevMem2D_<unsigned char> DevMem2Db;
-        typedef DevMem2Db DevMem2D;
-        typedef DevMem2D_<float> DevMem2Df;
-        typedef DevMem2D_<int> DevMem2Di;
    }
 }
-#endif // __cplusplus
+#endif /* __OPENCV_CORE_GPU_TYPES_HPP__ */
-#endif /* __OPENCV_CORE_DEVPTRS_HPP__ */
--- a/modules/core/include/opencv2/core/mat.hpp
+++ b/modules/core/include/opencv2/core/mat.hpp
@@ -77,7 +77,7 @@ public:
        STD_VECTOR_MAT    = 5 << KIND_SHIFT,
        EXPR              = 6 << KIND_SHIFT,
        OPENGL_BUFFER     = 7 << KIND_SHIFT,
-        OPENGL_TEXTURE    = 8 << KIND_SHIFT,
+        CUDA_MEM          = 8 << KIND_SHIFT,
        GPU_MAT           = 9 << KIND_SHIFT
    };
@@ -94,13 +94,12 @@ public:
    _InputArray(const double& val);
    _InputArray(const gpu::GpuMat& d_mat);
    _InputArray(const ogl::Buffer& buf);
-    _InputArray(const ogl::Texture2D& tex);
+    _InputArray(const gpu::CudaMem& cuda_mem);
    virtual Mat getMat(int i=-1) const;
    virtual void getMatVector(std::vector<Mat>& mv) const;
    virtual gpu::GpuMat getGpuMat() const;
    virtual ogl::Buffer getOGlBuffer() const;
-    virtual ogl::Texture2D getOGlTexture2D() const;
    virtual int kind() const;
    virtual Size size(int i=-1) const;
@@ -143,7 +142,7 @@ public:
    _OutputArray(std::vector<Mat>& vec);
    _OutputArray(gpu::GpuMat& d_mat);
    _OutputArray(ogl::Buffer& buf);
-    _OutputArray(ogl::Texture2D& tex);
+    _OutputArray(gpu::CudaMem& cuda_mem);
    template<typename _Tp> _OutputArray(std::vector<_Tp>& vec);
    template<typename _Tp> _OutputArray(std::vector<std::vector<_Tp> >& vec);
    template<typename _Tp> _OutputArray(std::vector<Mat_<_Tp> >& vec);
@@ -155,7 +154,7 @@ public:
    _OutputArray(const std::vector<Mat>& vec);
    _OutputArray(const gpu::GpuMat& d_mat);
    _OutputArray(const ogl::Buffer& buf);
-    _OutputArray(const ogl::Texture2D& tex);
+    _OutputArray(const gpu::CudaMem& cuda_mem);
    template<typename _Tp> _OutputArray(const std::vector<_Tp>& vec);
    template<typename _Tp> _OutputArray(const std::vector<std::vector<_Tp> >& vec);
    template<typename _Tp> _OutputArray(const std::vector<Mat_<_Tp> >& vec);
@@ -169,7 +168,7 @@ public:
    virtual Mat& getMatRef(int i=-1) const;
    virtual gpu::GpuMat& getGpuMatRef() const;
    virtual ogl::Buffer& getOGlBufferRef() const;
-    virtual ogl::Texture2D& getOGlTexture2DRef() const;
+    virtual gpu::CudaMem& getCudaMemRef() const;
    virtual void create(Size sz, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
    virtual void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
    virtual void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;

--- a/modules/core/include/opencv2/core/opengl.hpp
+++ b/modules/core/include/opencv2/core/opengl.hpp
@@ -40,8 +40,12 @@
 //
 //M*/
-#ifndef __OPENCV_OPENGL_INTEROP_HPP__
+#ifndef __OPENCV_CORE_OPENGL_HPP__
-#define __OPENCV_OPENGL_INTEROP_HPP__
+#define __OPENCV_CORE_OPENGL_HPP__
+#ifndef __cplusplus
+#  error opengl.hpp header must be compiled as C++
+#endif
 #include "opencv2/core.hpp"
@@ -84,7 +88,7 @@ public:
    //! create buffer
    void create(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
-    void create(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false) { create(asize.height, asize.width, atype, target, autoRelease); }
+    void create(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
    //! release memory and delete buffer object
    void release();
@@ -92,11 +96,15 @@ public:
    //! set auto release mode (if true, release will be called in object's destructor)
    void setAutoRelease(bool flag);
-    //! copy from host/device memory
+    //! copy from host/device memory (blocking)
    void copyFrom(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false);
+    //! copy from device memory (non blocking)
+    void copyFrom(InputArray arr, gpu::Stream& stream, Target target = ARRAY_BUFFER, bool autoRelease = false);
-    //! copy to host/device memory
+    //! copy to host/device memory (blocking)
-    void copyTo(OutputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false) const;
+    void copyTo(OutputArray arr) const;
+    //! copy to device memory (non blocking)
+    void copyTo(OutputArray arr, gpu::Stream& stream) const;
    //! create copy of current buffer
    Buffer clone(Target target = ARRAY_BUFFER, bool autoRelease = false) const;
@@ -111,21 +119,26 @@ public:
    Mat mapHost(Access access);
    void unmapHost();
-    //! map to device memory
+    //! map to device memory (blocking)
    gpu::GpuMat mapDevice();
    void unmapDevice();
-    int rows() const { return rows_; }
+    //! map to device memory (non blocking)
-    int cols() const { return cols_; }
+    gpu::GpuMat mapDevice(gpu::Stream& stream);
-    Size size() const { return Size(cols_, rows_); }
+    void unmapDevice(gpu::Stream& stream);
-    bool empty() const { return rows_ == 0 || cols_ == 0; }
-    int type() const { return type_; }
+    int rows() const;
-    int depth() const { return CV_MAT_DEPTH(type_); }
+    int cols() const;
-    int channels() const { return CV_MAT_CN(type_); }
+    Size size() const;
-    int elemSize() const { return CV_ELEM_SIZE(type_); }
+    bool empty() const;
-    int elemSize1() const { return CV_ELEM_SIZE1(type_); }
+    int type() const;
+    int depth() const;
+    int channels() const;
+    int elemSize() const;
+    int elemSize1() const;
+    //! get OpenGL opject id
    unsigned int bufId() const;
    class Impl;
@@ -165,7 +178,7 @@ public:
    //! create texture
    void create(int arows, int acols, Format aformat, bool autoRelease = false);
-    void create(Size asize, Format aformat, bool autoRelease = false) { create(asize.height, asize.width, aformat, autoRelease); }
+    void create(Size asize, Format aformat, bool autoRelease = false);
    //! release memory and delete texture object
    void release();
@@ -182,13 +195,14 @@ public:
    //! bind texture to current active texture unit for GL_TEXTURE_2D target
    void bind() const;
-    int rows() const { return rows_; }
+    int rows() const;
-    int cols() const { return cols_; }
+    int cols() const;
-    Size size() const { return Size(cols_, rows_); }
+    Size size() const;
-    bool empty() const { return rows_ == 0 || cols_ == 0; }
+    bool empty() const;
-    Format format() const { return format_; }
+    Format format() const;
+    //! get OpenGL opject id
    unsigned int texId() const;
    class Impl;
@@ -224,8 +238,8 @@ public:
    void bind() const;
-    int size() const { return size_; }
+    int size() const;
-    bool empty() const { return size_ == 0; }
+    bool empty() const;
 private:
    int size_;
@@ -260,14 +274,14 @@ enum {
 CV_EXPORTS void render(const Arrays& arr, int mode = POINTS, Scalar color = Scalar::all(255));
 CV_EXPORTS void render(const Arrays& arr, InputArray indices, int mode = POINTS, Scalar color = Scalar::all(255));
-}} // namespace cv::gl
+}} // namespace cv::ogl
 namespace cv { namespace gpu {
 //! set a CUDA device to use OpenGL interoperability
 CV_EXPORTS void setGlDevice(int device = 0);
-}} // cv::gpu
+}}
 namespace cv {
@@ -276,4 +290,149 @@ template <> CV_EXPORTS void Ptr<cv::ogl::Texture2D::Impl>::delete_obj();
 }
-#endif // __OPENCV_OPENGL_INTEROP_HPP__
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+inline
+cv::ogl::Buffer::Buffer(int arows, int acols, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
+{
+    create(arows, acols, atype, target, autoRelease);
+}
+inline
+cv::ogl::Buffer::Buffer(Size asize, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
+{
+    create(asize, atype, target, autoRelease);
+}
+inline
+void cv::ogl::Buffer::create(Size asize, int atype, Target target, bool autoRelease)
+{
+    create(asize.height, asize.width, atype, target, autoRelease);
+}
+inline
+int cv::ogl::Buffer::rows() const
+{
+    return rows_;
+}
+inline
+int cv::ogl::Buffer::cols() const
+{
+    return cols_;
+}
+inline
+cv::Size cv::ogl::Buffer::size() const
+{
+    return Size(cols_, rows_);
+}
+inline
+bool cv::ogl::Buffer::empty() const
+{
+    return rows_ == 0 || cols_ == 0;
+}
+inline
+int cv::ogl::Buffer::type() const
+{
+    return type_;
+}
+inline
+int cv::ogl::Buffer::depth() const
+{
+    return CV_MAT_DEPTH(type_);
+}
+inline
+int cv::ogl::Buffer::channels() const
+{
+    return CV_MAT_CN(type_);
+}
+inline
+int cv::ogl::Buffer::elemSize() const
+{
+    return CV_ELEM_SIZE(type_);
+}
+inline
+int cv::ogl::Buffer::elemSize1() const
+{
+    return CV_ELEM_SIZE1(type_);
+}
+///////
+inline
+cv::ogl::Texture2D::Texture2D(int arows, int acols, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
+{
+    create(arows, acols, aformat, autoRelease);
+}
+inline
+cv::ogl::Texture2D::Texture2D(Size asize, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
+{
+    create(asize, aformat, autoRelease);
+}
+inline
+void cv::ogl::Texture2D::create(Size asize, Format aformat, bool autoRelease)
+{
+    create(asize.height, asize.width, aformat, autoRelease);
+}
+inline
+int cv::ogl::Texture2D::rows() const
+{
+    return rows_;
+}
+inline
+int cv::ogl::Texture2D::cols() const
+{
+    return cols_;
+}
+inline
+cv::Size cv::ogl::Texture2D::size() const
+{
+    return Size(cols_, rows_);
+}
+inline
+bool cv::ogl::Texture2D::empty() const
+{
+    return rows_ == 0 || cols_ == 0;
+}
+inline
+cv::ogl::Texture2D::Format cv::ogl::Texture2D::format() const
+{
+    return format_;
+}
+///////
+inline
+cv::ogl::Arrays::Arrays() : size_(0)
+{
+}
+inline
+int cv::ogl::Arrays::size() const
+{
+    return size_;
+}
+inline
+bool cv::ogl::Arrays::empty() const
+{
+    return size_ == 0;
+}
+#endif /* __OPENCV_CORE_OPENGL_HPP__ */
--- a/modules/core/include/opencv2/core/gpu_private.hpp
+++ b/modules/core/include/opencv2/core/gpu_private.hpp
@@ -41,8 +41,8 @@
 //
 //M*/
-#ifndef __OPENCV_CORE_GPU_PRIVATE_HPP__
+#ifndef __OPENCV_CORE_PRIVATE_GPU_HPP__
-#define __OPENCV_CORE_GPU_PRIVATE_HPP__
+#define __OPENCV_CORE_PRIVATE_GPU_HPP__
 #ifndef __OPENCV_BUILD
 #  error this is a private header which should not be used from outside of the OpenCV library
@@ -53,11 +53,13 @@
 #include "opencv2/core/cvdef.h"
 #include "opencv2/core/base.hpp"
+#include "opencv2/core/gpu.hpp"
 #ifdef HAVE_CUDA
 #  include <cuda.h>
 #  include <cuda_runtime.h>
 #  include <npp.h>
-#  include "opencv2/core/stream_accessor.hpp"
+#  include "opencv2/core/gpu_stream_accessor.hpp"
 #  include "opencv2/core/cuda/common.hpp"
 #  define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD)

--- a/modules/core/src/cuda/matrix_operations.cu
+++ b/modules/core/src/cuda/matrix_operations.cu
--- a/modules/core/src/cuda/matrix_operations.hpp
+++ b/modules/core/src/cuda/matrix_operations.hpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "opencv2/core/cuda/common.hpp"
+namespace cv { namespace gpu { namespace cudev
+{
+    void copyWithMask(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream);
+    template <typename T>
+    void set(PtrStepSz<T> mat, const T* scalar, int channels, cudaStream_t stream);
+    template <typename T>
+    void set(PtrStepSz<T> mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
+    void convert(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream);
+}}}
--- a/modules/core/src/cudastream.cpp
+++ b/modules/core/src/cudastream.cpp
--- a/modules/core/src/matrix_operations.cpp
+++ b/modules/core/src/matrix_operations.cpp
--- a/modules/core/src/gpumat.cpp
+++ b/modules/core/src/gpumat.cpp
--- a/modules/core/src/gpu_mat.cpp
+++ b/modules/core/src/gpu_mat.cpp
--- a/modules/core/src/gpu_stream.cpp
+++ b/modules/core/src/gpu_stream.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "precomp.hpp"
+using namespace cv;
+using namespace cv::gpu;
+////////////////////////////////////////////////////////////////
+// Stream
+#ifndef HAVE_CUDA
+class cv::gpu::Stream::Impl
+{
+public:
+    Impl(void* ptr = 0)
+    {
+        (void) ptr;
+        throw_no_cuda();
+    }
+};
+#else
+class cv::gpu::Stream::Impl
+{
+public:
+    cudaStream_t stream;
+    Impl();
+    Impl(cudaStream_t stream);
+    ~Impl();
+};
+cv::gpu::Stream::Impl::Impl() : stream(0)
+{
+    cudaSafeCall( cudaStreamCreate(&stream) );
+}
+cv::gpu::Stream::Impl::Impl(cudaStream_t stream_) : stream(stream_)
+{
+}
+cv::gpu::Stream::Impl::~Impl()
+{
+    if (stream)
+        cudaStreamDestroy(stream);
+}
+cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream)
+{
+    return stream.impl_->stream;
+}
+#endif
+cv::gpu::Stream::Stream()
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+#else
+    impl_ = new Impl;
+#endif
+}
+bool cv::gpu::Stream::queryIfComplete() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return false;
+#else
+    cudaError_t err = cudaStreamQuery(impl_->stream);
+    if (err == cudaErrorNotReady || err == cudaSuccess)
+        return err == cudaSuccess;
+    cudaSafeCall(err);
+    return false;
+#endif
+}
+void cv::gpu::Stream::waitForCompletion()
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+#else
+    cudaSafeCall( cudaStreamSynchronize(impl_->stream) );
+#endif
+}
+void cv::gpu::Stream::waitEvent(const Event& event)
+{
+#ifndef HAVE_CUDA
+    (void) event;
+    throw_no_cuda();
+#else
+    cudaSafeCall( cudaStreamWaitEvent(impl_->stream, EventAccessor::getEvent(event), 0) );
+#endif
+}
+#if defined(HAVE_CUDA) && (CUDART_VERSION >= 5000)
+namespace
+{
+    struct CallbackData
+    {
+        Stream::StreamCallback callback;
+        void* userData;
+        CallbackData(Stream::StreamCallback callback_, void* userData_) : callback(callback_), userData(userData_) {}
+    };
+    void CUDART_CB cudaStreamCallback(cudaStream_t, cudaError_t status, void* userData)
+    {
+        CallbackData* data = reinterpret_cast<CallbackData*>(userData);
+        data->callback(static_cast<int>(status), data->userData);
+        delete data;
+    }
+}
+#endif
+void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userData)
+{
+#ifndef HAVE_CUDA
+    (void) callback;
+    (void) userData;
+    throw_no_cuda();
+#else
+    #if CUDART_VERSION < 5000
+        (void) callback;
+        (void) userData;
+        CV_Error(cv::Error::StsNotImplemented, "This function requires CUDA 5.0");
+    #else
+        CallbackData* data = new CallbackData(callback, userData);
+        cudaSafeCall( cudaStreamAddCallback(impl_->stream, cudaStreamCallback, data, 0) );
+    #endif
+#endif
+}
+Stream& cv::gpu::Stream::Null()
+{
+    static Stream s(new Impl(0));
+    return s;
+}
+cv::gpu::Stream::operator bool_type() const
+{
+#ifndef HAVE_CUDA
+    return 0;
+#else
+    return (impl_->stream != 0) ? &Stream::this_type_does_not_support_comparisons : 0;
+#endif
+}
+template <> void cv::Ptr<Stream::Impl>::delete_obj()
+{
+    if (obj) delete obj;
+}
+////////////////////////////////////////////////////////////////
+// Stream
+#ifndef HAVE_CUDA
+class cv::gpu::Event::Impl
+{
+public:
+    Impl(unsigned int)
+    {
+        throw_no_cuda();
+    }
+};
+#else
+class cv::gpu::Event::Impl
+{
+public:
+    cudaEvent_t event;
+    Impl(unsigned int flags);
+    ~Impl();
+};
+cv::gpu::Event::Impl::Impl(unsigned int flags) : event(0)
+{
+    cudaSafeCall( cudaEventCreateWithFlags(&event, flags) );
+}
+cv::gpu::Event::Impl::~Impl()
+{
+    if (event)
+        cudaEventDestroy(event);
+}
+cudaEvent_t cv::gpu::EventAccessor::getEvent(const Event& event)
+{
+    return event.impl_->event;
+}
+#endif
+cv::gpu::Event::Event(CreateFlags flags)
+{
+#ifndef HAVE_CUDA
+    (void) flags;
+    throw_no_cuda();
+#else
+    impl_ = new Impl(flags);
+#endif
+}
+void cv::gpu::Event::record(Stream& stream)
+{
+#ifndef HAVE_CUDA
+    (void) stream;
+    throw_no_cuda();
+#else
+    cudaSafeCall( cudaEventRecord(impl_->event, StreamAccessor::getStream(stream)) );
+#endif
+}
+bool cv::gpu::Event::queryIfComplete() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return false;
+#else
+    cudaError_t err = cudaEventQuery(impl_->event);
+    if (err == cudaErrorNotReady || err == cudaSuccess)
+        return err == cudaSuccess;
+    cudaSafeCall(err);
+    return false;
+#endif
+}
+void cv::gpu::Event::waitForCompletion()
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+#else
+    cudaSafeCall( cudaEventSynchronize(impl_->event) );
+#endif
+}
+float cv::gpu::Event::elapsedTime(const Event& start, const Event& end)
+{
+#ifndef HAVE_CUDA
+    (void) start;
+    (void) end;
+    throw_no_cuda();
+    return 0.0f;
+#else
+    float ms;
+    cudaSafeCall( cudaEventElapsedTime(&ms, start.impl_->event, end.impl_->event) );
+    return ms;
+#endif
+}
+template <> void cv::Ptr<Event::Impl>::delete_obj()
+{
+    if (obj) delete obj;
+}
--- a/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@@ -41,8 +41,6 @@
 //M*/
 #include "precomp.hpp"
-#include "opencv2/core/gpumat.hpp"
-#include "opencv2/core/opengl.hpp"
 /****************************************************************************************\
 *                           [scaled] Identity matrix initialization                      *
@@ -941,14 +939,15 @@ void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to)
 \*************************************************************************************************/
 _InputArray::_InputArray() : flags(0), obj(0) {}
-_InputArray::~_InputArray() {}
 _InputArray::_InputArray(const Mat& m) : flags(MAT), obj((void*)&m) {}
 _InputArray::_InputArray(const std::vector<Mat>& vec) : flags(STD_VECTOR_MAT), obj((void*)&vec) {}
 _InputArray::_InputArray(const double& val) : flags(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F), obj((void*)&val), sz(Size(1,1)) {}
 _InputArray::_InputArray(const MatExpr& expr) : flags(FIXED_TYPE + FIXED_SIZE + EXPR), obj((void*)&expr) {}
 _InputArray::_InputArray(const gpu::GpuMat& d_mat) : flags(GPU_MAT), obj((void*)&d_mat) {}
 _InputArray::_InputArray(const ogl::Buffer& buf) : flags(OPENGL_BUFFER), obj((void*)&buf) {}
-_InputArray::_InputArray(const ogl::Texture2D& tex) : flags(OPENGL_TEXTURE), obj((void*)&tex) {}
+_InputArray::_InputArray(const gpu::CudaMem& cuda_mem) : flags(CUDA_MEM), obj((void*)&cuda_mem) {}
+_InputArray::~_InputArray() {}
 Mat _InputArray::getMat(int i) const
 {
@@ -996,14 +995,37 @@ Mat _InputArray::getMat(int i) const
        return !v.empty() ? Mat(size(i), t, (void*)&v[0]) : Mat();
    }
-    CV_Assert( k == STD_VECTOR_MAT );
+    if( k == STD_VECTOR_MAT )
-    //if( k == STD_VECTOR_MAT )
    {
        const std::vector<Mat>& v = *(const std::vector<Mat>*)obj;
        CV_Assert( 0 <= i && i < (int)v.size() );
        return v[i];
    }
+    if( k == OPENGL_BUFFER )
+    {
+        CV_Assert( i < 0 );
+        CV_Error(cv::Error::StsNotImplemented, "You should explicitly call mapHost/unmapHost methods for ogl::Buffer object");
+        return Mat();
+    }
+    if( k == GPU_MAT )
+    {
+        CV_Assert( i < 0 );
+        CV_Error(cv::Error::StsNotImplemented, "You should explicitly call download method for gpu::GpuMat object");
+        return Mat();
+    }
+    CV_Assert( k == CUDA_MEM );
+    //if( k == CUDA_MEM )
+    {
+        CV_Assert( i < 0 );
+        const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
+        return cuda_mem->createMatHeader();
+    }
 }
@@ -1092,10 +1114,29 @@ gpu::GpuMat _InputArray::getGpuMat() const
 {
    int k = kind();
-    CV_Assert(k == GPU_MAT);
+    if (k == GPU_MAT)
+    {
+        const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
+        return *d_mat;
+    }
+    if (k == CUDA_MEM)
+    {
+        const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
+        return cuda_mem->createGpuMatHeader();
+    }
+    if (k == OPENGL_BUFFER)
+    {
+        CV_Error(cv::Error::StsNotImplemented, "You should explicitly call mapDevice/unmapDevice methods for ogl::Buffer object");
+        return gpu::GpuMat();
+    }
-    const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
+    if (k == NONE)
-    return *d_mat;
+        return gpu::GpuMat();
+    CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for gpu::GpuMat and gpu::CudaMem");
+    return gpu::GpuMat();
 }
 ogl::Buffer _InputArray::getOGlBuffer() const
@@ -1108,16 +1149,6 @@ ogl::Buffer _InputArray::getOGlBuffer() const
    return *gl_buf;
 }
-ogl::Texture2D _InputArray::getOGlTexture2D() const
-{
-    int k = kind();
-    CV_Assert(k == OPENGL_TEXTURE);
-    const ogl::Texture2D* gl_tex = (const ogl::Texture2D*)obj;
-    return *gl_tex;
-}
 int _InputArray::kind() const
 {
    return flags & KIND_MASK;
@@ -1186,19 +1217,19 @@ Size _InputArray::size(int i) const
        return buf->size();
    }
-    if( k == OPENGL_TEXTURE )
+    if( k == GPU_MAT )
    {
        CV_Assert( i < 0 );
-        const ogl::Texture2D* tex = (const ogl::Texture2D*)obj;
+        const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
-        return tex->size();
+        return d_mat->size();
    }
-    CV_Assert( k == GPU_MAT );
+    CV_Assert( k == CUDA_MEM );
-    //if( k == GPU_MAT )
+    //if( k == CUDA_MEM )
    {
        CV_Assert( i < 0 );
-        const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
+        const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
-        return d_mat->size();
+        return cuda_mem->size();
    }
 }
@@ -1252,9 +1283,12 @@ int _InputArray::type(int i) const
    if( k == OPENGL_BUFFER )
        return ((const ogl::Buffer*)obj)->type();
-    CV_Assert( k == GPU_MAT );
+    if( k == GPU_MAT )
-    //if( k == GPU_MAT )
        return ((const gpu::GpuMat*)obj)->type();
+    CV_Assert( k == CUDA_MEM );
+    //if( k == CUDA_MEM )
+        return ((const gpu::CudaMem*)obj)->type();
 }
 int _InputArray::depth(int i) const
@@ -1304,29 +1338,29 @@ bool _InputArray::empty() const
    if( k == OPENGL_BUFFER )
        return ((const ogl::Buffer*)obj)->empty();
-    if( k == OPENGL_TEXTURE )
+    if( k == GPU_MAT )
-        return ((const ogl::Texture2D*)obj)->empty();
-    CV_Assert( k == GPU_MAT );
-    //if( k == GPU_MAT )
        return ((const gpu::GpuMat*)obj)->empty();
+    CV_Assert( k == CUDA_MEM );
+    //if( k == CUDA_MEM )
+        return ((const gpu::CudaMem*)obj)->empty();
 }
 _OutputArray::_OutputArray() {}
-_OutputArray::~_OutputArray() {}
 _OutputArray::_OutputArray(Mat& m) : _InputArray(m) {}
 _OutputArray::_OutputArray(std::vector<Mat>& vec) : _InputArray(vec) {}
 _OutputArray::_OutputArray(gpu::GpuMat& d_mat) : _InputArray(d_mat) {}
 _OutputArray::_OutputArray(ogl::Buffer& buf) : _InputArray(buf) {}
-_OutputArray::_OutputArray(ogl::Texture2D& tex) : _InputArray(tex) {}
+_OutputArray::_OutputArray(gpu::CudaMem& cuda_mem) : _InputArray(cuda_mem) {}
 _OutputArray::_OutputArray(const Mat& m) : _InputArray(m) {flags |= FIXED_SIZE|FIXED_TYPE;}
 _OutputArray::_OutputArray(const std::vector<Mat>& vec) : _InputArray(vec) {flags |= FIXED_SIZE;}
 _OutputArray::_OutputArray(const gpu::GpuMat& d_mat) : _InputArray(d_mat) {flags |= FIXED_SIZE|FIXED_TYPE;}
 _OutputArray::_OutputArray(const ogl::Buffer& buf) : _InputArray(buf) {flags |= FIXED_SIZE|FIXED_TYPE;}
-_OutputArray::_OutputArray(const ogl::Texture2D& tex) : _InputArray(tex) {flags |= FIXED_SIZE|FIXED_TYPE;}
+_OutputArray::_OutputArray(const gpu::CudaMem& cuda_mem) : _InputArray(cuda_mem) {flags |= FIXED_SIZE|FIXED_TYPE;}
+_OutputArray::~_OutputArray() {}
 bool _OutputArray::fixedSize() const
 {
@@ -1362,6 +1396,13 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int
        ((ogl::Buffer*)obj)->create(_sz, mtype);
        return;
    }
+    if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
+    {
+        CV_Assert(!fixedSize() || ((gpu::CudaMem*)obj)->size() == _sz);
+        CV_Assert(!fixedType() || ((gpu::CudaMem*)obj)->type() == mtype);
+        ((gpu::CudaMem*)obj)->create(_sz, mtype);
+        return;
+    }
    int sizes[] = {_sz.height, _sz.width};
    create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
 }
@@ -1390,6 +1431,13 @@ void _OutputArray::create(int rows, int cols, int mtype, int i, bool allowTransp
        ((ogl::Buffer*)obj)->create(rows, cols, mtype);
        return;
    }
+    if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
+    {
+        CV_Assert(!fixedSize() || ((gpu::CudaMem*)obj)->size() == Size(cols, rows));
+        CV_Assert(!fixedType() || ((gpu::CudaMem*)obj)->type() == mtype);
+        ((gpu::CudaMem*)obj)->create(rows, cols, mtype);
+        return;
+    }
    int sizes[] = {rows, cols};
    create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
 }
@@ -1609,15 +1657,15 @@ void _OutputArray::release() const
        return;
    }
-    if( k == OPENGL_BUFFER )
+    if( k == CUDA_MEM )
    {
-        ((ogl::Buffer*)obj)->release();
+        ((gpu::CudaMem*)obj)->release();
        return;
    }
-    if( k == OPENGL_TEXTURE )
+    if( k == OPENGL_BUFFER )
    {
-        ((ogl::Texture2D*)obj)->release();
+        ((ogl::Buffer*)obj)->release();
        return;
    }
@@ -1693,11 +1741,11 @@ ogl::Buffer& _OutputArray::getOGlBufferRef() const
    return *(ogl::Buffer*)obj;
 }
-ogl::Texture2D& _OutputArray::getOGlTexture2DRef() const
+gpu::CudaMem& _OutputArray::getCudaMemRef() const
 {
    int k = kind();
-    CV_Assert( k == OPENGL_TEXTURE );
+    CV_Assert( k == CUDA_MEM );
-    return *(ogl::Texture2D*)obj;
+    return *(gpu::CudaMem*)obj;
 }
 static _OutputArray _none;

--- a/modules/core/src/opengl_interop.cpp
+++ b/modules/core/src/opengl_interop.cpp
--- a/modules/core/src/precomp.hpp
+++ b/modules/core/src/precomp.hpp
--- a/modules/gpu/doc/data_structures.rst
+++ b/modules/gpu/doc/data_structures.rst
--- a/modules/gpu/doc/initalization_and_information.rst
+++ b/modules/gpu/doc/initalization_and_information.rst
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -47,7 +47,7 @@
 #  error gpu.hpp header must be compiled as C++
 #endif
-#include "opencv2/core/gpumat.hpp"
+#include "opencv2/core/gpu.hpp"
 #if !defined(__OPENCV_BUILD) && !defined(OPENCV_GPU_SKIP_INCLUDE)
    #include "opencv2/opencv_modules.hpp"

--- a/modules/gpu/src/precomp.hpp
+++ b/modules/gpu/src/precomp.hpp
--- a/modules/gpu/test/test_opengl.cpp
+++ b/modules/gpu/test/test_opengl.cpp
--- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp
+++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
--- a/modules/gpuarithm/src/arithm.cpp
+++ b/modules/gpuarithm/src/arithm.cpp
--- a/modules/gpuarithm/src/precomp.hpp
+++ b/modules/gpuarithm/src/precomp.hpp
--- a/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp
+++ b/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp
--- a/modules/gpubgsegm/src/cuda/fgd.hpp
+++ b/modules/gpubgsegm/src/cuda/fgd.hpp
--- a/modules/gpubgsegm/src/gmg.cpp
+++ b/modules/gpubgsegm/src/gmg.cpp
--- a/modules/gpubgsegm/src/precomp.hpp
+++ b/modules/gpubgsegm/src/precomp.hpp
--- a/modules/gpucodec/include/opencv2/gpucodec.hpp
+++ b/modules/gpucodec/include/opencv2/gpucodec.hpp
--- a/modules/gpucodec/src/cuvid_video_source.h
+++ b/modules/gpucodec/src/cuvid_video_source.h
--- a/modules/gpucodec/src/frame_queue.h
+++ b/modules/gpucodec/src/frame_queue.h
--- a/modules/gpucodec/src/precomp.hpp
+++ b/modules/gpucodec/src/precomp.hpp
--- a/modules/gpucodec/src/video_decoder.h
+++ b/modules/gpucodec/src/video_decoder.h
--- a/modules/gpucodec/src/video_parser.h
+++ b/modules/gpucodec/src/video_parser.h
--- a/modules/gpufeatures2d/include/opencv2/gpufeatures2d.hpp
+++ b/modules/gpufeatures2d/include/opencv2/gpufeatures2d.hpp
--- a/modules/gpufeatures2d/src/brute_force_matcher.cpp
+++ b/modules/gpufeatures2d/src/brute_force_matcher.cpp
--- a/modules/gpufeatures2d/src/precomp.hpp
+++ b/modules/gpufeatures2d/src/precomp.hpp
--- a/modules/gpufilters/include/opencv2/gpufilters.hpp
+++ b/modules/gpufilters/include/opencv2/gpufilters.hpp
--- a/modules/gpufilters/src/filtering.cpp
+++ b/modules/gpufilters/src/filtering.cpp
--- a/modules/gpufilters/src/precomp.hpp
+++ b/modules/gpufilters/src/precomp.hpp
--- a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
+++ b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
--- a/modules/gpuimgproc/src/match_template.cpp
+++ b/modules/gpuimgproc/src/match_template.cpp
--- a/modules/gpuimgproc/src/precomp.hpp
+++ b/modules/gpuimgproc/src/precomp.hpp
--- a/modules/gpulegacy/include/opencv2/gpulegacy/private.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy/private.hpp
--- a/modules/gpulegacy/src/precomp.hpp
+++ b/modules/gpulegacy/src/precomp.hpp
--- a/modules/gpulegacy/test/test_precomp.hpp
+++ b/modules/gpulegacy/test/test_precomp.hpp
--- a/modules/gpuoptflow/include/opencv2/gpuoptflow.hpp
+++ b/modules/gpuoptflow/include/opencv2/gpuoptflow.hpp
--- a/modules/gpuoptflow/src/farneback.cpp
+++ b/modules/gpuoptflow/src/farneback.cpp
--- a/modules/gpuoptflow/src/precomp.hpp
+++ b/modules/gpuoptflow/src/precomp.hpp
--- a/modules/gpuoptflow/test/test_optflow.cpp
+++ b/modules/gpuoptflow/test/test_optflow.cpp
--- a/modules/gpustereo/include/opencv2/gpustereo.hpp
+++ b/modules/gpustereo/include/opencv2/gpustereo.hpp
--- a/modules/gpustereo/src/disparity_bilateral_filter.cpp
+++ b/modules/gpustereo/src/disparity_bilateral_filter.cpp
--- a/modules/gpustereo/src/precomp.hpp
+++ b/modules/gpustereo/src/precomp.hpp
--- a/modules/gpustereo/src/stereobm.cpp
+++ b/modules/gpustereo/src/stereobm.cpp
--- a/modules/gpustereo/src/stereobp.cpp
+++ b/modules/gpustereo/src/stereobp.cpp
--- a/modules/gpustereo/src/stereocsbp.cpp
+++ b/modules/gpustereo/src/stereocsbp.cpp
--- a/modules/gpuwarping/include/opencv2/gpuwarping.hpp
+++ b/modules/gpuwarping/include/opencv2/gpuwarping.hpp
--- a/modules/gpuwarping/src/precomp.hpp
+++ b/modules/gpuwarping/src/precomp.hpp
--- a/modules/gpuwarping/src/pyramids.cpp
+++ b/modules/gpuwarping/src/pyramids.cpp
--- a/modules/gpuwarping/src/resize.cpp
+++ b/modules/gpuwarping/src/resize.cpp
--- a/modules/highgui/include/opencv2/highgui.hpp
+++ b/modules/highgui/include/opencv2/highgui.hpp
--- a/modules/highgui/src/window.cpp
+++ b/modules/highgui/src/window.cpp
--- a/modules/nonfree/include/opencv2/nonfree/gpu.hpp
+++ b/modules/nonfree/include/opencv2/nonfree/gpu.hpp
--- a/modules/nonfree/src/precomp.hpp
+++ b/modules/nonfree/src/precomp.hpp
--- a/modules/photo/include/opencv2/photo/gpu.hpp
+++ b/modules/photo/include/opencv2/photo/gpu.hpp
--- a/modules/photo/src/denoising_gpu.cpp
+++ b/modules/photo/src/denoising_gpu.cpp
--- a/modules/softcascade/include/opencv2/softcascade.hpp
+++ b/modules/softcascade/include/opencv2/softcascade.hpp
--- a/modules/softcascade/src/cuda/channels.cu
+++ b/modules/softcascade/src/cuda/channels.cu
--- a/modules/softcascade/src/cuda_invoker.hpp
+++ b/modules/softcascade/src/cuda_invoker.hpp
--- a/modules/softcascade/src/detector_cuda.cpp
+++ b/modules/softcascade/src/detector_cuda.cpp
--- a/modules/softcascade/src/precomp.hpp
+++ b/modules/softcascade/src/precomp.hpp
--- a/modules/softcascade/test/test_cuda_softcascade.cpp
+++ b/modules/softcascade/test/test_cuda_softcascade.cpp
--- a/modules/softcascade/test/utility.hpp
+++ b/modules/softcascade/test/utility.hpp
--- a/modules/stitching/include/opencv2/stitching/detail/warpers.hpp
+++ b/modules/stitching/include/opencv2/stitching/detail/warpers.hpp
--- a/modules/superres/perf/perf_precomp.hpp
+++ b/modules/superres/perf/perf_precomp.hpp
--- a/modules/superres/src/input_array_utility.cpp
+++ b/modules/superres/src/input_array_utility.cpp
--- a/modules/superres/src/input_array_utility.hpp
+++ b/modules/superres/src/input_array_utility.hpp
--- a/modules/superres/src/precomp.hpp
+++ b/modules/superres/src/precomp.hpp
--- a/modules/ts/include/opencv2/ts/gpu_test.hpp
+++ b/modules/ts/include/opencv2/ts/gpu_test.hpp
--- a/modules/ts/src/gpu_perf.cpp
+++ b/modules/ts/src/gpu_perf.cpp
--- a/modules/ts/src/ts_perf.cpp
+++ b/modules/ts/src/ts_perf.cpp
--- a/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp
+++ b/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp
--- a/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
+++ b/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
--- a/samples/gpu/driver_api_multi.cpp
+++ b/samples/gpu/driver_api_multi.cpp
--- a/samples/gpu/driver_api_stereo_multi.cpp
+++ b/samples/gpu/driver_api_stereo_multi.cpp
--- a/samples/gpu/multi.cpp
+++ b/samples/gpu/multi.cpp
--- a/samples/gpu/opengl.cpp
+++ b/samples/gpu/opengl.cpp
--- a/samples/gpu/performance/performance.cpp
+++ b/samples/gpu/performance/performance.cpp
--- a/samples/gpu/stereo_multi.cpp
+++ b/samples/gpu/stereo_multi.cpp