Merge pull request #3527 from jet47:cuda-deprivate-old-device-layer

cddee22c · Vadim Pisarevsky · 214f633d · 1d82aecf · cddee22c · cddee22c
Commit cddee22c authored Dec 24, 2014 by Vadim Pisarevsky
29 changed files
--- a/modules/core/include/opencv2/core/cuda.hpp
+++ b/modules/core/include/opencv2/core/cuda.hpp
@@ -51,9 +51,20 @@
 #include "opencv2/core.hpp"
 #include "opencv2/core/cuda_types.hpp"
+/**
+  @defgroup cuda CUDA-accelerated Computer Vision
+  @{
+    @defgroup cudacore Core part
+    @{
+      @defgroup cudacore_init Initalization and Information
+      @defgroup cudacore_struct Data Structures
+    @}
+  @}
+ */
 namespace cv { namespace cuda {
-//! @addtogroup cuda_struct
+//! @addtogroup cudacore_struct
 //! @{
 //////////////////////////////// GpuMat ///////////////////////////////
@@ -514,11 +525,11 @@ private:
    friend struct EventAccessor;
 };
-//! @} cuda_struct
+//! @} cudacore_struct
 //////////////////////////////// Initialization & Info ////////////////////////
-//! @addtogroup cuda_init
+//! @addtogroup cudacore_init
 //! @{
 /** @brief Returns the number of installed CUDA-enabled devices.
@@ -813,7 +824,7 @@ private:
 CV_EXPORTS void printCudaDeviceInfo(int device);
 CV_EXPORTS void printShortCudaDeviceInfo(int device);
-//! @} cuda_init
+//! @} cudacore_init
 }} // namespace cv { namespace cuda {

--- a/modules/core/include/opencv2/core/cuda/block.hpp
+++ b/modules/core/include/opencv2/core/cuda/block.hpp
@@ -43,11 +43,14 @@
 #ifndef __OPENCV_CUDA_DEVICE_BLOCK_HPP__
 #define __OPENCV_CUDA_DEVICE_BLOCK_HPP__
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    struct Block
    {
        static __device__ __forceinline__ unsigned int id()
@@ -201,7 +204,8 @@ namespace cv { namespace cuda { namespace device
            }
        }
    };
-//!@}
 }}}
+//! @endcond
 #endif /* __OPENCV_CUDA_DEVICE_BLOCK_HPP__ */
--- a/modules/core/include/opencv2/core/cuda/border_interpolate.hpp
+++ b/modules/core/include/opencv2/core/cuda/border_interpolate.hpp
@@ -47,11 +47,14 @@
 #include "vec_traits.hpp"
 #include "vec_math.hpp"
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    //////////////////////////////////////////////////////////////
    // BrdConstant
@@ -712,7 +715,8 @@ namespace cv { namespace cuda { namespace device
        int width;
        D val;
    };
-//! @}
 }}} // namespace cv { namespace cuda { namespace cudev
+//! @endcond
 #endif // __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__
--- a/modules/core/include/opencv2/core/cuda/color.hpp
+++ b/modules/core/include/opencv2/core/cuda/color.hpp
@@ -45,10 +45,14 @@
 #include "detail/color_detail.hpp"
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    // All OPENCV_CUDA_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
    // template <typename T> class ColorSpace1_to_ColorSpace2_traits
    // {
@@ -298,7 +302,8 @@ namespace cv { namespace cuda { namespace device
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgra, 4, 4, false, 0)
    #undef OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS
-//! @}
 }}} // namespace cv { namespace cuda { namespace cudev
+//! @endcond
 #endif // __OPENCV_CUDA_BORDER_INTERPOLATE_HPP__
--- a/modules/core/include/opencv2/core/cuda/common.hpp
+++ b/modules/core/include/opencv2/core/cuda/common.hpp
@@ -48,6 +48,11 @@
 #include "opencv2/core/cvdef.h"
 #include "opencv2/core/base.hpp"
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 #ifndef CV_PI_F
    #ifndef CV_PI
@@ -58,14 +63,11 @@
 #endif
 namespace cv { namespace cuda {
-//! @addtogroup cuda
-//! @{
    static inline void checkCudaError(cudaError_t err, const char* file, const int line, const char* func)
    {
        if (cudaSuccess != err)
            cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line);
    }
-//! @}
 }}
 #ifndef cudaSafeCall
@@ -74,8 +76,6 @@ namespace cv { namespace cuda {
 namespace cv { namespace cuda
 {
-//! @addtogroup cuda
-//! @{
    template <typename T> static inline bool isAligned(const T* ptr, size_t size)
    {
        return reinterpret_cast<size_t>(ptr) % size == 0;
@@ -85,15 +85,12 @@ namespace cv { namespace cuda
    {
        return step % size == 0;
    }
-//! @}
 }}
 namespace cv { namespace cuda
 {
    namespace device
    {
-//! @addtogroup cuda
-//! @{
        __host__ __device__ __forceinline__ int divUp(int total, int grain)
        {
            return (total + grain - 1) / grain;
@@ -104,8 +101,9 @@ namespace cv { namespace cuda
            cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
            cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
        }
-//! @}
    }
 }}
+//! @endcond
 #endif // __OPENCV_CUDA_COMMON_HPP__
--- a/modules/core/include/opencv2/core/cuda/datamov_utils.hpp
+++ b/modules/core/include/opencv2/core/cuda/datamov_utils.hpp
@@ -45,11 +45,14 @@
 #include "common.hpp"
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
        // for Fermi memory space is detected automatically
@@ -103,7 +106,8 @@ namespace cv { namespace cuda { namespace device
        #undef OPENCV_CUDA_ASM_PTR
    #endif // __CUDA_ARCH__ >= 200
-//! @}
 }}} // namespace cv { namespace cuda { namespace cudev
+//! @endcond
 #endif // __OPENCV_CUDA_DATAMOV_UTILS_HPP__
--- a/modules/core/include/opencv2/core/cuda/dynamic_smem.hpp
+++ b/modules/core/include/opencv2/core/cuda/dynamic_smem.hpp
@@ -43,10 +43,14 @@
 #ifndef __OPENCV_CUDA_DYNAMIC_SMEM_HPP__
 #define __OPENCV_CUDA_DYNAMIC_SMEM_HPP__
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    template<class T> struct DynamicSharedMem
    {
        __device__ __forceinline__ operator T*()
@@ -77,7 +81,8 @@ namespace cv { namespace cuda { namespace device
            return (double*)__smem_d;
        }
    };
-//! @}
 }}}
+//! @endcond
 #endif // __OPENCV_CUDA_DYNAMIC_SMEM_HPP__
--- a/modules/core/include/opencv2/core/cuda/emulation.hpp
+++ b/modules/core/include/opencv2/core/cuda/emulation.hpp
@@ -46,10 +46,14 @@
 #include "common.hpp"
 #include "warp_reduce.hpp"
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    struct Emulation
    {
@@ -258,7 +262,8 @@ namespace cv { namespace cuda { namespace device
            }
        };
    }; //struct Emulation
-//!@}
 }}} // namespace cv { namespace cuda { namespace cudev
+//! @endcond
 #endif /* OPENCV_CUDA_EMULATION_HPP_ */
--- a/modules/core/include/opencv2/core/cuda/filters.hpp
+++ b/modules/core/include/opencv2/core/cuda/filters.hpp
@@ -48,10 +48,14 @@
 #include "vec_math.hpp"
 #include "type_traits.hpp"
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    template <typename Ptr2D> struct PointFilter
    {
        typedef typename Ptr2D::elem_type elem_type;
@@ -275,7 +279,8 @@ namespace cv { namespace cuda { namespace device
        float scale_x, scale_y;
        int width, haight;
    };
-//! @}
 }}} // namespace cv { namespace cuda { namespace cudev
+//! @endcond
 #endif // __OPENCV_CUDA_FILTERS_HPP__
--- a/modules/core/include/opencv2/core/cuda/funcattrib.hpp
+++ b/modules/core/include/opencv2/core/cuda/funcattrib.hpp
@@ -45,10 +45,14 @@
 #include <cstdio>
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    template<class Func>
    void printFuncAttrib(Func& func)
    {
@@ -68,7 +72,8 @@ namespace cv { namespace cuda { namespace device
        printf("\n");
        fflush(stdout);
    }
-//! @}
 }}} // namespace cv { namespace cuda { namespace cudev
+//! @endcond
 #endif  /* __OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP_ */
--- a/modules/core/include/opencv2/core/cuda/functional.hpp
+++ b/modules/core/include/opencv2/core/cuda/functional.hpp
@@ -49,10 +49,14 @@
 #include "type_traits.hpp"
 #include "device_functions.h"
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    // Function Objects
    template<typename Argument, typename Result> struct unary_function : public std::unary_function<Argument, Result> {};
    template<typename Argument1, typename Argument2, typename Result> struct binary_function : public std::binary_function<Argument1, Argument2, Result> {};
@@ -786,7 +790,8 @@ namespace cv { namespace cuda { namespace device
 #define OPENCV_CUDA_TRANSFORM_FUNCTOR_TRAITS(type) \
    template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type >
-//! @}
 }}} // namespace cv { namespace cuda { namespace cudev
+//! @endcond
 #endif // __OPENCV_CUDA_FUNCTIONAL_HPP__
--- a/modules/core/include/opencv2/core/cuda/limits.hpp
+++ b/modules/core/include/opencv2/core/cuda/limits.hpp
@@ -47,10 +47,14 @@
 #include <float.h>
 #include "common.hpp"
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
 template <class T> struct numeric_limits;
 template <> struct numeric_limits<bool>
@@ -117,7 +121,8 @@ template <> struct numeric_limits<double>
    __device__ __forceinline__ static double epsilon() { return DBL_EPSILON; }
    static const bool is_signed = true;
 };
-//! @}
 }}} // namespace cv { namespace cuda { namespace cudev {
+//! @endcond
 #endif // __OPENCV_CUDA_LIMITS_HPP__
--- a/modules/core/include/opencv2/core/cuda/reduce.hpp
+++ b/modules/core/include/opencv2/core/cuda/reduce.hpp
@@ -47,10 +47,14 @@
 #include "detail/reduce.hpp"
 #include "detail/reduce_key_val.hpp"
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    template <int N, typename T, class Op>
    __device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op)
    {
@@ -194,7 +198,8 @@ namespace cv { namespace cuda { namespace device
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9);
    }
-//! @}
 }}}
+//! @endcond
 #endif // __OPENCV_CUDA_UTILITY_HPP__
--- a/modules/core/include/opencv2/core/cuda/saturate_cast.hpp
+++ b/modules/core/include/opencv2/core/cuda/saturate_cast.hpp
@@ -45,10 +45,14 @@
 #include "common.hpp"
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
@@ -281,7 +285,8 @@ namespace cv { namespace cuda { namespace device
        return saturate_cast<uint>((float)v);
    #endif
    }
-//! @}
 }}}
+//! @endcond
 #endif /* __OPENCV_CUDA_SATURATE_CAST_HPP__ */
--- a/modules/core/include/opencv2/core/cuda/scan.hpp
+++ b/modules/core/include/opencv2/core/cuda/scan.hpp
@@ -48,10 +48,14 @@
 #include "opencv2/core/cuda/warp.hpp"
 #include "opencv2/core/cuda/warp_shuffle.hpp"
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    enum ScanKind { EXCLUSIVE = 0,  INCLUSIVE = 1 };
    template <ScanKind Kind, typename T, typename F> struct WarpScan
@@ -247,7 +251,8 @@ namespace cv { namespace cuda { namespace device
            return warpScanInclusive(idata, s_Data, tid);
        }
    }
-//! @}
 }}}
+//! @endcond
 #endif // __OPENCV_CUDA_SCAN_HPP__
--- a/modules/core/include/opencv2/core/cuda/simd_functions.hpp
+++ b/modules/core/include/opencv2/core/cuda/simd_functions.hpp
@@ -76,57 +76,13 @@
 #include "common.hpp"
 /** @file
-  This header file contains inline functions that implement intra-word SIMD
+ * @deprecated Use @ref cudev instead.
-  operations, that are hardware accelerated on sm_3x (Kepler) GPUs. Efficient
+ */
-  emulation code paths are provided for earlier architectures (sm_1x, sm_2x)
-  to make the code portable across all GPUs supported by CUDA. The following
+//! @cond IGNORED
-  functions are currently implemented:
-  vadd2(a,b)      per-halfword unsigned addition, with wrap-around: a + b
-  vsub2(a,b)      per-halfword unsigned subtraction, with wrap-around: a - b
-  vabsdiff2(a,b)  per-halfword unsigned absolute difference: |a - b|
-  vavg2(a,b)      per-halfword unsigned average: (a + b) / 2
-  vavrg2(a,b)     per-halfword unsigned rounded average: (a + b + 1) / 2
-  vseteq2(a,b)    per-halfword unsigned comparison: a == b ? 1 : 0
-  vcmpeq2(a,b)    per-halfword unsigned comparison: a == b ? 0xffff : 0
-  vsetge2(a,b)    per-halfword unsigned comparison: a >= b ? 1 : 0
-  vcmpge2(a,b)    per-halfword unsigned comparison: a >= b ? 0xffff : 0
-  vsetgt2(a,b)    per-halfword unsigned comparison: a > b ? 1 : 0
-  vcmpgt2(a,b)    per-halfword unsigned comparison: a > b ? 0xffff : 0
-  vsetle2(a,b)    per-halfword unsigned comparison: a <= b ? 1 : 0
-  vcmple2(a,b)    per-halfword unsigned comparison: a <= b ? 0xffff : 0
-  vsetlt2(a,b)    per-halfword unsigned comparison: a < b ? 1 : 0
-  vcmplt2(a,b)    per-halfword unsigned comparison: a < b ? 0xffff : 0
-  vsetne2(a,b)    per-halfword unsigned comparison: a != b ? 1 : 0
-  vcmpne2(a,b)    per-halfword unsigned comparison: a != b ? 0xffff : 0
-  vmax2(a,b)      per-halfword unsigned maximum: max(a, b)
-  vmin2(a,b)      per-halfword unsigned minimum: min(a, b)
-  vadd4(a,b)      per-byte unsigned addition, with wrap-around: a + b
-  vsub4(a,b)      per-byte unsigned subtraction, with wrap-around: a - b
-  vabsdiff4(a,b)  per-byte unsigned absolute difference: |a - b|
-  vavg4(a,b)      per-byte unsigned average: (a + b) / 2
-  vavrg4(a,b)     per-byte unsigned rounded average: (a + b + 1) / 2
-  vseteq4(a,b)    per-byte unsigned comparison: a == b ? 1 : 0
-  vcmpeq4(a,b)    per-byte unsigned comparison: a == b ? 0xff : 0
-  vsetge4(a,b)    per-byte unsigned comparison: a >= b ? 1 : 0
-  vcmpge4(a,b)    per-byte unsigned comparison: a >= b ? 0xff : 0
-  vsetgt4(a,b)    per-byte unsigned comparison: a > b ? 1 : 0
-  vcmpgt4(a,b)    per-byte unsigned comparison: a > b ? 0xff : 0
-  vsetle4(a,b)    per-byte unsigned comparison: a <= b ? 1 : 0
-  vcmple4(a,b)    per-byte unsigned comparison: a <= b ? 0xff : 0
-  vsetlt4(a,b)    per-byte unsigned comparison: a < b ? 1 : 0
-  vcmplt4(a,b)    per-byte unsigned comparison: a < b ? 0xff : 0
-  vsetne4(a,b)    per-byte unsigned comparison: a != b ? 1: 0
-  vcmpne4(a,b)    per-byte unsigned comparison: a != b ? 0xff: 0
-  vmax4(a,b)      per-byte unsigned maximum: max(a, b)
-  vmin4(a,b)      per-byte unsigned minimum: min(a, b)
-*/
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    // 2
    static __device__ __forceinline__ unsigned int vadd2(unsigned int a, unsigned int b)
@@ -906,7 +862,8 @@ namespace cv { namespace cuda { namespace device
        return r;
    }
-//! @}
 }}}
+//! @endcond
 #endif // __OPENCV_CUDA_SIMD_FUNCTIONS_HPP__
--- a/modules/core/include/opencv2/core/cuda/transform.hpp
+++ b/modules/core/include/opencv2/core/cuda/transform.hpp
@@ -47,10 +47,14 @@
 #include "utility.hpp"
 #include "detail/transform_detail.hpp"
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    template <typename T, typename D, typename UnOp, typename Mask>
    static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
    {
@@ -64,7 +68,8 @@ namespace cv { namespace cuda { namespace device
        typedef TransformFunctorTraits<BinOp> ft;
        transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
    }
-//! @}
 }}}
+//! @endcond
 #endif // __OPENCV_CUDA_TRANSFORM_HPP__
--- a/modules/core/include/opencv2/core/cuda/type_traits.hpp
+++ b/modules/core/include/opencv2/core/cuda/type_traits.hpp
@@ -45,10 +45,14 @@
 #include "detail/type_traits_detail.hpp"
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    template <typename T> struct IsSimpleParameter
    {
        enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value ||
@@ -79,7 +83,8 @@ namespace cv { namespace cuda { namespace device
        typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value,
            T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType;
    };
-//! @}
 }}}
+//! @endcond
 #endif // __OPENCV_CUDA_TYPE_TRAITS_HPP__
--- a/modules/core/include/opencv2/core/cuda/utility.hpp
+++ b/modules/core/include/opencv2/core/cuda/utility.hpp
@@ -46,10 +46,14 @@
 #include "saturate_cast.hpp"
 #include "datamov_utils.hpp"
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    #define OPENCV_CUDA_LOG_WARP_SIZE        (5)
    #define OPENCV_CUDA_WARP_SIZE            (1 << OPENCV_CUDA_LOG_WARP_SIZE)
    #define OPENCV_CUDA_LOG_MEM_BANKS        ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
@@ -210,7 +214,8 @@ namespace cv { namespace cuda { namespace device
        return false;
    }
-//! @}
 }}} // namespace cv { namespace cuda { namespace cudev
+//! @endcond
 #endif // __OPENCV_CUDA_UTILITY_HPP__
--- a/modules/core/include/opencv2/core/cuda/vec_distance.hpp
+++ b/modules/core/include/opencv2/core/cuda/vec_distance.hpp
@@ -47,10 +47,14 @@
 #include "functional.hpp"
 #include "detail/vec_distance_detail.hpp"
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    template <typename T> struct L1Dist
    {
        typedef int value_type;
@@ -221,7 +225,8 @@ namespace cv { namespace cuda { namespace device
        U vec1Vals[MAX_LEN / THREAD_DIM];
    };
-//! @}
 }}} // namespace cv { namespace cuda { namespace cudev
+//! @endcond
 #endif // __OPENCV_CUDA_VEC_DISTANCE_HPP__
--- a/modules/core/include/opencv2/core/cuda/vec_math.hpp
+++ b/modules/core/include/opencv2/core/cuda/vec_math.hpp
@@ -46,12 +46,15 @@
 #include "vec_traits.hpp"
 #include "saturate_cast.hpp"
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
 // saturate_cast
 namespace vec_math_detail
@@ -920,8 +923,8 @@ CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double)
 #undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC
-//! @}
 }}} // namespace cv { namespace cuda { namespace device
+//! @endcond
 #endif // __OPENCV_CUDA_VECMATH_HPP__
--- a/modules/core/include/opencv2/core/cuda/vec_traits.hpp
+++ b/modules/core/include/opencv2/core/cuda/vec_traits.hpp
@@ -45,10 +45,14 @@
 #include "common.hpp"
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    template<typename T, int N> struct TypeVec;
    struct __align__(8) uchar8
@@ -277,7 +281,8 @@ namespace cv { namespace cuda { namespace device
        static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
        static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
    };
-//! @}
 }}} // namespace cv { namespace cuda { namespace cudev
+//! @endcond
 #endif // __OPENCV_CUDA_VEC_TRAITS_HPP__
--- a/modules/core/include/opencv2/core/cuda/warp.hpp
+++ b/modules/core/include/opencv2/core/cuda/warp.hpp
@@ -43,10 +43,14 @@
 #ifndef __OPENCV_CUDA_DEVICE_WARP_HPP__
 #define __OPENCV_CUDA_DEVICE_WARP_HPP__
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    struct Warp
    {
        enum
@@ -128,7 +132,8 @@ namespace cv { namespace cuda { namespace device
                *t = value;
        }
    };
-//! @}
 }}} // namespace cv { namespace cuda { namespace cudev
+//! @endcond
 #endif /* __OPENCV_CUDA_DEVICE_WARP_HPP__ */
--- a/modules/core/include/opencv2/core/cuda/warp_reduce.hpp
+++ b/modules/core/include/opencv2/core/cuda/warp_reduce.hpp
@@ -43,10 +43,14 @@
 #ifndef OPENCV_CUDA_WARP_REDUCE_HPP__
 #define OPENCV_CUDA_WARP_REDUCE_HPP__
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    template <class T>
    __device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
    {
@@ -65,7 +69,8 @@ namespace cv { namespace cuda { namespace device
        return ptr[tid - lane];
    }
-//! @}
 }}} // namespace cv { namespace cuda { namespace cudev {
+//! @endcond
 #endif /* OPENCV_CUDA_WARP_REDUCE_HPP__ */
--- a/modules/core/include/opencv2/core/cuda/warp_shuffle.hpp
+++ b/modules/core/include/opencv2/core/cuda/warp_shuffle.hpp
@@ -43,10 +43,14 @@
 #ifndef __OPENCV_CUDA_WARP_SHUFFLE_HPP__
 #define __OPENCV_CUDA_WARP_SHUFFLE_HPP__
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
-//! @addtogroup cuda
-//! @{
    template <typename T>
    __device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
    {
@@ -142,7 +146,8 @@ namespace cv { namespace cuda { namespace device
        return 0.0;
    #endif
    }
-//! @}
 }}}
+//! @endcond
 #endif // __OPENCV_CUDA_WARP_SHUFFLE_HPP__
--- a/modules/core/include/opencv2/core/cuda_stream_accessor.hpp
+++ b/modules/core/include/opencv2/core/cuda_stream_accessor.hpp
@@ -47,10 +47,9 @@
 #  error cuda_stream_accessor.hpp header must be compiled as C++
 #endif
-// This is only header file that depends on Cuda. All other headers are independent.
+/** @file cuda_stream_accessor.hpp
-// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
+ * This is only header file that depends on CUDA Runtime API. All other headers are independent.
-// But of you wanna use CUDA by yourself, may get cuda stream instance using the class below.
+ */
-// In this case you have to install Cuda Toolkit.
 #include <cuda_runtime.h>
 #include "opencv2/core/cvdef.h"
@@ -60,22 +59,21 @@ namespace cv
    namespace cuda
    {
-//! @addtogroup cuda_struct
+//! @addtogroup cudacore_struct
 //! @{
        class Stream;
        class Event;
        /** @brief Class that enables getting cudaStream_t from cuda::Stream
-        because it is the only public header that depends on the CUDA Runtime API. Including it
-        brings a dependency to your code.
         */
        struct StreamAccessor
        {
            CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
        };
+        /** @brief Class that enables getting cudaEvent_t from cuda::Event
+         */
        struct EventAccessor
        {
            CV_EXPORTS static cudaEvent_t getEvent(const Event& event);

--- a/modules/core/include/opencv2/core/cuda_types.hpp
+++ b/modules/core/include/opencv2/core/cuda_types.hpp
@@ -47,6 +47,12 @@
 #  error cuda_types.hpp header must be compiled as C++
 #endif
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+//! @cond IGNORED
 #ifdef __CUDACC__
    #define __CV_CUDA_HOST_DEVICE__ __host__ __device__ __forceinline__
 #else
@@ -58,9 +64,6 @@ namespace cv
    namespace cuda
    {
-//! @addtogroup cuda_struct
-//! @{
        // Simple lightweight structures that encapsulates information about an image on device.
        // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
@@ -89,17 +92,11 @@ namespace cv
            size_t size;
        };
-        /** @brief Structure similar to cuda::PtrStepSz but containing only a pointer and row step.
-        Width and height fields are excluded due to performance reasons. The structure is intended
-        for internal use or for users who write device code.
-         */
        template <typename T> struct PtrStep : public DevPtr<T>
        {
            __CV_CUDA_HOST_DEVICE__ PtrStep() : step(0) {}
            __CV_CUDA_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
-            //! stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!!
            size_t step;
            __CV_CUDA_HOST_DEVICE__       T* ptr(int y = 0)       { return (      T*)( (      char*)DevPtr<T>::data + y * step); }
@@ -109,12 +106,6 @@ namespace cv
            __CV_CUDA_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
        };
-        /** @brief Lightweight class encapsulating pitched memory on a GPU and passed to nvcc-compiled code (CUDA
-        kernels).
-        Typically, it is used internally by OpenCV and by users who write device code. You can call
-        its members from both host and device code.
-         */
        template <typename T> struct PtrStepSz : public PtrStep<T>
        {
            __CV_CUDA_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
@@ -136,9 +127,9 @@ namespace cv
        typedef PtrStep<float> PtrStepf;
        typedef PtrStep<int> PtrStepi;
-//! @}
    }
 }
+//! @endcond
 #endif /* __OPENCV_CORE_CUDA_TYPES_HPP__ */
--- a/modules/cuda/include/opencv2/cuda.hpp
+++ b/modules/cuda/include/opencv2/cuda.hpp
@@ -50,15 +50,11 @@
 #include "opencv2/core/cuda.hpp"
 /**
-@defgroup cuda CUDA-accelerated Computer Vision
+    @addtogroup cuda
-    @ref cuda_intro "Introduction page"
    @{
-        @defgroup cuda_init Initalization and Information
-        @defgroup cuda_struct Data Structures
        @defgroup cuda_calib3d Camera Calibration and 3D Reconstruction
        @defgroup cuda_objdetect Object Detection
    @}
 */
 namespace cv { namespace cuda {

--- a/modules/cudev/include/opencv2/cudev/ptr2d/glob.hpp
+++ b/modules/cudev/include/opencv2/cudev/ptr2d/glob.hpp
@@ -54,12 +54,19 @@ namespace cv { namespace cudev {
 //! @addtogroup cudev
 //! @{
+/** @brief Structure similar to cv::cudev::GlobPtrSz but containing only a pointer and row step.
+Width and height fields are excluded due to performance reasons. The structure is intended
+for internal use or for users who write device code.
+ */
 template <typename T> struct GlobPtr
 {
    typedef T   value_type;
    typedef int index_type;
    T* data;
+    //! stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!!
    size_t step;
    __device__ __forceinline__       T* row(int y)       { return (      T*)( (      uchar*)data + y * step); }
@@ -69,6 +76,12 @@ template <typename T> struct GlobPtr
    __device__ __forceinline__ const T& operator ()(int y, int x) const { return row(y)[x]; }
 };
+/** @brief Lightweight class encapsulating pitched memory on a GPU and passed to nvcc-compiled code (CUDA
+kernels).
+Typically, it is used internally by OpenCV and by users who write device code. You can call
+its members from both host and device code.
+ */
 template <typename T> struct GlobPtrSz : GlobPtr<T>
 {
    int rows, cols;