Commit fcfa7208 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

moved GpuMat and DevMem2D to core module, some code refactoring

parent 8a148e39
...@@ -90,6 +90,10 @@ class Mat; ...@@ -90,6 +90,10 @@ class Mat;
class SparseMat; class SparseMat;
typedef Mat MatND; typedef Mat MatND;
namespace gpu {
class GpuMat;
}
class CV_EXPORTS MatExpr; class CV_EXPORTS MatExpr;
class CV_EXPORTS MatOp_Base; class CV_EXPORTS MatOp_Base;
class CV_EXPORTS MatArg; class CV_EXPORTS MatArg;
...@@ -1627,6 +1631,10 @@ public: ...@@ -1627,6 +1631,10 @@ public:
template<typename _Tp> explicit Mat(const Point3_<_Tp>& pt, bool copyData=true); template<typename _Tp> explicit Mat(const Point3_<_Tp>& pt, bool copyData=true);
//! builds matrix from comma initializer //! builds matrix from comma initializer
template<typename _Tp> explicit Mat(const MatCommaInitializer_<_Tp>& commaInitializer); template<typename _Tp> explicit Mat(const MatCommaInitializer_<_Tp>& commaInitializer);
//! download data from GpuMat
explicit Mat(const gpu::GpuMat& m);
//! destructor - calls release() //! destructor - calls release()
~Mat(); ~Mat();
//! assignment operators //! assignment operators
......
...@@ -40,103 +40,118 @@ ...@@ -40,103 +40,118 @@
// //
//M*/ //M*/
#ifndef __OPENCV_GPU_MATRIX_OPERATIONS_HPP__ #ifndef __OPENCV_CORE_DevMem2D_HPP__
#define __OPENCV_GPU_MATRIX_OPERATIONS_HPP__ #define __OPENCV_CORE_DevMem2D_HPP__
namespace cv #ifdef __CUDACC__
{ #define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
#else
namespace gpu #define __CV_GPU_HOST_DEVICE__
{ #endif
///////////////////////////////////////////////////////////////////////
//////////////////////////////// CudaMem ////////////////////////////////
///////////////////////////////////////////////////////////////////////
inline CudaMem::CudaMem() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0) {}
inline CudaMem::CudaMem(int _rows, int _cols, int _type, int _alloc_type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
{
if( _rows > 0 && _cols > 0 )
create( _rows, _cols, _type, _alloc_type);
}
inline CudaMem::CudaMem(Size _size, int _type, int _alloc_type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
{
if( _size.height > 0 && _size.width > 0 )
create( _size.height, _size.width, _type, _alloc_type);
}
inline CudaMem::CudaMem(const CudaMem& m) : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
{
if( refcount )
CV_XADD(refcount, 1);
}
inline CudaMem::CudaMem(const Mat& m, int _alloc_type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
{
if( m.rows > 0 && m.cols > 0 )
create( m.size(), m.type(), _alloc_type);
Mat tmp = createMatHeader();
m.copyTo(tmp);
}
inline CudaMem::~CudaMem()
{
release();
}
inline CudaMem& CudaMem::operator = (const CudaMem& m) namespace cv
{ {
if( this != &m ) namespace gpu
{ {
if( m.refcount ) // Simple lightweight structures that encapsulates information about an image on device.
CV_XADD(m.refcount, 1); // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
release();
flags = m.flags; template <bool expr> struct StaticAssert;
rows = m.rows; cols = m.cols; template <> struct StaticAssert<true> {static __CV_GPU_HOST_DEVICE__ void check(){}};
step = m.step; data = m.data;
datastart = m.datastart; template<typename T> struct DevPtr
dataend = m.dataend; {
refcount = m.refcount; typedef T elem_type;
alloc_type = m.alloc_type; typedef int index_type;
}
return *this; enum { elem_size = sizeof(elem_type) };
}
T* data;
inline CudaMem CudaMem::clone() const
{ __CV_GPU_HOST_DEVICE__ DevPtr() : data(0) {}
CudaMem m(size(), type(), alloc_type); __CV_GPU_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
Mat to = m;
Mat from = *this; __CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
from.copyTo(to); __CV_GPU_HOST_DEVICE__ operator T*() { return data; }
return m; __CV_GPU_HOST_DEVICE__ operator const T*() const { return data; }
};
template<typename T> struct PtrSz : public DevPtr<T>
{
__CV_GPU_HOST_DEVICE__ PtrSz() : size(0) {}
__CV_GPU_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
size_t size;
};
template<typename T> struct PtrStep : public DevPtr<T>
{
__CV_GPU_HOST_DEVICE__ PtrStep() : step(0) {}
__CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
/** \brief stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!! */
size_t step;
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr<T>::data + y * step); }
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
};
template <typename T> struct PtrStepSz : public PtrStep<T>
{
__CV_GPU_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
__CV_GPU_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
: PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
int cols;
int rows;
};
template <typename T> struct DevMem2D_ : public PtrStepSz<T>
{
DevMem2D_() {}
DevMem2D_(int rows_, int cols_, T* data_, size_t step_) : PtrStepSz<T>(rows_, cols_, data_, step_) {}
template <typename U>
explicit DevMem2D_(const DevMem2D_<U>& d) : PtrStepSz<T>(d.rows, d.cols, (T*)d.data, d.step) {}
};
template<typename T> struct PtrElemStep_ : public PtrStep<T>
{
PtrElemStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step)
{
StaticAssert<256 % sizeof(T) == 0>::check();
PtrStep<T>::step /= PtrStep<T>::elem_size;
}
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return PtrStep<T>::data + y * PtrStep<T>::step; }
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep<T>::data + y * PtrStep<T>::step; }
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
};
template<typename T> struct PtrStep_ : public PtrStep<T>
{
PtrStep_() {}
PtrStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step) {}
};
typedef DevMem2D_<unsigned char> DevMem2Db;
typedef DevMem2Db DevMem2D;
typedef DevMem2D_<float> DevMem2Df;
typedef DevMem2D_<int> DevMem2Di;
typedef PtrStep<unsigned char> PtrStepb;
typedef PtrStep<float> PtrStepf;
typedef PtrStep<int> PtrStepi;
typedef PtrElemStep_<unsigned char> PtrElemStep;
typedef PtrElemStep_<float> PtrElemStepf;
typedef PtrElemStep_<int> PtrElemStepi;
}
} }
inline void CudaMem::create(Size _size, int _type, int _alloc_type) { create(_size.height, _size.width, _type, _alloc_type); } #endif /* __OPENCV_GPU_DevMem2D_HPP__ */
//CCP void CudaMem::create(int _rows, int _cols, int _type, int _alloc_type);
//CPP void CudaMem::release();
inline Mat CudaMem::createMatHeader() const { return Mat(size(), type(), data, step); }
inline CudaMem::operator Mat() const { return createMatHeader(); }
inline CudaMem::operator GpuMat() const { return createGpuMatHeader(); }
//CPP GpuMat CudaMem::createGpuMatHeader() const;
inline bool CudaMem::isContinuous() const { return (flags & Mat::CONTINUOUS_FLAG) != 0; }
inline size_t CudaMem::elemSize() const { return CV_ELEM_SIZE(flags); }
inline size_t CudaMem::elemSize1() const { return CV_ELEM_SIZE1(flags); }
inline int CudaMem::type() const { return CV_MAT_TYPE(flags); }
inline int CudaMem::depth() const { return CV_MAT_DEPTH(flags); }
inline int CudaMem::channels() const { return CV_MAT_CN(flags); }
inline size_t CudaMem::step1() const { return step/elemSize1(); }
inline Size CudaMem::size() const { return Size(cols, rows); }
inline bool CudaMem::empty() const { return data == 0; }
} /* end of namespace gpu */
} /* end of namespace cv */
#endif /* __OPENCV_GPU_MATRIX_OPERATIONS_HPP__ */
This diff is collapsed.
...@@ -3,7 +3,8 @@ set(name "gpu") ...@@ -3,7 +3,8 @@ set(name "gpu")
set(the_target "opencv_${name}") set(the_target "opencv_${name}")
project(${the_target}) project(${the_target})
set(DEPS "opencv_core" "opencv_imgproc" "opencv_objdetect" "opencv_features2d" "opencv_flann" "opencv_calib3d") #"opencv_features2d" "opencv_flann" "opencv_objdetect" - only headers needed set(DEPS "opencv_core" "opencv_imgproc" "opencv_calib3d" "opencv_objdetect")
set(DEPS_HEADER ${DEPS} "opencv_features2d" "opencv_flann")
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} opencv_gpu) set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} opencv_gpu)
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include" include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"
...@@ -27,6 +28,13 @@ file(GLOB lib_device_hdrs_detail "src/opencv2/gpu/device/detail/*.h*") ...@@ -27,6 +28,13 @@ file(GLOB lib_device_hdrs_detail "src/opencv2/gpu/device/detail/*.h*")
source_group("Device" FILES ${lib_device_hdrs}) source_group("Device" FILES ${lib_device_hdrs})
source_group("Device\\Detail" FILES ${lib_device_hdrs_detail}) source_group("Device\\Detail" FILES ${lib_device_hdrs_detail})
foreach(d ${DEPS_HEADER})
if(${d} MATCHES "opencv_")
string(REPLACE "opencv_" "${CMAKE_CURRENT_SOURCE_DIR}/../" d_dir ${d})
include_directories("${d_dir}/include")
endif()
endforeach()
if (HAVE_CUDA) if (HAVE_CUDA)
file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp") file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp")
file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu") file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu")
...@@ -50,7 +58,6 @@ if (HAVE_CUDA) ...@@ -50,7 +58,6 @@ if (HAVE_CUDA)
if (APPLE) if (APPLE)
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-fno-finite-math-only;") set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-fno-finite-math-only;")
endif() endif()
string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
...@@ -60,7 +67,7 @@ if (HAVE_CUDA) ...@@ -60,7 +67,7 @@ if (HAVE_CUDA)
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") #string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") #string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") #string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4211 /wd4201 /wd4100 /wd4505 /wd4408") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4211 /wd4201 /wd4100 /wd4505 /wd4408 /wd4251")
string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}") string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}")
...@@ -69,22 +76,19 @@ if (HAVE_CUDA) ...@@ -69,22 +76,19 @@ if (HAVE_CUDA)
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
endif() endif()
if (BUILD_SHARED_LIBS) if (BUILD_SHARED_LIBS)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-DCVAPI_EXPORTS") set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-DCVAPI_EXPORTS")
endif() endif()
if(MSVC)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;/wd4251")
endif()
CUDA_COMPILE(cuda_objs ${lib_cuda} ${ncv_cuda}) CUDA_COMPILE(cuda_objs ${lib_cuda} ${ncv_cuda})
#CUDA_BUILD_CLEAN_TARGET() #CUDA_BUILD_CLEAN_TARGET()
endif() endif()
foreach(d ${DEPS})
if(${d} MATCHES "opencv_")
string(REPLACE "opencv_" "${CMAKE_CURRENT_SOURCE_DIR}/../" d_dir ${d})
include_directories("${d_dir}/include")
endif()
endforeach()
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${lib_device_hdrs_detail} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs}) add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${lib_device_hdrs_detail} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
# For dynamic link numbering convenions # For dynamic link numbering convenions
......
...@@ -40,122 +40,4 @@ ...@@ -40,122 +40,4 @@
// //
//M*/ //M*/
#ifndef __OPENCV_GPU_DevMem2D_HPP__ #include "opencv2/core/devmem2d.hpp"
#define __OPENCV_GPU_DevMem2D_HPP__
namespace cv
{
namespace gpu
{
// Simple lightweight structures that encapsulates information about an image on device.
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
#if defined(__CUDACC__)
#define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
#else
#define __CV_GPU_HOST_DEVICE__
#endif
template <bool expr> struct StaticAssert;
template <> struct StaticAssert<true> {static __CV_GPU_HOST_DEVICE__ void check(){}};
template<typename T> struct DevPtr
{
typedef T elem_type;
typedef int index_type;
enum { elem_size = sizeof(elem_type) };
T* data;
__CV_GPU_HOST_DEVICE__ DevPtr() : data(0) {}
__CV_GPU_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
__CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
__CV_GPU_HOST_DEVICE__ operator T*() { return data; }
__CV_GPU_HOST_DEVICE__ operator const T*() const { return data; }
};
template<typename T> struct PtrSz : public DevPtr<T>
{
__CV_GPU_HOST_DEVICE__ PtrSz() : size(0) {}
__CV_GPU_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
size_t size;
};
template<typename T> struct PtrStep : public DevPtr<T>
{
__CV_GPU_HOST_DEVICE__ PtrStep() : step(0) {}
__CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
/** \brief stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!! */
size_t step;
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr<T>::data + y * step); }
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
};
template <typename T> struct PtrStepSz : public PtrStep<T>
{
__CV_GPU_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
__CV_GPU_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
: PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
int cols;
int rows;
};
template <typename T> struct DevMem2D_ : public PtrStepSz<T>
{
DevMem2D_() {}
DevMem2D_(int rows_, int cols_, T *data_, size_t step_) : PtrStepSz<T>(rows_, cols_, data_, step_) {}
template <typename U>
explicit DevMem2D_(const DevMem2D_<U>& d) : PtrStepSz<T>(d.rows, d.cols, (T*)d.data, d.step) {}
};
template<typename T> struct PtrElemStep_ : public PtrStep<T>
{
PtrElemStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step)
{
StaticAssert<256 % sizeof(T) == 0>::check();
PtrStep<T>::step /= PtrStep<T>::elem_size;
}
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return PtrStep<T>::data + y * PtrStep<T>::step; }
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep<T>::data + y * PtrStep<T>::step; }
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
};
template<typename T> struct PtrStep_ : public PtrStep<T>
{
PtrStep_() {}
PtrStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step) {}
};
#undef __CV_GPU_HOST_DEVICE__
typedef DevMem2D_<unsigned char> DevMem2Db;
typedef DevMem2Db DevMem2D;
typedef DevMem2D_<float> DevMem2Df;
typedef DevMem2D_<int> DevMem2Di;
typedef PtrStep<unsigned char> PtrStepb;
typedef PtrStep<float> PtrStepf;
typedef PtrStep<int> PtrStepi;
typedef PtrElemStep_<unsigned char> PtrElemStep;
typedef PtrElemStep_<float> PtrElemStepf;
typedef PtrElemStep_<int> PtrElemStepi;
}
}
#endif /* __OPENCV_GPU_DevMem2D_HPP__ */
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
...@@ -24,7 +24,7 @@ PERF_TEST_P(DevInfo_Size_MatType, transpose, testing::Combine(testing::ValuesIn( ...@@ -24,7 +24,7 @@ PERF_TEST_P(DevInfo_Size_MatType, transpose, testing::Combine(testing::ValuesIn(
transpose(src, dst); transpose(src, dst);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -55,7 +55,7 @@ PERF_TEST_P(DevInfo_Size_MatType_FlipCode, flip, testing::Combine(testing::Value ...@@ -55,7 +55,7 @@ PERF_TEST_P(DevInfo_Size_MatType_FlipCode, flip, testing::Combine(testing::Value
flip(src, dst, flipCode); flip(src, dst, flipCode);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -85,7 +85,7 @@ PERF_TEST_P(DevInfo_Size_MatType, LUT, testing::Combine(testing::ValuesIn(device ...@@ -85,7 +85,7 @@ PERF_TEST_P(DevInfo_Size_MatType, LUT, testing::Combine(testing::ValuesIn(device
LUT(src, lut, dst); LUT(src, lut, dst);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -115,8 +115,8 @@ PERF_TEST_P(DevInfo_Size, cartToPolar, testing::Combine(testing::ValuesIn(device ...@@ -115,8 +115,8 @@ PERF_TEST_P(DevInfo_Size, cartToPolar, testing::Combine(testing::ValuesIn(device
cartToPolar(x, y, magnitude, angle); cartToPolar(x, y, magnitude, angle);
} }
Mat magnitude_host = magnitude; Mat magnitude_host(magnitude);
Mat angle_host = angle; Mat angle_host(angle);
SANITY_CHECK(magnitude_host); SANITY_CHECK(magnitude_host);
SANITY_CHECK(angle_host); SANITY_CHECK(angle_host);
...@@ -147,8 +147,8 @@ PERF_TEST_P(DevInfo_Size, polarToCart, testing::Combine(testing::ValuesIn(device ...@@ -147,8 +147,8 @@ PERF_TEST_P(DevInfo_Size, polarToCart, testing::Combine(testing::ValuesIn(device
polarToCart(magnitude, angle, x, y); polarToCart(magnitude, angle, x, y);
} }
Mat x_host = x; Mat x_host(x);
Mat y_host = angle; Mat y_host(y);
SANITY_CHECK(x_host); SANITY_CHECK(x_host);
SANITY_CHECK(y_host); SANITY_CHECK(y_host);
...@@ -180,7 +180,7 @@ PERF_TEST_P(DevInfo_Size_MatType, addMat, testing::Combine(testing::ValuesIn(dev ...@@ -180,7 +180,7 @@ PERF_TEST_P(DevInfo_Size_MatType, addMat, testing::Combine(testing::ValuesIn(dev
add(a, b, c); add(a, b, c);
} }
Mat c_host = c; Mat c_host(c);
SANITY_CHECK(c_host); SANITY_CHECK(c_host);
} }
...@@ -210,7 +210,7 @@ PERF_TEST_P(DevInfo_Size_MatType, addScalar, testing::Combine(testing::ValuesIn( ...@@ -210,7 +210,7 @@ PERF_TEST_P(DevInfo_Size_MatType, addScalar, testing::Combine(testing::ValuesIn(
add(a, b, c); add(a, b, c);
} }
Mat c_host = c; Mat c_host(c);
SANITY_CHECK(c_host); SANITY_CHECK(c_host);
} }
...@@ -241,7 +241,7 @@ PERF_TEST_P(DevInfo_Size_MatType, subtractMat, testing::Combine(testing::ValuesI ...@@ -241,7 +241,7 @@ PERF_TEST_P(DevInfo_Size_MatType, subtractMat, testing::Combine(testing::ValuesI
subtract(a, b, c); subtract(a, b, c);
} }
Mat c_host = c; Mat c_host(c);
SANITY_CHECK(c_host); SANITY_CHECK(c_host);
} }
...@@ -270,7 +270,7 @@ PERF_TEST_P(DevInfo_Size, multiplyMat, testing::Combine(testing::ValuesIn(device ...@@ -270,7 +270,7 @@ PERF_TEST_P(DevInfo_Size, multiplyMat, testing::Combine(testing::ValuesIn(device
multiply(a, b, c); multiply(a, b, c);
} }
Mat c_host = c; Mat c_host(c);
SANITY_CHECK(c_host); SANITY_CHECK(c_host);
} }
...@@ -300,7 +300,7 @@ PERF_TEST_P(DevInfo_Size_MatType, multiplyScalar, testing::Combine(testing::Valu ...@@ -300,7 +300,7 @@ PERF_TEST_P(DevInfo_Size_MatType, multiplyScalar, testing::Combine(testing::Valu
multiply(a, b, c); multiply(a, b, c);
} }
Mat c_host = c; Mat c_host(c);
SANITY_CHECK(c_host); SANITY_CHECK(c_host);
} }
...@@ -327,7 +327,7 @@ PERF_TEST_P(DevInfo_Size, exp, testing::Combine(testing::ValuesIn(devices()), ...@@ -327,7 +327,7 @@ PERF_TEST_P(DevInfo_Size, exp, testing::Combine(testing::ValuesIn(devices()),
exp(a, b); exp(a, b);
} }
Mat b_host = b; Mat b_host(b);
SANITY_CHECK(b_host); SANITY_CHECK(b_host);
} }
...@@ -356,7 +356,7 @@ PERF_TEST_P(DevInfo_Size_MatType, pow, testing::Combine(testing::ValuesIn(device ...@@ -356,7 +356,7 @@ PERF_TEST_P(DevInfo_Size_MatType, pow, testing::Combine(testing::ValuesIn(device
pow(src, 2.0, dst); pow(src, 2.0, dst);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -389,7 +389,7 @@ PERF_TEST_P(DevInfo_Size_MatType_CmpOp, compare, testing::Combine(testing::Value ...@@ -389,7 +389,7 @@ PERF_TEST_P(DevInfo_Size_MatType_CmpOp, compare, testing::Combine(testing::Value
compare(src1, src2, dst, cmpop); compare(src1, src2, dst, cmpop);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -418,7 +418,7 @@ PERF_TEST_P(DevInfo_Size_MatType, bitwise_not, testing::Combine(testing::ValuesI ...@@ -418,7 +418,7 @@ PERF_TEST_P(DevInfo_Size_MatType, bitwise_not, testing::Combine(testing::ValuesI
bitwise_not(src, dst); bitwise_not(src, dst);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -449,7 +449,7 @@ PERF_TEST_P(DevInfo_Size_MatType, bitwise_and, testing::Combine(testing::ValuesI ...@@ -449,7 +449,7 @@ PERF_TEST_P(DevInfo_Size_MatType, bitwise_and, testing::Combine(testing::ValuesI
bitwise_and(src1, src2, dst); bitwise_and(src1, src2, dst);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -480,7 +480,7 @@ PERF_TEST_P(DevInfo_Size_MatType, min, testing::Combine(testing::ValuesIn(device ...@@ -480,7 +480,7 @@ PERF_TEST_P(DevInfo_Size_MatType, min, testing::Combine(testing::ValuesIn(device
min(src1, src2, dst); min(src1, src2, dst);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -712,7 +712,7 @@ PERF_TEST_P(DevInfo_Size_MatType, addWeighted, testing::Combine(testing::ValuesI ...@@ -712,7 +712,7 @@ PERF_TEST_P(DevInfo_Size_MatType, addWeighted, testing::Combine(testing::ValuesI
addWeighted(src1, 0.5, src2, 0.5, 0.0, dst); addWeighted(src1, 0.5, src2, 0.5, 0.0, dst);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -743,7 +743,7 @@ PERF_TEST_P(DevInfo_Size_MatType_FlipCode, reduce, testing::Combine(testing::Val ...@@ -743,7 +743,7 @@ PERF_TEST_P(DevInfo_Size_MatType_FlipCode, reduce, testing::Combine(testing::Val
reduce(src, dst, dim, CV_REDUCE_MIN); reduce(src, dst, dim, CV_REDUCE_MIN);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -774,7 +774,7 @@ PERF_TEST_P(DevInfo_Size, gemm, testing::Combine(testing::ValuesIn(devices()), ...@@ -774,7 +774,7 @@ PERF_TEST_P(DevInfo_Size, gemm, testing::Combine(testing::ValuesIn(devices()),
gemm(src1, src2, 1.0, src3, 1.0, dst); gemm(src1, src2, 1.0, src3, 1.0, dst);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -20,7 +20,7 @@ PERF_TEST_P(DevInfo, transformPoints, testing::ValuesIn(devices())) ...@@ -20,7 +20,7 @@ PERF_TEST_P(DevInfo, transformPoints, testing::ValuesIn(devices()))
transformPoints(src, Mat::ones(1, 3, CV_32FC1), Mat::ones(1, 3, CV_32FC1), dst); transformPoints(src, Mat::ones(1, 3, CV_32FC1), Mat::ones(1, 3, CV_32FC1), dst);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -45,7 +45,7 @@ PERF_TEST_P(DevInfo, projectPoints, testing::ValuesIn(devices())) ...@@ -45,7 +45,7 @@ PERF_TEST_P(DevInfo, projectPoints, testing::ValuesIn(devices()))
projectPoints(src, Mat::ones(1, 3, CV_32FC1), Mat::ones(1, 3, CV_32FC1), Mat::ones(3, 3, CV_32FC1), Mat(), dst); projectPoints(src, Mat::ones(1, 3, CV_32FC1), Mat::ones(1, 3, CV_32FC1), Mat::ones(3, 3, CV_32FC1), Mat(), dst);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
......
...@@ -28,7 +28,7 @@ PERF_TEST_P(DevInfo_Size_MatType_KernelSize, boxFilter, testing::Combine(testing ...@@ -28,7 +28,7 @@ PERF_TEST_P(DevInfo_Size_MatType_KernelSize, boxFilter, testing::Combine(testing
filter->apply(src, dst); filter->apply(src, dst);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -63,7 +63,7 @@ PERF_TEST_P(DevInfo_Size_MatType_MorphOp_KernelSize, morphologyFilter, testing:: ...@@ -63,7 +63,7 @@ PERF_TEST_P(DevInfo_Size_MatType_MorphOp_KernelSize, morphologyFilter, testing::
filter->apply(src, dst); filter->apply(src, dst);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -96,7 +96,7 @@ PERF_TEST_P(DevInfo_Size_MatType_KernelSize, linearFilter, testing::Combine(test ...@@ -96,7 +96,7 @@ PERF_TEST_P(DevInfo_Size_MatType_KernelSize, linearFilter, testing::Combine(test
filter->apply(src, dst); filter->apply(src, dst);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -130,7 +130,7 @@ PERF_TEST_P(DevInfo_Size_MatType_KernelSize, separableLinearFilter, testing::Com ...@@ -130,7 +130,7 @@ PERF_TEST_P(DevInfo_Size_MatType_KernelSize, separableLinearFilter, testing::Com
filter->apply(src, dst, Rect(0, 0, src.cols, src.rows)); filter->apply(src, dst, Rect(0, 0, src.cols, src.rows));
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -36,7 +36,7 @@ PERF_TEST_P(DevInfo_Size_MatType_Interpolation_BorderMode, remap, testing::Combi ...@@ -36,7 +36,7 @@ PERF_TEST_P(DevInfo_Size_MatType_Interpolation_BorderMode, remap, testing::Combi
remap(src, dst, xmap, ymap, interpolation, borderMode); remap(src, dst, xmap, ymap, interpolation, borderMode);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -63,7 +63,7 @@ PERF_TEST_P(DevInfo, meanShiftFiltering, testing::ValuesIn(devices())) ...@@ -63,7 +63,7 @@ PERF_TEST_P(DevInfo, meanShiftFiltering, testing::ValuesIn(devices()))
meanShiftFiltering(src, dst, 50, 50); meanShiftFiltering(src, dst, 50, 50);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -91,8 +91,8 @@ PERF_TEST_P(DevInfo, meanShiftProc, testing::ValuesIn(devices())) ...@@ -91,8 +91,8 @@ PERF_TEST_P(DevInfo, meanShiftProc, testing::ValuesIn(devices()))
meanShiftProc(src, dstr, dstsp, 50, 50); meanShiftProc(src, dstr, dstsp, 50, 50);
} }
Mat dstr_host = dstr; Mat dstr_host(dstr);
Mat dstsp_host = dstsp; Mat dstsp_host(dstsp);
SANITY_CHECK(dstr_host); SANITY_CHECK(dstr_host);
SANITY_CHECK(dstsp_host); SANITY_CHECK(dstsp_host);
......
...@@ -25,7 +25,7 @@ PERF_TEST_P(DevInfo_Size_MatType, merge, testing::Combine(testing::ValuesIn(devi ...@@ -25,7 +25,7 @@ PERF_TEST_P(DevInfo_Size_MatType, merge, testing::Combine(testing::ValuesIn(devi
merge(src, dst); merge(src, dst);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -82,7 +82,7 @@ PERF_TEST_P(DevInfo_Size_MatType, setTo, testing::Combine(testing::ValuesIn(devi ...@@ -82,7 +82,7 @@ PERF_TEST_P(DevInfo_Size_MatType, setTo, testing::Combine(testing::ValuesIn(devi
src.setTo(val); src.setTo(val);
} }
Mat src_host = src; Mat src_host(src);
SANITY_CHECK(src_host); SANITY_CHECK(src_host);
} }
...@@ -115,7 +115,7 @@ PERF_TEST_P(DevInfo_Size_MatType, setToMasked, testing::Combine(testing::ValuesI ...@@ -115,7 +115,7 @@ PERF_TEST_P(DevInfo_Size_MatType, setToMasked, testing::Combine(testing::ValuesI
src.setTo(val, mask); src.setTo(val, mask);
} }
src_host = src; src.download(src_host);
SANITY_CHECK(src_host); SANITY_CHECK(src_host);
} }
...@@ -148,7 +148,7 @@ PERF_TEST_P(DevInfo_Size_MatType, copyToMasked, testing::Combine(testing::Values ...@@ -148,7 +148,7 @@ PERF_TEST_P(DevInfo_Size_MatType, copyToMasked, testing::Combine(testing::Values
src.copyTo(dst, mask); src.copyTo(dst, mask);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -182,7 +182,7 @@ PERF_TEST_P(DevInfo_Size_MatType_MatType, convertTo, testing::Combine(testing::V ...@@ -182,7 +182,7 @@ PERF_TEST_P(DevInfo_Size_MatType_MatType, convertTo, testing::Combine(testing::V
src.convertTo(dst, type2, a, b); src.convertTo(dst, type2, a, b);
} }
Mat dst_host = dst; Mat dst_host(dst);
SANITY_CHECK(dst_host); SANITY_CHECK(dst_host);
} }
...@@ -425,16 +425,22 @@ void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream) ...@@ -425,16 +425,22 @@ void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream)
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Polar <-> Cart // Polar <-> Cart
namespace cv { namespace gpu { namespace mathfunc BEGIN_OPENCV_DEVICE_NAMESPACE
namespace mathfunc
{ {
void cartToPolar_gpu(const DevMem2Df& x, const DevMem2Df& y, const DevMem2Df& mag, bool magSqr, const DevMem2Df& angle, bool angleInDegrees, cudaStream_t stream); void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
void polarToCart_gpu(const DevMem2Df& mag, const DevMem2Df& angle, const DevMem2Df& x, const DevMem2Df& y, bool angleInDegrees, cudaStream_t stream); void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
}}} }
END_OPENCV_DEVICE_NAMESPACE
namespace namespace
{ {
inline void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream) inline void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream)
{ {
using namespace OPENCV_DEVICE_NAMESPACE_ mathfunc;
CV_DbgAssert(x.size() == y.size() && x.type() == y.type()); CV_DbgAssert(x.size() == y.size() && x.type() == y.type());
CV_Assert(x.depth() == CV_32F); CV_Assert(x.depth() == CV_32F);
...@@ -448,11 +454,13 @@ namespace ...@@ -448,11 +454,13 @@ namespace
GpuMat mag1cn = mag ? mag->reshape(1) : GpuMat(); GpuMat mag1cn = mag ? mag->reshape(1) : GpuMat();
GpuMat angle1cn = angle ? angle->reshape(1) : GpuMat(); GpuMat angle1cn = angle ? angle->reshape(1) : GpuMat();
mathfunc::cartToPolar_gpu(x1cn, y1cn, mag1cn, magSqr, angle1cn, angleInDegrees, stream); cartToPolar_gpu(x1cn, y1cn, mag1cn, magSqr, angle1cn, angleInDegrees, stream);
} }
inline void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream) inline void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream)
{ {
using namespace OPENCV_DEVICE_NAMESPACE_ mathfunc;
CV_DbgAssert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type()); CV_DbgAssert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type());
CV_Assert(mag.depth() == CV_32F); CV_Assert(mag.depth() == CV_32F);
...@@ -464,34 +472,33 @@ namespace ...@@ -464,34 +472,33 @@ namespace
GpuMat x1cn = x.reshape(1); GpuMat x1cn = x.reshape(1);
GpuMat y1cn = y.reshape(1); GpuMat y1cn = y.reshape(1);
mathfunc::polarToCart_gpu(mag1cn, angle1cn, x1cn, y1cn, angleInDegrees, stream); polarToCart_gpu(mag1cn, angle1cn, x1cn, y1cn, angleInDegrees, stream);
} }
} }
void cv::gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream) void cv::gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream)
{ {
::cartToPolar_caller(x, y, &dst, false, 0, false, StreamAccessor::getStream(stream)); cartToPolar_caller(x, y, &dst, false, 0, false, StreamAccessor::getStream(stream));
} }
void cv::gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream) void cv::gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream)
{ {
::cartToPolar_caller(x, y, &dst, true, 0, false, StreamAccessor::getStream(stream)); cartToPolar_caller(x, y, &dst, true, 0, false, StreamAccessor::getStream(stream));
} }
void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees, Stream& stream) void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees, Stream& stream)
{ {
::cartToPolar_caller(x, y, 0, false, &angle, angleInDegrees, StreamAccessor::getStream(stream)); cartToPolar_caller(x, y, 0, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
} }
void cv::gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& mag, GpuMat& angle, bool angleInDegrees, Stream& stream) void cv::gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& mag, GpuMat& angle, bool angleInDegrees, Stream& stream)
{ {
::cartToPolar_caller(x, y, &mag, false, &angle, angleInDegrees, StreamAccessor::getStream(stream)); cartToPolar_caller(x, y, &mag, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
} }
void cv::gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, Stream& stream) void cv::gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, Stream& stream)
{ {
::polarToCart_caller(magnitude, angle, x, y, angleInDegrees, StreamAccessor::getStream(stream)); polarToCart_caller(magnitude, angle, x, y, angleInDegrees, StreamAccessor::getStream(stream));
} }
#endif /* !defined (HAVE_CUDA) */ #endif /* !defined (HAVE_CUDA) */
...@@ -55,13 +55,19 @@ void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&, ...@@ -55,13 +55,19 @@ void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&,
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu { namespace bf BEGIN_OPENCV_DEVICE_NAMESPACE
namespace bilateral_filter
{ {
void load_constants(float* table_color, const DevMem2Df& table_space, int ndisp, int radius, short edge_disc, short max_disc); void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc);
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
}
END_OPENCV_DEVICE_NAMESPACE
void bilateral_filter_gpu(const DevMem2Db& disp, const DevMem2Db& img, int channels, int iters, cudaStream_t stream); using namespace OPENCV_DEVICE_NAMESPACE_ bilateral_filter;
void bilateral_filter_gpu(const DevMem2D_<short>& disp, const DevMem2Db& img, int channels, int iters, cudaStream_t stream);
}}}
namespace namespace
{ {
...@@ -105,7 +111,7 @@ namespace ...@@ -105,7 +111,7 @@ namespace
short edge_disc = max<short>(short(1), short(ndisp * edge_threshold + 0.5)); short edge_disc = max<short>(short(1), short(ndisp * edge_threshold + 0.5));
short max_disc = short(ndisp * max_disc_threshold + 0.5); short max_disc = short(ndisp * max_disc_threshold + 0.5);
bf::load_constants(table_color.ptr<float>(), table_space, ndisp, radius, edge_disc, max_disc); load_constants(table_color.ptr<float>(), table_space, ndisp, radius, edge_disc, max_disc);
if (&dst != &disp) if (&dst != &disp)
{ {
...@@ -115,7 +121,7 @@ namespace ...@@ -115,7 +121,7 @@ namespace
disp.copyTo(dst); disp.copyTo(dst);
} }
bf::bilateral_filter_gpu((DevMem2D_<T>)dst, img, img.channels(), iters, StreamAccessor::getStream(stream)); bilateral_filter_gpu((DevMem2D_<T>)dst, img, img.channels(), iters, StreamAccessor::getStream(stream));
} }
typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold,
......
...@@ -52,15 +52,19 @@ void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const Gpu ...@@ -52,15 +52,19 @@ void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const Gpu
#else #else
namespace cv { namespace gpu BEGIN_OPENCV_DEVICE_NAMESPACE
namespace blend
{ {
template <typename T> template <typename T>
void blendLinearCaller(int rows, int cols, int cn, const PtrStep<T>& img1, const PtrStep<T>& img2, void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream);
const PtrStepf& weights1, const PtrStepf& weights2, PtrStep<T> result, cudaStream_t stream);
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream);
}
END_OPENCV_DEVICE_NAMESPACE
void blendLinearCaller8UC4(int rows, int cols, const PtrStepb& img1, const PtrStepb& img2, using namespace OPENCV_DEVICE_NAMESPACE_ blend;
const PtrStepf& weights1, const PtrStepf& weights2, PtrStepb result, cudaStream_t stream);
}}
void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2, void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
GpuMat& result, Stream& stream) GpuMat& result, Stream& stream)
......
...@@ -82,7 +82,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, vector< vec ...@@ -82,7 +82,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, vector< vec
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu { namespace bf_match BEGIN_OPENCV_DEVICE_NAMESPACE
namespace bf_match
{ {
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Di& trainIdx, const DevMem2Df& distance,
...@@ -103,9 +105,9 @@ namespace cv { namespace gpu { namespace bf_match ...@@ -103,9 +105,9 @@ namespace cv { namespace gpu { namespace bf_match
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
int cc, cudaStream_t stream); int cc, cudaStream_t stream);
}}} }
namespace cv { namespace gpu { namespace bf_knnmatch namespace bf_knnmatch
{ {
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
...@@ -126,9 +128,9 @@ namespace cv { namespace gpu { namespace bf_knnmatch ...@@ -126,9 +128,9 @@ namespace cv { namespace gpu { namespace bf_knnmatch
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
int cc, cudaStream_t stream); int cc, cudaStream_t stream);
}}} }
namespace cv { namespace gpu { namespace bf_radius_match namespace bf_radius_match
{ {
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
...@@ -151,15 +153,17 @@ namespace cv { namespace gpu { namespace bf_radius_match ...@@ -151,15 +153,17 @@ namespace cv { namespace gpu { namespace bf_radius_match
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
int cc, cudaStream_t stream); int cc, cudaStream_t stream);
}}}
cv::gpu::BruteForceMatcher_GPU_base::BruteForceMatcher_GPU_base(DistType distType_) : distType(distType_)
{
} }
END_OPENCV_DEVICE_NAMESPACE
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// Train collection // Train collection
cv::gpu::BruteForceMatcher_GPU_base::BruteForceMatcher_GPU_base(DistType distType_) : distType(distType_)
{
}
void cv::gpu::BruteForceMatcher_GPU_base::add(const vector<GpuMat>& descCollection) void cv::gpu::BruteForceMatcher_GPU_base::add(const vector<GpuMat>& descCollection)
{ {
trainDescCollection.insert(trainDescCollection.end(), descCollection.begin(), descCollection.end()); trainDescCollection.insert(trainDescCollection.end(), descCollection.begin(), descCollection.end());
...@@ -195,7 +199,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const ...@@ -195,7 +199,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const
if (query.empty() || train.empty()) if (query.empty() || train.empty())
return; return;
using namespace cv::gpu::bf_match; using namespace OPENCV_DEVICE_NAMESPACE_ bf_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Di& trainIdx, const DevMem2Df& distance,
...@@ -242,8 +246,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx, ...@@ -242,8 +246,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx,
if (trainIdx.empty() || distance.empty()) if (trainIdx.empty() || distance.empty())
return; return;
Mat trainIdxCPU = trainIdx; Mat trainIdxCPU(trainIdx);
Mat distanceCPU = distance; Mat distanceCPU(distance);
matchConvert(trainIdxCPU, distanceCPU, matches); matchConvert(trainIdxCPU, distanceCPU, matches);
} }
...@@ -337,7 +341,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c ...@@ -337,7 +341,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c
if (query.empty() || trainCollection.empty()) if (query.empty() || trainCollection.empty())
return; return;
using namespace cv::gpu::bf_match; using namespace OPENCV_DEVICE_NAMESPACE_ bf_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
...@@ -384,9 +388,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx, ...@@ -384,9 +388,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx,
if (trainIdx.empty() || imgIdx.empty() || distance.empty()) if (trainIdx.empty() || imgIdx.empty() || distance.empty())
return; return;
Mat trainIdxCPU = trainIdx; Mat trainIdxCPU(trainIdx);
Mat imgIdxCPU = imgIdx; Mat imgIdxCPU(imgIdx);
Mat distanceCPU = distance; Mat distanceCPU(distance);
matchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, matches); matchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, matches);
} }
...@@ -448,7 +452,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co ...@@ -448,7 +452,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co
if (query.empty() || train.empty()) if (query.empty() || train.empty())
return; return;
using namespace cv::gpu::bf_knnmatch; using namespace OPENCV_DEVICE_NAMESPACE_ bf_knnmatch;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
...@@ -511,8 +515,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat& trainId ...@@ -511,8 +515,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat& trainId
if (trainIdx.empty() || distance.empty()) if (trainIdx.empty() || distance.empty())
return; return;
Mat trainIdxCPU = trainIdx; Mat trainIdxCPU(trainIdx);
Mat distanceCPU = distance; Mat distanceCPU(distance);
knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult); knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult);
} }
...@@ -577,7 +581,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer ...@@ -577,7 +581,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer
if (query.empty() || trainCollection.empty()) if (query.empty() || trainCollection.empty())
return; return;
using namespace cv::gpu::bf_knnmatch; using namespace OPENCV_DEVICE_NAMESPACE_ bf_knnmatch;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
...@@ -630,9 +634,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Download(const GpuMat& trainI ...@@ -630,9 +634,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Download(const GpuMat& trainI
if (trainIdx.empty() || imgIdx.empty() || distance.empty()) if (trainIdx.empty() || imgIdx.empty() || distance.empty())
return; return;
Mat trainIdxCPU = trainIdx; Mat trainIdxCPU(trainIdx);
Mat imgIdxCPU = imgIdx; Mat imgIdxCPU(imgIdx);
Mat distanceCPU = distance; Mat distanceCPU(distance);
knnMatch2Convert(trainIdxCPU, imgIdxCPU, distanceCPU, matches, compactResult); knnMatch2Convert(trainIdxCPU, imgIdxCPU, distanceCPU, matches, compactResult);
} }
...@@ -758,7 +762,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query, ...@@ -758,7 +762,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
if (query.empty() || train.empty()) if (query.empty() || train.empty())
return; return;
using namespace cv::gpu::bf_radius_match; using namespace OPENCV_DEVICE_NAMESPACE_ bf_radius_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
...@@ -819,9 +823,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trai ...@@ -819,9 +823,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trai
if (trainIdx.empty() || distance.empty() || nMatches.empty()) if (trainIdx.empty() || distance.empty() || nMatches.empty())
return; return;
Mat trainIdxCPU = trainIdx; Mat trainIdxCPU(trainIdx);
Mat distanceCPU = distance; Mat distanceCPU(distance);
Mat nMatchesCPU = nMatches; Mat nMatchesCPU(nMatches);
radiusMatchConvert(trainIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult); radiusMatchConvert(trainIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
} }
...@@ -889,7 +893,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu ...@@ -889,7 +893,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
if (query.empty() || empty()) if (query.empty() || empty())
return; return;
using namespace cv::gpu::bf_radius_match; using namespace OPENCV_DEVICE_NAMESPACE_ bf_radius_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
...@@ -953,10 +957,10 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trai ...@@ -953,10 +957,10 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trai
if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty()) if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
return; return;
Mat trainIdxCPU = trainIdx; Mat trainIdxCPU(trainIdx);
Mat imgIdxCPU = imgIdx; Mat imgIdxCPU(imgIdx);
Mat distanceCPU = distance; Mat distanceCPU(distance);
Mat nMatchesCPU = nMatches; Mat nMatchesCPU(nMatches);
radiusMatchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult); radiusMatchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
} }
......
...@@ -42,6 +42,10 @@ ...@@ -42,6 +42,10 @@
#include "precomp.hpp" #include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace std;
#if !defined(HAVE_CUDA) #if !defined(HAVE_CUDA)
void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
...@@ -52,13 +56,31 @@ void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat ...@@ -52,13 +56,31 @@ void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat
#else #else
using namespace cv; BEGIN_OPENCV_DEVICE_NAMESPACE
using namespace cv::gpu;
namespace cv { namespace gpu { namespace transform_points namespace transform_points
{ {
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, DevMem2D_<float3> dst, cudaStream_t stream); void call(const DevMem2D_<float3> src, const float* rot, const float* transl, DevMem2D_<float3> dst, cudaStream_t stream);
}}} }
namespace project_points
{
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
}
namespace solve_pnp_ransac
{
int maxNumIters();
void computeHypothesisScores(
const int num_hypotheses, const int num_points, const float* rot_matrices,
const float3* transl_vectors, const float3* object, const float2* image,
const float dist_threshold, int* hypothesis_scores);
}
END_OPENCV_DEVICE_NAMESPACE
using namespace OPENCV_DEVICE_NAMESPACE;
namespace namespace
{ {
...@@ -79,15 +101,9 @@ namespace ...@@ -79,15 +101,9 @@ namespace
void cv::gpu::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, Stream& stream) void cv::gpu::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, Stream& stream)
{ {
::transformPointsCaller(src, rvec, tvec, dst, StreamAccessor::getStream(stream)); transformPointsCaller(src, rvec, tvec, dst, StreamAccessor::getStream(stream));
} }
namespace cv { namespace gpu { namespace project_points
{
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
}}}
namespace namespace
{ {
void projectPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, cudaStream_t stream) void projectPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, cudaStream_t stream)
...@@ -109,20 +125,9 @@ namespace ...@@ -109,20 +125,9 @@ namespace
void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, Stream& stream) void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, Stream& stream)
{ {
::projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream)); projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream));
} }
namespace cv { namespace gpu { namespace solve_pnp_ransac
{
int maxNumIters();
void computeHypothesisScores(
const int num_hypotheses, const int num_points, const float* rot_matrices,
const float3* transl_vectors, const float3* object, const float2* image,
const float dist_threshold, int* hypothesis_scores);
}}}
namespace namespace
{ {
// Selects subset_size random different points from [0, num_points - 1] range // Selects subset_size random different points from [0, num_points - 1] range
......
...@@ -46,7 +46,6 @@ using namespace cv; ...@@ -46,7 +46,6 @@ using namespace cv;
using namespace cv::gpu; using namespace cv::gpu;
using namespace std; using namespace std;
#if !defined (HAVE_CUDA) #if !defined (HAVE_CUDA)
cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() { throw_nogpu(); } cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() { throw_nogpu(); }
......
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -43,10 +43,9 @@ ...@@ -43,10 +43,9 @@
#include "internal_shared.hpp" #include "internal_shared.hpp"
#include "opencv2/gpu/device/vec_math.hpp" #include "opencv2/gpu/device/vec_math.hpp"
using namespace cv::gpu; BEGIN_OPENCV_DEVICE_NAMESPACE
using namespace cv::gpu::device;
namespace cv { namespace gpu { namespace imgproc { namespace match_template {
__device__ __forceinline__ float sum(float v) { return v; } __device__ __forceinline__ float sum(float v) { return v; }
__device__ __forceinline__ float sum(float2 v) { return v.x + v.y; } __device__ __forceinline__ float sum(float2 v) { return v.x + v.y; }
...@@ -266,9 +265,9 @@ void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long ...@@ -266,9 +265,9 @@ void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long
__device__ float normAcc(float num, float denum) __device__ float normAcc(float num, float denum)
{ {
if (fabs(num) < denum) if (::fabs(num) < denum)
return num / denum; return num / denum;
if (fabs(num) < denum * 1.125f) if (::fabs(num) < denum * 1.125f)
return num > 0 ? 1 : -1; return num > 0 ? 1 : -1;
return 0; return 0;
} }
...@@ -276,9 +275,9 @@ __device__ float normAcc(float num, float denum) ...@@ -276,9 +275,9 @@ __device__ float normAcc(float num, float denum)
__device__ float normAcc_SQDIFF(float num, float denum) __device__ float normAcc_SQDIFF(float num, float denum)
{ {
if (fabs(num) < denum) if (::fabs(num) < denum)
return num / denum; return num / denum;
if (fabs(num) < denum * 1.125f) if (::fabs(num) < denum * 1.125f)
return num > 0 ? 1 : -1; return num > 0 ? 1 : -1;
return 1; return 1;
} }
...@@ -906,4 +905,7 @@ void extractFirstChannel_32F(const DevMem2Db image, DevMem2Df result, int cn, cu ...@@ -906,4 +905,7 @@ void extractFirstChannel_32F(const DevMem2Db image, DevMem2Df result, int cn, cu
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
}}}
} //namespace match_template
END_OPENCV_DEVICE_NAMESPACE
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -71,16 +71,20 @@ cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; } ...@@ -71,16 +71,20 @@ cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; }
#include "opencv2/gpu/stream_accessor.hpp" #include "opencv2/gpu/stream_accessor.hpp"
namespace cv { namespace gpu { namespace device { BEGIN_OPENCV_DEVICE_NAMESPACE
void copy_to_with_mask(const DevMem2Db& src, DevMem2Db dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t & stream = 0);
template <typename T> void copy_to_with_mask(const DevMem2Db& src, DevMem2Db dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t & stream = 0);
void set_to_gpu(const DevMem2Db& mat, const T* scalar, int channels, cudaStream_t stream);
template <typename T> template <typename T>
void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream); void set_to_gpu(const DevMem2Db& mat, const T* scalar, int channels, cudaStream_t stream);
template <typename T>
void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0);
END_OPENCV_DEVICE_NAMESPACE
void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0); using namespace OPENCV_DEVICE_NAMESPACE;
}}}
struct Stream::Impl struct Stream::Impl
{ {
...@@ -101,14 +105,14 @@ namespace ...@@ -101,14 +105,14 @@ namespace
void kernelSet(GpuMat& src, const Scalar& s, cudaStream_t stream) void kernelSet(GpuMat& src, const Scalar& s, cudaStream_t stream)
{ {
Scalar_<T> sf = s; Scalar_<T> sf = s;
device::set_to_gpu(src, sf.val, src.channels(), stream); set_to_gpu(src, sf.val, src.channels(), stream);
} }
template <typename T> template <typename T>
void kernelSetMask(GpuMat& src, const Scalar& s, const GpuMat& mask, cudaStream_t stream) void kernelSetMask(GpuMat& src, const Scalar& s, const GpuMat& mask, cudaStream_t stream)
{ {
Scalar_<T> sf = s; Scalar_<T> sf = s;
device::set_to_gpu(src, sf.val, mask, src.channels(), stream); set_to_gpu(src, sf.val, mask, src.channels(), stream);
} }
} }
...@@ -255,7 +259,7 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, ...@@ -255,7 +259,7 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype,
psrc = &(temp = src); psrc = &(temp = src);
dst.create( src.size(), rtype ); dst.create( src.size(), rtype );
device::convert_gpu(psrc->reshape(1), sdepth, dst.reshape(1), ddepth, alpha, beta, impl->stream); convert_gpu(psrc->reshape(1), sdepth, dst.reshape(1), ddepth, alpha, beta, impl->stream);
} }
cv::gpu::Stream::operator bool() const cv::gpu::Stream::operator bool() const
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -549,8 +549,8 @@ TEST_P(MorphEx, Accuracy) ...@@ -549,8 +549,8 @@ TEST_P(MorphEx, Accuracy)
cv::gpu::GpuMat dev_dst_rgba; cv::gpu::GpuMat dev_dst_rgba;
cv::gpu::GpuMat dev_dst_gray; cv::gpu::GpuMat dev_dst_gray;
cv::gpu::morphologyEx(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, morphOps[morphOpsIdx], cv::gpu::GpuMat(kernel)); cv::gpu::morphologyEx(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, morphOps[morphOpsIdx], kernel);
cv::gpu::morphologyEx(cv::gpu::GpuMat(img_gray), dev_dst_gray, morphOps[morphOpsIdx], cv::gpu::GpuMat(kernel)); cv::gpu::morphologyEx(cv::gpu::GpuMat(img_gray), dev_dst_gray, morphOps[morphOpsIdx], kernel);
dev_dst_rgba.download(dst_rgba); dev_dst_rgba.download(dst_rgba);
dev_dst_gray.download(dst_gray); dev_dst_gray.download(dst_gray);
......
This diff is collapsed.
...@@ -3897,7 +3897,7 @@ static void testC2C(const std::string& hint, int cols, int rows, int flags, bool ...@@ -3897,7 +3897,7 @@ static void testC2C(const std::string& hint, int cols, int rows, int flags, bool
EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr()); EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
ASSERT_EQ(CV_32F, d_b.depth()); ASSERT_EQ(CV_32F, d_b.depth());
ASSERT_EQ(2, d_b.channels()); ASSERT_EQ(2, d_b.channels());
EXPECT_MAT_NEAR(b_gold, d_b, rows * cols * 1e-4); EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), rows * cols * 1e-4);
} }
TEST_P(Dft, C2C) TEST_P(Dft, C2C)
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment