Commit fcfa7208 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

moved GpuMat and DevMem2D to core module, some code refactoring

parent 8a148e39
......@@ -90,6 +90,10 @@ class Mat;
class SparseMat;
typedef Mat MatND;
namespace gpu {
class GpuMat;
}
class CV_EXPORTS MatExpr;
class CV_EXPORTS MatOp_Base;
class CV_EXPORTS MatArg;
......@@ -1627,6 +1631,10 @@ public:
template<typename _Tp> explicit Mat(const Point3_<_Tp>& pt, bool copyData=true);
//! builds matrix from comma initializer
template<typename _Tp> explicit Mat(const MatCommaInitializer_<_Tp>& commaInitializer);
//! download data from GpuMat
explicit Mat(const gpu::GpuMat& m);
//! destructor - calls release()
~Mat();
//! assignment operators
......
......@@ -40,103 +40,118 @@
//
//M*/
#ifndef __OPENCV_GPU_MATRIX_OPERATIONS_HPP__
#define __OPENCV_GPU_MATRIX_OPERATIONS_HPP__
#ifndef __OPENCV_CORE_DevMem2D_HPP__
#define __OPENCV_CORE_DevMem2D_HPP__
#ifdef __CUDACC__
#define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
#else
#define __CV_GPU_HOST_DEVICE__
#endif
namespace cv
{
namespace gpu
{
// Simple lightweight structures that encapsulates information about an image on device.
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
namespace gpu
{
///////////////////////////////////////////////////////////////////////
//////////////////////////////// CudaMem ////////////////////////////////
///////////////////////////////////////////////////////////////////////
template <bool expr> struct StaticAssert;
template <> struct StaticAssert<true> {static __CV_GPU_HOST_DEVICE__ void check(){}};
inline CudaMem::CudaMem() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0) {}
inline CudaMem::CudaMem(int _rows, int _cols, int _type, int _alloc_type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
{
if( _rows > 0 && _cols > 0 )
create( _rows, _cols, _type, _alloc_type);
}
template<typename T> struct DevPtr
{
typedef T elem_type;
typedef int index_type;
inline CudaMem::CudaMem(Size _size, int _type, int _alloc_type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
{
if( _size.height > 0 && _size.width > 0 )
create( _size.height, _size.width, _type, _alloc_type);
}
enum { elem_size = sizeof(elem_type) };
inline CudaMem::CudaMem(const CudaMem& m) : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
{
if( refcount )
CV_XADD(refcount, 1);
}
T* data;
inline CudaMem::CudaMem(const Mat& m, int _alloc_type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
{
if( m.rows > 0 && m.cols > 0 )
create( m.size(), m.type(), _alloc_type);
__CV_GPU_HOST_DEVICE__ DevPtr() : data(0) {}
__CV_GPU_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
Mat tmp = createMatHeader();
m.copyTo(tmp);
}
__CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
__CV_GPU_HOST_DEVICE__ operator T*() { return data; }
__CV_GPU_HOST_DEVICE__ operator const T*() const { return data; }
};
inline CudaMem::~CudaMem()
{
release();
template<typename T> struct PtrSz : public DevPtr<T>
{
__CV_GPU_HOST_DEVICE__ PtrSz() : size(0) {}
__CV_GPU_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
}
size_t size;
};
inline CudaMem& CudaMem::operator = (const CudaMem& m)
{
if( this != &m )
template<typename T> struct PtrStep : public DevPtr<T>
{
if( m.refcount )
CV_XADD(m.refcount, 1);
release();
flags = m.flags;
rows = m.rows; cols = m.cols;
step = m.step; data = m.data;
datastart = m.datastart;
dataend = m.dataend;
refcount = m.refcount;
alloc_type = m.alloc_type;
}
return *this;
}
__CV_GPU_HOST_DEVICE__ PtrStep() : step(0) {}
__CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
inline CudaMem CudaMem::clone() const
{
CudaMem m(size(), type(), alloc_type);
Mat to = m;
Mat from = *this;
from.copyTo(to);
return m;
}
/** \brief stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!! */
size_t step;
inline void CudaMem::create(Size _size, int _type, int _alloc_type) { create(_size.height, _size.width, _type, _alloc_type); }
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr<T>::data + y * step); }
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
};
//CCP void CudaMem::create(int _rows, int _cols, int _type, int _alloc_type);
//CPP void CudaMem::release();
template <typename T> struct PtrStepSz : public PtrStep<T>
{
__CV_GPU_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
__CV_GPU_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
: PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
inline Mat CudaMem::createMatHeader() const { return Mat(size(), type(), data, step); }
inline CudaMem::operator Mat() const { return createMatHeader(); }
int cols;
int rows;
};
inline CudaMem::operator GpuMat() const { return createGpuMatHeader(); }
//CPP GpuMat CudaMem::createGpuMatHeader() const;
template <typename T> struct DevMem2D_ : public PtrStepSz<T>
{
DevMem2D_() {}
DevMem2D_(int rows_, int cols_, T* data_, size_t step_) : PtrStepSz<T>(rows_, cols_, data_, step_) {}
inline bool CudaMem::isContinuous() const { return (flags & Mat::CONTINUOUS_FLAG) != 0; }
inline size_t CudaMem::elemSize() const { return CV_ELEM_SIZE(flags); }
inline size_t CudaMem::elemSize1() const { return CV_ELEM_SIZE1(flags); }
inline int CudaMem::type() const { return CV_MAT_TYPE(flags); }
inline int CudaMem::depth() const { return CV_MAT_DEPTH(flags); }
inline int CudaMem::channels() const { return CV_MAT_CN(flags); }
inline size_t CudaMem::step1() const { return step/elemSize1(); }
inline Size CudaMem::size() const { return Size(cols, rows); }
inline bool CudaMem::empty() const { return data == 0; }
template <typename U>
explicit DevMem2D_(const DevMem2D_<U>& d) : PtrStepSz<T>(d.rows, d.cols, (T*)d.data, d.step) {}
};
} /* end of namespace gpu */
template<typename T> struct PtrElemStep_ : public PtrStep<T>
{
PtrElemStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step)
{
StaticAssert<256 % sizeof(T) == 0>::check();
PtrStep<T>::step /= PtrStep<T>::elem_size;
}
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return PtrStep<T>::data + y * PtrStep<T>::step; }
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep<T>::data + y * PtrStep<T>::step; }
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
};
} /* end of namespace cv */
template<typename T> struct PtrStep_ : public PtrStep<T>
{
PtrStep_() {}
PtrStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step) {}
};
typedef DevMem2D_<unsigned char> DevMem2Db;
typedef DevMem2Db DevMem2D;
typedef DevMem2D_<float> DevMem2Df;
typedef DevMem2D_<int> DevMem2Di;
typedef PtrStep<unsigned char> PtrStepb;
typedef PtrStep<float> PtrStepf;
typedef PtrStep<int> PtrStepi;
typedef PtrElemStep_<unsigned char> PtrElemStep;
typedef PtrElemStep_<float> PtrElemStepf;
typedef PtrElemStep_<int> PtrElemStepi;
}
}
#endif /* __OPENCV_GPU_MATRIX_OPERATIONS_HPP__ */
#endif /* __OPENCV_GPU_DevMem2D_HPP__ */
This diff is collapsed.
......@@ -3,7 +3,8 @@ set(name "gpu")
set(the_target "opencv_${name}")
project(${the_target})
set(DEPS "opencv_core" "opencv_imgproc" "opencv_objdetect" "opencv_features2d" "opencv_flann" "opencv_calib3d") #"opencv_features2d" "opencv_flann" "opencv_objdetect" - only headers needed
set(DEPS "opencv_core" "opencv_imgproc" "opencv_calib3d" "opencv_objdetect")
set(DEPS_HEADER ${DEPS} "opencv_features2d" "opencv_flann")
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} opencv_gpu)
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"
......@@ -27,6 +28,13 @@ file(GLOB lib_device_hdrs_detail "src/opencv2/gpu/device/detail/*.h*")
source_group("Device" FILES ${lib_device_hdrs})
source_group("Device\\Detail" FILES ${lib_device_hdrs_detail})
foreach(d ${DEPS_HEADER})
if(${d} MATCHES "opencv_")
string(REPLACE "opencv_" "${CMAKE_CURRENT_SOURCE_DIR}/../" d_dir ${d})
include_directories("${d_dir}/include")
endif()
endforeach()
if (HAVE_CUDA)
file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp")
file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu")
......@@ -51,7 +59,6 @@ if (HAVE_CUDA)
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-fno-finite-math-only;")
endif()
string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
......@@ -60,7 +67,7 @@ if (HAVE_CUDA)
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4211 /wd4201 /wd4100 /wd4505 /wd4408")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4211 /wd4201 /wd4100 /wd4505 /wd4408 /wd4251")
string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}")
......@@ -74,17 +81,14 @@ if (HAVE_CUDA)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-DCVAPI_EXPORTS")
endif()
if(MSVC)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;/wd4251")
endif()
CUDA_COMPILE(cuda_objs ${lib_cuda} ${ncv_cuda})
#CUDA_BUILD_CLEAN_TARGET()
endif()
foreach(d ${DEPS})
if(${d} MATCHES "opencv_")
string(REPLACE "opencv_" "${CMAKE_CURRENT_SOURCE_DIR}/../" d_dir ${d})
include_directories("${d_dir}/include")
endif()
endforeach()
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${lib_device_hdrs_detail} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
# For dynamic link numbering convenions
......
......@@ -40,122 +40,4 @@
//
//M*/
#ifndef __OPENCV_GPU_DevMem2D_HPP__
#define __OPENCV_GPU_DevMem2D_HPP__
namespace cv
{
namespace gpu
{
// Simple lightweight structures that encapsulates information about an image on device.
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
#if defined(__CUDACC__)
#define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
#else
#define __CV_GPU_HOST_DEVICE__
#endif
template <bool expr> struct StaticAssert;
template <> struct StaticAssert<true> {static __CV_GPU_HOST_DEVICE__ void check(){}};
template<typename T> struct DevPtr
{
typedef T elem_type;
typedef int index_type;
enum { elem_size = sizeof(elem_type) };
T* data;
__CV_GPU_HOST_DEVICE__ DevPtr() : data(0) {}
__CV_GPU_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
__CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
__CV_GPU_HOST_DEVICE__ operator T*() { return data; }
__CV_GPU_HOST_DEVICE__ operator const T*() const { return data; }
};
template<typename T> struct PtrSz : public DevPtr<T>
{
__CV_GPU_HOST_DEVICE__ PtrSz() : size(0) {}
__CV_GPU_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
size_t size;
};
template<typename T> struct PtrStep : public DevPtr<T>
{
__CV_GPU_HOST_DEVICE__ PtrStep() : step(0) {}
__CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
/** \brief stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!! */
size_t step;
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr<T>::data + y * step); }
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
};
template <typename T> struct PtrStepSz : public PtrStep<T>
{
__CV_GPU_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
__CV_GPU_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
: PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
int cols;
int rows;
};
template <typename T> struct DevMem2D_ : public PtrStepSz<T>
{
DevMem2D_() {}
DevMem2D_(int rows_, int cols_, T *data_, size_t step_) : PtrStepSz<T>(rows_, cols_, data_, step_) {}
template <typename U>
explicit DevMem2D_(const DevMem2D_<U>& d) : PtrStepSz<T>(d.rows, d.cols, (T*)d.data, d.step) {}
};
template<typename T> struct PtrElemStep_ : public PtrStep<T>
{
PtrElemStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step)
{
StaticAssert<256 % sizeof(T) == 0>::check();
PtrStep<T>::step /= PtrStep<T>::elem_size;
}
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return PtrStep<T>::data + y * PtrStep<T>::step; }
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep<T>::data + y * PtrStep<T>::step; }
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
};
template<typename T> struct PtrStep_ : public PtrStep<T>
{
PtrStep_() {}
PtrStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step) {}
};
#undef __CV_GPU_HOST_DEVICE__
typedef DevMem2D_<unsigned char> DevMem2Db;
typedef DevMem2Db DevMem2D;
typedef DevMem2D_<float> DevMem2Df;
typedef DevMem2D_<int> DevMem2Di;
typedef PtrStep<unsigned char> PtrStepb;
typedef PtrStep<float> PtrStepf;
typedef PtrStep<int> PtrStepi;
typedef PtrElemStep_<unsigned char> PtrElemStep;
typedef PtrElemStep_<float> PtrElemStepf;
typedef PtrElemStep_<int> PtrElemStepi;
}
}
#endif /* __OPENCV_GPU_DevMem2D_HPP__ */
#include "opencv2/core/devmem2d.hpp"
This diff is collapsed.
This diff is collapsed.
......@@ -24,7 +24,7 @@ PERF_TEST_P(DevInfo_Size_MatType, transpose, testing::Combine(testing::ValuesIn(
transpose(src, dst);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -55,7 +55,7 @@ PERF_TEST_P(DevInfo_Size_MatType_FlipCode, flip, testing::Combine(testing::Value
flip(src, dst, flipCode);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -85,7 +85,7 @@ PERF_TEST_P(DevInfo_Size_MatType, LUT, testing::Combine(testing::ValuesIn(device
LUT(src, lut, dst);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -115,8 +115,8 @@ PERF_TEST_P(DevInfo_Size, cartToPolar, testing::Combine(testing::ValuesIn(device
cartToPolar(x, y, magnitude, angle);
}
Mat magnitude_host = magnitude;
Mat angle_host = angle;
Mat magnitude_host(magnitude);
Mat angle_host(angle);
SANITY_CHECK(magnitude_host);
SANITY_CHECK(angle_host);
......@@ -147,8 +147,8 @@ PERF_TEST_P(DevInfo_Size, polarToCart, testing::Combine(testing::ValuesIn(device
polarToCart(magnitude, angle, x, y);
}
Mat x_host = x;
Mat y_host = angle;
Mat x_host(x);
Mat y_host(y);
SANITY_CHECK(x_host);
SANITY_CHECK(y_host);
......@@ -180,7 +180,7 @@ PERF_TEST_P(DevInfo_Size_MatType, addMat, testing::Combine(testing::ValuesIn(dev
add(a, b, c);
}
Mat c_host = c;
Mat c_host(c);
SANITY_CHECK(c_host);
}
......@@ -210,7 +210,7 @@ PERF_TEST_P(DevInfo_Size_MatType, addScalar, testing::Combine(testing::ValuesIn(
add(a, b, c);
}
Mat c_host = c;
Mat c_host(c);
SANITY_CHECK(c_host);
}
......@@ -241,7 +241,7 @@ PERF_TEST_P(DevInfo_Size_MatType, subtractMat, testing::Combine(testing::ValuesI
subtract(a, b, c);
}
Mat c_host = c;
Mat c_host(c);
SANITY_CHECK(c_host);
}
......@@ -270,7 +270,7 @@ PERF_TEST_P(DevInfo_Size, multiplyMat, testing::Combine(testing::ValuesIn(device
multiply(a, b, c);
}
Mat c_host = c;
Mat c_host(c);
SANITY_CHECK(c_host);
}
......@@ -300,7 +300,7 @@ PERF_TEST_P(DevInfo_Size_MatType, multiplyScalar, testing::Combine(testing::Valu
multiply(a, b, c);
}
Mat c_host = c;
Mat c_host(c);
SANITY_CHECK(c_host);
}
......@@ -327,7 +327,7 @@ PERF_TEST_P(DevInfo_Size, exp, testing::Combine(testing::ValuesIn(devices()),
exp(a, b);
}
Mat b_host = b;
Mat b_host(b);
SANITY_CHECK(b_host);
}
......@@ -356,7 +356,7 @@ PERF_TEST_P(DevInfo_Size_MatType, pow, testing::Combine(testing::ValuesIn(device
pow(src, 2.0, dst);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -389,7 +389,7 @@ PERF_TEST_P(DevInfo_Size_MatType_CmpOp, compare, testing::Combine(testing::Value
compare(src1, src2, dst, cmpop);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -418,7 +418,7 @@ PERF_TEST_P(DevInfo_Size_MatType, bitwise_not, testing::Combine(testing::ValuesI
bitwise_not(src, dst);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -449,7 +449,7 @@ PERF_TEST_P(DevInfo_Size_MatType, bitwise_and, testing::Combine(testing::ValuesI
bitwise_and(src1, src2, dst);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -480,7 +480,7 @@ PERF_TEST_P(DevInfo_Size_MatType, min, testing::Combine(testing::ValuesIn(device
min(src1, src2, dst);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -712,7 +712,7 @@ PERF_TEST_P(DevInfo_Size_MatType, addWeighted, testing::Combine(testing::ValuesI
addWeighted(src1, 0.5, src2, 0.5, 0.0, dst);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -743,7 +743,7 @@ PERF_TEST_P(DevInfo_Size_MatType_FlipCode, reduce, testing::Combine(testing::Val
reduce(src, dst, dim, CV_REDUCE_MIN);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -774,7 +774,7 @@ PERF_TEST_P(DevInfo_Size, gemm, testing::Combine(testing::ValuesIn(devices()),
gemm(src1, src2, 1.0, src3, 1.0, dst);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -20,7 +20,7 @@ PERF_TEST_P(DevInfo, transformPoints, testing::ValuesIn(devices()))
transformPoints(src, Mat::ones(1, 3, CV_32FC1), Mat::ones(1, 3, CV_32FC1), dst);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -45,7 +45,7 @@ PERF_TEST_P(DevInfo, projectPoints, testing::ValuesIn(devices()))
projectPoints(src, Mat::ones(1, 3, CV_32FC1), Mat::ones(1, 3, CV_32FC1), Mat::ones(3, 3, CV_32FC1), Mat(), dst);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......
......@@ -28,7 +28,7 @@ PERF_TEST_P(DevInfo_Size_MatType_KernelSize, boxFilter, testing::Combine(testing
filter->apply(src, dst);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -63,7 +63,7 @@ PERF_TEST_P(DevInfo_Size_MatType_MorphOp_KernelSize, morphologyFilter, testing::
filter->apply(src, dst);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -96,7 +96,7 @@ PERF_TEST_P(DevInfo_Size_MatType_KernelSize, linearFilter, testing::Combine(test
filter->apply(src, dst);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -130,7 +130,7 @@ PERF_TEST_P(DevInfo_Size_MatType_KernelSize, separableLinearFilter, testing::Com
filter->apply(src, dst, Rect(0, 0, src.cols, src.rows));
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -36,7 +36,7 @@ PERF_TEST_P(DevInfo_Size_MatType_Interpolation_BorderMode, remap, testing::Combi
remap(src, dst, xmap, ymap, interpolation, borderMode);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -63,7 +63,7 @@ PERF_TEST_P(DevInfo, meanShiftFiltering, testing::ValuesIn(devices()))
meanShiftFiltering(src, dst, 50, 50);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -91,8 +91,8 @@ PERF_TEST_P(DevInfo, meanShiftProc, testing::ValuesIn(devices()))
meanShiftProc(src, dstr, dstsp, 50, 50);
}
Mat dstr_host = dstr;
Mat dstsp_host = dstsp;
Mat dstr_host(dstr);
Mat dstsp_host(dstsp);
SANITY_CHECK(dstr_host);
SANITY_CHECK(dstsp_host);
......
......@@ -25,7 +25,7 @@ PERF_TEST_P(DevInfo_Size_MatType, merge, testing::Combine(testing::ValuesIn(devi
merge(src, dst);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -82,7 +82,7 @@ PERF_TEST_P(DevInfo_Size_MatType, setTo, testing::Combine(testing::ValuesIn(devi
src.setTo(val);
}
Mat src_host = src;
Mat src_host(src);
SANITY_CHECK(src_host);
}
......@@ -115,7 +115,7 @@ PERF_TEST_P(DevInfo_Size_MatType, setToMasked, testing::Combine(testing::ValuesI
src.setTo(val, mask);
}
src_host = src;
src.download(src_host);
SANITY_CHECK(src_host);
}
......@@ -148,7 +148,7 @@ PERF_TEST_P(DevInfo_Size_MatType, copyToMasked, testing::Combine(testing::Values
src.copyTo(dst, mask);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -182,7 +182,7 @@ PERF_TEST_P(DevInfo_Size_MatType_MatType, convertTo, testing::Combine(testing::V
src.convertTo(dst, type2, a, b);
}
Mat dst_host = dst;
Mat dst_host(dst);
SANITY_CHECK(dst_host);
}
......@@ -425,16 +425,22 @@ void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream)
////////////////////////////////////////////////////////////////////////
// Polar <-> Cart
namespace cv { namespace gpu { namespace mathfunc
BEGIN_OPENCV_DEVICE_NAMESPACE
namespace mathfunc
{
void cartToPolar_gpu(const DevMem2Df& x, const DevMem2Df& y, const DevMem2Df& mag, bool magSqr, const DevMem2Df& angle, bool angleInDegrees, cudaStream_t stream);
void polarToCart_gpu(const DevMem2Df& mag, const DevMem2Df& angle, const DevMem2Df& x, const DevMem2Df& y, bool angleInDegrees, cudaStream_t stream);
}}}
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
}
END_OPENCV_DEVICE_NAMESPACE
namespace
{
inline void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream)
{
using namespace OPENCV_DEVICE_NAMESPACE_ mathfunc;
CV_DbgAssert(x.size() == y.size() && x.type() == y.type());
CV_Assert(x.depth() == CV_32F);
......@@ -448,11 +454,13 @@ namespace
GpuMat mag1cn = mag ? mag->reshape(1) : GpuMat();
GpuMat angle1cn = angle ? angle->reshape(1) : GpuMat();
mathfunc::cartToPolar_gpu(x1cn, y1cn, mag1cn, magSqr, angle1cn, angleInDegrees, stream);
cartToPolar_gpu(x1cn, y1cn, mag1cn, magSqr, angle1cn, angleInDegrees, stream);
}
inline void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream)
{
using namespace OPENCV_DEVICE_NAMESPACE_ mathfunc;
CV_DbgAssert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type());
CV_Assert(mag.depth() == CV_32F);
......@@ -464,34 +472,33 @@ namespace
GpuMat x1cn = x.reshape(1);
GpuMat y1cn = y.reshape(1);
mathfunc::polarToCart_gpu(mag1cn, angle1cn, x1cn, y1cn, angleInDegrees, stream);
polarToCart_gpu(mag1cn, angle1cn, x1cn, y1cn, angleInDegrees, stream);
}
}
void cv::gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream)
{
::cartToPolar_caller(x, y, &dst, false, 0, false, StreamAccessor::getStream(stream));
cartToPolar_caller(x, y, &dst, false, 0, false, StreamAccessor::getStream(stream));
}
void cv::gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream)
{
::cartToPolar_caller(x, y, &dst, true, 0, false, StreamAccessor::getStream(stream));
cartToPolar_caller(x, y, &dst, true, 0, false, StreamAccessor::getStream(stream));
}
void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees, Stream& stream)
{
::cartToPolar_caller(x, y, 0, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
cartToPolar_caller(x, y, 0, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
}
void cv::gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& mag, GpuMat& angle, bool angleInDegrees, Stream& stream)
{
::cartToPolar_caller(x, y, &mag, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
cartToPolar_caller(x, y, &mag, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
}
void cv::gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, Stream& stream)
{
::polarToCart_caller(magnitude, angle, x, y, angleInDegrees, StreamAccessor::getStream(stream));
polarToCart_caller(magnitude, angle, x, y, angleInDegrees, StreamAccessor::getStream(stream));
}
#endif /* !defined (HAVE_CUDA) */
......@@ -55,13 +55,19 @@ void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&,
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu { namespace bf
BEGIN_OPENCV_DEVICE_NAMESPACE
namespace bilateral_filter
{
void load_constants(float* table_color, const DevMem2Df& table_space, int ndisp, int radius, short edge_disc, short max_disc);
void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc);
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
}
END_OPENCV_DEVICE_NAMESPACE
void bilateral_filter_gpu(const DevMem2Db& disp, const DevMem2Db& img, int channels, int iters, cudaStream_t stream);
void bilateral_filter_gpu(const DevMem2D_<short>& disp, const DevMem2Db& img, int channels, int iters, cudaStream_t stream);
}}}
using namespace OPENCV_DEVICE_NAMESPACE_ bilateral_filter;
namespace
{
......@@ -105,7 +111,7 @@ namespace
short edge_disc = max<short>(short(1), short(ndisp * edge_threshold + 0.5));
short max_disc = short(ndisp * max_disc_threshold + 0.5);
bf::load_constants(table_color.ptr<float>(), table_space, ndisp, radius, edge_disc, max_disc);
load_constants(table_color.ptr<float>(), table_space, ndisp, radius, edge_disc, max_disc);
if (&dst != &disp)
{
......@@ -115,7 +121,7 @@ namespace
disp.copyTo(dst);
}
bf::bilateral_filter_gpu((DevMem2D_<T>)dst, img, img.channels(), iters, StreamAccessor::getStream(stream));
bilateral_filter_gpu((DevMem2D_<T>)dst, img, img.channels(), iters, StreamAccessor::getStream(stream));
}
typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold,
......
......@@ -52,15 +52,19 @@ void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const Gpu
#else
namespace cv { namespace gpu
BEGIN_OPENCV_DEVICE_NAMESPACE
namespace blend
{
template <typename T>
void blendLinearCaller(int rows, int cols, int cn, const PtrStep<T>& img1, const PtrStep<T>& img2,
const PtrStepf& weights1, const PtrStepf& weights2, PtrStep<T> result, cudaStream_t stream);
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream);
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream);
}
END_OPENCV_DEVICE_NAMESPACE
void blendLinearCaller8UC4(int rows, int cols, const PtrStepb& img1, const PtrStepb& img2,
const PtrStepf& weights1, const PtrStepf& weights2, PtrStepb result, cudaStream_t stream);
}}
using namespace OPENCV_DEVICE_NAMESPACE_ blend;
void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
GpuMat& result, Stream& stream)
......
......@@ -82,7 +82,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, vector< vec
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu { namespace bf_match
BEGIN_OPENCV_DEVICE_NAMESPACE
namespace bf_match
{
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
const DevMem2Di& trainIdx, const DevMem2Df& distance,
......@@ -103,9 +105,9 @@ namespace cv { namespace gpu { namespace bf_match
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
int cc, cudaStream_t stream);
}}}
}
namespace cv { namespace gpu { namespace bf_knnmatch
namespace bf_knnmatch
{
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
......@@ -126,9 +128,9 @@ namespace cv { namespace gpu { namespace bf_knnmatch
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
int cc, cudaStream_t stream);
}}}
}
namespace cv { namespace gpu { namespace bf_radius_match
namespace bf_radius_match
{
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
......@@ -151,15 +153,17 @@ namespace cv { namespace gpu { namespace bf_radius_match
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
int cc, cudaStream_t stream);
}}}
cv::gpu::BruteForceMatcher_GPU_base::BruteForceMatcher_GPU_base(DistType distType_) : distType(distType_)
{
}
END_OPENCV_DEVICE_NAMESPACE
////////////////////////////////////////////////////////////////////
// Train collection
cv::gpu::BruteForceMatcher_GPU_base::BruteForceMatcher_GPU_base(DistType distType_) : distType(distType_)
{
}
void cv::gpu::BruteForceMatcher_GPU_base::add(const vector<GpuMat>& descCollection)
{
trainDescCollection.insert(trainDescCollection.end(), descCollection.begin(), descCollection.end());
......@@ -195,7 +199,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const
if (query.empty() || train.empty())
return;
using namespace cv::gpu::bf_match;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
const DevMem2Di& trainIdx, const DevMem2Df& distance,
......@@ -242,8 +246,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx,
if (trainIdx.empty() || distance.empty())
return;
Mat trainIdxCPU = trainIdx;
Mat distanceCPU = distance;
Mat trainIdxCPU(trainIdx);
Mat distanceCPU(distance);
matchConvert(trainIdxCPU, distanceCPU, matches);
}
......@@ -337,7 +341,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c
if (query.empty() || trainCollection.empty())
return;
using namespace cv::gpu::bf_match;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
......@@ -384,9 +388,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx,
if (trainIdx.empty() || imgIdx.empty() || distance.empty())
return;
Mat trainIdxCPU = trainIdx;
Mat imgIdxCPU = imgIdx;
Mat distanceCPU = distance;
Mat trainIdxCPU(trainIdx);
Mat imgIdxCPU(imgIdx);
Mat distanceCPU(distance);
matchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, matches);
}
......@@ -448,7 +452,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co
if (query.empty() || train.empty())
return;
using namespace cv::gpu::bf_knnmatch;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_knnmatch;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
......@@ -511,8 +515,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat& trainId
if (trainIdx.empty() || distance.empty())
return;
Mat trainIdxCPU = trainIdx;
Mat distanceCPU = distance;
Mat trainIdxCPU(trainIdx);
Mat distanceCPU(distance);
knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult);
}
......@@ -577,7 +581,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer
if (query.empty() || trainCollection.empty())
return;
using namespace cv::gpu::bf_knnmatch;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_knnmatch;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
......@@ -630,9 +634,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Download(const GpuMat& trainI
if (trainIdx.empty() || imgIdx.empty() || distance.empty())
return;
Mat trainIdxCPU = trainIdx;
Mat imgIdxCPU = imgIdx;
Mat distanceCPU = distance;
Mat trainIdxCPU(trainIdx);
Mat imgIdxCPU(imgIdx);
Mat distanceCPU(distance);
knnMatch2Convert(trainIdxCPU, imgIdxCPU, distanceCPU, matches, compactResult);
}
......@@ -758,7 +762,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
if (query.empty() || train.empty())
return;
using namespace cv::gpu::bf_radius_match;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_radius_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
......@@ -819,9 +823,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trai
if (trainIdx.empty() || distance.empty() || nMatches.empty())
return;
Mat trainIdxCPU = trainIdx;
Mat distanceCPU = distance;
Mat nMatchesCPU = nMatches;
Mat trainIdxCPU(trainIdx);
Mat distanceCPU(distance);
Mat nMatchesCPU(nMatches);
radiusMatchConvert(trainIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
}
......@@ -889,7 +893,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
if (query.empty() || empty())
return;
using namespace cv::gpu::bf_radius_match;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_radius_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
......@@ -953,10 +957,10 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trai
if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
return;
Mat trainIdxCPU = trainIdx;
Mat imgIdxCPU = imgIdx;
Mat distanceCPU = distance;
Mat nMatchesCPU = nMatches;
Mat trainIdxCPU(trainIdx);
Mat imgIdxCPU(imgIdx);
Mat distanceCPU(distance);
Mat nMatchesCPU(nMatches);
radiusMatchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
}
......
......@@ -42,6 +42,10 @@
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace std;
#if !defined(HAVE_CUDA)
void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
......@@ -52,13 +56,31 @@ void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat
#else
using namespace cv;
using namespace cv::gpu;
BEGIN_OPENCV_DEVICE_NAMESPACE
namespace cv { namespace gpu { namespace transform_points
namespace transform_points
{
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, DevMem2D_<float3> dst, cudaStream_t stream);
}}}
}
namespace project_points
{
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
}
namespace solve_pnp_ransac
{
int maxNumIters();
void computeHypothesisScores(
const int num_hypotheses, const int num_points, const float* rot_matrices,
const float3* transl_vectors, const float3* object, const float2* image,
const float dist_threshold, int* hypothesis_scores);
}
END_OPENCV_DEVICE_NAMESPACE
using namespace OPENCV_DEVICE_NAMESPACE;
namespace
{
......@@ -79,15 +101,9 @@ namespace
void cv::gpu::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, Stream& stream)
{
::transformPointsCaller(src, rvec, tvec, dst, StreamAccessor::getStream(stream));
transformPointsCaller(src, rvec, tvec, dst, StreamAccessor::getStream(stream));
}
namespace cv { namespace gpu { namespace project_points
{
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
}}}
namespace
{
void projectPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, cudaStream_t stream)
......@@ -109,20 +125,9 @@ namespace
void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, Stream& stream)
{
::projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream));
projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream));
}
namespace cv { namespace gpu { namespace solve_pnp_ransac
{
int maxNumIters();
void computeHypothesisScores(
const int num_hypotheses, const int num_points, const float* rot_matrices,
const float3* transl_vectors, const float3* object, const float2* image,
const float dist_threshold, int* hypothesis_scores);
}}}
namespace
{
// Selects subset_size random different points from [0, num_points - 1] range
......
......@@ -46,7 +46,6 @@ using namespace cv;
using namespace cv::gpu;
using namespace std;
#if !defined (HAVE_CUDA)
cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() { throw_nogpu(); }
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -43,65 +43,58 @@
#include "internal_shared.hpp"
#include "opencv2/gpu/device/limits.hpp"
using namespace cv::gpu;
using namespace cv::gpu::device;
BEGIN_OPENCV_DEVICE_NAMESPACE
namespace bf_krnls
{
__constant__ float* ctable_color;
__constant__ float* ctable_space;
__constant__ size_t ctable_space_step;
namespace bilateral_filter {
__constant__ int cndisp;
__constant__ int cradius;
__constant__ float* ctable_color;
__constant__ float* ctable_space;
__constant__ size_t ctable_space_step;
__constant__ short cedge_disc;
__constant__ short cmax_disc;
}
__constant__ int cndisp;
__constant__ int cradius;
namespace cv { namespace gpu { namespace bf
__constant__ short cedge_disc;
__constant__ short cmax_disc;
void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc)
{
void load_constants(float* table_color, const DevMem2Df& table_space, int ndisp, int radius, short edge_disc, short max_disc)
{
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_color, &table_color, sizeof(table_color)) );
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space, &table_space.data, sizeof(table_space.data)) );
cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) );
cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) );
size_t table_space_step = table_space.step / sizeof(float);
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space_step, &table_space_step, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(ctable_space_step, &table_space_step, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cndisp, &ndisp, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cradius, &radius, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(cradius, &radius, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cedge_disc, &edge_disc, sizeof(short)) );
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cmax_disc, &max_disc, sizeof(short)) );
}
}}}
cudaSafeCall( cudaMemcpyToSymbol(cedge_disc, &edge_disc, sizeof(short)) );
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc, &max_disc, sizeof(short)) );
}
namespace bf_krnls
template <int channels>
struct DistRgbMax
{
template <int channels>
struct DistRgbMax
{
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
{
uchar x = abs(a[0] - b[0]);
uchar y = abs(a[1] - b[1]);
uchar z = abs(a[2] - b[2]);
return (max(max(x, y), z));
uchar x = ::abs(a[0] - b[0]);
uchar y = ::abs(a[1] - b[1]);
uchar z = ::abs(a[2] - b[2]);
return (::max(::max(x, y), z));
}
};
};
template <>
struct DistRgbMax<1>
{
template <>
struct DistRgbMax<1>
{
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
{
return abs(a[0] - b[0]);
return ::abs(a[0] - b[0]);
}
};
};
template <int channels, typename T>
__global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar* img, size_t img_step, int h, int w)
{
template <int channels, typename T>
__global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar* img, size_t img_step, int h, int w)
{
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);
......@@ -115,12 +108,12 @@ namespace bf_krnls
dp[3] = *(disp + (y+1) * disp_step + x + 0);
dp[4] = *(disp + (y ) * disp_step + x + 1);
if(abs(dp[1] - dp[0]) >= cedge_disc || abs(dp[2] - dp[0]) >= cedge_disc || abs(dp[3] - dp[0]) >= cedge_disc || abs(dp[4] - dp[0]) >= cedge_disc)
if(::abs(dp[1] - dp[0]) >= cedge_disc || ::abs(dp[2] - dp[0]) >= cedge_disc || ::abs(dp[3] - dp[0]) >= cedge_disc || ::abs(dp[4] - dp[0]) >= cedge_disc)
{
const int ymin = max(0, y - cradius);
const int xmin = max(0, x - cradius);
const int ymax = min(h - 1, y + cradius);
const int xmax = min(w - 1, x + cradius);
const int ymin = ::max(0, y - cradius);
const int xmin = ::max(0, x - cradius);
const int ymax = ::min(h - 1, y + cradius);
const int xmax = ::min(w - 1, x + cradius);
float cost[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
......@@ -136,15 +129,15 @@ namespace bf_krnls
uchar dist_rgb = DistRgbMax<channels>::calc(in, ic);
const float weight = ctable_color[dist_rgb] * (ctable_space + abs(y-yi)* ctable_space_step)[abs(x-xi)];
const float weight = ctable_color[dist_rgb] * (ctable_space + ::abs(y-yi)* ctable_space_step)[::abs(x-xi)];
const T disp_reg = disp_y[xi];
cost[0] += min(cmax_disc, abs(disp_reg - dp[0])) * weight;
cost[1] += min(cmax_disc, abs(disp_reg - dp[1])) * weight;
cost[2] += min(cmax_disc, abs(disp_reg - dp[2])) * weight;
cost[3] += min(cmax_disc, abs(disp_reg - dp[3])) * weight;
cost[4] += min(cmax_disc, abs(disp_reg - dp[4])) * weight;
cost[0] += ::min(cmax_disc, ::abs(disp_reg - dp[0])) * weight;
cost[1] += ::min(cmax_disc, ::abs(disp_reg - dp[1])) * weight;
cost[2] += ::min(cmax_disc, ::abs(disp_reg - dp[2])) * weight;
cost[3] += ::min(cmax_disc, ::abs(disp_reg - dp[3])) * weight;
cost[4] += ::min(cmax_disc, ::abs(disp_reg - dp[4])) * weight;
}
}
......@@ -180,14 +173,11 @@ namespace bf_krnls
*(disp + y * disp_step + x) = dp[id];
}
}
}
}
namespace cv { namespace gpu { namespace bf
template <typename T>
void bilateral_filter_caller(DevMem2D_<T> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
{
template <typename T>
void bilateral_filter_caller(const DevMem2D_<T>& disp, const DevMem2Db& img, int channels, int iters, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(disp.cols, threads.x << 1);
......@@ -198,18 +188,20 @@ namespace cv { namespace gpu { namespace bf
case 1:
for (int i = 0; i < iters; ++i)
{
bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
cudaSafeCall( cudaGetLastError() );
bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
cudaSafeCall( cudaGetLastError() );
}
break;
case 3:
for (int i = 0; i < iters; ++i)
{
bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
cudaSafeCall( cudaGetLastError() );
bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
cudaSafeCall( cudaGetLastError() );
}
break;
......@@ -219,15 +211,18 @@ namespace cv { namespace gpu { namespace bf
if (stream != 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
void bilateral_filter_gpu(const DevMem2Db& disp, const DevMem2Db& img, int channels, int iters, cudaStream_t stream)
{
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
{
bilateral_filter_caller(disp, img, channels, iters, stream);
}
}
void bilateral_filter_gpu(const DevMem2D_<short>& disp, const DevMem2Db& img, int channels, int iters, cudaStream_t stream)
{
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
{
bilateral_filter_caller(disp, img, channels, iters, stream);
}
}}}
}
} // namespace bilateral_filter
END_OPENCV_DEVICE_NAMESPACE
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment