Commit 0f53f299 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

removed BEGIN_OPENCV_DEVICE_NAMESPACE macros

parent d9265413
...@@ -425,21 +425,20 @@ void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream) ...@@ -425,21 +425,20 @@ void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream)
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Polar <-> Cart // Polar <-> Cart
BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device
namespace mathfunc
{ {
namespace mathfunc
{
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream); void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream); void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
} }
}}}
END_OPENCV_DEVICE_NAMESPACE
namespace namespace
{ {
inline void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream) inline void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream)
{ {
using namespace OPENCV_DEVICE_NAMESPACE_ mathfunc; using namespace ::cv::gpu::device::mathfunc;
CV_DbgAssert(x.size() == y.size() && x.type() == y.type()); CV_DbgAssert(x.size() == y.size() && x.type() == y.type());
CV_Assert(x.depth() == CV_32F); CV_Assert(x.depth() == CV_32F);
...@@ -459,7 +458,7 @@ namespace ...@@ -459,7 +458,7 @@ namespace
inline void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream) inline void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream)
{ {
using namespace OPENCV_DEVICE_NAMESPACE_ mathfunc; using namespace ::cv::gpu::device::mathfunc;
CV_DbgAssert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type()); CV_DbgAssert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type());
CV_Assert(mag.depth() == CV_32F); CV_Assert(mag.depth() == CV_32F);
......
...@@ -55,19 +55,18 @@ void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&, ...@@ -55,19 +55,18 @@ void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&,
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device
namespace bilateral_filter
{ {
namespace bilateral_filter
{
void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc); void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc);
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream); void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream); void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
} }
}}}
END_OPENCV_DEVICE_NAMESPACE
using namespace OPENCV_DEVICE_NAMESPACE_ bilateral_filter; using namespace ::cv::gpu::device::bilateral_filter;
namespace namespace
{ {
......
...@@ -52,19 +52,18 @@ void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const Gpu ...@@ -52,19 +52,18 @@ void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const Gpu
#else #else
BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device
namespace blend
{ {
namespace blend
{
template <typename T> template <typename T>
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream); void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream);
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream); void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream);
} }
}}}
END_OPENCV_DEVICE_NAMESPACE
using namespace OPENCV_DEVICE_NAMESPACE_ blend; using namespace ::cv::gpu::device::blend;
void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2, void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
GpuMat& result, Stream& stream) GpuMat& result, Stream& stream)
......
...@@ -82,10 +82,10 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, vector< vec ...@@ -82,10 +82,10 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, vector< vec
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device
namespace bf_match
{ {
namespace bf_match
{
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Di& trainIdx, const DevMem2Df& distance,
int cc, cudaStream_t stream); int cc, cudaStream_t stream);
...@@ -105,10 +105,10 @@ namespace bf_match ...@@ -105,10 +105,10 @@ namespace bf_match
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
int cc, cudaStream_t stream); int cc, cudaStream_t stream);
} }
namespace bf_knnmatch namespace bf_knnmatch
{ {
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
int cc, cudaStream_t stream); int cc, cudaStream_t stream);
...@@ -128,10 +128,10 @@ namespace bf_knnmatch ...@@ -128,10 +128,10 @@ namespace bf_knnmatch
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
int cc, cudaStream_t stream); int cc, cudaStream_t stream);
} }
namespace bf_radius_match namespace bf_radius_match
{ {
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
int cc, cudaStream_t stream); int cc, cudaStream_t stream);
...@@ -153,9 +153,8 @@ namespace bf_radius_match ...@@ -153,9 +153,8 @@ namespace bf_radius_match
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
int cc, cudaStream_t stream); int cc, cudaStream_t stream);
} }
}}}
END_OPENCV_DEVICE_NAMESPACE
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// Train collection // Train collection
...@@ -199,7 +198,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const ...@@ -199,7 +198,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const
if (query.empty() || train.empty()) if (query.empty() || train.empty())
return; return;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_match; using namespace ::cv::gpu::device::bf_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask, typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Di& trainIdx, const DevMem2Df& distance,
...@@ -341,7 +340,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c ...@@ -341,7 +340,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c
if (query.empty() || trainCollection.empty()) if (query.empty() || trainCollection.empty())
return; return;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_match; using namespace ::cv::gpu::device::bf_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
...@@ -452,7 +451,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co ...@@ -452,7 +451,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co
if (query.empty() || train.empty()) if (query.empty() || train.empty())
return; return;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_knnmatch; using namespace ::cv::gpu::device::bf_knnmatch;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask, typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
...@@ -581,7 +580,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer ...@@ -581,7 +580,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer
if (query.empty() || trainCollection.empty()) if (query.empty() || trainCollection.empty())
return; return;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_knnmatch; using namespace ::cv::gpu::device::bf_knnmatch;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
...@@ -762,7 +761,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query, ...@@ -762,7 +761,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
if (query.empty() || train.empty()) if (query.empty() || train.empty())
return; return;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_radius_match; using namespace ::cv::gpu::device::bf_radius_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask, typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
...@@ -893,7 +892,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu ...@@ -893,7 +892,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
if (query.empty() || empty()) if (query.empty() || empty())
return; return;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_radius_match; using namespace ::cv::gpu::device::bf_radius_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
......
...@@ -56,31 +56,30 @@ void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat ...@@ -56,31 +56,30 @@ void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat
#else #else
BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device
namespace transform_points
{ {
namespace transform_points
{
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, DevMem2D_<float3> dst, cudaStream_t stream); void call(const DevMem2D_<float3> src, const float* rot, const float* transl, DevMem2D_<float3> dst, cudaStream_t stream);
} }
namespace project_points namespace project_points
{ {
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, const float* proj, DevMem2D_<float2> dst, cudaStream_t stream); void call(const DevMem2D_<float3> src, const float* rot, const float* transl, const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
} }
namespace solve_pnp_ransac namespace solve_pnp_ransac
{ {
int maxNumIters(); int maxNumIters();
void computeHypothesisScores( void computeHypothesisScores(
const int num_hypotheses, const int num_points, const float* rot_matrices, const int num_hypotheses, const int num_points, const float* rot_matrices,
const float3* transl_vectors, const float3* object, const float2* image, const float3* transl_vectors, const float3* object, const float2* image,
const float dist_threshold, int* hypothesis_scores); const float dist_threshold, int* hypothesis_scores);
} }
}}}
END_OPENCV_DEVICE_NAMESPACE
using namespace OPENCV_DEVICE_NAMESPACE; using namespace ::cv::gpu::device;
namespace namespace
{ {
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -43,22 +43,22 @@ ...@@ -43,22 +43,22 @@
#include "internal_shared.hpp" #include "internal_shared.hpp"
#include "opencv2/gpu/device/limits.hpp" #include "opencv2/gpu/device/limits.hpp"
BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device
{
namespace bilateral_filter { namespace bilateral_filter
{
__constant__ float* ctable_color; __constant__ float* ctable_color;
__constant__ float* ctable_space; __constant__ float* ctable_space;
__constant__ size_t ctable_space_step; __constant__ size_t ctable_space_step;
__constant__ int cndisp; __constant__ int cndisp;
__constant__ int cradius; __constant__ int cradius;
__constant__ short cedge_disc; __constant__ short cedge_disc;
__constant__ short cmax_disc; __constant__ short cmax_disc;
void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc) void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc)
{ {
cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) ); cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) );
cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) ); cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) );
size_t table_space_step = table_space.step / sizeof(float); size_t table_space_step = table_space.step / sizeof(float);
...@@ -69,11 +69,11 @@ void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int ra ...@@ -69,11 +69,11 @@ void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int ra
cudaSafeCall( cudaMemcpyToSymbol(cedge_disc, &edge_disc, sizeof(short)) ); cudaSafeCall( cudaMemcpyToSymbol(cedge_disc, &edge_disc, sizeof(short)) );
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc, &max_disc, sizeof(short)) ); cudaSafeCall( cudaMemcpyToSymbol(cmax_disc, &max_disc, sizeof(short)) );
} }
template <int channels> template <int channels>
struct DistRgbMax struct DistRgbMax
{ {
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b) static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
{ {
uchar x = ::abs(a[0] - b[0]); uchar x = ::abs(a[0] - b[0]);
...@@ -81,20 +81,20 @@ struct DistRgbMax ...@@ -81,20 +81,20 @@ struct DistRgbMax
uchar z = ::abs(a[2] - b[2]); uchar z = ::abs(a[2] - b[2]);
return (::max(::max(x, y), z)); return (::max(::max(x, y), z));
} }
}; };
template <> template <>
struct DistRgbMax<1> struct DistRgbMax<1>
{ {
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b) static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
{ {
return ::abs(a[0] - b[0]); return ::abs(a[0] - b[0]);
} }
}; };
template <int channels, typename T> template <int channels, typename T>
__global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar* img, size_t img_step, int h, int w) __global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar* img, size_t img_step, int h, int w)
{ {
const int y = blockIdx.y * blockDim.y + threadIdx.y; const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1); const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);
...@@ -173,11 +173,11 @@ __global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar* ...@@ -173,11 +173,11 @@ __global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar*
*(disp + y * disp_step + x) = dp[id]; *(disp + y * disp_step + x) = dp[id];
} }
} }
} }
template <typename T> template <typename T>
void bilateral_filter_caller(DevMem2D_<T> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream) void bilateral_filter_caller(DevMem2D_<T> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
{ {
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
grid.x = divUp(disp.cols, threads.x << 1); grid.x = divUp(disp.cols, threads.x << 1);
...@@ -211,18 +211,16 @@ void bilateral_filter_caller(DevMem2D_<T> disp, DevMem2Db img, int channels, int ...@@ -211,18 +211,16 @@ void bilateral_filter_caller(DevMem2D_<T> disp, DevMem2Db img, int channels, int
if (stream != 0) if (stream != 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream) void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
{ {
bilateral_filter_caller(disp, img, channels, iters, stream); bilateral_filter_caller(disp, img, channels, iters, stream);
} }
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream) void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
{ {
bilateral_filter_caller(disp, img, channels, iters, stream); bilateral_filter_caller(disp, img, channels, iters, stream);
} }
} // namespace bilateral_filter
} // namespace bilateral_filter }}} // namespace cv { namespace gpu { namespace device
END_OPENCV_DEVICE_NAMESPACE
...@@ -42,14 +42,14 @@ ...@@ -42,14 +42,14 @@
#include "internal_shared.hpp" #include "internal_shared.hpp"
BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device
namespace blend {
template <typename T>
__global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
const PtrStepf weights1, const PtrStepf weights2, PtrStep<T> result)
{ {
namespace blend
{
template <typename T>
__global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
const PtrStepf weights1, const PtrStepf weights2, PtrStep<T> result)
{
int x = blockIdx.x * blockDim.x + threadIdx.x; int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y; int y = blockIdx.y * blockDim.y + threadIdx.y;
...@@ -62,11 +62,11 @@ __global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> i ...@@ -62,11 +62,11 @@ __global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> i
T p2 = img2.ptr(y)[x]; T p2 = img2.ptr(y)[x];
result.ptr(y)[x] = (p1 * w1 + p2 * w2) / (w1 + w2 + 1e-5f); result.ptr(y)[x] = (p1 * w1 + p2 * w2) / (w1 + w2 + 1e-5f);
} }
} }
template <typename T> template <typename T>
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream) void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream)
{ {
dim3 threads(16, 16); dim3 threads(16, 16);
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y)); dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
...@@ -75,15 +75,15 @@ void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> i ...@@ -75,15 +75,15 @@ void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> i
if (stream == 0) if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
template void blendLinearCaller<uchar>(int, int, int, PtrStep<uchar>, PtrStep<uchar>, PtrStepf, PtrStepf, PtrStep<uchar>, cudaStream_t stream); template void blendLinearCaller<uchar>(int, int, int, PtrStep<uchar>, PtrStep<uchar>, PtrStepf, PtrStepf, PtrStep<uchar>, cudaStream_t stream);
template void blendLinearCaller<float>(int, int, int, PtrStep<float>, PtrStep<float>, PtrStepf, PtrStepf, PtrStep<float>, cudaStream_t stream); template void blendLinearCaller<float>(int, int, int, PtrStep<float>, PtrStep<float>, PtrStepf, PtrStepf, PtrStep<float>, cudaStream_t stream);
__global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, const PtrStepb img2, __global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, const PtrStepb img2,
const PtrStepf weights1, const PtrStepf weights2, PtrStepb result) const PtrStepf weights1, const PtrStepf weights2, PtrStepb result)
{ {
int x = blockIdx.x * blockDim.x + threadIdx.x; int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y; int y = blockIdx.y * blockDim.y + threadIdx.y;
...@@ -99,10 +99,10 @@ __global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, c ...@@ -99,10 +99,10 @@ __global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, c
((uchar4*)result.ptr(y))[x] = make_uchar4(p1.x * w1 + p2.x * w2, p1.y * w1 + p2.y * w2, ((uchar4*)result.ptr(y))[x] = make_uchar4(p1.x * w1 + p2.x * w2, p1.y * w1 + p2.y * w2,
p1.z * w1 + p2.z * w2, p1.w * w1 + p2.w * w2); p1.z * w1 + p2.z * w2, p1.w * w1 + p2.w * w2);
} }
} }
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream) void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream)
{ {
dim3 threads(16, 16); dim3 threads(16, 16);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
...@@ -111,8 +111,6 @@ void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, Ptr ...@@ -111,8 +111,6 @@ void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, Ptr
if (stream == 0) if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
} }
} // namespace blend
} // namespace blend }}} // namespace cv { namespace gpu { namespace device
END_OPENCV_DEVICE_NAMESPACE
...@@ -44,12 +44,12 @@ ...@@ -44,12 +44,12 @@
#include "opencv2/gpu/device/transform.hpp" #include "opencv2/gpu/device/transform.hpp"
#include "opencv2/gpu/device/functional.hpp" #include "opencv2/gpu/device/functional.hpp"
BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device
#define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
namespace transform_points
{ {
#define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
namespace transform_points
{
__constant__ float3 crot0; __constant__ float3 crot0;
__constant__ float3 crot1; __constant__ float3 crot1;
__constant__ float3 crot2; __constant__ float3 crot2;
...@@ -74,12 +74,12 @@ namespace transform_points ...@@ -74,12 +74,12 @@ namespace transform_points
cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
OPENCV_DEVICE_NAMESPACE_ transform(src, dst, TransformOp(), stream); ::cv::gpu::device::transform(src, dst, TransformOp(), stream);
} }
} // namespace transform_points } // namespace transform_points
namespace project_points namespace project_points
{ {
__constant__ float3 crot0; __constant__ float3 crot0;
__constant__ float3 crot1; __constant__ float3 crot1;
__constant__ float3 crot2; __constant__ float3 crot2;
...@@ -113,12 +113,12 @@ namespace project_points ...@@ -113,12 +113,12 @@ namespace project_points
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3)); cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
OPENCV_DEVICE_NAMESPACE_ transform(src, dst, ProjectOp(), stream); ::cv::gpu::device::transform(src, dst, ProjectOp(), stream);
} }
} // namespace project_points } // namespace project_points
namespace solve_pnp_ransac namespace solve_pnp_ransac
{ {
__constant__ float3 crot_matrices[SOLVE_PNP_RANSAC_MAX_NUM_ITERS * 3]; __constant__ float3 crot_matrices[SOLVE_PNP_RANSAC_MAX_NUM_ITERS * 3];
__constant__ float3 ctransl_vectors[SOLVE_PNP_RANSAC_MAX_NUM_ITERS]; __constant__ float3 ctransl_vectors[SOLVE_PNP_RANSAC_MAX_NUM_ITERS];
...@@ -187,6 +187,5 @@ namespace solve_pnp_ransac ...@@ -187,6 +187,5 @@ namespace solve_pnp_ransac
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} // namespace solvepnp_ransac } // namespace solvepnp_ransac
}}} // namespace cv { namespace gpu { namespace device
END_OPENCV_DEVICE_NAMESPACE
...@@ -44,12 +44,12 @@ ...@@ -44,12 +44,12 @@
#include <algorithm> #include <algorithm>
#include "internal_shared.hpp" #include "internal_shared.hpp"
BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device
namespace canny {
__global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
{ {
namespace canny
{
__global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
{
__shared__ int smem[16][18]; __shared__ int smem[16][18];
const int j = blockIdx.x * blockDim.x + threadIdx.x; const int j = blockIdx.x * blockDim.x + threadIdx.x;
...@@ -71,10 +71,10 @@ __global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi d ...@@ -71,10 +71,10 @@ __global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi d
dy_buf.ptr(i)[j] = smem[threadIdx.y][threadIdx.x] + 2 * smem[threadIdx.y][threadIdx.x + 1] + smem[threadIdx.y][threadIdx.x + 2]; dy_buf.ptr(i)[j] = smem[threadIdx.y][threadIdx.x] + 2 * smem[threadIdx.y][threadIdx.x + 1] + smem[threadIdx.y][threadIdx.x + 2];
} }
} }
} }
void calcSobelRowPass_gpu(PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols) void calcSobelRowPass_gpu(PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
{ {
dim3 block(16, 16, 1); dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1); dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
...@@ -82,26 +82,26 @@ void calcSobelRowPass_gpu(PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int ro ...@@ -82,26 +82,26 @@ void calcSobelRowPass_gpu(PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int ro
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize()); cudaSafeCall(cudaThreadSynchronize());
} }
struct L1 struct L1
{ {
static __device__ __forceinline__ float calc(int x, int y) static __device__ __forceinline__ float calc(int x, int y)
{ {
return ::abs(x) + ::abs(y); return ::abs(x) + ::abs(y);
} }
}; };
struct L2 struct L2
{ {
static __device__ __forceinline__ float calc(int x, int y) static __device__ __forceinline__ float calc(int x, int y)
{ {
return ::sqrtf(x * x + y * y); return ::sqrtf(x * x + y * y);
} }
}; };
template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, const PtrStepi dy_buf, template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, const PtrStepi dy_buf,
PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols) PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
{ {
__shared__ int sdx[18][16]; __shared__ int sdx[18][16];
__shared__ int sdy[18][16]; __shared__ int sdy[18][16];
...@@ -133,10 +133,10 @@ template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, co ...@@ -133,10 +133,10 @@ template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, co
mag.ptr(i + 1)[j + 1] = Norm::calc(x, y); mag.ptr(i + 1)[j + 1] = Norm::calc(x, y);
} }
} }
} }
void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad) void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad)
{ {
dim3 block(16, 16, 1); dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1); dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
...@@ -148,19 +148,19 @@ void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi d ...@@ -148,19 +148,19 @@ void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi d
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize()); cudaSafeCall(cudaThreadSynchronize());
} }
template <typename Norm> __global__ void calcMagnitude(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols) template <typename Norm> __global__ void calcMagnitude(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
{ {
const int j = blockIdx.x * blockDim.x + threadIdx.x; const int j = blockIdx.x * blockDim.x + threadIdx.x;
const int i = blockIdx.y * blockDim.y + threadIdx.y; const int i = blockIdx.y * blockDim.y + threadIdx.y;
if (i < rows && j < cols) if (i < rows && j < cols)
mag.ptr(i + 1)[j + 1] = Norm::calc(dx.ptr(i)[j], dy.ptr(i)[j]); mag.ptr(i + 1)[j + 1] = Norm::calc(dx.ptr(i)[j], dy.ptr(i)[j]);
} }
void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad) void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad)
{ {
dim3 block(16, 16, 1); dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1); dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
...@@ -172,15 +172,15 @@ void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int col ...@@ -172,15 +172,15 @@ void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int col
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize()); cudaSafeCall(cudaThreadSynchronize());
} }
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
#define CANNY_SHIFT 15 #define CANNY_SHIFT 15
#define TG22 (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5) #define TG22 (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5)
__global__ void calcMap(const PtrStepi dx, const PtrStepi dy, const PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh) __global__ void calcMap(const PtrStepi dx, const PtrStepi dy, const PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh)
{ {
__shared__ float smem[18][18]; __shared__ float smem[18][18];
const int j = blockIdx.x * 16 + threadIdx.x; const int j = blockIdx.x * 16 + threadIdx.x;
...@@ -239,13 +239,13 @@ __global__ void calcMap(const PtrStepi dx, const PtrStepi dy, const PtrStepf mag ...@@ -239,13 +239,13 @@ __global__ void calcMap(const PtrStepi dx, const PtrStepi dy, const PtrStepf mag
map.ptr(i + 1)[j + 1] = edge_type; map.ptr(i + 1)[j + 1] = edge_type;
} }
} }
#undef CANNY_SHIFT #undef CANNY_SHIFT
#undef TG22 #undef TG22
void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh) void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh)
{ {
dim3 block(16, 16, 1); dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1); dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
...@@ -253,14 +253,14 @@ void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, ...@@ -253,14 +253,14 @@ void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows,
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize()); cudaSafeCall(cudaThreadSynchronize());
} }
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
__device__ unsigned int counter = 0; __device__ unsigned int counter = 0;
__global__ void edgesHysteresisLocal(PtrStepi map, ushort2* st, int rows, int cols) __global__ void edgesHysteresisLocal(PtrStepi map, ushort2* st, int rows, int cols)
{ {
#if __CUDA_ARCH__ >= 120 #if __CUDA_ARCH__ >= 120
__shared__ int smem[18][18]; __shared__ int smem[18][18];
...@@ -335,10 +335,10 @@ __global__ void edgesHysteresisLocal(PtrStepi map, ushort2* st, int rows, int co ...@@ -335,10 +335,10 @@ __global__ void edgesHysteresisLocal(PtrStepi map, ushort2* st, int rows, int co
} }
#endif #endif
} }
void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols) void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols)
{ {
dim3 block(16, 16, 1); dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1); dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
...@@ -346,13 +346,13 @@ void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols) ...@@ -346,13 +346,13 @@ void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols)
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize()); cudaSafeCall(cudaThreadSynchronize());
} }
__constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1}; __constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
__constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1}; __constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
__global__ void edgesHysteresisGlobal(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols, int count) __global__ void edgesHysteresisGlobal(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols, int count)
{ {
#if __CUDA_ARCH__ >= 120 #if __CUDA_ARCH__ >= 120
const int stack_size = 512; const int stack_size = 512;
...@@ -441,10 +441,10 @@ __global__ void edgesHysteresisGlobal(PtrStepi map, ushort2* st1, ushort2* st2, ...@@ -441,10 +441,10 @@ __global__ void edgesHysteresisGlobal(PtrStepi map, ushort2* st1, ushort2* st2,
} }
#endif #endif
} }
void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols) void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols)
{ {
void* counter_ptr; void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) ); cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
...@@ -466,19 +466,19 @@ void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int row ...@@ -466,19 +466,19 @@ void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int row
std::swap(st1, st2); std::swap(st1, st2);
} }
} }
__global__ void getEdges(PtrStepi map, PtrStepb dst, int rows, int cols) __global__ void getEdges(PtrStepi map, PtrStepb dst, int rows, int cols)
{ {
const int j = blockIdx.x * 16 + threadIdx.x; const int j = blockIdx.x * 16 + threadIdx.x;
const int i = blockIdx.y * 16 + threadIdx.y; const int i = blockIdx.y * 16 + threadIdx.y;
if (i < rows && j < cols) if (i < rows && j < cols)
dst.ptr(i)[j] = (uchar)(-(map.ptr(i + 1)[j + 1] >> 1)); dst.ptr(i)[j] = (uchar)(-(map.ptr(i + 1)[j + 1] >> 1));
} }
void getEdges_gpu(PtrStepi map, PtrStepb dst, int rows, int cols) void getEdges_gpu(PtrStepi map, PtrStepb dst, int rows, int cols)
{ {
dim3 block(16, 16, 1); dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1); dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
...@@ -486,8 +486,6 @@ void getEdges_gpu(PtrStepi map, PtrStepb dst, int rows, int cols) ...@@ -486,8 +486,6 @@ void getEdges_gpu(PtrStepi map, PtrStepb dst, int rows, int cols)
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize()); cudaSafeCall(cudaThreadSynchronize());
} }
} // namespace canny
} // namespace canny }}} // namespace cv { namespace gpu { namespace device
END_OPENCV_DEVICE_NAMESPACE
This diff is collapsed.
...@@ -47,26 +47,26 @@ ...@@ -47,26 +47,26 @@
#include "opencv2/gpu/device/limits.hpp" #include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp" #include "opencv2/gpu/device/border_interpolate.hpp"
BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device
{
#define MAX_KERNEL_SIZE 16 #define MAX_KERNEL_SIZE 16
#define BLOCK_DIM_X 16 #define BLOCK_DIM_X 16
#define BLOCK_DIM_Y 4 #define BLOCK_DIM_Y 4
#define RESULT_STEPS 8 #define RESULT_STEPS 8
#define HALO_STEPS 1 #define HALO_STEPS 1
namespace column_filter {
__constant__ float c_kernel[MAX_KERNEL_SIZE]; namespace column_filter
{
__constant__ float c_kernel[MAX_KERNEL_SIZE];
void loadKernel(const float kernel[], int ksize) void loadKernel(const float kernel[], int ksize)
{ {
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float)) );
} }
template <int KERNEL_SIZE, typename T, typename D, typename B> template <int KERNEL_SIZE, typename T, typename D, typename B>
__global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep<D> dst, int anchor, const B b) __global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep<D> dst, int anchor, const B b)
{ {
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t; typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
__shared__ T smem[BLOCK_DIM_X][(RESULT_STEPS + 2 * HALO_STEPS) * BLOCK_DIM_Y + 1]; __shared__ T smem[BLOCK_DIM_X][(RESULT_STEPS + 2 * HALO_STEPS) * BLOCK_DIM_Y + 1];
...@@ -111,11 +111,11 @@ __global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep<D> dst, int a ...@@ -111,11 +111,11 @@ __global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep<D> dst, int a
dst.ptr(dstY)[x] = saturate_cast<D>(sum); dst.ptr(dstY)[x] = saturate_cast<D>(sum);
} }
} }
} }
template <int ksize, typename T, typename D, template<typename> class B> template <int ksize, typename T, typename D, template<typename> class B>
void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream) void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
{ {
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y); const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, RESULT_STEPS * BLOCK_DIM_Y)); const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, RESULT_STEPS * BLOCK_DIM_Y));
...@@ -126,11 +126,11 @@ void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, ...@@ -126,11 +126,11 @@ void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst,
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <typename T, typename D> template <typename T, typename D>
void linearColumnFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream) void linearColumnFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream)
{ {
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream); typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream);
static const caller_t callers[5][17] = static const caller_t callers[5][17] =
{ {
...@@ -234,16 +234,14 @@ void linearColumnFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const fl ...@@ -234,16 +234,14 @@ void linearColumnFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const fl
loadKernel(kernel, ksize); loadKernel(kernel, ksize);
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream); callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
} }
template void linearColumnFilter_gpu<float , uchar >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearColumnFilter_gpu<float4, uchar4>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
//template void linearColumnFilter_gpu<float , short >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
//template void linearColumnFilter_gpu<float2, short2>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearColumnFilter_gpu<float3, short3>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearColumnFilter_gpu<float , int >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearColumnFilter_gpu<float , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
} // namespace column_filter
END_OPENCV_DEVICE_NAMESPACE template void linearColumnFilter_gpu<float , uchar >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearColumnFilter_gpu<float4, uchar4>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
//template void linearColumnFilter_gpu<float , short >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
//template void linearColumnFilter_gpu<float2, short2>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearColumnFilter_gpu<float3, short3>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearColumnFilter_gpu<float , int >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearColumnFilter_gpu<float , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
} // namespace column_filter
}}} // namespace cv { namespace gpu { namespace device
This diff is collapsed.
This diff is collapsed.
...@@ -45,29 +45,29 @@ ...@@ -45,29 +45,29 @@
#include "opencv2/gpu/device/utility.hpp" #include "opencv2/gpu/device/utility.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp" #include "opencv2/gpu/device/saturate_cast.hpp"
BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device
{
#define UINT_BITS 32U #define UINT_BITS 32U
//Warps == subhistograms per threadblock
#define WARP_COUNT 6
//Threadblock size //Warps == subhistograms per threadblock
#define HISTOGRAM256_THREADBLOCK_SIZE (WARP_COUNT * OPENCV_GPU_WARP_SIZE) #define WARP_COUNT 6
#define HISTOGRAM256_BIN_COUNT 256
//Shared memory per threadblock //Threadblock size
#define HISTOGRAM256_THREADBLOCK_MEMORY (WARP_COUNT * HISTOGRAM256_BIN_COUNT) #define HISTOGRAM256_THREADBLOCK_SIZE (WARP_COUNT * OPENCV_GPU_WARP_SIZE)
#define HISTOGRAM256_BIN_COUNT 256
#define PARTIAL_HISTOGRAM256_COUNT 240 //Shared memory per threadblock
#define HISTOGRAM256_THREADBLOCK_MEMORY (WARP_COUNT * HISTOGRAM256_BIN_COUNT)
#define MERGE_THREADBLOCK_SIZE 256 #define PARTIAL_HISTOGRAM256_COUNT 240
#define USE_SMEM_ATOMICS (__CUDA_ARCH__ >= 120) #define MERGE_THREADBLOCK_SIZE 256
namespace hist { #define USE_SMEM_ATOMICS (__CUDA_ARCH__ >= 120)
#if (!USE_SMEM_ATOMICS) namespace hist
{
#if (!USE_SMEM_ATOMICS)
#define TAG_MASK ( (1U << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE)) - 1U ) #define TAG_MASK ( (1U << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE)) - 1U )
...@@ -82,7 +82,7 @@ namespace hist { ...@@ -82,7 +82,7 @@ namespace hist {
} while (s_WarpHist[data] != count); } while (s_WarpHist[data] != count);
} }
#else #else
#define TAG_MASK 0xFFFFFFFFU #define TAG_MASK 0xFFFFFFFFU
...@@ -91,20 +91,20 @@ namespace hist { ...@@ -91,20 +91,20 @@ namespace hist {
atomicAdd(s_WarpHist + data, 1); atomicAdd(s_WarpHist + data, 1);
} }
#endif #endif
__forceinline__ __device__ void addWord(uint* s_WarpHist, uint data, uint tag, uint pos_x, uint cols) __forceinline__ __device__ void addWord(uint* s_WarpHist, uint data, uint tag, uint pos_x, uint cols)
{ {
uint x = pos_x << 2; uint x = pos_x << 2;
if (x + 0 < cols) addByte(s_WarpHist, (data >> 0) & 0xFFU, tag); if (x + 0 < cols) addByte(s_WarpHist, (data >> 0) & 0xFFU, tag);
if (x + 1 < cols) addByte(s_WarpHist, (data >> 8) & 0xFFU, tag); if (x + 1 < cols) addByte(s_WarpHist, (data >> 8) & 0xFFU, tag);
if (x + 2 < cols) addByte(s_WarpHist, (data >> 16) & 0xFFU, tag); if (x + 2 < cols) addByte(s_WarpHist, (data >> 16) & 0xFFU, tag);
if (x + 3 < cols) addByte(s_WarpHist, (data >> 24) & 0xFFU, tag); if (x + 3 < cols) addByte(s_WarpHist, (data >> 24) & 0xFFU, tag);
} }
__global__ void histogram256(const PtrStep<uint> d_Data, uint* d_PartialHistograms, uint dataCount, uint cols) __global__ void histogram256(const PtrStep<uint> d_Data, uint* d_PartialHistograms, uint dataCount, uint cols)
{ {
//Per-warp subhistogram storage //Per-warp subhistogram storage
__shared__ uint s_Hist[HISTOGRAM256_THREADBLOCK_MEMORY]; __shared__ uint s_Hist[HISTOGRAM256_THREADBLOCK_MEMORY];
uint* s_WarpHist= s_Hist + (threadIdx.x >> OPENCV_GPU_LOG_WARP_SIZE) * HISTOGRAM256_BIN_COUNT; uint* s_WarpHist= s_Hist + (threadIdx.x >> OPENCV_GPU_LOG_WARP_SIZE) * HISTOGRAM256_BIN_COUNT;
...@@ -138,17 +138,17 @@ __global__ void histogram256(const PtrStep<uint> d_Data, uint* d_PartialHistogra ...@@ -138,17 +138,17 @@ __global__ void histogram256(const PtrStep<uint> d_Data, uint* d_PartialHistogra
d_PartialHistograms[blockIdx.x * HISTOGRAM256_BIN_COUNT + bin] = sum; d_PartialHistograms[blockIdx.x * HISTOGRAM256_BIN_COUNT + bin] = sum;
} }
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Merge histogram256() output // Merge histogram256() output
// Run one threadblock per bin; each threadblock adds up the same bin counter // Run one threadblock per bin; each threadblock adds up the same bin counter
// from every partial histogram. Reads are uncoalesced, but mergeHistogram256 // from every partial histogram. Reads are uncoalesced, but mergeHistogram256
// takes only a fraction of total processing time // takes only a fraction of total processing time
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
__global__ void mergeHistogram256(const uint* d_PartialHistograms, int* d_Histogram) __global__ void mergeHistogram256(const uint* d_PartialHistograms, int* d_Histogram)
{ {
uint sum = 0; uint sum = 0;
#pragma unroll #pragma unroll
...@@ -167,10 +167,10 @@ __global__ void mergeHistogram256(const uint* d_PartialHistograms, int* d_Histog ...@@ -167,10 +167,10 @@ __global__ void mergeHistogram256(const uint* d_PartialHistograms, int* d_Histog
if(threadIdx.x == 0) if(threadIdx.x == 0)
d_Histogram[blockIdx.x] = saturate_cast<int>(data[0]); d_Histogram[blockIdx.x] = saturate_cast<int>(data[0]);
} }
void histogram256_gpu(DevMem2Db src, int* hist, uint* buf, cudaStream_t stream) void histogram256_gpu(DevMem2Db src, int* hist, uint* buf, cudaStream_t stream)
{ {
histogram256<<<PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_THREADBLOCK_SIZE, 0, stream>>>( histogram256<<<PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_THREADBLOCK_SIZE, 0, stream>>>(
DevMem2D_<uint>(src), DevMem2D_<uint>(src),
buf, buf,
...@@ -185,12 +185,12 @@ void histogram256_gpu(DevMem2Db src, int* hist, uint* buf, cudaStream_t stream) ...@@ -185,12 +185,12 @@ void histogram256_gpu(DevMem2Db src, int* hist, uint* buf, cudaStream_t stream)
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__constant__ int c_lut[256]; __constant__ int c_lut[256];
__global__ void equalizeHist(const DevMem2Db src, PtrStepb dst) __global__ void equalizeHist(const DevMem2Db src, PtrStepb dst)
{ {
const int x = blockIdx.x * blockDim.x + threadIdx.x; const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y; const int y = blockIdx.y * blockDim.y + threadIdx.y;
...@@ -200,10 +200,10 @@ __global__ void equalizeHist(const DevMem2Db src, PtrStepb dst) ...@@ -200,10 +200,10 @@ __global__ void equalizeHist(const DevMem2Db src, PtrStepb dst)
const int lut = c_lut[val]; const int lut = c_lut[val];
dst.ptr(y)[x] = __float2int_rn(255.0f / (src.cols * src.rows) * lut); dst.ptr(y)[x] = __float2int_rn(255.0f / (src.cols * src.rows) * lut);
} }
} }
void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t stream) void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t stream)
{ {
dim3 block(16, 16); dim3 block(16, 16);
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
...@@ -214,8 +214,6 @@ void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t ...@@ -214,8 +214,6 @@ void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
} // namespace hist
} // namespace hist }}} // namespace cv { namespace gpu { namespace device
END_OPENCV_DEVICE_NAMESPACE
This diff is collapsed.
This diff is collapsed.
...@@ -50,7 +50,7 @@ ...@@ -50,7 +50,7 @@
#include "safe_call.hpp" #include "safe_call.hpp"
#ifndef CV_PI #ifndef CV_PI
#define CV_PI 3.1415926535897932384626433832795f #define CV_PI 3.1415926535897932384626433832795
#endif #endif
#ifndef CV_PI_F #ifndef CV_PI_F
...@@ -61,27 +61,21 @@ ...@@ -61,27 +61,21 @@
#endif #endif
#endif #endif
#define BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device {
#define END_OPENCV_DEVICE_NAMESPACE }}}
#define OPENCV_DEVICE_NAMESPACE ::cv::gpu::device
#define OPENCV_DEVICE_NAMESPACE_ ::cv::gpu::device::
#ifdef __CUDACC__ #ifdef __CUDACC__
BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef signed char schar;
typedef unsigned int uint;
template<class T> static inline void bindTexture(const textureReference* tex, const DevMem2D_<T>& img)
{ {
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef signed char schar;
typedef unsigned int uint;
template<class T> static inline void bindTexture(const textureReference* tex, const DevMem2D_<T>& img)
{
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>(); cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) ); cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
} }
}}}
END_OPENCV_DEVICE_NAMESPACE
#endif #endif
...@@ -102,87 +96,6 @@ namespace cv { namespace gpu ...@@ -102,87 +96,6 @@ namespace cv { namespace gpu
static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; } static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; }
/*template<class T> static inline void uploadConstant(const char* name, const T& value)
{
cudaSafeCall( cudaMemcpyToSymbol(name, &value, sizeof(T)) );
}
template<class T> static inline void uploadConstant(const char* name, const T& value, cudaStream_t stream)
{
cudaSafeCall( cudaMemcpyToSymbolAsync(name, &value, sizeof(T), 0, cudaMemcpyHostToDevice, stream) );
} */
//template<class T> static inline void bindTexture(const char* name, const DevMem2D_<T>& img)
//{
// //!!!! const_cast is disabled!
// //!!!! Please use constructor of 'class texture' instead.
//
// //textureReference* tex;
// //cudaSafeCall( cudaGetTextureReference((const textureReference**)&tex, name) );
// //tex->normalized = normalized;
// //tex->filterMode = filterMode;
// //tex->addressMode[0] = addrMode;
// //tex->addressMode[1] = addrMode;
//
// const textureReference* tex;
// cudaSafeCall( cudaGetTextureReference(&tex, name) );
//
// cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
// cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
//}
//static inline void unbindTexture(const char *name)
//{
// const textureReference* tex;
// cudaSafeCall( cudaGetTextureReference(&tex, name) );
// cudaSafeCall( cudaUnbindTexture(tex) );
//}
//class TextureBinder
//{
//public:
// TextureBinder() : tex_(0) {}
// template <typename T> TextureBinder(const textureReference* tex, const DevMem2D_<T>& img) : tex_(0)
// {
// bind(tex, img);
// }
// template <typename T> TextureBinder(const char* tex_name, const DevMem2D_<T>& img) : tex_(0)
// {
// bind(tex_name, img);
// }
// ~TextureBinder() { unbind(); }
//
// template <typename T> void bind(const textureReference* tex, const DevMem2D_<T>& img)
// {
// unbind();
//
// cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
// cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
//
// tex_ = tex;
// }
// template <typename T> void bind(const char* tex_name, const DevMem2D_<T>& img)
// {
// const textureReference* tex;
// cudaSafeCall( cudaGetTextureReference(&tex, tex_name) );
// bind(tex, img);
// }
//
// void unbind()
// {
// if (tex_)
// {
// cudaUnbindTexture(tex_);
// tex_ = 0;
// }
// }
//
//private:
// const textureReference* tex_;
//};
class NppStreamHandler class NppStreamHandler
{ {
public: public:
......
This diff is collapsed.
...@@ -42,46 +42,46 @@ ...@@ -42,46 +42,46 @@
#include "internal_shared.hpp" #include "internal_shared.hpp"
BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device
namespace mathfunc {
//////////////////////////////////////////////////////////////////////////////////////
// Cart <-> Polar
struct Nothing
{ {
namespace mathfunc
{
//////////////////////////////////////////////////////////////////////////////////////
// Cart <-> Polar
struct Nothing
{
static __device__ __forceinline__ void calc(int, int, float, float, float*, size_t, float) static __device__ __forceinline__ void calc(int, int, float, float, float*, size_t, float)
{ {
} }
}; };
struct Magnitude struct Magnitude
{ {
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float) static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
{ {
dst[y * dst_step + x] = ::sqrtf(x_data * x_data + y_data * y_data); dst[y * dst_step + x] = ::sqrtf(x_data * x_data + y_data * y_data);
} }
}; };
struct MagnitudeSqr struct MagnitudeSqr
{ {
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float) static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
{ {
dst[y * dst_step + x] = x_data * x_data + y_data * y_data; dst[y * dst_step + x] = x_data * x_data + y_data * y_data;
} }
}; };
struct Atan2 struct Atan2
{ {
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale) static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
{ {
float angle = ::atan2f(y_data, x_data); float angle = ::atan2f(y_data, x_data);
angle += (angle < 0) * 2.0 * CV_PI; angle += (angle < 0) * 2.0 * CV_PI;
dst[y * dst_step + x] = scale * angle; dst[y * dst_step + x] = scale * angle;
} }
}; };
template <typename Mag, typename Angle> template <typename Mag, typename Angle>
__global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step, __global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step,
float* mag, size_t mag_step, float* angle, size_t angle_step, float scale, int width, int height) float* mag, size_t mag_step, float* angle, size_t angle_step, float scale, int width, int height)
{ {
const int x = blockDim.x * blockIdx.x + threadIdx.x; const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y; const int y = blockDim.y * blockIdx.y + threadIdx.y;
...@@ -93,26 +93,26 @@ __global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, ...@@ -93,26 +93,26 @@ __global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr,
Mag::calc(x, y, x_data, y_data, mag, mag_step, scale); Mag::calc(x, y, x_data, y_data, mag, mag_step, scale);
Angle::calc(x, y, x_data, y_data, angle, angle_step, scale); Angle::calc(x, y, x_data, y_data, angle, angle_step, scale);
} }
} }
struct NonEmptyMag struct NonEmptyMag
{ {
static __device__ __forceinline__ float get(const float* mag, size_t mag_step, int x, int y) static __device__ __forceinline__ float get(const float* mag, size_t mag_step, int x, int y)
{ {
return mag[y * mag_step + x]; return mag[y * mag_step + x];
} }
}; };
struct EmptyMag struct EmptyMag
{ {
static __device__ __forceinline__ float get(const float*, size_t, int, int) static __device__ __forceinline__ float get(const float*, size_t, int, int)
{ {
return 1.0f; return 1.0f;
} }
}; };
template <typename Mag> template <typename Mag>
__global__ void polarToCart(const float* mag, size_t mag_step, const float* angle, size_t angle_step, float scale, __global__ void polarToCart(const float* mag, size_t mag_step, const float* angle, size_t angle_step, float scale,
float* xptr, size_t x_step, float* yptr, size_t y_step, int width, int height) float* xptr, size_t x_step, float* yptr, size_t y_step, int width, int height)
{ {
const int x = blockDim.x * blockIdx.x + threadIdx.x; const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y; const int y = blockDim.y * blockIdx.y + threadIdx.y;
...@@ -127,11 +127,11 @@ __global__ void polarToCart(const float* mag, size_t mag_step, const float* angl ...@@ -127,11 +127,11 @@ __global__ void polarToCart(const float* mag, size_t mag_step, const float* angl
xptr[y * x_step + x] = mag_data * cos_a; xptr[y * x_step + x] = mag_data * cos_a;
yptr[y * y_step + x] = mag_data * sin_a; yptr[y * y_step + x] = mag_data * sin_a;
} }
} }
template <typename Mag, typename Angle> template <typename Mag, typename Angle>
void cartToPolar_caller(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream) void cartToPolar_caller(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
{ {
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
...@@ -147,10 +147,10 @@ void cartToPolar_caller(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle ...@@ -147,10 +147,10 @@ void cartToPolar_caller(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream) void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
{ {
typedef void (*caller_t)(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream); typedef void (*caller_t)(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
static const caller_t callers[2][2][2] = static const caller_t callers[2][2][2] =
{ {
...@@ -177,11 +177,11 @@ void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMe ...@@ -177,11 +177,11 @@ void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMe
}; };
callers[mag.data == 0][magSqr][angle.data == 0](x, y, mag, angle, angleInDegrees, stream); callers[mag.data == 0][magSqr][angle.data == 0](x, y, mag, angle, angleInDegrees, stream);
} }
template <typename Mag> template <typename Mag>
void polarToCart_caller(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream) void polarToCart_caller(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
{ {
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
...@@ -196,10 +196,10 @@ void polarToCart_caller(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y ...@@ -196,10 +196,10 @@ void polarToCart_caller(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream) void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
{ {
typedef void (*caller_t)(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream); typedef void (*caller_t)(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
static const caller_t callers[2] = static const caller_t callers[2] =
{ {
...@@ -208,8 +208,6 @@ void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, b ...@@ -208,8 +208,6 @@ void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, b
}; };
callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream); callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream);
} }
} // namespace mathfunc
} // namespace mathfunc }}} // namespace cv { namespace gpu { namespace device
END_OPENCV_DEVICE_NAMESPACE
This diff is collapsed.
This diff is collapsed.
...@@ -46,12 +46,12 @@ ...@@ -46,12 +46,12 @@
#include "opencv2/gpu/device/vec_math.hpp" #include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp" #include "opencv2/gpu/device/saturate_cast.hpp"
BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device
namespace imgproc {
template <typename T, typename B> __global__ void pyrDown(const PtrStep<T> src, PtrStep<T> dst, const B b, int dst_cols)
{ {
namespace imgproc
{
template <typename T, typename B> __global__ void pyrDown(const PtrStep<T> src, PtrStep<T> dst, const B b, int dst_cols)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type; typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
const int x = blockIdx.x * blockDim.x + threadIdx.x; const int x = blockIdx.x * blockDim.x + threadIdx.x;
...@@ -122,10 +122,10 @@ template <typename T, typename B> __global__ void pyrDown(const PtrStep<T> src, ...@@ -122,10 +122,10 @@ template <typename T, typename B> __global__ void pyrDown(const PtrStep<T> src,
if (dst_x < dst_cols) if (dst_x < dst_cols)
dst.ptr(y)[dst_x] = saturate_cast<T>(sum); dst.ptr(y)[dst_x] = saturate_cast<T>(sum);
} }
} }
template <typename T, template <typename> class B> void pyrDown_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, cudaStream_t stream) template <typename T, template <typename> class B> void pyrDown_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, cudaStream_t stream)
{ {
const dim3 block(256); const dim3 block(256);
const dim3 grid(divUp(src.cols, block.x), dst.rows); const dim3 grid(divUp(src.cols, block.x), dst.rows);
...@@ -136,10 +136,10 @@ template <typename T, template <typename> class B> void pyrDown_caller(const Dev ...@@ -136,10 +136,10 @@ template <typename T, template <typename> class B> void pyrDown_caller(const Dev
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template <typename T, int cn> void pyrDown_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream) template <typename T, int cn> void pyrDown_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream)
{ {
typedef typename TypeVec<T, cn>::vec_type type; typedef typename TypeVec<T, cn>::vec_type type;
typedef void (*caller_t)(const DevMem2D_<type>& src, const DevMem2D_<type>& dst, cudaStream_t stream); typedef void (*caller_t)(const DevMem2D_<type>& src, const DevMem2D_<type>& dst, cudaStream_t stream);
...@@ -150,38 +150,36 @@ template <typename T, int cn> void pyrDown_gpu(const DevMem2Db& src, const DevMe ...@@ -150,38 +150,36 @@ template <typename T, int cn> void pyrDown_gpu(const DevMem2Db& src, const DevMe
}; };
callers[borderType](static_cast< DevMem2D_<type> >(src), static_cast< DevMem2D_<type> >(dst), stream); callers[borderType](static_cast< DevMem2D_<type> >(src), static_cast< DevMem2D_<type> >(dst), stream);
} }
template void pyrDown_gpu<uchar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
} // namespace imgproc
END_OPENCV_DEVICE_NAMESPACE template void pyrDown_gpu<uchar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment