Commit 0f53f299 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

removed BEGIN_OPENCV_DEVICE_NAMESPACE macros

parent d9265413
......@@ -425,21 +425,20 @@ void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream)
////////////////////////////////////////////////////////////////////////
// Polar <-> Cart
BEGIN_OPENCV_DEVICE_NAMESPACE
namespace mathfunc
namespace cv { namespace gpu { namespace device
{
namespace mathfunc
{
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
}
END_OPENCV_DEVICE_NAMESPACE
}
}}}
namespace
{
inline void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream)
{
using namespace OPENCV_DEVICE_NAMESPACE_ mathfunc;
using namespace ::cv::gpu::device::mathfunc;
CV_DbgAssert(x.size() == y.size() && x.type() == y.type());
CV_Assert(x.depth() == CV_32F);
......@@ -459,7 +458,7 @@ namespace
inline void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream)
{
using namespace OPENCV_DEVICE_NAMESPACE_ mathfunc;
using namespace ::cv::gpu::device::mathfunc;
CV_DbgAssert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type());
CV_Assert(mag.depth() == CV_32F);
......
......@@ -55,19 +55,18 @@ void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&,
#else /* !defined (HAVE_CUDA) */
BEGIN_OPENCV_DEVICE_NAMESPACE
namespace bilateral_filter
namespace cv { namespace gpu { namespace device
{
namespace bilateral_filter
{
void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc);
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
}
END_OPENCV_DEVICE_NAMESPACE
}
}}}
using namespace OPENCV_DEVICE_NAMESPACE_ bilateral_filter;
using namespace ::cv::gpu::device::bilateral_filter;
namespace
{
......
......@@ -52,19 +52,18 @@ void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const Gpu
#else
BEGIN_OPENCV_DEVICE_NAMESPACE
namespace blend
namespace cv { namespace gpu { namespace device
{
namespace blend
{
template <typename T>
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream);
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream);
}
END_OPENCV_DEVICE_NAMESPACE
}
}}}
using namespace OPENCV_DEVICE_NAMESPACE_ blend;
using namespace ::cv::gpu::device::blend;
void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
GpuMat& result, Stream& stream)
......
......@@ -82,10 +82,10 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, vector< vec
#else /* !defined (HAVE_CUDA) */
BEGIN_OPENCV_DEVICE_NAMESPACE
namespace bf_match
namespace cv { namespace gpu { namespace device
{
namespace bf_match
{
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
const DevMem2Di& trainIdx, const DevMem2Df& distance,
int cc, cudaStream_t stream);
......@@ -105,10 +105,10 @@ namespace bf_match
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
int cc, cudaStream_t stream);
}
}
namespace bf_knnmatch
{
namespace bf_knnmatch
{
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
int cc, cudaStream_t stream);
......@@ -128,10 +128,10 @@ namespace bf_knnmatch
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
int cc, cudaStream_t stream);
}
}
namespace bf_radius_match
{
namespace bf_radius_match
{
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
int cc, cudaStream_t stream);
......@@ -153,9 +153,8 @@ namespace bf_radius_match
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
int cc, cudaStream_t stream);
}
END_OPENCV_DEVICE_NAMESPACE
}
}}}
////////////////////////////////////////////////////////////////////
// Train collection
......@@ -199,7 +198,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const
if (query.empty() || train.empty())
return;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_match;
using namespace ::cv::gpu::device::bf_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
const DevMem2Di& trainIdx, const DevMem2Df& distance,
......@@ -341,7 +340,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c
if (query.empty() || trainCollection.empty())
return;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_match;
using namespace ::cv::gpu::device::bf_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
......@@ -452,7 +451,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co
if (query.empty() || train.empty())
return;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_knnmatch;
using namespace ::cv::gpu::device::bf_knnmatch;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
......@@ -581,7 +580,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer
if (query.empty() || trainCollection.empty())
return;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_knnmatch;
using namespace ::cv::gpu::device::bf_knnmatch;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
......@@ -762,7 +761,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
if (query.empty() || train.empty())
return;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_radius_match;
using namespace ::cv::gpu::device::bf_radius_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
......@@ -893,7 +892,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
if (query.empty() || empty())
return;
using namespace OPENCV_DEVICE_NAMESPACE_ bf_radius_match;
using namespace ::cv::gpu::device::bf_radius_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
......
......@@ -56,31 +56,30 @@ void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat
#else
BEGIN_OPENCV_DEVICE_NAMESPACE
namespace transform_points
namespace cv { namespace gpu { namespace device
{
namespace transform_points
{
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, DevMem2D_<float3> dst, cudaStream_t stream);
}
}
namespace project_points
{
namespace project_points
{
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
}
}
namespace solve_pnp_ransac
{
namespace solve_pnp_ransac
{
int maxNumIters();
void computeHypothesisScores(
const int num_hypotheses, const int num_points, const float* rot_matrices,
const float3* transl_vectors, const float3* object, const float2* image,
const float dist_threshold, int* hypothesis_scores);
}
END_OPENCV_DEVICE_NAMESPACE
}
}}}
using namespace OPENCV_DEVICE_NAMESPACE;
using namespace ::cv::gpu::device;
namespace
{
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -43,22 +43,22 @@
#include "internal_shared.hpp"
#include "opencv2/gpu/device/limits.hpp"
BEGIN_OPENCV_DEVICE_NAMESPACE
namespace bilateral_filter {
__constant__ float* ctable_color;
__constant__ float* ctable_space;
__constant__ size_t ctable_space_step;
namespace cv { namespace gpu { namespace device
{
namespace bilateral_filter
{
__constant__ float* ctable_color;
__constant__ float* ctable_space;
__constant__ size_t ctable_space_step;
__constant__ int cndisp;
__constant__ int cradius;
__constant__ int cndisp;
__constant__ int cradius;
__constant__ short cedge_disc;
__constant__ short cmax_disc;
__constant__ short cedge_disc;
__constant__ short cmax_disc;
void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc)
{
void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc)
{
cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) );
cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) );
size_t table_space_step = table_space.step / sizeof(float);
......@@ -69,11 +69,11 @@ void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int ra
cudaSafeCall( cudaMemcpyToSymbol(cedge_disc, &edge_disc, sizeof(short)) );
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc, &max_disc, sizeof(short)) );
}
}
template <int channels>
struct DistRgbMax
{
template <int channels>
struct DistRgbMax
{
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
{
uchar x = ::abs(a[0] - b[0]);
......@@ -81,20 +81,20 @@ struct DistRgbMax
uchar z = ::abs(a[2] - b[2]);
return (::max(::max(x, y), z));
}
};
};
template <>
struct DistRgbMax<1>
{
template <>
struct DistRgbMax<1>
{
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
{
return ::abs(a[0] - b[0]);
}
};
};
template <int channels, typename T>
__global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar* img, size_t img_step, int h, int w)
{
template <int channels, typename T>
__global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar* img, size_t img_step, int h, int w)
{
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);
......@@ -173,11 +173,11 @@ __global__ void bilateral_filter(int t, T* disp, size_t disp_step, const uchar*
*(disp + y * disp_step + x) = dp[id];
}
}
}
}
template <typename T>
void bilateral_filter_caller(DevMem2D_<T> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
{
template <typename T>
void bilateral_filter_caller(DevMem2D_<T> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(disp.cols, threads.x << 1);
......@@ -211,18 +211,16 @@ void bilateral_filter_caller(DevMem2D_<T> disp, DevMem2Db img, int channels, int
if (stream != 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
{
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
{
bilateral_filter_caller(disp, img, channels, iters, stream);
}
}
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
{
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
{
bilateral_filter_caller(disp, img, channels, iters, stream);
}
} // namespace bilateral_filter
END_OPENCV_DEVICE_NAMESPACE
}
} // namespace bilateral_filter
}}} // namespace cv { namespace gpu { namespace device
......@@ -42,14 +42,14 @@
#include "internal_shared.hpp"
BEGIN_OPENCV_DEVICE_NAMESPACE
namespace blend {
template <typename T>
__global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
const PtrStepf weights1, const PtrStepf weights2, PtrStep<T> result)
namespace cv { namespace gpu { namespace device
{
namespace blend
{
template <typename T>
__global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
const PtrStepf weights1, const PtrStepf weights2, PtrStep<T> result)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
......@@ -62,11 +62,11 @@ __global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> i
T p2 = img2.ptr(y)[x];
result.ptr(y)[x] = (p1 * w1 + p2 * w2) / (w1 + w2 + 1e-5f);
}
}
}
template <typename T>
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream)
{
template <typename T>
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream)
{
dim3 threads(16, 16);
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
......@@ -75,15 +75,15 @@ void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> i
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
}
template void blendLinearCaller<uchar>(int, int, int, PtrStep<uchar>, PtrStep<uchar>, PtrStepf, PtrStepf, PtrStep<uchar>, cudaStream_t stream);
template void blendLinearCaller<float>(int, int, int, PtrStep<float>, PtrStep<float>, PtrStepf, PtrStepf, PtrStep<float>, cudaStream_t stream);
template void blendLinearCaller<uchar>(int, int, int, PtrStep<uchar>, PtrStep<uchar>, PtrStepf, PtrStepf, PtrStep<uchar>, cudaStream_t stream);
template void blendLinearCaller<float>(int, int, int, PtrStep<float>, PtrStep<float>, PtrStepf, PtrStepf, PtrStep<float>, cudaStream_t stream);
__global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, const PtrStepb img2,
__global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, const PtrStepb img2,
const PtrStepf weights1, const PtrStepf weights2, PtrStepb result)
{
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
......@@ -99,10 +99,10 @@ __global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, c
((uchar4*)result.ptr(y))[x] = make_uchar4(p1.x * w1 + p2.x * w2, p1.y * w1 + p2.y * w2,
p1.z * w1 + p2.z * w2, p1.w * w1 + p2.w * w2);
}
}
}
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream)
{
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream)
{
dim3 threads(16, 16);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
......@@ -111,8 +111,6 @@ void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, Ptr
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
} // namespace blend
END_OPENCV_DEVICE_NAMESPACE
}
} // namespace blend
}}} // namespace cv { namespace gpu { namespace device
......@@ -44,12 +44,12 @@
#include "opencv2/gpu/device/transform.hpp"
#include "opencv2/gpu/device/functional.hpp"
BEGIN_OPENCV_DEVICE_NAMESPACE
#define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
namespace transform_points
namespace cv { namespace gpu { namespace device
{
#define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
namespace transform_points
{
__constant__ float3 crot0;
__constant__ float3 crot1;
__constant__ float3 crot2;
......@@ -74,12 +74,12 @@ namespace transform_points
cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
OPENCV_DEVICE_NAMESPACE_ transform(src, dst, TransformOp(), stream);
::cv::gpu::device::transform(src, dst, TransformOp(), stream);
}
} // namespace transform_points
} // namespace transform_points
namespace project_points
{
namespace project_points
{
__constant__ float3 crot0;
__constant__ float3 crot1;
__constant__ float3 crot2;
......@@ -113,12 +113,12 @@ namespace project_points
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
OPENCV_DEVICE_NAMESPACE_ transform(src, dst, ProjectOp(), stream);
::cv::gpu::device::transform(src, dst, ProjectOp(), stream);
}
} // namespace project_points
} // namespace project_points
namespace solve_pnp_ransac
{
namespace solve_pnp_ransac
{
__constant__ float3 crot_matrices[SOLVE_PNP_RANSAC_MAX_NUM_ITERS * 3];
__constant__ float3 ctransl_vectors[SOLVE_PNP_RANSAC_MAX_NUM_ITERS];
......@@ -187,6 +187,5 @@ namespace solve_pnp_ransac
cudaSafeCall( cudaDeviceSynchronize() );
}
} // namespace solvepnp_ransac
END_OPENCV_DEVICE_NAMESPACE
} // namespace solvepnp_ransac
}}} // namespace cv { namespace gpu { namespace device
......@@ -44,12 +44,12 @@
#include <algorithm>
#include "internal_shared.hpp"
BEGIN_OPENCV_DEVICE_NAMESPACE
namespace canny {
__global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
namespace cv { namespace gpu { namespace device
{
namespace canny
{
__global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
{
__shared__ int smem[16][18];
const int j = blockIdx.x * blockDim.x + threadIdx.x;
......@@ -71,10 +71,10 @@ __global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi d
dy_buf.ptr(i)[j] = smem[threadIdx.y][threadIdx.x] + 2 * smem[threadIdx.y][threadIdx.x + 1] + smem[threadIdx.y][threadIdx.x + 2];
}
}
}
}
void calcSobelRowPass_gpu(PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
{
void calcSobelRowPass_gpu(PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
{
dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
......@@ -82,26 +82,26 @@ void calcSobelRowPass_gpu(PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int ro
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
}
}
struct L1
{
struct L1
{
static __device__ __forceinline__ float calc(int x, int y)
{
return ::abs(x) + ::abs(y);
}
};
struct L2
{
};
struct L2
{
static __device__ __forceinline__ float calc(int x, int y)
{
return ::sqrtf(x * x + y * y);
}
};
};
template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, const PtrStepi dy_buf,
template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, const PtrStepi dy_buf,
PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
{
{
__shared__ int sdx[18][16];
__shared__ int sdy[18][16];
......@@ -133,10 +133,10 @@ template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, co
mag.ptr(i + 1)[j + 1] = Norm::calc(x, y);
}
}
}
}
void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad)
{
void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad)
{
dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
......@@ -148,19 +148,19 @@ void calcMagnitude_gpu(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi d
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
}
}
template <typename Norm> __global__ void calcMagnitude(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
{
template <typename Norm> __global__ void calcMagnitude(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
{
const int j = blockIdx.x * blockDim.x + threadIdx.x;
const int i = blockIdx.y * blockDim.y + threadIdx.y;
if (i < rows && j < cols)
mag.ptr(i + 1)[j + 1] = Norm::calc(dx.ptr(i)[j], dy.ptr(i)[j]);
}
}
void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad)
{
void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols, bool L2Grad)
{
dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
......@@ -172,15 +172,15 @@ void calcMagnitude_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int col
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
}
}
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
#define CANNY_SHIFT 15
#define TG22 (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5)
#define CANNY_SHIFT 15
#define TG22 (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5)
__global__ void calcMap(const PtrStepi dx, const PtrStepi dy, const PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh)
{
__global__ void calcMap(const PtrStepi dx, const PtrStepi dy, const PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh)
{
__shared__ float smem[18][18];
const int j = blockIdx.x * 16 + threadIdx.x;
......@@ -239,13 +239,13 @@ __global__ void calcMap(const PtrStepi dx, const PtrStepi dy, const PtrStepf mag
map.ptr(i + 1)[j + 1] = edge_type;
}
}
}
#undef CANNY_SHIFT
#undef TG22
#undef CANNY_SHIFT
#undef TG22
void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh)
{
void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh)
{
dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
......@@ -253,14 +253,14 @@ void calcMap_gpu(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows,
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
}
}
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
__device__ unsigned int counter = 0;
__device__ unsigned int counter = 0;
__global__ void edgesHysteresisLocal(PtrStepi map, ushort2* st, int rows, int cols)
{
__global__ void edgesHysteresisLocal(PtrStepi map, ushort2* st, int rows, int cols)
{
#if __CUDA_ARCH__ >= 120
__shared__ int smem[18][18];
......@@ -335,10 +335,10 @@ __global__ void edgesHysteresisLocal(PtrStepi map, ushort2* st, int rows, int co
}
#endif
}
}
void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols)
{
void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols)
{
dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
......@@ -346,13 +346,13 @@ void edgesHysteresisLocal_gpu(PtrStepi map, ushort2* st1, int rows, int cols)
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
}
}
__constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
__constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
__constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
__constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
__global__ void edgesHysteresisGlobal(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols, int count)
{
__global__ void edgesHysteresisGlobal(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols, int count)
{
#if __CUDA_ARCH__ >= 120
const int stack_size = 512;
......@@ -441,10 +441,10 @@ __global__ void edgesHysteresisGlobal(PtrStepi map, ushort2* st1, ushort2* st2,
}
#endif
}
}
void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols)
{
void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
......@@ -466,19 +466,19 @@ void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int row
std::swap(st1, st2);
}
}
}
__global__ void getEdges(PtrStepi map, PtrStepb dst, int rows, int cols)
{
__global__ void getEdges(PtrStepi map, PtrStepb dst, int rows, int cols)
{
const int j = blockIdx.x * 16 + threadIdx.x;
const int i = blockIdx.y * 16 + threadIdx.y;
if (i < rows && j < cols)
dst.ptr(i)[j] = (uchar)(-(map.ptr(i + 1)[j + 1] >> 1));
}
}
void getEdges_gpu(PtrStepi map, PtrStepb dst, int rows, int cols)
{
void getEdges_gpu(PtrStepi map, PtrStepb dst, int rows, int cols)
{
dim3 block(16, 16, 1);
dim3 grid(divUp(cols, block.x), divUp(rows, block.y), 1);
......@@ -486,8 +486,6 @@ void getEdges_gpu(PtrStepi map, PtrStepb dst, int rows, int cols)
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
}
} // namespace canny
END_OPENCV_DEVICE_NAMESPACE
}
} // namespace canny
}}} // namespace cv { namespace gpu { namespace device
This diff is collapsed.
......@@ -47,26 +47,26 @@
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/border_interpolate.hpp"
BEGIN_OPENCV_DEVICE_NAMESPACE
#define MAX_KERNEL_SIZE 16
#define BLOCK_DIM_X 16
#define BLOCK_DIM_Y 4
#define RESULT_STEPS 8
#define HALO_STEPS 1
namespace column_filter {
namespace cv { namespace gpu { namespace device
{
#define MAX_KERNEL_SIZE 16
#define BLOCK_DIM_X 16
#define BLOCK_DIM_Y 4
#define RESULT_STEPS 8
#define HALO_STEPS 1
__constant__ float c_kernel[MAX_KERNEL_SIZE];
namespace column_filter
{
__constant__ float c_kernel[MAX_KERNEL_SIZE];
void loadKernel(const float kernel[], int ksize)
{
void loadKernel(const float kernel[], int ksize)
{
cudaSafeCall( cudaMemcpyToSymbol(c_kernel, kernel, ksize * sizeof(float)) );
}
}
template <int KERNEL_SIZE, typename T, typename D, typename B>
__global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep<D> dst, int anchor, const B b)
{
template <int KERNEL_SIZE, typename T, typename D, typename B>
__global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep<D> dst, int anchor, const B b)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
__shared__ T smem[BLOCK_DIM_X][(RESULT_STEPS + 2 * HALO_STEPS) * BLOCK_DIM_Y + 1];
......@@ -111,11 +111,11 @@ __global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep<D> dst, int a
dst.ptr(dstY)[x] = saturate_cast<D>(sum);
}
}
}
}
template <int ksize, typename T, typename D, template<typename> class B>
void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
{
template <int ksize, typename T, typename D, template<typename> class B>
void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
{
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
const dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, RESULT_STEPS * BLOCK_DIM_Y));
......@@ -126,11 +126,11 @@ void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst,
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
template <typename T, typename D>
void linearColumnFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream)
{
template <typename T, typename D>
void linearColumnFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream)
{
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream);
static const caller_t callers[5][17] =
{
......@@ -234,16 +234,14 @@ void linearColumnFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const fl
loadKernel(kernel, ksize);
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
}
template void linearColumnFilter_gpu<float , uchar >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearColumnFilter_gpu<float4, uchar4>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
//template void linearColumnFilter_gpu<float , short >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
//template void linearColumnFilter_gpu<float2, short2>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearColumnFilter_gpu<float3, short3>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearColumnFilter_gpu<float , int >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearColumnFilter_gpu<float , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
} // namespace column_filter
}
END_OPENCV_DEVICE_NAMESPACE
template void linearColumnFilter_gpu<float , uchar >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearColumnFilter_gpu<float4, uchar4>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
//template void linearColumnFilter_gpu<float , short >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
//template void linearColumnFilter_gpu<float2, short2>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearColumnFilter_gpu<float3, short3>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearColumnFilter_gpu<float , int >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
template void linearColumnFilter_gpu<float , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
} // namespace column_filter
}}} // namespace cv { namespace gpu { namespace device
This diff is collapsed.
This diff is collapsed.
......@@ -45,29 +45,29 @@
#include "opencv2/gpu/device/utility.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
BEGIN_OPENCV_DEVICE_NAMESPACE
#define UINT_BITS 32U
//Warps == subhistograms per threadblock
#define WARP_COUNT 6
namespace cv { namespace gpu { namespace device
{
#define UINT_BITS 32U
//Threadblock size
#define HISTOGRAM256_THREADBLOCK_SIZE (WARP_COUNT * OPENCV_GPU_WARP_SIZE)
#define HISTOGRAM256_BIN_COUNT 256
//Warps == subhistograms per threadblock
#define WARP_COUNT 6
//Shared memory per threadblock
#define HISTOGRAM256_THREADBLOCK_MEMORY (WARP_COUNT * HISTOGRAM256_BIN_COUNT)
//Threadblock size
#define HISTOGRAM256_THREADBLOCK_SIZE (WARP_COUNT * OPENCV_GPU_WARP_SIZE)
#define HISTOGRAM256_BIN_COUNT 256
#define PARTIAL_HISTOGRAM256_COUNT 240
//Shared memory per threadblock
#define HISTOGRAM256_THREADBLOCK_MEMORY (WARP_COUNT * HISTOGRAM256_BIN_COUNT)
#define MERGE_THREADBLOCK_SIZE 256
#define PARTIAL_HISTOGRAM256_COUNT 240
#define USE_SMEM_ATOMICS (__CUDA_ARCH__ >= 120)
#define MERGE_THREADBLOCK_SIZE 256
namespace hist {
#define USE_SMEM_ATOMICS (__CUDA_ARCH__ >= 120)
#if (!USE_SMEM_ATOMICS)
namespace hist
{
#if (!USE_SMEM_ATOMICS)
#define TAG_MASK ( (1U << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE)) - 1U )
......@@ -82,7 +82,7 @@ namespace hist {
} while (s_WarpHist[data] != count);
}
#else
#else
#define TAG_MASK 0xFFFFFFFFU
......@@ -91,20 +91,20 @@ namespace hist {
atomicAdd(s_WarpHist + data, 1);
}
#endif
#endif
__forceinline__ __device__ void addWord(uint* s_WarpHist, uint data, uint tag, uint pos_x, uint cols)
{
__forceinline__ __device__ void addWord(uint* s_WarpHist, uint data, uint tag, uint pos_x, uint cols)
{
uint x = pos_x << 2;
if (x + 0 < cols) addByte(s_WarpHist, (data >> 0) & 0xFFU, tag);
if (x + 1 < cols) addByte(s_WarpHist, (data >> 8) & 0xFFU, tag);
if (x + 2 < cols) addByte(s_WarpHist, (data >> 16) & 0xFFU, tag);
if (x + 3 < cols) addByte(s_WarpHist, (data >> 24) & 0xFFU, tag);
}
}
__global__ void histogram256(const PtrStep<uint> d_Data, uint* d_PartialHistograms, uint dataCount, uint cols)
{
__global__ void histogram256(const PtrStep<uint> d_Data, uint* d_PartialHistograms, uint dataCount, uint cols)
{
//Per-warp subhistogram storage
__shared__ uint s_Hist[HISTOGRAM256_THREADBLOCK_MEMORY];
uint* s_WarpHist= s_Hist + (threadIdx.x >> OPENCV_GPU_LOG_WARP_SIZE) * HISTOGRAM256_BIN_COUNT;
......@@ -138,17 +138,17 @@ __global__ void histogram256(const PtrStep<uint> d_Data, uint* d_PartialHistogra
d_PartialHistograms[blockIdx.x * HISTOGRAM256_BIN_COUNT + bin] = sum;
}
}
}
////////////////////////////////////////////////////////////////////////////////
// Merge histogram256() output
// Run one threadblock per bin; each threadblock adds up the same bin counter
// from every partial histogram. Reads are uncoalesced, but mergeHistogram256
// takes only a fraction of total processing time
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// Merge histogram256() output
// Run one threadblock per bin; each threadblock adds up the same bin counter
// from every partial histogram. Reads are uncoalesced, but mergeHistogram256
// takes only a fraction of total processing time
////////////////////////////////////////////////////////////////////////////////
__global__ void mergeHistogram256(const uint* d_PartialHistograms, int* d_Histogram)
{
__global__ void mergeHistogram256(const uint* d_PartialHistograms, int* d_Histogram)
{
uint sum = 0;
#pragma unroll
......@@ -167,10 +167,10 @@ __global__ void mergeHistogram256(const uint* d_PartialHistograms, int* d_Histog
if(threadIdx.x == 0)
d_Histogram[blockIdx.x] = saturate_cast<int>(data[0]);
}
}
void histogram256_gpu(DevMem2Db src, int* hist, uint* buf, cudaStream_t stream)
{
void histogram256_gpu(DevMem2Db src, int* hist, uint* buf, cudaStream_t stream)
{
histogram256<<<PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_THREADBLOCK_SIZE, 0, stream>>>(
DevMem2D_<uint>(src),
buf,
......@@ -185,12 +185,12 @@ void histogram256_gpu(DevMem2Db src, int* hist, uint* buf, cudaStream_t stream)
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
__constant__ int c_lut[256];
__constant__ int c_lut[256];
__global__ void equalizeHist(const DevMem2Db src, PtrStepb dst)
{
__global__ void equalizeHist(const DevMem2Db src, PtrStepb dst)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
......@@ -200,10 +200,10 @@ __global__ void equalizeHist(const DevMem2Db src, PtrStepb dst)
const int lut = c_lut[val];
dst.ptr(y)[x] = __float2int_rn(255.0f / (src.cols * src.rows) * lut);
}
}
}
void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t stream)
{
void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t stream)
{
dim3 block(16, 16);
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
......@@ -214,8 +214,6 @@ void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
} // namespace hist
END_OPENCV_DEVICE_NAMESPACE
}
} // namespace hist
}}} // namespace cv { namespace gpu { namespace device
This diff is collapsed.
This diff is collapsed.
......@@ -50,7 +50,7 @@
#include "safe_call.hpp"
#ifndef CV_PI
#define CV_PI 3.1415926535897932384626433832795f
#define CV_PI 3.1415926535897932384626433832795
#endif
#ifndef CV_PI_F
......@@ -61,27 +61,21 @@
#endif
#endif
#define BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device {
#define END_OPENCV_DEVICE_NAMESPACE }}}
#define OPENCV_DEVICE_NAMESPACE ::cv::gpu::device
#define OPENCV_DEVICE_NAMESPACE_ ::cv::gpu::device::
#ifdef __CUDACC__
BEGIN_OPENCV_DEVICE_NAMESPACE
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef signed char schar;
typedef unsigned int uint;
template<class T> static inline void bindTexture(const textureReference* tex, const DevMem2D_<T>& img)
namespace cv { namespace gpu { namespace device
{
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef signed char schar;
typedef unsigned int uint;
template<class T> static inline void bindTexture(const textureReference* tex, const DevMem2D_<T>& img)
{
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
}
END_OPENCV_DEVICE_NAMESPACE
}
}}}
#endif
......@@ -102,87 +96,6 @@ namespace cv { namespace gpu
static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; }
/*template<class T> static inline void uploadConstant(const char* name, const T& value)
{
cudaSafeCall( cudaMemcpyToSymbol(name, &value, sizeof(T)) );
}
template<class T> static inline void uploadConstant(const char* name, const T& value, cudaStream_t stream)
{
cudaSafeCall( cudaMemcpyToSymbolAsync(name, &value, sizeof(T), 0, cudaMemcpyHostToDevice, stream) );
} */
//template<class T> static inline void bindTexture(const char* name, const DevMem2D_<T>& img)
//{
// //!!!! const_cast is disabled!
// //!!!! Please use constructor of 'class texture' instead.
//
// //textureReference* tex;
// //cudaSafeCall( cudaGetTextureReference((const textureReference**)&tex, name) );
// //tex->normalized = normalized;
// //tex->filterMode = filterMode;
// //tex->addressMode[0] = addrMode;
// //tex->addressMode[1] = addrMode;
//
// const textureReference* tex;
// cudaSafeCall( cudaGetTextureReference(&tex, name) );
//
// cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
// cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
//}
//static inline void unbindTexture(const char *name)
//{
// const textureReference* tex;
// cudaSafeCall( cudaGetTextureReference(&tex, name) );
// cudaSafeCall( cudaUnbindTexture(tex) );
//}
//class TextureBinder
//{
//public:
// TextureBinder() : tex_(0) {}
// template <typename T> TextureBinder(const textureReference* tex, const DevMem2D_<T>& img) : tex_(0)
// {
// bind(tex, img);
// }
// template <typename T> TextureBinder(const char* tex_name, const DevMem2D_<T>& img) : tex_(0)
// {
// bind(tex_name, img);
// }
// ~TextureBinder() { unbind(); }
//
// template <typename T> void bind(const textureReference* tex, const DevMem2D_<T>& img)
// {
// unbind();
//
// cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
// cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
//
// tex_ = tex;
// }
// template <typename T> void bind(const char* tex_name, const DevMem2D_<T>& img)
// {
// const textureReference* tex;
// cudaSafeCall( cudaGetTextureReference(&tex, tex_name) );
// bind(tex, img);
// }
//
// void unbind()
// {
// if (tex_)
// {
// cudaUnbindTexture(tex_);
// tex_ = 0;
// }
// }
//
//private:
// const textureReference* tex_;
//};
class NppStreamHandler
{
public:
......
This diff is collapsed.
......@@ -42,46 +42,46 @@
#include "internal_shared.hpp"
BEGIN_OPENCV_DEVICE_NAMESPACE
namespace mathfunc {
//////////////////////////////////////////////////////////////////////////////////////
// Cart <-> Polar
struct Nothing
namespace cv { namespace gpu { namespace device
{
namespace mathfunc
{
//////////////////////////////////////////////////////////////////////////////////////
// Cart <-> Polar
struct Nothing
{
static __device__ __forceinline__ void calc(int, int, float, float, float*, size_t, float)
{
}
};
struct Magnitude
{
};
struct Magnitude
{
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
{
dst[y * dst_step + x] = ::sqrtf(x_data * x_data + y_data * y_data);
}
};
struct MagnitudeSqr
{
};
struct MagnitudeSqr
{
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
{
dst[y * dst_step + x] = x_data * x_data + y_data * y_data;
}
};
struct Atan2
{
};
struct Atan2
{
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
{
float angle = ::atan2f(y_data, x_data);
angle += (angle < 0) * 2.0 * CV_PI;
dst[y * dst_step + x] = scale * angle;
}
};
template <typename Mag, typename Angle>
__global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step,
};
template <typename Mag, typename Angle>
__global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step,
float* mag, size_t mag_step, float* angle, size_t angle_step, float scale, int width, int height)
{
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
......@@ -93,26 +93,26 @@ __global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr,
Mag::calc(x, y, x_data, y_data, mag, mag_step, scale);
Angle::calc(x, y, x_data, y_data, angle, angle_step, scale);
}
}
}
struct NonEmptyMag
{
struct NonEmptyMag
{
static __device__ __forceinline__ float get(const float* mag, size_t mag_step, int x, int y)
{
return mag[y * mag_step + x];
}
};
struct EmptyMag
{
};
struct EmptyMag
{
static __device__ __forceinline__ float get(const float*, size_t, int, int)
{
return 1.0f;
}
};
template <typename Mag>
__global__ void polarToCart(const float* mag, size_t mag_step, const float* angle, size_t angle_step, float scale,
};
template <typename Mag>
__global__ void polarToCart(const float* mag, size_t mag_step, const float* angle, size_t angle_step, float scale,
float* xptr, size_t x_step, float* yptr, size_t y_step, int width, int height)
{
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
......@@ -127,11 +127,11 @@ __global__ void polarToCart(const float* mag, size_t mag_step, const float* angl
xptr[y * x_step + x] = mag_data * cos_a;
yptr[y * y_step + x] = mag_data * sin_a;
}
}
}
template <typename Mag, typename Angle>
void cartToPolar_caller(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
{
template <typename Mag, typename Angle>
void cartToPolar_caller(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
......@@ -147,10 +147,10 @@ void cartToPolar_caller(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
{
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
{
typedef void (*caller_t)(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
static const caller_t callers[2][2][2] =
{
......@@ -177,11 +177,11 @@ void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMe
};
callers[mag.data == 0][magSqr][angle.data == 0](x, y, mag, angle, angleInDegrees, stream);
}
}
template <typename Mag>
void polarToCart_caller(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
{
template <typename Mag>
void polarToCart_caller(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
......@@ -196,10 +196,10 @@ void polarToCart_caller(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
{
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
{
typedef void (*caller_t)(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
static const caller_t callers[2] =
{
......@@ -208,8 +208,6 @@ void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, b
};
callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream);
}
} // namespace mathfunc
END_OPENCV_DEVICE_NAMESPACE
}
} // namespace mathfunc
}}} // namespace cv { namespace gpu { namespace device
This diff is collapsed.
This diff is collapsed.
......@@ -46,12 +46,12 @@
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
BEGIN_OPENCV_DEVICE_NAMESPACE
namespace imgproc {
template <typename T, typename B> __global__ void pyrDown(const PtrStep<T> src, PtrStep<T> dst, const B b, int dst_cols)
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
template <typename T, typename B> __global__ void pyrDown(const PtrStep<T> src, PtrStep<T> dst, const B b, int dst_cols)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
......@@ -122,10 +122,10 @@ template <typename T, typename B> __global__ void pyrDown(const PtrStep<T> src,
if (dst_x < dst_cols)
dst.ptr(y)[dst_x] = saturate_cast<T>(sum);
}
}
}
template <typename T, template <typename> class B> void pyrDown_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, cudaStream_t stream)
{
template <typename T, template <typename> class B> void pyrDown_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, cudaStream_t stream)
{
const dim3 block(256);
const dim3 grid(divUp(src.cols, block.x), dst.rows);
......@@ -136,10 +136,10 @@ template <typename T, template <typename> class B> void pyrDown_caller(const Dev
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
template <typename T, int cn> void pyrDown_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream)
{
template <typename T, int cn> void pyrDown_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream)
{
typedef typename TypeVec<T, cn>::vec_type type;
typedef void (*caller_t)(const DevMem2D_<type>& src, const DevMem2D_<type>& dst, cudaStream_t stream);
......@@ -150,38 +150,36 @@ template <typename T, int cn> void pyrDown_gpu(const DevMem2Db& src, const DevMe
};
callers[borderType](static_cast< DevMem2D_<type> >(src), static_cast< DevMem2D_<type> >(dst), stream);
}
template void pyrDown_gpu<uchar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
} // namespace imgproc
}
END_OPENCV_DEVICE_NAMESPACE
template void pyrDown_gpu<uchar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 1>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment