Commit 70204a8e authored by Anatoly Baksheev's avatar Anatoly Baksheev

Removed PtrElemStep, Marked DevMem1D as deprecated, now should use PtrStepSz now

parent 9be63c50
......@@ -46,7 +46,7 @@
#ifdef __cplusplus
#include "opencv2/core/core.hpp"
#include "opencv2/core/devmem2d.hpp"
#include "opencv2/core/cuda_devptrs.hpp"
namespace cv { namespace gpu
{
......@@ -268,10 +268,14 @@ namespace cv { namespace gpu
template<typename _Tp> _Tp* ptr(int y = 0);
template<typename _Tp> const _Tp* ptr(int y = 0) const;
template <typename _Tp> operator DevMem2D_<_Tp>() const;
template <typename _Tp> operator PtrStep_<_Tp>() const;
template <typename _Tp> operator PtrStepSz<_Tp>() const;
template <typename _Tp> operator PtrStep<_Tp>() const;
// Deprecated function
__CV_GPU_DEPR_BEFORE__ template <typename _Tp> operator DevMem2D_<_Tp>() const __CV_GPU_DEPR_AFTER__;
#undef __CV_GPU_DEPR_BEFORE__
#undef __CV_GPU_DEPR_AFTER__
/*! includes several bit-fields:
- the magic signature
- continuity flag
......@@ -502,19 +506,19 @@ namespace cv { namespace gpu
return *this;
}
template <class T> inline GpuMat::operator DevMem2D_<T>() const
template <class T> inline GpuMat::operator PtrStepSz<T>() const
{
return DevMem2D_<T>(rows, cols, (T*)data, step);
return PtrStepSz<T>(rows, cols, (T*)data, step);
}
template <class T> inline GpuMat::operator PtrStep_<T>() const
template <class T> inline GpuMat::operator PtrStep<T>() const
{
return PtrStep_<T>(static_cast< DevMem2D_<T> >(*this));
return PtrStep<T>((T*)data, step);
}
template <class T> inline GpuMat::operator PtrStep<T>() const
template <class T> inline GpuMat::operator DevMem2D_<T>() const
{
return PtrStep<T>((T*)data, step);
return DevMem2D_<T>(rows, cols, (T*)data, step);
}
inline GpuMat createContinuous(int rows, int cols, int type)
......
......@@ -44,6 +44,18 @@
#include "opencv2/gpu/device/transform.hpp"
#include "opencv2/gpu/device/functional.hpp"
namespace cv { namespace gpu { namespace device
{
void writeScalar(const uchar*);
void writeScalar(const schar*);
void writeScalar(const ushort*);
void writeScalar(const short int*);
void writeScalar(const int*);
void writeScalar(const float*);
void writeScalar(const double*);
void convert_gpu(PtrStepSzb, int, PtrStepSzb, int, double, double, cudaStream_t);
}}}
namespace cv { namespace gpu { namespace device
{
template <typename T> struct shift_and_sizeof;
......@@ -59,17 +71,17 @@ namespace cv { namespace gpu { namespace device
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
template <typename T> void copyToWithMask(DevMem2Db src, DevMem2Db dst, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream)
template <typename T> void copyToWithMask(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream)
{
if (colorMask)
cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, identity<T>(), SingleMask(mask), stream);
cv::gpu::device::transform((PtrStepSz<T>)src, (PtrStepSz<T>)dst, identity<T>(), SingleMask(mask), stream);
else
cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, identity<T>(), SingleMaskChannels(mask, cn), stream);
cv::gpu::device::transform((PtrStepSz<T>)src, (PtrStepSz<T>)dst, identity<T>(), SingleMaskChannels(mask, cn), stream);
}
void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, size_t elemSize1, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream)
void copyToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream)
{
typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream);
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream);
static func_t tab[] =
{
......@@ -164,7 +176,7 @@ namespace cv { namespace gpu { namespace device
}
}
template <typename T>
void set_to_gpu(DevMem2Db mat, const T* scalar, DevMem2Db mask, int channels, cudaStream_t stream)
void set_to_gpu(PtrStepSzb mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream)
{
writeScalar(scalar);
......@@ -178,16 +190,16 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall ( cudaDeviceSynchronize() );
}
template void set_to_gpu<uchar >(DevMem2Db mat, const uchar* scalar, DevMem2Db mask, int channels, cudaStream_t stream);
template void set_to_gpu<schar >(DevMem2Db mat, const schar* scalar, DevMem2Db mask, int channels, cudaStream_t stream);
template void set_to_gpu<ushort>(DevMem2Db mat, const ushort* scalar, DevMem2Db mask, int channels, cudaStream_t stream);
template void set_to_gpu<short >(DevMem2Db mat, const short* scalar, DevMem2Db mask, int channels, cudaStream_t stream);
template void set_to_gpu<int >(DevMem2Db mat, const int* scalar, DevMem2Db mask, int channels, cudaStream_t stream);
template void set_to_gpu<float >(DevMem2Db mat, const float* scalar, DevMem2Db mask, int channels, cudaStream_t stream);
template void set_to_gpu<double>(DevMem2Db mat, const double* scalar, DevMem2Db mask, int channels, cudaStream_t stream);
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<schar >(PtrStepSzb mat, const schar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<ushort>(PtrStepSzb mat, const ushort* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<short >(PtrStepSzb mat, const short* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<int >(PtrStepSzb mat, const int* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<float >(PtrStepSzb mat, const float* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<double>(PtrStepSzb mat, const double* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template <typename T>
void set_to_gpu(DevMem2Db mat, const T* scalar, int channels, cudaStream_t stream)
void set_to_gpu(PtrStepSzb mat, const T* scalar, int channels, cudaStream_t stream)
{
writeScalar(scalar);
......@@ -201,13 +213,13 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall ( cudaDeviceSynchronize() );
}
template void set_to_gpu<uchar >(DevMem2Db mat, const uchar* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<schar >(DevMem2Db mat, const schar* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<ushort>(DevMem2Db mat, const ushort* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<short >(DevMem2Db mat, const short* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<int >(DevMem2Db mat, const int* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<float >(DevMem2Db mat, const float* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<double>(DevMem2Db mat, const double* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<schar >(PtrStepSzb mat, const schar* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<ushort>(PtrStepSzb mat, const ushort* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<short >(PtrStepSzb mat, const short* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<int >(PtrStepSzb mat, const int* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<float >(PtrStepSzb mat, const float* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<double>(PtrStepSzb mat, const double* scalar, int channels, cudaStream_t stream);
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
......@@ -274,12 +286,12 @@ namespace cv { namespace gpu { namespace device
};
template<typename T, typename D>
void cvt_(DevMem2Db src, DevMem2Db dst, double alpha, double beta, cudaStream_t stream)
void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream)
{
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
Convertor<T, D> op(alpha, beta);
cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
cv::gpu::device::transform((PtrStepSz<T>)src, (PtrStepSz<D>)dst, op, WithOutMask(), stream);
}
#if defined __clang__
......@@ -287,9 +299,9 @@ namespace cv { namespace gpu { namespace device
# pragma clang diagnostic ignored "-Wmissing-declarations"
#endif
void convert_gpu(DevMem2Db src, int sdepth, DevMem2Db dst, int ddepth, double alpha, double beta, cudaStream_t stream)
void convert_gpu(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream)
{
typedef void (*caller_t)(DevMem2Db src, DevMem2Db dst, double alpha, double beta, cudaStream_t stream);
typedef void (*caller_t)(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream);
static const caller_t tab[8][8] =
{
......
......@@ -761,15 +761,15 @@ namespace
namespace cv { namespace gpu { namespace device
{
void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, size_t elemSize1, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream);
void copyToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream);
template <typename T>
void set_to_gpu(DevMem2Db mat, const T* scalar, int channels, cudaStream_t stream);
void set_to_gpu(PtrStepSzb mat, const T* scalar, int channels, cudaStream_t stream);
template <typename T>
void set_to_gpu(DevMem2Db mat, const T* scalar, DevMem2Db mask, int channels, cudaStream_t stream);
void set_to_gpu(PtrStepSzb mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
void convert_gpu(DevMem2Db src, int sdepth, DevMem2Db dst, int ddepth, double alpha, double beta, cudaStream_t stream);
void convert_gpu(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream);
}}}
namespace
......@@ -787,9 +787,22 @@ namespace
}
}
namespace cv { namespace gpu
{
CV_EXPORTS void copyWithMask(const cv::gpu::GpuMat&, cv::gpu::GpuMat&, const cv::gpu::GpuMat&, CUstream_st*);
CV_EXPORTS void convertTo(const cv::gpu::GpuMat&, cv::gpu::GpuMat&);
CV_EXPORTS void convertTo(const cv::gpu::GpuMat&, cv::gpu::GpuMat&, double, double, CUstream_st*);
CV_EXPORTS void setTo(cv::gpu::GpuMat&, cv::Scalar, CUstream_st*);
CV_EXPORTS void setTo(cv::gpu::GpuMat&, cv::Scalar, const cv::gpu::GpuMat&, CUstream_st*);
CV_EXPORTS void setTo(cv::gpu::GpuMat&, cv::Scalar);
CV_EXPORTS void setTo(cv::gpu::GpuMat&, cv::Scalar, const cv::gpu::GpuMat&);
}}
namespace cv { namespace gpu
{
CV_EXPORTS void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0)
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0)
{
CV_Assert(src.size() == dst.size() && src.type() == dst.type());
CV_Assert(src.size() == mask.size() && mask.depth() == CV_8U && (mask.channels() == 1 || mask.channels() == src.channels()));
......@@ -797,17 +810,17 @@ namespace cv { namespace gpu
cv::gpu::device::copyToWithMask_gpu(src.reshape(1), dst.reshape(1), src.elemSize1(), src.channels(), mask.reshape(1), mask.channels() != 1, stream);
}
CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst)
void convertTo(const GpuMat& src, GpuMat& dst)
{
cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0, 0);
}
CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream = 0)
void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream = 0)
{
cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta, stream);
}
CV_EXPORTS void setTo(GpuMat& src, Scalar s, cudaStream_t stream)
void setTo(GpuMat& src, Scalar s, cudaStream_t stream)
{
typedef void (*caller_t)(GpuMat& src, Scalar s, cudaStream_t stream);
......@@ -820,7 +833,7 @@ namespace cv { namespace gpu
callers[src.depth()](src, s, stream);
}
CV_EXPORTS void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream)
void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream)
{
typedef void (*caller_t)(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream);
......@@ -833,12 +846,12 @@ namespace cv { namespace gpu
callers[src.depth()](src, s, mask, stream);
}
CV_EXPORTS void setTo(GpuMat& src, Scalar s)
void setTo(GpuMat& src, Scalar s)
{
setTo(src, s, 0);
}
CV_EXPORTS void setTo(GpuMat& src, Scalar s, const GpuMat& mask)
void setTo(GpuMat& src, Scalar s, const GpuMat& mask)
{
setTo(src, s, mask, 0);
}
......
......@@ -5,24 +5,24 @@ Data Structures
gpu::DevMem2D\_
gpu::PtrStepSz
---------------
.. ocv:class:: gpu::DevMem2D\_
.. ocv:class:: gpu::PtrStepSz
Lightweight class encapsulating pitched memory on a GPU and passed to nvcc-compiled code (CUDA kernels). Typically, it is used internally by OpenCV and by users who write device code. You can call its members from both host and device code. ::
template <typename T> struct DevMem2D_
template <typename T> struct PtrStepSz
{
int cols;
int rows;
T* data;
size_t step;
DevMem2D_() : cols(0), rows(0), data(0), step(0){};
DevMem2D_(int rows, int cols, T *data, size_t step);
PtrStepSz() : cols(0), rows(0), data(0), step(0){};
PtrStepSz(int rows, int cols, T *data, size_t step);
template <typename U>
explicit DevMem2D_(const DevMem2D_<U>& d);
explicit PtrStepSz(const PtrStepSz<U>& d);
typedef T elem_type;
enum { elem_size = sizeof(elem_type) };
......@@ -34,25 +34,25 @@ Lightweight class encapsulating pitched memory on a GPU and passed to nvcc-compi
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const;
};
typedef DevMem2D_<unsigned char> DevMem2D;
typedef DevMem2D_<float> DevMem2Df;
typedef DevMem2D_<int> DevMem2Di;
typedef PtrStepSz<unsigned char> PtrStepSzb;
typedef PtrStepSz<float> PtrStepSzf;
typedef PtrStepSz<int> PtrStepSzi;
gpu::PtrStep\_
gpu::PtrStep
--------------
.. ocv:class:: gpu::PtrStep\_
.. ocv:class:: gpu::PtrStep
Structure similar to :ocv:class:`gpu::DevMem2D_` but containing only a pointer and row step. Width and height fields are excluded due to performance reasons. The structure is intended for internal use or for users who write device code. ::
Structure similar to :ocv:class:`gpu::PtrStepSz` but containing only a pointer and row step. Width and height fields are excluded due to performance reasons. The structure is intended for internal use or for users who write device code. ::
template<typename T> struct PtrStep_
template<typename T> struct PtrStep
{
T* data;
size_t step;
PtrStep_();
PtrStep_(const DevMem2D_<T>& mem);
PtrStep();
PtrStep(const PtrStepSz<T>& mem);
typedef T elem_type;
enum { elem_size = sizeof(elem_type) };
......@@ -62,25 +62,9 @@ Structure similar to :ocv:class:`gpu::DevMem2D_` but containing only a pointer a
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const;
};
typedef PtrStep_<unsigned char> PtrStep;
typedef PtrStep_<float> PtrStepf;
typedef PtrStep_<int> PtrStepi;
gpu::PtrElemStep\_
------------------
.. ocv:class:: gpu::PtrElemStep\_
Structure similar to :ocv:class:`gpu::DevMem2D_` but containing only a pointer and a row step in elements. Width and height fields are excluded due to performance reasons. This class can only be constructed if ``sizeof(T)`` is a multiple of 256. The structure is intended for internal use or for users who write device code. ::
template<typename T> struct PtrElemStep_ : public PtrStep_<T>
{
PtrElemStep_(const DevMem2D_<T>& mem);
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0);
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const;
};
typedef PtrStep<unsigned char> PtrStep;
typedef PtrStep<float> PtrStepf;
typedef PtrStep<int> PtrStepi;
gpu::GpuMat
......@@ -93,7 +77,7 @@ Base storage class for GPU memory with reference counting. Its interface matches
* no functions that return references to their data (because references on GPU are not valid for CPU)
* no expression templates technique support
Beware that the latter limitation may lead to overloaded matrix operators that cause memory allocations. The ``GpuMat`` class is convertible to :ocv:class:`gpu::DevMem2D_` and :ocv:class:`gpu::PtrStep_` so it can be passed directly to the kernel.
Beware that the latter limitation may lead to overloaded matrix operators that cause memory allocations. The ``GpuMat`` class is convertible to :ocv:class:`gpu::PtrStepSz` and :ocv:class:`gpu::PtrStep` so it can be passed directly to the kernel.
.. note:: In contrast with :ocv:class:`Mat`, in most cases ``GpuMat::isContinuous() == false`` . This means that rows are aligned to a size depending on the hardware. Single-row ``GpuMat`` is always a continuous matrix.
......@@ -113,10 +97,10 @@ Beware that the latter limitation may lead to overloaded matrix operators that c
//! builds GpuMat from Mat. Blocks uploading to device.
explicit GpuMat (const Mat& m);
//! returns lightweight DevMem2D_ structure for passing
//! returns lightweight PtrStepSz structure for passing
//to nvcc-compiled code. Contains size, data ptr and step.
template <class T> operator DevMem2D_<T>() const;
template <class T> operator PtrStep_<T>() const;
template <class T> operator PtrStepSz<T>() const;
template <class T> operator PtrStep<T>() const;
//! blocks uploading data to GpuMat.
void upload(const cv::Mat& m);
......
......@@ -40,4 +40,4 @@
//
//M*/
#include "opencv2/core/devmem2d.hpp"
#include "opencv2/core/cuda_devptrs.hpp"
......@@ -454,8 +454,8 @@ namespace cv { namespace gpu { namespace device
{
namespace mathfunc
{
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream);
void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream);
}
}}}
......
......@@ -58,7 +58,7 @@ namespace cv { namespace gpu { namespace device {
float decisionThreshold, int maxFeatures, int numInitializationFrames);
template <typename SrcT>
void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures,
void update_gpu(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures,
int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
}
}}}
......@@ -109,7 +109,7 @@ void cv::gpu::GMG_GPU::operator ()(const cv::gpu::GpuMat& frame, cv::gpu::GpuMat
{
using namespace cv::gpu::device::bgfg_gmg;
typedef void (*func_t)(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures,
typedef void (*func_t)(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures,
int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
static const func_t funcs[6][4] =
{
......
......@@ -62,14 +62,14 @@ namespace cv { namespace gpu { namespace device
{
namespace mog
{
void mog_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Df weight, DevMem2Df sortKey, DevMem2Db mean, DevMem2Db var,
void mog_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var,
int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma,
cudaStream_t stream);
void getBackgroundImage_gpu(int cn, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, int nmixtures, float backgroundRatio, cudaStream_t stream);
void getBackgroundImage_gpu(int cn, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream);
void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal);
void mog2_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Df variance, DevMem2Db mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
void getBackgroundImage2_gpu(int cn, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, cudaStream_t stream);
void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
}
}}}
......
......@@ -57,9 +57,9 @@ namespace cv { namespace gpu { namespace device
{
void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor);
void init_gpu(DevMem2Db frame, int cn, DevMem2Db samples, DevMem2D_<unsigned int> randStates, cudaStream_t stream);
void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz<unsigned int> randStates, cudaStream_t stream);
void update_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Db samples, DevMem2D_<unsigned int> randStates, cudaStream_t stream);
void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<unsigned int> randStates, cudaStream_t stream);
}
}}}
......
......@@ -59,10 +59,10 @@ namespace cv { namespace gpu { namespace device
{
namespace bilateral_filter
{
void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc);
void load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc);
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
void bilateral_filter_gpu(PtrStepSzb disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
void bilateral_filter_gpu(PtrStepSz<short> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
}
}}}
......@@ -120,7 +120,7 @@ namespace
disp.copyTo(dst);
}
bilateral_filter_gpu((DevMem2D_<T>)dst, img, img.channels(), iters, StreamAccessor::getStream(stream));
bilateral_filter_gpu((PtrStepSz<T>)dst, img, img.channels(), iters, StreamAccessor::getStream(stream));
}
typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold,
......
This diff is collapsed.
......@@ -60,12 +60,12 @@ namespace cv { namespace gpu { namespace device
{
namespace transform_points
{
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, DevMem2D_<float3> dst, cudaStream_t stream);
void call(const PtrStepSz<float3> src, const float* rot, const float* transl, PtrStepSz<float3> dst, cudaStream_t stream);
}
namespace project_points
{
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
void call(const PtrStepSz<float3> src, const float* rot, const float* transl, const float* proj, PtrStepSz<float2> dst, cudaStream_t stream);
}
namespace solve_pnp_ransac
......
......@@ -352,18 +352,18 @@ namespace cv { namespace gpu { namespace device
float initalScale,
float factor,
int total,
const DevMem2Db& mstages,
const PtrStepSzb& mstages,
const int nstages,
const DevMem2Di& mnodes,
const DevMem2Df& mleaves,
const DevMem2Di& msubsets,
const DevMem2Db& mfeatures,
const PtrStepSzi& mnodes,
const PtrStepSzf& mleaves,
const PtrStepSzi& msubsets,
const PtrStepSzb& mfeatures,
const int subsetSize,
DevMem2D_<int4> objects,
PtrStepSz<int4> objects,
unsigned int* classified,
DevMem2Di integral);
PtrStepSzi integral);
void connectedConmonents(DevMem2D_<int4> candidates, int ncandidates, DevMem2D_<int4> objects,int groupThreshold, float grouping_eps, unsigned int* nclasses);
void connectedConmonents(PtrStepSz<int4> candidates, int ncandidates, PtrStepSz<int4> objects,int groupThreshold, float grouping_eps, unsigned int* nclasses);
}
}}}
......
......@@ -59,9 +59,9 @@ namespace cv { namespace gpu {
namespace device
{
template <int cn>
void Bayer2BGR_8u_gpu(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template <int cn>
void Bayer2BGR_16u_gpu(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
}
}}
......@@ -69,7 +69,7 @@ using namespace ::cv::gpu::device;
namespace
{
typedef void (*gpu_func_t)(const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
typedef void (*gpu_func_t)(const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
void bgr_to_rgb(const GpuMat& src, GpuMat& dst, int, Stream& stream)
{
......@@ -1336,7 +1336,7 @@ namespace
void bayer_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, bool blue_last, bool start_with_green, Stream& stream)
{
typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
static const func_t funcs[3][4] =
{
{0,0,Bayer2BGR_8u_gpu<3>, Bayer2BGR_8u_gpu<4>},
......
......@@ -191,7 +191,7 @@ namespace cv { namespace gpu { namespace device {
dstImage[y * dstImagePitch + x + 1 ] = RGBAPACK_10bit(red[1], green[1], blue[1], constAlpha);
}
void NV12ToARGB_gpu(const PtrStepb decodedFrame, DevMem2D_<uint> interopFrame, cudaStream_t stream)
void NV12ToARGB_gpu(const PtrStepb decodedFrame, PtrStepSz<uint> interopFrame, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(interopFrame.cols, 2 * block.x), divUp(interopFrame.rows, block.y));
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -168,7 +168,7 @@ namespace cv { namespace gpu { namespace device {
template <typename T> struct Quantization : detail::Quantization<VecTraits<T>::cn> {};
template <typename SrcT>
__global__ void update(const PtrStep_<SrcT> frame, PtrStepb fgmask, PtrStepi colors_, PtrStepf weights_, PtrStepi nfeatures_,
__global__ void update(const PtrStep<SrcT> frame, PtrStepb fgmask, PtrStepi colors_, PtrStepf weights_, PtrStepi nfeatures_,
const int frameNum, const float learningRate, const bool updateBackgroundModel)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
......@@ -222,7 +222,7 @@ namespace cv { namespace gpu { namespace device {
}
template <typename SrcT>
void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures,
void update_gpu(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures,
int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream)
{
const dim3 block(32, 8);
......@@ -230,7 +230,7 @@ namespace cv { namespace gpu { namespace device {
cudaSafeCall( cudaFuncSetCacheConfig(update<SrcT>, cudaFuncCachePreferL1) );
update<SrcT><<<grid, block, 0, stream>>>((DevMem2D_<SrcT>) frame, fgmask, colors, weights, nfeatures, frameNum, learningRate, updateBackgroundModel);
update<SrcT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, colors, weights, nfeatures, frameNum, learningRate, updateBackgroundModel);
cudaSafeCall( cudaGetLastError() );
......@@ -238,16 +238,16 @@ namespace cv { namespace gpu { namespace device {
cudaSafeCall( cudaDeviceSynchronize() );
}
template void update_gpu<uchar >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
template void update_gpu<uchar3 >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
template void update_gpu<uchar4 >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
template void update_gpu<uchar >(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
template void update_gpu<uchar3 >(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
template void update_gpu<uchar4 >(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
template void update_gpu<ushort >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
template void update_gpu<ushort3>(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
template void update_gpu<ushort4>(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
template void update_gpu<ushort >(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
template void update_gpu<ushort3>(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
template void update_gpu<ushort4>(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
template void update_gpu<float >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
template void update_gpu<float3 >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
template void update_gpu<float4 >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
template void update_gpu<float >(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
template void update_gpu<float3 >(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
template void update_gpu<float4 >(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
}
}}}
This diff is collapsed.
......@@ -90,7 +90,7 @@ namespace cv { namespace gpu { namespace device
}
template <typename SrcT, typename SampleT>
__global__ void init(const DevMem2D_<SrcT> frame, PtrStep_<SampleT> samples, PtrStep_<uint> randStates)
__global__ void init(const PtrStepSz<SrcT> frame, PtrStep<SampleT> samples, PtrStep<uint> randStates)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
......@@ -116,23 +116,23 @@ namespace cv { namespace gpu { namespace device
}
template <typename SrcT, typename SampleT>
void init_caller(DevMem2Db frame, DevMem2Db samples, DevMem2D_<uint> randStates, cudaStream_t stream)
void init_caller(PtrStepSzb frame, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(init<SrcT, SampleT>, cudaFuncCachePreferL1) );
init<SrcT, SampleT><<<grid, block, 0, stream>>>((DevMem2D_<SrcT>) frame, (DevMem2D_<SampleT>) samples, randStates);
init<SrcT, SampleT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, (PtrStepSz<SampleT>) samples, randStates);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
void init_gpu(DevMem2Db frame, int cn, DevMem2Db samples, DevMem2D_<uint> randStates, cudaStream_t stream)
void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
{
typedef void (*func_t)(DevMem2Db frame, DevMem2Db samples, DevMem2D_<uint> randStates, cudaStream_t stream);
typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream);
static const func_t funcs[] =
{
0, init_caller<uchar, uchar>, 0, init_caller<uchar3, uchar4>, init_caller<uchar4, uchar4>
......@@ -155,7 +155,7 @@ namespace cv { namespace gpu { namespace device
}
template <typename SrcT, typename SampleT>
__global__ void update(const DevMem2D_<SrcT> frame, PtrStepb fgmask, PtrStep_<SampleT> samples, PtrStep_<uint> randStates)
__global__ void update(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStep<SampleT> samples, PtrStep<uint> randStates)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
......@@ -225,23 +225,23 @@ namespace cv { namespace gpu { namespace device
}
template <typename SrcT, typename SampleT>
void update_caller(DevMem2Db frame, DevMem2Db fgmask, DevMem2Db samples, DevMem2D_<uint> randStates, cudaStream_t stream)
void update_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(update<SrcT, SampleT>, cudaFuncCachePreferL1) );
update<SrcT, SampleT><<<grid, block, 0, stream>>>((DevMem2D_<SrcT>) frame, fgmask, (DevMem2D_<SampleT>) samples, randStates);
update<SrcT, SampleT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, (PtrStepSz<SampleT>) samples, randStates);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
void update_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Db samples, DevMem2D_<uint> randStates, cudaStream_t stream)
void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
{
typedef void (*func_t)(DevMem2Db frame, DevMem2Db fgmask, DevMem2Db samples, DevMem2D_<uint> randStates, cudaStream_t stream);
typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream);
static const func_t funcs[] =
{
0, update_caller<uchar, uchar>, 0, update_caller<uchar3, uchar4>, update_caller<uchar4, uchar4>
......
......@@ -57,7 +57,7 @@ namespace cv { namespace gpu { namespace device
__constant__ short cedge_disc;
__constant__ short cmax_disc;
void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc)
void load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc)
{
cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) );
cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) );
......@@ -176,7 +176,7 @@ namespace cv { namespace gpu { namespace device
}
template <typename T>
void bilateral_filter_caller(DevMem2D_<T> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
void bilateral_filter_caller(PtrStepSz<T> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
......@@ -213,12 +213,12 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaDeviceSynchronize() );
}
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
void bilateral_filter_gpu(PtrStepSzb disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream)
{
bilateral_filter_caller(disp, img, channels, iters, stream);
}
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
void bilateral_filter_gpu(PtrStepSz<short> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream)
{
bilateral_filter_caller(disp, img, channels, iters, stream);
}
......
......@@ -66,8 +66,8 @@ namespace cv { namespace gpu { namespace device
}
};
void call(const DevMem2D_<float3> src, const float* rot,
const float* transl, DevMem2D_<float3> dst,
void call(const PtrStepSz<float3> src, const float* rot,
const float* transl, PtrStepSz<float3> dst,
cudaStream_t stream)
{
cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
......@@ -103,8 +103,8 @@ namespace cv { namespace gpu { namespace device
}
};
void call(const DevMem2D_<float3> src, const float* rot,
const float* transl, const float* proj, DevMem2D_<float2> dst,
void call(const PtrStepSz<float3> src, const float* rot,
const float* transl, const float* proj, PtrStepSz<float2> dst,
cudaStream_t stream)
{
cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
......
......@@ -176,7 +176,7 @@ namespace cv { namespace gpu { namespace device
template<typename T, typename F>
__global__ void computeConnectivity(const DevMem2D_<T> image, DevMem2D components, F connected)
__global__ void computeConnectivity(const PtrStepSz<T> image, PtrStepSzb components, F connected)
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
......@@ -202,7 +202,7 @@ namespace cv { namespace gpu { namespace device
}
template< typename T>
void computeEdges(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream)
void computeEdges(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream)
{
dim3 block(CTA_SIZE_X, CTA_SIZE_Y);
dim3 grid(divUp(image.cols, block.x), divUp(image.rows, block.y));
......@@ -210,23 +210,23 @@ namespace cv { namespace gpu { namespace device
typedef InInterval<typename IntervalsTraits<T>::dist_type, IntervalsTraits<T>::ch> Int_t;
Int_t inInt(lo, hi);
computeConnectivity<T, Int_t><<<grid, block, 0, stream>>>(static_cast<const DevMem2D_<T> >(image), edges, inInt);
computeConnectivity<T, Int_t><<<grid, block, 0, stream>>>(static_cast<const PtrStepSz<T> >(image), edges, inInt);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void computeEdges<uchar> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
template void computeEdges<uchar3> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
template void computeEdges<uchar4> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
template void computeEdges<ushort> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
template void computeEdges<ushort3>(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
template void computeEdges<ushort4>(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
template void computeEdges<int> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
template void computeEdges<float> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
template void computeEdges<uchar> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
template void computeEdges<uchar3> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
template void computeEdges<uchar4> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
template void computeEdges<ushort> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
template void computeEdges<ushort3>(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
template void computeEdges<ushort4>(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
template void computeEdges<int> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
template void computeEdges<float> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
__global__ void lableTiles(const DevMem2D edges, DevMem2Di comps)
__global__ void lableTiles(const PtrStepSzb edges, PtrStepSzi comps)
{
int x = threadIdx.x + blockIdx.x * TILE_COLS;
int y = threadIdx.y + blockIdx.y * TILE_ROWS;
......@@ -360,7 +360,7 @@ namespace cv { namespace gpu { namespace device
}
}
__device__ __forceinline__ int root(const DevMem2Di& comps, int label)
__device__ __forceinline__ int root(const PtrStepSzi& comps, int label)
{
while(1)
{
......@@ -376,7 +376,7 @@ namespace cv { namespace gpu { namespace device
return label;
}
__device__ __forceinline__ void isConnected(DevMem2Di& comps, int l1, int l2, bool& changed)
__device__ __forceinline__ void isConnected(PtrStepSzi& comps, int l1, int l2, bool& changed)
{
int r1 = root(comps, l1);
int r2 = root(comps, l2);
......@@ -394,7 +394,7 @@ namespace cv { namespace gpu { namespace device
}
__global__ void crossMerge(const int tilesNumY, const int tilesNumX, int tileSizeY, int tileSizeX,
const DevMem2D edges, DevMem2Di comps, const int yIncomplete, int xIncomplete)
const PtrStepSzb edges, PtrStepSzi comps, const int yIncomplete, int xIncomplete)
{
int tid = threadIdx.y * blockDim.x + threadIdx.x;
int stride = blockDim.y * blockDim.x;
......@@ -482,7 +482,7 @@ namespace cv { namespace gpu { namespace device
} while (Emulation::syncthreadsOr(changed));
}
__global__ void flatten(const DevMem2D edges, DevMem2Di comps)
__global__ void flatten(const PtrStepSzb edges, PtrStepSzi comps)
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
......@@ -493,7 +493,7 @@ namespace cv { namespace gpu { namespace device
enum {CC_NO_COMPACT = 0, CC_COMPACT_LABELS = 1};
void labelComponents(const DevMem2D& edges, DevMem2Di comps, int flags, cudaStream_t stream)
void labelComponents(const PtrStepSzb& edges, PtrStepSzi comps, int flags, cudaStream_t stream)
{
dim3 block(CTA_SIZE_X, CTA_SIZE_Y);
dim3 grid(divUp(edges.cols, TILE_COLS), divUp(edges.rows, TILE_ROWS));
......
......@@ -222,12 +222,12 @@ namespace cv { namespace gpu { namespace device
};
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, traits) \
void name(const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream) \
void name(const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream) \
{ \
traits::functor_type functor = traits::create_functor(); \
typedef typename traits::functor_type::argument_type src_t; \
typedef typename traits::functor_type::result_type dst_t; \
cv::gpu::device::transform((DevMem2D_<src_t>)src, (DevMem2D_<dst_t>)dst, functor, WithOutMask(), stream); \
cv::gpu::device::transform((PtrStepSz<src_t>)src, (PtrStepSz<dst_t>)dst, functor, WithOutMask(), stream); \
}
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
......
......@@ -62,7 +62,7 @@ namespace cv { namespace gpu { namespace device
}
template <int KSIZE, typename T, typename D, typename B>
__global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep<D> dst, const int anchor, const B brd)
__global__ void linearColumnFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
const int BLOCK_DIM_X = 16;
......@@ -125,7 +125,7 @@ namespace cv { namespace gpu { namespace device
}
template <int KSIZE, typename T, typename D, template<typename> class B>
void linearColumnFilter_caller(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, int cc, cudaStream_t stream)
void linearColumnFilter_caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
{
int BLOCK_DIM_X;
int BLOCK_DIM_Y;
......@@ -158,9 +158,9 @@ namespace cv { namespace gpu { namespace device
}
template <typename T, typename D>
void linearColumnFilter_gpu(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
{
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, int cc, cudaStream_t stream);
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
static const caller_t callers[5][33] =
{
......@@ -343,13 +343,13 @@ namespace cv { namespace gpu { namespace device
loadKernel(kernel, ksize);
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, cc, stream);
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
}
template void linearColumnFilter_gpu<float , uchar >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float4, uchar4>(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float3, short3>(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , int >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , float >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , uchar >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float4, uchar4>(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float3, short3>(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , int >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
template void linearColumnFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
} // namespace column_filter
}}} // namespace cv { namespace gpu { namespace device
This diff is collapsed.
......@@ -49,7 +49,7 @@ namespace cv { namespace gpu {
namespace device
{
template <typename D>
__global__ void Bayer2BGR_8u(const PtrStepb src, DevMem2D_<D> dst, const bool blue_last, const bool start_with_green)
__global__ void Bayer2BGR_8u(const PtrStepb src, PtrStepSz<D> dst, const bool blue_last, const bool start_with_green)
{
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
......@@ -193,7 +193,7 @@ namespace cv { namespace gpu {
}
template <typename D>
__global__ void Bayer2BGR_16u(const PtrStepb src, DevMem2D_<D> dst, const bool blue_last, const bool start_with_green)
__global__ void Bayer2BGR_16u(const PtrStepb src, PtrStepSz<D> dst, const bool blue_last, const bool start_with_green)
{
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
......@@ -287,7 +287,7 @@ namespace cv { namespace gpu {
}
template <int cn>
void Bayer2BGR_8u_gpu(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream)
void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
{
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
......@@ -296,14 +296,14 @@ namespace cv { namespace gpu {
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) );
Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (DevMem2D_<dst_t>)dst, blue_last, start_with_green);
Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <int cn>
void Bayer2BGR_16u_gpu(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream)
void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
{
typedef typename TypeVec<ushort, cn>::vec_type dst_t;
......@@ -312,16 +312,16 @@ namespace cv { namespace gpu {
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) );
Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (DevMem2D_<dst_t>)dst, blue_last, start_with_green);
Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void Bayer2BGR_8u_gpu<3>(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_8u_gpu<4>(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_16u_gpu<3>(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_16u_gpu<4>(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_8u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_8u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_16u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_16u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
}
}}
This diff is collapsed.
......@@ -221,7 +221,7 @@ namespace cv { namespace gpu { namespace device
}
template <bool calcScore, class Mask>
__global__ void calcKeypoints(const DevMem2Db img, const Mask mask, short2* kpLoc, const unsigned int maxKeypoints, PtrStepi score, const int threshold)
__global__ void calcKeypoints(const PtrStepSzb img, const Mask mask, short2* kpLoc, const unsigned int maxKeypoints, PtrStepi score, const int threshold)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
......@@ -282,7 +282,7 @@ namespace cv { namespace gpu { namespace device
#endif
}
int calcKeypoints_gpu(DevMem2Db img, DevMem2Db mask, short2* kpLoc, int maxKeypoints, DevMem2Di score, int threshold)
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
......@@ -323,7 +323,7 @@ namespace cv { namespace gpu { namespace device
///////////////////////////////////////////////////////////////////////////
// nonmaxSupression
__global__ void nonmaxSupression(const short2* kpLoc, int count, const DevMem2Di scoreMat, short2* locFinal, float* responseFinal)
__global__ void nonmaxSupression(const short2* kpLoc, int count, const PtrStepSzi scoreMat, short2* locFinal, float* responseFinal)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
......@@ -359,7 +359,7 @@ namespace cv { namespace gpu { namespace device
#endif
}
int nonmaxSupression_gpu(const short2* kpLoc, int count, DevMem2Di score, short2* loc, float* response)
int nonmaxSupression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
......
This diff is collapsed.
#ifndef __FGD_BGFG_COMMON_HPP__
#define __FGD_BGFG_COMMON_HPP__
#include "opencv2/core/devmem2d.hpp"
#include "opencv2/core/cuda_devptrs.hpp"
namespace bgfg
{
......@@ -122,24 +122,24 @@ namespace bgfg
const int HISTOGRAM_BIN_COUNT = 256;
template <typename PT, typename CT>
void calcDiffHistogram_gpu(cv::gpu::DevMem2Db prevFrame, cv::gpu::DevMem2Db curFrame,
void calcDiffHistogram_gpu(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame,
unsigned int* hist0, unsigned int* hist1, unsigned int* hist2,
unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2,
int cc, cudaStream_t stream);
template <typename PT, typename CT>
void calcDiffThreshMask_gpu(cv::gpu::DevMem2Db prevFrame, cv::gpu::DevMem2Db curFrame, uchar3 bestThres, cv::gpu::DevMem2Db changeMask, cudaStream_t stream);
void calcDiffThreshMask_gpu(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, uchar3 bestThres, cv::gpu::PtrStepSzb changeMask, cudaStream_t stream);
void setBGPixelStat(const BGPixelStat& stat);
template <typename PT, typename CT, typename OT>
void bgfgClassification_gpu(cv::gpu::DevMem2Db prevFrame, cv::gpu::DevMem2Db curFrame,
cv::gpu::DevMem2Db Ftd, cv::gpu::DevMem2Db Fbd, cv::gpu::DevMem2Db foreground,
void bgfgClassification_gpu(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame,
cv::gpu::PtrStepSzb Ftd, cv::gpu::PtrStepSzb Fbd, cv::gpu::PtrStepSzb foreground,
int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
template <typename PT, typename CT, typename OT>
void updateBackgroundModel_gpu(cv::gpu::DevMem2Db prevFrame, cv::gpu::DevMem2Db curFrame,
cv::gpu::DevMem2Db Ftd, cv::gpu::DevMem2Db Fbd, cv::gpu::DevMem2Db foreground, cv::gpu::DevMem2Db background,
void updateBackgroundModel_gpu(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame,
cv::gpu::PtrStepSzb Ftd, cv::gpu::PtrStepSzb Fbd, cv::gpu::PtrStepSzb foreground, cv::gpu::PtrStepSzb background,
int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T,
cudaStream_t stream);
}
......
......@@ -97,7 +97,7 @@ namespace cv { namespace gpu { namespace device
#endif // __CUDA_ARCH__ >= 110
}
int findCorners_gpu(DevMem2Df eig, float threshold, DevMem2Db mask, float2* corners, int max_count)
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
......@@ -134,7 +134,7 @@ namespace cv { namespace gpu { namespace device
};
void sortCorners_gpu(DevMem2Df eig, float2* corners, int count)
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count)
{
bindTexture(&eigTex, eig);
......
......@@ -68,7 +68,7 @@ int compactPoints(int N, float *points0, float *points1, const uchar *mask)
__global__ void calcWobbleSuppressionMapsKernel(
const int left, const int idx, const int right, const int width, const int height,
PtrElemStepf mapx, PtrElemStepf mapy)
PtrStepf mapx, PtrStepf mapy)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
......@@ -97,7 +97,7 @@ __global__ void calcWobbleSuppressionMapsKernel(
void calcWobbleSuppressionMaps(
int left, int idx, int right, int width, int height,
const float *ml, const float *mr, DevMem2Df mapx, DevMem2Df mapy)
const float *ml, const float *mr, PtrStepSzf mapx, PtrStepSzf mapy)
{
cudaSafeCall(cudaMemcpyToSymbol(cml, ml, 9*sizeof(float)));
cudaSafeCall(cudaMemcpyToSymbol(cmr, mr, 9*sizeof(float)));
......
......@@ -169,10 +169,10 @@ namespace cv { namespace gpu { namespace device
d_Histogram[blockIdx.x] = saturate_cast<int>(data[0]);
}
void histogram256_gpu(DevMem2Db src, int* hist, uint* buf, cudaStream_t stream)
void histogram256_gpu(PtrStepSzb src, int* hist, uint* buf, cudaStream_t stream)
{
histogram256<<<PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_THREADBLOCK_SIZE, 0, stream>>>(
DevMem2D_<uint>(src),
PtrStepSz<uint>(src),
buf,
static_cast<uint>(src.rows * src.step / sizeof(uint)),
src.cols);
......@@ -189,7 +189,7 @@ namespace cv { namespace gpu { namespace device
__constant__ int c_lut[256];
__global__ void equalizeHist(const DevMem2Db src, PtrStepb dst)
__global__ void equalizeHist(const PtrStepSzb src, PtrStepb dst)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
......@@ -202,7 +202,7 @@ namespace cv { namespace gpu { namespace device
}
}
void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t stream)
void equalizeHist_gpu(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream)
{
dim3 block(16, 16);
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
......
......@@ -110,8 +110,8 @@ namespace cv { namespace gpu { namespace device
template <int nblocks> // Number of histogram blocks processed by single GPU thread block
__global__ void compute_hists_kernel_many_blocks(const int img_block_width, const PtrElemStepf grad,
const PtrElemStep qangle, float scale, float* block_hists)
__global__ void compute_hists_kernel_many_blocks(const int img_block_width, const PtrStepf grad,
const PtrStepb qangle, float scale, float* block_hists)
{
const int block_x = threadIdx.z;
const int cell_x = threadIdx.x / 16;
......@@ -149,8 +149,8 @@ namespace cv { namespace gpu { namespace device
float2 vote = *(const float2*)grad_ptr;
uchar2 bin = *(const uchar2*)qangle_ptr;
grad_ptr += grad.step;
qangle_ptr += qangle.step;
grad_ptr += grad.step/grad.elemSize();
qangle_ptr += qangle.step/qangle.elemSize();
int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
int dist_center_x = dist_x - 4 * (1 - 2 * cell_x);
......@@ -188,8 +188,8 @@ namespace cv { namespace gpu { namespace device
void compute_hists(int nbins, int block_stride_x, int block_stride_y,
int height, int width, const DevMem2Df& grad,
const DevMem2Db& qangle, float sigma, float* block_hists)
int height, int width, const PtrStepSzf& grad,
const PtrStepSzb& qangle, float sigma, float* block_hists)
{
const int nblocks = 1;
......@@ -512,7 +512,7 @@ namespace cv { namespace gpu { namespace device
template <int nthreads>
__global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
const float* block_hists, PtrElemStepf descriptors)
const float* block_hists, PtrStepf descriptors)
{
// Get left top corner of the window in src
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
......@@ -532,7 +532,7 @@ namespace cv { namespace gpu { namespace device
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, int win_stride_y, int win_stride_x,
int height, int width, float* block_hists, DevMem2Df descriptors)
int height, int width, float* block_hists, PtrStepSzf descriptors)
{
const int nthreads = 256;
......@@ -555,7 +555,7 @@ namespace cv { namespace gpu { namespace device
template <int nthreads>
__global__ void extract_descrs_by_cols_kernel(const int img_block_width, const int win_block_stride_x,
const int win_block_stride_y, const float* block_hists,
PtrElemStepf descriptors)
PtrStepf descriptors)
{
// Get left top corner of the window in src
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
......@@ -581,7 +581,7 @@ namespace cv { namespace gpu { namespace device
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
DevMem2Df descriptors)
PtrStepSzf descriptors)
{
const int nthreads = 256;
......@@ -605,8 +605,8 @@ namespace cv { namespace gpu { namespace device
template <int nthreads, int correct_gamma>
__global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrElemStep img,
float angle_scale, PtrElemStepf grad, PtrElemStep qangle)
__global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrStepb img,
float angle_scale, PtrStepf grad, PtrStepb qangle)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
......@@ -707,8 +707,8 @@ namespace cv { namespace gpu { namespace device
}
void compute_gradients_8UC4(int nbins, int height, int width, const DevMem2Db& img,
float angle_scale, DevMem2Df grad, DevMem2Db qangle, bool correct_gamma)
void compute_gradients_8UC4(int nbins, int height, int width, const PtrStepSzb& img,
float angle_scale, PtrStepSzf grad, PtrStepSzb qangle, bool correct_gamma)
{
(void)nbins;
const int nthreads = 256;
......@@ -727,8 +727,8 @@ namespace cv { namespace gpu { namespace device
}
template <int nthreads, int correct_gamma>
__global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrElemStep img,
float angle_scale, PtrElemStepf grad, PtrElemStep qangle)
__global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrStepb img,
float angle_scale, PtrStepf grad, PtrStepb qangle)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
......@@ -780,8 +780,8 @@ namespace cv { namespace gpu { namespace device
}
void compute_gradients_8UC1(int nbins, int height, int width, const DevMem2Db& img,
float angle_scale, DevMem2Df grad, DevMem2Db qangle, bool correct_gamma)
void compute_gradients_8UC1(int nbins, int height, int width, const PtrStepSzb& img,
float angle_scale, PtrStepSzf grad, PtrStepSzb qangle, bool correct_gamma)
{
(void)nbins;
const int nthreads = 256;
......@@ -807,7 +807,7 @@ namespace cv { namespace gpu { namespace device
texture<uchar4, 2, cudaReadModeNormalizedFloat> resize8UC4_tex;
texture<uchar, 2, cudaReadModeNormalizedFloat> resize8UC1_tex;
__global__ void resize_for_hog_kernel(float sx, float sy, DevMem2D_<uchar> dst, int colOfs)
__global__ void resize_for_hog_kernel(float sx, float sy, PtrStepSz<uchar> dst, int colOfs)
{
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
......@@ -816,7 +816,7 @@ namespace cv { namespace gpu { namespace device
dst.ptr(y)[x] = tex2D(resize8UC1_tex, x * sx + colOfs, y * sy) * 255;
}
__global__ void resize_for_hog_kernel(float sx, float sy, DevMem2D_<uchar4> dst, int colOfs)
__global__ void resize_for_hog_kernel(float sx, float sy, PtrStepSz<uchar4> dst, int colOfs)
{
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
......@@ -829,7 +829,7 @@ namespace cv { namespace gpu { namespace device
}
template<class T, class TEX>
static void resize_for_hog(const DevMem2Db& src, DevMem2Db dst, TEX& tex)
static void resize_for_hog(const PtrStepSzb& src, PtrStepSzb dst, TEX& tex)
{
tex.filterMode = cudaFilterModeLinear;
......@@ -852,7 +852,7 @@ namespace cv { namespace gpu { namespace device
float sx = static_cast<float>(src.cols) / dst.cols;
float sy = static_cast<float>(src.rows) / dst.rows;
resize_for_hog_kernel<<<grid, threads>>>(sx, sy, (DevMem2D_<T>)dst, colOfs);
resize_for_hog_kernel<<<grid, threads>>>(sx, sy, (PtrStepSz<T>)dst, colOfs);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
......@@ -860,7 +860,7 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaUnbindTexture(tex) );
}
void resize_8UC1(const DevMem2Db& src, DevMem2Db dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); }
void resize_8UC4(const DevMem2Db& src, DevMem2Db dst) { resize_for_hog<uchar4>(src, dst, resize8UC4_tex); }
void resize_8UC1(const PtrStepSzb& src, PtrStepSzb dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); }
void resize_8UC4(const PtrStepSzb& src, PtrStepSzb dst) { resize_for_hog<uchar4>(src, dst, resize8UC4_tex); }
} // namespace hog
}}} // namespace cv { namespace gpu { namespace device
......@@ -55,7 +55,7 @@ namespace cv { namespace gpu { namespace device
const int PIXELS_PER_THREAD = 16;
__global__ void buildPointList(const DevMem2Db src, unsigned int* list)
__global__ void buildPointList(const PtrStepSzb src, unsigned int* list)
{
__shared__ unsigned int s_queues[4][32 * PIXELS_PER_THREAD];
__shared__ int s_qsize[4];
......@@ -112,7 +112,7 @@ namespace cv { namespace gpu { namespace device
list[gidx] = s_queues[threadIdx.y][i];
}
int buildPointList_gpu(DevMem2Db src, unsigned int* list)
int buildPointList_gpu(PtrStepSzb src, unsigned int* list)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
......@@ -206,7 +206,7 @@ namespace cv { namespace gpu { namespace device
accumRow[i] = smem[i];
}
void linesAccum_gpu(const unsigned int* list, int count, DevMem2Di accum, float rho, float theta, size_t sharedMemPerBlock, bool has20)
void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20)
{
const dim3 block(has20 ? 1024 : 512);
const dim3 grid(accum.rows - 2);
......@@ -226,7 +226,7 @@ namespace cv { namespace gpu { namespace device
////////////////////////////////////////////////////////////////////////
// linesGetResult
__global__ void linesGetResult(const DevMem2Di accum, float2* out, int* votes, const int maxSize, const float rho, const float theta, const int threshold, const int numrho)
__global__ void linesGetResult(const PtrStepSzi accum, float2* out, int* votes, const int maxSize, const float rho, const float theta, const int threshold, const int numrho)
{
const int r = blockIdx.x * blockDim.x + threadIdx.x;
const int n = blockIdx.y * blockDim.y + threadIdx.y;
......@@ -254,7 +254,7 @@ namespace cv { namespace gpu { namespace device
}
}
int linesGetResult_gpu(DevMem2Di accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort)
int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
......@@ -341,7 +341,7 @@ namespace cv { namespace gpu { namespace device
}
}
void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, DevMem2Di accum, int minRadius, int maxRadius, float idp)
void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp)
{
const dim3 block(256);
const dim3 grid(divUp(count, block.x));
......@@ -357,7 +357,7 @@ namespace cv { namespace gpu { namespace device
////////////////////////////////////////////////////////////////////////
// buildCentersList
__global__ void buildCentersList(const DevMem2Di accum, unsigned int* centers, const int threshold)
__global__ void buildCentersList(const PtrStepSzi accum, unsigned int* centers, const int threshold)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
......@@ -381,7 +381,7 @@ namespace cv { namespace gpu { namespace device
}
}
int buildCentersList_gpu(DevMem2Di accum, unsigned int* centers, int threshold)
int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
......@@ -467,7 +467,7 @@ namespace cv { namespace gpu { namespace device
const dim3 block(has20 ? 1024 : 512);
const dim3 grid(centersCount);
const int histSize = ::ceil(maxRadius - minRadius + 1);
const int histSize = maxRadius - minRadius + 1;
size_t smemSize = (histSize + 2) * sizeof(int);
circlesAccumRadius<<<grid, block, smemSize>>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold);
......
This diff is collapsed.
......@@ -57,7 +57,7 @@ namespace cv { namespace gpu { namespace device
return bytes;
}
__global__ void shfl_integral_horizontal(const PtrStep_<uint4> img, PtrStep_<uint4> integral)
__global__ void shfl_integral_horizontal(const PtrStep<uint4> img, PtrStep<uint4> integral)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
__shared__ int sums[128];
......@@ -297,7 +297,7 @@ namespace cv { namespace gpu { namespace device
// The final set of sums from the block is then propgated, with the block
// computing "down" the image and adding the running sum to the local
// block sums.
__global__ void shfl_integral_vertical(DevMem2D_<unsigned int> integral)
__global__ void shfl_integral_vertical(PtrStepSz<unsigned int> integral)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
__shared__ unsigned int sums[32][9];
......@@ -355,7 +355,7 @@ namespace cv { namespace gpu { namespace device
#endif
}
void shfl_integral_gpu(DevMem2Db img, DevMem2D_<unsigned int> integral, cudaStream_t stream)
void shfl_integral_gpu(PtrStepSzb img, PtrStepSz<unsigned int> integral, cudaStream_t stream)
{
{
// each thread handles 16 values, use 1 block/row
......@@ -366,7 +366,7 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaFuncSetCacheConfig(shfl_integral_horizontal, cudaFuncCachePreferL1) );
shfl_integral_horizontal<<<grid, block, 0, stream>>>((DevMem2D_<uint4>) img, (DevMem2D_<uint4>) integral);
shfl_integral_horizontal<<<grid, block, 0, stream>>>((PtrStepSz<uint4>) img, (PtrStepSz<uint4>) integral);
cudaSafeCall( cudaGetLastError() );
}
......
......@@ -181,7 +181,7 @@ namespace cv { namespace gpu { namespace device
}
}
void connectedConmonents(DevMem2D_<int4> candidates, int ncandidates, DevMem2D_<int4> objects, int groupThreshold, float grouping_eps, unsigned int* nclasses)
void connectedConmonents(PtrStepSz<int4> candidates, int ncandidates, PtrStepSz<int4> objects, int groupThreshold, float grouping_eps, unsigned int* nclasses)
{
int block = ncandidates;
int smem = block * ( sizeof(int) + sizeof(int4) );
......@@ -240,7 +240,7 @@ namespace cv { namespace gpu { namespace device
// stepShift, scale, width_k, sum_prev => y = sum_prev + tid_k / width_k, x = tid_k - tid_k / width_k
__global__ void lbp_cascade(const Cascade cascade, int frameW, int frameH, int windowW, int windowH, float scale, const float factor,
const int total, int* integral, const int pitch, DevMem2D_<int4> objects, unsigned int* classified)
const int total, int* integral, const int pitch, PtrStepSz<int4> objects, unsigned int* classified)
{
int ftid = blockIdx.x * blockDim.x + threadIdx.x;
if (ftid >= total) return;
......@@ -285,8 +285,8 @@ namespace cv { namespace gpu { namespace device
}
void classifyPyramid(int frameW, int frameH, int windowW, int windowH, float initialScale, float factor, int workAmount,
const DevMem2Db& mstages, const int nstages, const DevMem2Di& mnodes, const DevMem2Df& mleaves, const DevMem2Di& msubsets, const DevMem2Db& mfeatures,
const int subsetSize, DevMem2D_<int4> objects, unsigned int* classified, DevMem2Di integral)
const PtrStepSzb& mstages, const int nstages, const PtrStepSzi& mnodes, const PtrStepSzf& mleaves, const PtrStepSzi& msubsets, const PtrStepSzb& mfeatures,
const int subsetSize, PtrStepSz<int4> objects, unsigned int* classified, PtrStepSzi integral)
{
const int block = 128;
int grid = divUp(workAmount, block);
......
This diff is collapsed.
......@@ -130,7 +130,7 @@ namespace cv { namespace gpu { namespace device
}
template <typename Mag, typename Angle>
void cartToPolar_caller(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
void cartToPolar_caller(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
......@@ -149,9 +149,9 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaDeviceSynchronize() );
}
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
{
typedef void (*caller_t)(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
typedef void (*caller_t)(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream);
static const caller_t callers[2][2][2] =
{
{
......@@ -180,7 +180,7 @@ namespace cv { namespace gpu { namespace device
}
template <typename Mag>
void polarToCart_caller(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
void polarToCart_caller(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
......@@ -198,9 +198,9 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaDeviceSynchronize() );
}
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)
{
typedef void (*caller_t)(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
typedef void (*caller_t)(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream);
static const caller_t callers[2] =
{
polarToCart_caller<NonEmptyMag>,
......
This diff is collapsed.
......@@ -49,7 +49,7 @@ namespace cv { namespace gpu { namespace device
#define NEEDLE_MAP_SCALE 16
#define NUM_VERTS_PER_ARROW 6
__global__ void NeedleMapAverageKernel(const DevMem2Df u, const PtrStepf v, PtrStepf u_avg, PtrStepf v_avg)
__global__ void NeedleMapAverageKernel(const PtrStepSzf u, const PtrStepf v, PtrStepf u_avg, PtrStepf v_avg)
{
__shared__ float smem[2 * NEEDLE_MAP_SCALE];
......@@ -111,7 +111,7 @@ namespace cv { namespace gpu { namespace device
}
}
void NeedleMapAverage_gpu(DevMem2Df u, DevMem2Df v, DevMem2Df u_avg, DevMem2Df v_avg)
void NeedleMapAverage_gpu(PtrStepSzf u, PtrStepSzf v, PtrStepSzf u_avg, PtrStepSzf v_avg)
{
const dim3 block(NEEDLE_MAP_SCALE);
const dim3 grid(u_avg.cols, u_avg.rows);
......@@ -122,7 +122,7 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void NeedleMapVertexKernel(const DevMem2Df u_avg, const PtrStepf v_avg, float* vertex_data, float* color_data, float max_flow, float xscale, float yscale)
__global__ void NeedleMapVertexKernel(const PtrStepSzf u_avg, const PtrStepf v_avg, float* vertex_data, float* color_data, float max_flow, float xscale, float yscale)
{
// test - just draw a triangle at each pixel
const int x = blockIdx.x * blockDim.x + threadIdx.x;
......@@ -202,7 +202,7 @@ namespace cv { namespace gpu { namespace device
}
}
void CreateOpticalFlowNeedleMap_gpu(DevMem2Df u_avg, DevMem2Df v_avg, float* vertex_buffer, float* color_data, float max_flow, float xscale, float yscale)
void CreateOpticalFlowNeedleMap_gpu(PtrStepSzf u_avg, PtrStepSzf v_avg, float* vertex_buffer, float* color_data, float max_flow, float xscale, float yscale)
{
const dim3 block(16);
const dim3 grid(divUp(u_avg.cols, block.x), divUp(u_avg.rows, block.y));
......
......@@ -135,7 +135,7 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
}
void polynomialExpansionGpu(const DevMem2Df &src, int polyN, DevMem2Df dst, cudaStream_t stream)
void polynomialExpansionGpu(const PtrStepSzf &src, int polyN, PtrStepSzf dst, cudaStream_t stream)
{
dim3 block(256);
dim3 grid(divUp(src.cols, block.x - 2*polyN), src.rows);
......@@ -251,8 +251,8 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
void updateMatricesGpu(
const DevMem2Df flowx, const DevMem2Df flowy, const DevMem2Df R0, const DevMem2Df R1,
DevMem2Df M, cudaStream_t stream)
const PtrStepSzf flowx, const PtrStepSzf flowy, const PtrStepSzf R0, const PtrStepSzf R1,
PtrStepSzf M, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(flowx.cols, block.x), divUp(flowx.rows, block.y));
......@@ -288,7 +288,7 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
}
void updateFlowGpu(const DevMem2Df M, DevMem2Df flowx, DevMem2Df flowy, cudaStream_t stream)
void updateFlowGpu(const PtrStepSzf M, PtrStepSzf flowx, PtrStepSzf flowy, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(flowx.cols, block.x), divUp(flowx.rows, block.y));
......@@ -340,7 +340,7 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
}
void boxFilterGpu(const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream)
void boxFilterGpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream)
{
dim3 block(256);
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
......@@ -414,7 +414,7 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
}
void boxFilter5Gpu(const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream)
void boxFilter5Gpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream)
{
int height = src.rows / 5;
int width = src.cols;
......@@ -433,7 +433,7 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
}
void boxFilter5Gpu_CC11(const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream)
void boxFilter5Gpu_CC11(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream)
{
int height = src.rows / 5;
int width = src.cols;
......@@ -501,7 +501,7 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
template <typename Border>
void gaussianBlurCaller(const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream)
void gaussianBlurCaller(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream)
{
int height = src.rows;
int width = src.cols;
......@@ -521,9 +521,9 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
void gaussianBlurGpu(
const DevMem2Df src, int ksizeHalf, DevMem2Df dst, int borderMode, cudaStream_t stream)
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderMode, cudaStream_t stream)
{
typedef void (*caller_t)(const DevMem2Df, int, DevMem2Df, cudaStream_t);
typedef void (*caller_t)(const PtrStepSzf, int, PtrStepSzf, cudaStream_t);
static const caller_t callers[] =
{
......@@ -596,7 +596,7 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
template <typename Border, int blockDimX>
void gaussianBlur5Caller(
const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream)
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream)
{
int height = src.rows / 5;
int width = src.cols;
......@@ -616,9 +616,9 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
void gaussianBlur5Gpu(
const DevMem2Df src, int ksizeHalf, DevMem2Df dst, int borderMode, cudaStream_t stream)
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderMode, cudaStream_t stream)
{
typedef void (*caller_t)(const DevMem2Df, int, DevMem2Df, cudaStream_t);
typedef void (*caller_t)(const PtrStepSzf, int, PtrStepSzf, cudaStream_t);
static const caller_t callers[] =
{
......@@ -630,9 +630,9 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
}
void gaussianBlur5Gpu_CC11(
const DevMem2Df src, int ksizeHalf, DevMem2Df dst, int borderMode, cudaStream_t stream)
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderMode, cudaStream_t stream)
{
typedef void (*caller_t)(const DevMem2Df, int, DevMem2Df, cudaStream_t);
typedef void (*caller_t)(const PtrStepSzf, int, PtrStepSzf, cudaStream_t);
static const caller_t callers[] =
{
......
......@@ -122,7 +122,7 @@ namespace cv { namespace gpu { namespace device
}
}
void HarrisResponses_gpu(DevMem2Db img, const short2* loc, float* response, const int npoints, int blockSize, float harris_k, cudaStream_t stream)
void HarrisResponses_gpu(PtrStepSzb img, const short2* loc, float* response, const int npoints, int blockSize, float harris_k, cudaStream_t stream)
{
dim3 block(32, 8);
......@@ -201,7 +201,7 @@ namespace cv { namespace gpu { namespace device
}
}
void IC_Angle_gpu(DevMem2Db image, const short2* loc, float* angle, int npoints, int half_k, cudaStream_t stream)
void IC_Angle_gpu(PtrStepSzb image, const short2* loc, float* angle, int npoints, int half_k, cudaStream_t stream)
{
dim3 block(32, 8);
......
......@@ -124,7 +124,7 @@ namespace cv { namespace gpu { namespace device
}
}
template <typename T, template <typename> class B> void pyrDown_caller(DevMem2D_<T> src, DevMem2D_<T> dst, cudaStream_t stream)
template <typename T, template <typename> class B> void pyrDown_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
{
const dim3 block(256);
const dim3 grid(divUp(src.cols, block.x), dst.rows);
......@@ -138,39 +138,39 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T> void pyrDown_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream)
template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
{
pyrDown_caller<T, BrdReflect101>(static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(dst), stream);
pyrDown_caller<T, BrdReflect101>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
}
template void pyrDown_gpu<uchar>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrDown_gpu<uchar2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrDown_gpu<uchar3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrDown_gpu<uchar4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrDown_gpu<schar>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrDown_gpu<char2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrDown_gpu<char3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrDown_gpu<char4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrDown_gpu<ushort>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrDown_gpu<ushort2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrDown_gpu<ushort3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrDown_gpu<ushort4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrDown_gpu<short>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrDown_gpu<short2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrDown_gpu<short3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrDown_gpu<short4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrDown_gpu<int>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrDown_gpu<int2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrDown_gpu<int3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrDown_gpu<int4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrDown_gpu<float>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrDown_gpu<float2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrDown_gpu<float3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrDown_gpu<float4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrDown_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrDown_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrDown_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
......@@ -50,7 +50,7 @@ namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
template <typename T> __global__ void pyrUp(const DevMem2D_<T> src, DevMem2D_<T> dst)
template <typename T> __global__ void pyrUp(const PtrStepSz<T> src, PtrStepSz<T> dst)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
......@@ -142,7 +142,7 @@ namespace cv { namespace gpu { namespace device
dst(y, x) = saturate_cast<T>(4.0f * sum);
}
template <typename T> void pyrUp_caller(DevMem2D_<T> src, DevMem2D_<T> dst, cudaStream_t stream)
template <typename T> void pyrUp_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
{
const dim3 block(16, 16);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
......@@ -154,39 +154,39 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T> void pyrUp_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream)
template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
{
pyrUp_caller<T>(static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(dst), stream);
pyrUp_caller<T>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
}
template void pyrUp_gpu<uchar>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrUp_gpu<uchar2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrUp_gpu<uchar3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrUp_gpu<uchar4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrUp_gpu<schar>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrUp_gpu<char2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrUp_gpu<char3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrUp_gpu<char4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrUp_gpu<ushort>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrUp_gpu<ushort2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrUp_gpu<ushort3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrUp_gpu<ushort4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrUp_gpu<short>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrUp_gpu<short2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrUp_gpu<short3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrUp_gpu<short4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrUp_gpu<int>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrUp_gpu<int2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrUp_gpu<int3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrUp_gpu<int4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrUp_gpu<float>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
//template void pyrUp_gpu<float2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrUp_gpu<float3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrUp_gpu<float4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
template void pyrUp_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
//template void pyrUp_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void pyrUp_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -46,7 +46,7 @@
namespace cv { namespace gpu { namespace device
{
#define OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name) \
void name(const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
void name(const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
#define OPENCV_GPU_DECLARE_CVTCOLOR_ALL(name) \
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _8u) \
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment