Commit 43c75c64 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov Committed by Alexander Smorkalov

disable NPP for GpuMat methods and for copyMakeBorder(cherry picked from commit…

disable NPP for GpuMat methods and for copyMakeBorder(cherry picked from commit 316d49fc0fb7a609ebb0a65efc207faea6b978a4)
parent c319625a
...@@ -129,15 +129,20 @@ public: ...@@ -129,15 +129,20 @@ public:
#if defined(USE_CUDA) #if defined(USE_CUDA)
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, CV_Func) // Disable NPP for this file
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, CV_Func) //#define USE_NPP
#undef USE_NPP
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, CV_Func)
inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "") inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
{ {
if (cudaSuccess != err) if (cudaSuccess != err)
cv::gpu::error(cudaGetErrorString(err), file, line, func); cv::gpu::error(cudaGetErrorString(err), file, line, func);
} }
#ifdef USE_NPP
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, CV_Func)
inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "") inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
{ {
if (err < 0) if (err < 0)
...@@ -148,6 +153,8 @@ inline void ___nppSafeCall(int err, const char *file, const int line, const char ...@@ -148,6 +153,8 @@ inline void ___nppSafeCall(int err, const char *file, const int line, const char
} }
} }
#endif
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
void copyToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream); void copyToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream);
...@@ -173,6 +180,8 @@ template <typename T> void kernelSetCaller(GpuMat& src, Scalar s, const GpuMat& ...@@ -173,6 +180,8 @@ template <typename T> void kernelSetCaller(GpuMat& src, Scalar s, const GpuMat&
cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), stream); cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), stream);
} }
#ifdef USE_NPP
template<int n> struct NPPTypeTraits; template<int n> struct NPPTypeTraits;
template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; }; template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };
template<> struct NPPTypeTraits<CV_8S> { typedef Npp8s npp_type; }; template<> struct NPPTypeTraits<CV_8S> { typedef Npp8s npp_type; };
...@@ -182,9 +191,13 @@ template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; }; ...@@ -182,9 +191,13 @@ template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; };
template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; }; template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
template<> struct NPPTypeTraits<CV_64F> { typedef Npp64f npp_type; }; template<> struct NPPTypeTraits<CV_64F> { typedef Npp64f npp_type; };
#endif
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Convert // Convert
#ifdef USE_NPP
template<int SDEPTH, int DDEPTH> struct NppConvertFunc template<int SDEPTH, int DDEPTH> struct NppConvertFunc
{ {
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t; typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
...@@ -232,9 +245,13 @@ template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> str ...@@ -232,9 +245,13 @@ template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> str
} }
}; };
#endif
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Set // Set
#ifdef USE_NPP
template<int SDEPTH, int SCN> struct NppSetFunc template<int SDEPTH, int SCN> struct NppSetFunc
{ {
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t; typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
...@@ -339,9 +356,13 @@ template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct N ...@@ -339,9 +356,13 @@ template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct N
} }
}; };
#endif
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// CopyMasked // CopyMasked
#ifdef USE_NPP
template<int SDEPTH> struct NppCopyMaskedFunc template<int SDEPTH> struct NppCopyMaskedFunc
{ {
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t; typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
...@@ -365,6 +386,8 @@ template<int SDEPTH, typename NppCopyMaskedFunc<SDEPTH>::func_ptr func> struct N ...@@ -365,6 +386,8 @@ template<int SDEPTH, typename NppCopyMaskedFunc<SDEPTH>::func_ptr func> struct N
} }
}; };
#endif
template <typename T> static inline bool isAligned(const T* ptr, size_t size) template <typename T> static inline bool isAligned(const T* ptr, size_t size)
{ {
return reinterpret_cast<size_t>(ptr) % size == 0; return reinterpret_cast<size_t>(ptr) % size == 0;
...@@ -877,6 +900,8 @@ public: ...@@ -877,6 +900,8 @@ public:
} }
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
#ifdef USE_NPP
static const func_t funcs[7][4] = static const func_t funcs[7][4] =
{ {
/* 8U */ {NppCopyMasked<CV_8U , nppiCopy_8u_C1MR >::call, cv::gpu::device::copyWithMask, NppCopyMasked<CV_8U , nppiCopy_8u_C3MR >::call, NppCopyMasked<CV_8U , nppiCopy_8u_C4MR >::call}, /* 8U */ {NppCopyMasked<CV_8U , nppiCopy_8u_C1MR >::call, cv::gpu::device::copyWithMask, NppCopyMasked<CV_8U , nppiCopy_8u_C3MR >::call, NppCopyMasked<CV_8U , nppiCopy_8u_C4MR >::call},
...@@ -889,6 +914,9 @@ public: ...@@ -889,6 +914,9 @@ public:
}; };
const func_t func = mask.channels() == src.channels() ? funcs[src.depth()][src.channels() - 1] : cv::gpu::device::copyWithMask; const func_t func = mask.channels() == src.channels() ? funcs[src.depth()][src.channels() - 1] : cv::gpu::device::copyWithMask;
#else
const func_t func = cv::gpu::device::copyWithMask;
#endif
func(src, dst, mask, 0); func(src, dst, mask, 0);
} }
...@@ -896,6 +924,8 @@ public: ...@@ -896,6 +924,8 @@ public:
void convert(const GpuMat& src, GpuMat& dst) const void convert(const GpuMat& src, GpuMat& dst) const
{ {
typedef void (*func_t)(const GpuMat& src, GpuMat& dst); typedef void (*func_t)(const GpuMat& src, GpuMat& dst);
#ifdef USE_NPP
static const func_t funcs[7][7][4] = static const func_t funcs[7][7][4] =
{ {
{ {
...@@ -962,6 +992,7 @@ public: ...@@ -962,6 +992,7 @@ public:
/* 64F -> 64F */ {0,0,0,0} /* 64F -> 64F */ {0,0,0,0}
} }
}; };
#endif
CV_Assert(src.depth() <= CV_64F && src.channels() <= 4); CV_Assert(src.depth() <= CV_64F && src.channels() <= 4);
CV_Assert(dst.depth() <= CV_64F); CV_Assert(dst.depth() <= CV_64F);
...@@ -980,8 +1011,12 @@ public: ...@@ -980,8 +1011,12 @@ public:
return; return;
} }
#ifdef USE_NPP
const func_t func = funcs[src.depth()][dst.depth()][src.channels() - 1]; const func_t func = funcs[src.depth()][dst.depth()][src.channels() - 1];
CV_DbgAssert(func != 0); CV_DbgAssert(func != 0);
#else
const func_t func = cv::gpu::device::convertTo;
#endif
func(src, dst); func(src, dst);
} }
...@@ -1023,6 +1058,8 @@ public: ...@@ -1023,6 +1058,8 @@ public:
} }
typedef void (*func_t)(GpuMat& src, Scalar s); typedef void (*func_t)(GpuMat& src, Scalar s);
#ifdef USE_NPP
static const func_t funcs[7][4] = static const func_t funcs[7][4] =
{ {
{NppSet<CV_8U , 1, nppiSet_8u_C1R >::call, cv::gpu::device::setTo , cv::gpu::device::setTo , NppSet<CV_8U , 4, nppiSet_8u_C4R >::call}, {NppSet<CV_8U , 1, nppiSet_8u_C1R >::call, cv::gpu::device::setTo , cv::gpu::device::setTo , NppSet<CV_8U , 4, nppiSet_8u_C4R >::call},
...@@ -1033,6 +1070,7 @@ public: ...@@ -1033,6 +1070,7 @@ public:
{NppSet<CV_32F, 1, nppiSet_32f_C1R>::call, cv::gpu::device::setTo , cv::gpu::device::setTo , NppSet<CV_32F, 4, nppiSet_32f_C4R>::call}, {NppSet<CV_32F, 1, nppiSet_32f_C1R>::call, cv::gpu::device::setTo , cv::gpu::device::setTo , NppSet<CV_32F, 4, nppiSet_32f_C4R>::call},
{cv::gpu::device::setTo , cv::gpu::device::setTo , cv::gpu::device::setTo , cv::gpu::device::setTo } {cv::gpu::device::setTo , cv::gpu::device::setTo , cv::gpu::device::setTo , cv::gpu::device::setTo }
}; };
#endif
CV_Assert(m.depth() <= CV_64F && m.channels() <= 4); CV_Assert(m.depth() <= CV_64F && m.channels() <= 4);
...@@ -1042,14 +1080,22 @@ public: ...@@ -1042,14 +1080,22 @@ public:
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
} }
#ifdef USE_NPP
const func_t func = funcs[m.depth()][m.channels() - 1];
#else
const func_t func = cv::gpu::device::setTo;
#endif
if (stream) if (stream)
cv::gpu::device::setTo(m, s, stream); cv::gpu::device::setTo(m, s, stream);
else else
funcs[m.depth()][m.channels() - 1](m, s); func(m, s);
} }
else else
{ {
typedef void (*func_t)(GpuMat& src, Scalar s, const GpuMat& mask); typedef void (*func_t)(GpuMat& src, Scalar s, const GpuMat& mask);
#ifdef USE_NPP
static const func_t funcs[7][4] = static const func_t funcs[7][4] =
{ {
{NppSetMask<CV_8U , 1, nppiSet_8u_C1MR >::call, cv::gpu::device::setTo, cv::gpu::device::setTo, NppSetMask<CV_8U , 4, nppiSet_8u_C4MR >::call}, {NppSetMask<CV_8U , 1, nppiSet_8u_C1MR >::call, cv::gpu::device::setTo, cv::gpu::device::setTo, NppSetMask<CV_8U , 4, nppiSet_8u_C4MR >::call},
...@@ -1060,6 +1106,7 @@ public: ...@@ -1060,6 +1106,7 @@ public:
{NppSetMask<CV_32F, 1, nppiSet_32f_C1MR>::call, cv::gpu::device::setTo, cv::gpu::device::setTo, NppSetMask<CV_32F, 4, nppiSet_32f_C4MR>::call}, {NppSetMask<CV_32F, 1, nppiSet_32f_C1MR>::call, cv::gpu::device::setTo, cv::gpu::device::setTo, NppSetMask<CV_32F, 4, nppiSet_32f_C4MR>::call},
{cv::gpu::device::setTo , cv::gpu::device::setTo, cv::gpu::device::setTo, cv::gpu::device::setTo } {cv::gpu::device::setTo , cv::gpu::device::setTo, cv::gpu::device::setTo, cv::gpu::device::setTo }
}; };
#endif
CV_Assert(m.depth() <= CV_64F && m.channels() <= 4); CV_Assert(m.depth() <= CV_64F && m.channels() <= 4);
...@@ -1069,10 +1116,16 @@ public: ...@@ -1069,10 +1116,16 @@ public:
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
} }
#ifdef USE_NPP
const func_t func = funcs[m.depth()][m.channels() - 1];
#else
const func_t func = cv::gpu::device::setTo;
#endif
if (stream) if (stream)
cv::gpu::device::setTo(m, s, mask, stream); cv::gpu::device::setTo(m, s, mask, stream);
else else
funcs[m.depth()][m.channels() - 1](m, s, mask); func(m, s, mask);
} }
} }
......
...@@ -244,6 +244,10 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyz, const Mat& Q, ...@@ -244,6 +244,10 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyz, const Mat& Q,
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// copyMakeBorder // copyMakeBorder
// Disable NPP for this file
//#define USE_NPP
#undef USE_NPP
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
namespace imgproc namespace imgproc
...@@ -279,6 +283,7 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom ...@@ -279,6 +283,7 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
cudaStream_t stream = StreamAccessor::getStream(s); cudaStream_t stream = StreamAccessor::getStream(s);
#ifdef USE_NPP
if (borderType == BORDER_CONSTANT && (src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1)) if (borderType == BORDER_CONSTANT && (src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1))
{ {
NppiSize srcsz; NppiSize srcsz;
...@@ -328,6 +333,7 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom ...@@ -328,6 +333,7 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
else else
#endif
{ {
typedef void (*caller_t)(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream); typedef void (*caller_t)(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream);
static const caller_t callers[6][4] = static const caller_t callers[6][4] =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment