Commit 40ee754e authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

added WITH_CUFFT and WITH_CUBLAS flags to cmake scripts

fixed gpu module error reporting
added asynchronous version of some functions
parent a73b509b
...@@ -452,8 +452,12 @@ set(WITH_EIGEN ON CACHE BOOL "Include Eigen2/Eigen3 support") ...@@ -452,8 +452,12 @@ set(WITH_EIGEN ON CACHE BOOL "Include Eigen2/Eigen3 support")
if( CMAKE_VERSION VERSION_GREATER "2.8") if( CMAKE_VERSION VERSION_GREATER "2.8")
set(WITH_CUDA ON CACHE BOOL "Include NVidia Cuda Runtime support") set(WITH_CUDA ON CACHE BOOL "Include NVidia Cuda Runtime support")
set(WITH_CUFFT ON CACHE BOOL "Include NVidia Cuda Fast Fourier Transform (FFT) library support")
set(WITH_CUBLAS OFF CACHE BOOL "Include NVidia Cuda Basic Linear Algebra Subprograms (BLAS) library support")
else() else()
set(WITH_CUDA OFF CACHE BOOL "Include NVidia Cuda Runtime support") set(WITH_CUDA OFF CACHE BOOL "Include NVidia Cuda Runtime support")
set(WITH_CUFFT OFF CACHE BOOL "Include NVidia Cuda Fast Fourier Transform (FFT) library support")
set(WITH_CUBLAS OFF CACHE BOOL "Include NVidia Cuda Basic Linear Algebra Subprograms (BLAS) library support")
endif() endif()
set(WITH_OPENNI OFF CACHE BOOL "Include OpenNI support") set(WITH_OPENNI OFF CACHE BOOL "Include OpenNI support")
...@@ -995,6 +999,15 @@ if(WITH_CUDA) ...@@ -995,6 +999,15 @@ if(WITH_CUDA)
if(CUDA_FOUND) if(CUDA_FOUND)
set(HAVE_CUDA 1) set(HAVE_CUDA 1)
if(WITH_CUFFT)
set(HAVE_CUFFT 1)
endif()
if(WITH_CUBLAS)
set(HAVE_CUBLAS 1)
endif()
message(STATUS "CUDA detected: " ${CUDA_VERSION}) message(STATUS "CUDA detected: " ${CUDA_VERSION})
set(CUDA_ARCH_BIN "1.1 1.2 1.3 2.0 2.1(2.0)" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") set(CUDA_ARCH_BIN "1.1 1.2 1.3 2.0 2.1(2.0)" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
......
...@@ -172,6 +172,12 @@ ...@@ -172,6 +172,12 @@
/* NVidia Cuda Runtime API*/ /* NVidia Cuda Runtime API*/
#cmakedefine HAVE_CUDA #cmakedefine HAVE_CUDA
/* NVidia Cuda Fast Fourier Transform (FFT) API*/
#cmakedefine HAVE_CUFFT
/* NVidia Cuda Basic Linear Algebra Subprograms (BLAS) API*/
#cmakedefine HAVE_CUBLAS
/* Compile for 'real' NVIDIA GPU architectures */ /* Compile for 'real' NVIDIA GPU architectures */
#define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}" #define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}"
......
...@@ -250,9 +250,7 @@ enum { ...@@ -250,9 +250,7 @@ enum {
CV_StsBadMemBlock= -214, /* an allocated block has been corrupted */ CV_StsBadMemBlock= -214, /* an allocated block has been corrupted */
CV_StsAssert= -215, /* assertion failed */ CV_StsAssert= -215, /* assertion failed */
CV_GpuNotSupported= -216, CV_GpuNotSupported= -216,
CV_GpuApiCallError= -217, CV_GpuApiCallError= -217
CV_GpuNppCallError= -218,
CV_GpuCufftCallError= -219
}; };
/****************************************************************************************\ /****************************************************************************************\
......
...@@ -631,7 +631,6 @@ CV_IMPL const char* cvErrorStr( int status ) ...@@ -631,7 +631,6 @@ CV_IMPL const char* cvErrorStr( int status )
case CV_StsAssert : return "Assertion failed"; case CV_StsAssert : return "Assertion failed";
case CV_GpuNotSupported : return "No GPU support"; case CV_GpuNotSupported : return "No GPU support";
case CV_GpuApiCallError : return "Gpu Api call"; case CV_GpuApiCallError : return "Gpu Api call";
case CV_GpuNppCallError : return "Npp Api call";
}; };
sprintf(buf, "Unknown %s code %d", status >= 0 ? "status":"error", status); sprintf(buf, "Unknown %s code %d", status >= 0 ? "status":"error", status);
......
...@@ -121,11 +121,18 @@ target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS} ) ...@@ -121,11 +121,18 @@ target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS} )
if (HAVE_CUDA) if (HAVE_CUDA)
target_link_libraries(${the_target} ${CUDA_LIBRARIES}) target_link_libraries(${the_target} ${CUDA_LIBRARIES})
CUDA_ADD_CUFFT_TO_TARGET(${the_target})
unset(CUDA_npp_LIBRARY CACHE) unset(CUDA_npp_LIBRARY CACHE)
find_cuda_helper_libs(npp) find_cuda_helper_libs(npp)
target_link_libraries(${the_target} ${CUDA_npp_LIBRARY}) target_link_libraries(${the_target} ${CUDA_npp_LIBRARY})
if(HAVE_CUFFT)
CUDA_ADD_CUFFT_TO_TARGET(${the_target})
endif()
if(HAVE_CUBLAS)
CUDA_ADD_CUBLAS_TO_TARGET(${the_target})
endif()
endif() endif()
if(MSVC) if(MSVC)
......
...@@ -141,8 +141,8 @@ namespace cv ...@@ -141,8 +141,8 @@ namespace cv
//////////////////////////////// Error handling //////////////////////// //////////////////////////////// Error handling ////////////////////////
CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func); //CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
CV_EXPORTS void nppError( int err, const char *file, const int line, const char *func); //CV_EXPORTS void nppError( int err, const char *file, const int line, const char *func);
//////////////////////////////// CudaMem //////////////////////////////// //////////////////////////////// CudaMem ////////////////////////////////
// CudaMem is limited cv::Mat with page locked memory allocation. // CudaMem is limited cv::Mat with page locked memory allocation.
...@@ -628,11 +628,11 @@ namespace cv ...@@ -628,11 +628,11 @@ namespace cv
//! Does mean shift filtering on GPU. //! Does mean shift filtering on GPU.
CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream = Stream::Null());
//! Does mean shift procedure on GPU. //! Does mean shift procedure on GPU.
CV_EXPORTS void meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, CV_EXPORTS void meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr,
TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream = Stream::Null());
//! Does mean shift segmentation with elimination of small regions. //! Does mean shift segmentation with elimination of small regions.
CV_EXPORTS void meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize, CV_EXPORTS void meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize,
...@@ -683,10 +683,12 @@ namespace cv ...@@ -683,10 +683,12 @@ namespace cv
//! rotate 8bit single or four channel image //! rotate 8bit single or four channel image
//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC //! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
//! supports CV_8UC1, CV_8UC4 types //! supports CV_8UC1, CV_8UC4 types
CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null()); CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0,
int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
//! copies 2D array to a larger destination array and pads borders with user-specifiable constant //! copies 2D array to a larger destination array and pads borders with user-specifiable constant
CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value = Scalar(), Stream& stream = Stream::Null()); CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType,
const Scalar& value = Scalar(), Stream& stream = Stream::Null());
//! computes the integral image //! computes the integral image
//! sum will have CV_32S type, but will contain unsigned int values //! sum will have CV_32S type, but will contain unsigned int values
...@@ -715,21 +717,26 @@ namespace cv ...@@ -715,21 +717,26 @@ namespace cv
CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null()); CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null());
//! computes Harris cornerness criteria at each image pixel //! computes Harris cornerness criteria at each image pixel
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType=BORDER_REFLECT101); CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k,
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType=BORDER_REFLECT101); int borderType = BORDER_REFLECT101);
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k,
int borderType = BORDER_REFLECT101);
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k,
int borderType = BORDER_REFLECT101, Stream& stream = Stream::Null());
//! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria //! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria
CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101); CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType=BORDER_REFLECT101); CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize,
int borderType=BORDER_REFLECT101, Stream& stream = Stream::Null());
//! performs per-element multiplication of two full (not packed) Fourier spectrums //! performs per-element multiplication of two full (not packed) Fourier spectrums
//! supports 32FC2 matrixes only (interleaved format) //! supports 32FC2 matrixes only (interleaved format)
CV_EXPORTS void mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false); CV_EXPORTS void mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false, Stream& stream = Stream::Null());
//! performs per-element multiplication of two full (not packed) Fourier spectrums //! performs per-element multiplication of two full (not packed) Fourier spectrums
//! supports 32FC2 matrixes only (interleaved format) //! supports 32FC2 matrixes only (interleaved format)
CV_EXPORTS void mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, CV_EXPORTS void mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB=false, Stream& stream = Stream::Null());
float scale, bool conjB=false);
//! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix. //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
//! Param dft_size is the size of DFT transform. //! Param dft_size is the size of DFT transform.
...@@ -742,19 +749,14 @@ namespace cv ...@@ -742,19 +749,14 @@ namespace cv
//! in CUFFT's format. Result as full complex matrix for such kind of transform cannot be retrieved. //! in CUFFT's format. Result as full complex matrix for such kind of transform cannot be retrieved.
//! //!
//! For complex-to-real transform it is assumed that the source matrix is packed in CUFFT's format. //! For complex-to-real transform it is assumed that the source matrix is packed in CUFFT's format.
CV_EXPORTS void dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags=0); CV_EXPORTS void dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags=0, Stream& stream = Stream::Null());
//! computes convolution (or cross-correlation) of two images using discrete Fourier transform //! computes convolution (or cross-correlation) of two images using discrete Fourier transform
//! supports source images of 32FC1 type only //! supports source images of 32FC1 type only
//! result matrix will have 32FC1 type //! result matrix will have 32FC1 type
CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
bool ccorr=false);
struct CV_EXPORTS ConvolveBuf; struct CV_EXPORTS ConvolveBuf;
CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr = false);
//! buffered version CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream = Stream::Null());
CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
bool ccorr, ConvolveBuf& buf);
struct CV_EXPORTS ConvolveBuf struct CV_EXPORTS ConvolveBuf
{ {
...@@ -766,7 +768,7 @@ namespace cv ...@@ -766,7 +768,7 @@ namespace cv
private: private:
static Size estimateBlockSize(Size result_size, Size templ_size); static Size estimateBlockSize(Size result_size, Size templ_size);
friend void convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&); friend void convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&, Stream& stream);
Size result_size; Size result_size;
Size block_size; Size block_size;
...@@ -778,7 +780,7 @@ namespace cv ...@@ -778,7 +780,7 @@ namespace cv
}; };
//! computes the proximity map for the raster template and the image where the template is searched for //! computes the proximity map for the raster template and the image where the template is searched for
CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method); CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, Stream& stream = Stream::Null());
//! smoothes the source image and downsamples it //! smoothes the source image and downsamples it
CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null()); CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
......
...@@ -93,7 +93,7 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s) ...@@ -93,7 +93,7 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
sz.width = src.cols; sz.width = src.cols;
sz.height = src.rows; sz.height = src.rows;
nppSafeCall( nppiStTranspose_32u_C1R(const_cast<Ncv32u*>(src.ptr<Ncv32u>()), static_cast<int>(src.step), ncvSafeCall( nppiStTranspose_32u_C1R(const_cast<Ncv32u*>(src.ptr<Ncv32u>()), static_cast<int>(src.step),
dst.ptr<Ncv32u>(), static_cast<int>(dst.step), sz) ); dst.ptr<Ncv32u>(), static_cast<int>(dst.step), sz) );
} }
else // if (src.elemSize() == 8) else // if (src.elemSize() == 8)
...@@ -104,7 +104,7 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s) ...@@ -104,7 +104,7 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
sz.width = src.cols; sz.width = src.cols;
sz.height = src.rows; sz.height = src.rows;
nppSafeCall( nppiStTranspose_64u_C1R(const_cast<Ncv64u*>(src.ptr<Ncv64u>()), static_cast<int>(src.step), ncvSafeCall( nppiStTranspose_64u_C1R(const_cast<Ncv64u*>(src.ptr<Ncv64u>()), static_cast<int>(src.step),
dst.ptr<Ncv64u>(), static_cast<int>(dst.step), sz) ); dst.ptr<Ncv64u>(), static_cast<int>(dst.step), sz) );
} }
......
...@@ -66,10 +66,7 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl ...@@ -66,10 +66,7 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
CascadeClassifierImpl(const string& filename) : lastAllocatedFrameSize(-1, -1) CascadeClassifierImpl(const string& filename) : lastAllocatedFrameSize(-1, -1)
{ {
ncvSetDebugOutputHandler(NCVDebugOutputHandler); ncvSetDebugOutputHandler(NCVDebugOutputHandler);
if (ncvStat != load(filename)) ncvSafeCall( load(filename) );
{
CV_Error(CV_GpuApiCallError, "Error in GPU cacade load");
}
} }
...@@ -287,11 +284,7 @@ int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat& image, GpuMa ...@@ -287,11 +284,7 @@ int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat& image, GpuMa
} }
unsigned int numDetections; unsigned int numDetections;
NCVStatus ncvStat = impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections); ncvSafeCall( impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections) );
if (ncvStat != NCV_SUCCESS)
{
CV_Error(CV_GpuApiCallError, "Error in face detectioln");
}
return numDetections; return numDetections;
} }
......
This diff is collapsed.
This diff is collapsed.
...@@ -45,16 +45,18 @@ ...@@ -45,16 +45,18 @@
#include "cuda_runtime_api.h" #include "cuda_runtime_api.h"
#include "cufft.h" #include "cufft.h"
//#include <nppdefs.h> #include "NCV.hpp"
#if defined(__GNUC__) #if defined(__GNUC__)
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__) #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__, __func__)
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__) #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
#define ncvSafeCall(expr) ___ncvSafeCall(expr, __FILE__, __LINE__, __func__)
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */ #else /* defined(__CUDACC__) || defined(__MSVC__) */
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__) #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__)
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__) #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
#define ncvSafeCall(expr) ___ncvSafeCall(expr, __FILE__, __LINE__)
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__)
#endif #endif
namespace cv namespace cv
...@@ -63,6 +65,7 @@ namespace cv ...@@ -63,6 +65,7 @@ namespace cv
{ {
void error(const char *error_string, const char *file, const int line, const char *func = ""); void error(const char *error_string, const char *file, const int line, const char *func = "");
void nppError(int err, const char *file, const int line, const char *func = ""); void nppError(int err, const char *file, const int line, const char *func = "");
void ncvError(int err, const char *file, const int line, const char *func = "");
void cufftError(int err, const char *file, const int line, const char *func = ""); void cufftError(int err, const char *file, const int line, const char *func = "");
static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "") static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
...@@ -71,17 +74,23 @@ namespace cv ...@@ -71,17 +74,23 @@ namespace cv
cv::gpu::error(cudaGetErrorString(err), file, line, func); cv::gpu::error(cudaGetErrorString(err), file, line, func);
} }
static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "")
{
if (CUFFT_SUCCESS != err)
cv::gpu::cufftError(err, file, line, func);
}
static inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "") static inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
{ {
if (err < 0) if (err < 0)
cv::gpu::nppError(err, file, line, func); cv::gpu::nppError(err, file, line, func);
} }
static inline void ___ncvSafeCall(int err, const char *file, const int line, const char *func = "")
{
if (NCV_SUCCESS != err)
cv::gpu::ncvError(err, file, line, func);
}
static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "")
{
if (CUFFT_SUCCESS != err)
cv::gpu::cufftError(err, file, line, func);
}
} }
} }
......
...@@ -42,30 +42,45 @@ ...@@ -42,30 +42,45 @@
#include "precomp.hpp" #include "precomp.hpp"
using namespace cv; using namespace cv;
using namespace cv::gpu; using namespace cv::gpu;
using namespace std;
#ifdef HAVE_CUDA
#if !defined (HAVE_CUDA)
#else /* !defined (HAVE_CUDA) */
namespace namespace
{ {
#define error_entry(entry) { entry, #entry } #define error_entry(entry) { entry, #entry }
////////////////////////////////////////////////////////////////////////// struct ErrorEntry
// NPP errors
struct NppError
{ {
int error; int code;
string str; string str;
};
struct ErrorEntryComparer
{
int code;
ErrorEntryComparer(int code_) : code(code_) {};
bool operator()(const ErrorEntry& e) const { return e.code == code; }
};
string getErrorString(int code, const ErrorEntry* errors, size_t n)
{
size_t idx = find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
const string& msg = (idx != n) ? errors[idx].str : string("Unknown error code");
ostringstream ostr;
ostr << msg << " [Code = " << code << "]";
return ostr.str();
} }
npp_errors [] = //////////////////////////////////////////////////////////////////////////
// NPP errors
const ErrorEntry npp_errors [] =
{ {
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ), error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ), error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
...@@ -74,6 +89,7 @@ namespace ...@@ -74,6 +89,7 @@ namespace
#if defined (_MSC_VER) #if defined (_MSC_VER)
error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ), error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
#endif #endif
error_entry( NPP_BAD_ARG_ERROR ), error_entry( NPP_BAD_ARG_ERROR ),
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ), error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_TEXTURE_BIND_ERROR ), error_entry( NPP_TEXTURE_BIND_ERROR ),
...@@ -110,105 +126,115 @@ namespace ...@@ -110,105 +126,115 @@ namespace
error_entry( NPP_ODD_ROI_WARNING ) error_entry( NPP_ODD_ROI_WARNING )
}; };
const size_t error_num = sizeof(npp_errors) / sizeof(npp_errors[0]); const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
struct Searcher
{
int err;
Searcher(int err_) : err(err_) {};
bool operator()(const NppError& e) const { return e.error == err; }
};
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// CUFFT errors // NCV errors
struct CufftError const ErrorEntry ncv_errors [] =
{ {
int code; error_entry( NCV_SUCCESS ),
string message; error_entry( NCV_UNKNOWN_ERROR ),
error_entry( NCV_CUDA_ERROR ),
error_entry( NCV_NPP_ERROR ),
error_entry( NCV_FILE_ERROR ),
error_entry( NCV_NULL_PTR ),
error_entry( NCV_INCONSISTENT_INPUT ),
error_entry( NCV_TEXTURE_BIND_ERROR ),
error_entry( NCV_DIMENSIONS_INVALID ),
error_entry( NCV_INVALID_ROI ),
error_entry( NCV_INVALID_STEP ),
error_entry( NCV_INVALID_SCALE ),
error_entry( NCV_INVALID_SCALE ),
error_entry( NCV_ALLOCATOR_NOT_INITIALIZED ),
error_entry( NCV_ALLOCATOR_BAD_ALLOC ),
error_entry( NCV_ALLOCATOR_BAD_DEALLOC ),
error_entry( NCV_ALLOCATOR_INSUFFICIENT_CAPACITY ),
error_entry( NCV_ALLOCATOR_DEALLOC_ORDER ),
error_entry( NCV_ALLOCATOR_BAD_REUSE ),
error_entry( NCV_MEM_COPY_ERROR ),
error_entry( NCV_MEM_RESIDENCE_ERROR ),
error_entry( NCV_MEM_INSUFFICIENT_CAPACITY ),
error_entry( NCV_HAAR_INVALID_PIXEL_STEP ),
error_entry( NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER ),
error_entry( NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE ),
error_entry( NCV_HAAR_TOO_LARGE_FEATURES ),
error_entry( NCV_HAAR_XML_LOADING_EXCEPTION ),
error_entry( NCV_NOIMPL_HAAR_TILTED_FEATURES ),
error_entry( NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW ),
error_entry( NPPST_SUCCESS ),
error_entry( NPPST_ERROR ),
error_entry( NPPST_CUDA_KERNEL_EXECUTION_ERROR ),
error_entry( NPPST_NULL_POINTER_ERROR ),
error_entry( NPPST_TEXTURE_BIND_ERROR ),
error_entry( NPPST_MEMCPY_ERROR ),
error_entry( NPPST_MEM_ALLOC_ERR ),
error_entry( NPPST_MEMFREE_ERR ),
error_entry( NPPST_INVALID_ROI ),
error_entry( NPPST_INVALID_STEP ),
error_entry( NPPST_INVALID_SCALE ),
error_entry( NPPST_MEM_INSUFFICIENT_BUFFER ),
error_entry( NPPST_MEM_RESIDENCE_ERROR ),
error_entry( NPPST_MEM_INTERNAL_ERROR )
}; };
const CufftError cufft_errors[] = const size_t ncv_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
{
error_entry(CUFFT_INVALID_PLAN),
error_entry(CUFFT_ALLOC_FAILED),
error_entry(CUFFT_INVALID_TYPE),
error_entry(CUFFT_INVALID_VALUE),
error_entry(CUFFT_INTERNAL_ERROR),
error_entry(CUFFT_EXEC_FAILED),
error_entry(CUFFT_SETUP_FAILED),
error_entry(CUFFT_INVALID_SIZE),
error_entry(CUFFT_UNALIGNED_DATA)
};
struct CufftErrorComparer //////////////////////////////////////////////////////////////////////////
{ // CUFFT errors
CufftErrorComparer(int code_): code(code_) {}
bool operator()(const CufftError& other) const const ErrorEntry cufft_errors[] =
{ {
return other.code == code; error_entry( CUFFT_INVALID_PLAN ),
} error_entry( CUFFT_ALLOC_FAILED ),
int code; error_entry( CUFFT_INVALID_TYPE ),
error_entry( CUFFT_INVALID_VALUE ),
error_entry( CUFFT_INTERNAL_ERROR ),
error_entry( CUFFT_EXEC_FAILED ),
error_entry( CUFFT_SETUP_FAILED ),
error_entry( CUFFT_INVALID_SIZE ),
error_entry( CUFFT_UNALIGNED_DATA )
}; };
const int cufft_error_num = sizeof(cufft_errors) / sizeof(cufft_errors[0]); const int cufft_error_num = sizeof(cufft_errors) / sizeof(cufft_errors[0]);
} }
namespace cv namespace cv
{ {
namespace gpu namespace gpu
{ {
const string getNppErrorString( int err ) void error(const char *error_string, const char *file, const int line, const char *func)
{ {
size_t idx = std::find_if(npp_errors, npp_errors + error_num, Searcher(err)) - npp_errors; int code = CV_GpuApiCallError;
const string& msg = (idx != error_num) ? npp_errors[idx].str : string("Unknown error code");
std::stringstream interpreter; if (uncaught_exception())
interpreter << msg <<" [Code = " << err << "]"; {
const char* errorStr = cvErrorStr(code);
const char* function = func ? func : "unknown function";
return interpreter.str(); cerr << "OpenCV Error: " << errorStr << "(" << error_string << ") in " << function << ", file " << file << ", line " << line;
cerr.flush();
} }
else
void nppError( int err, const char *file, const int line, const char *func) cv::error( cv::Exception(code, error_string, func, file, line) );
{
cv::error( cv::Exception(CV_GpuNppCallError, getNppErrorString(err), func, file, line) );
} }
const string getCufftErrorString(int err_code) void nppError(int code, const char *file, const int line, const char *func)
{ {
const CufftError* cufft_error = std::find_if( string msg = getErrorString(code, npp_errors, npp_error_num);
cufft_errors, cufft_errors + cufft_error_num, cv::gpu::error(msg.c_str(), file, line, func);
CufftErrorComparer(err_code));
bool found = cufft_error != cufft_errors + cufft_error_num;
std::stringstream ss;
ss << (found ? cufft_error->message : "Unknown error code");
ss << " [Code = " << err_code << "]";
return ss.str();
} }
void cufftError(int err, const char *file, const int line, const char *func) void ncvError(int code, const char *file, const int line, const char *func)
{ {
cv::error(cv::Exception(CV_GpuCufftCallError, getCufftErrorString(err), func, file, line)); string msg = getErrorString(code, ncv_errors, ncv_error_num);
cv::gpu::error(msg.c_str(), file, line, func);
} }
void error(const char *error_string, const char *file, const int line, const char *func) void cufftError(int code, const char *file, const int line, const char *func)
{
int code = CV_GpuApiCallError;
if (std::uncaught_exception())
{ {
const char* errorStr = cvErrorStr(code); string msg = getErrorString(code, cufft_errors, cufft_error_num);
const char* function = func ? func : "unknown function"; cv::gpu::error(msg.c_str(), file, line, func);
std::cerr << "OpenCV Error: " << errorStr << "(" << error_string << ") in " << function << ", file " << file << ", line " << line;
std::cerr.flush();
}
else
cv::error( cv::Exception(code, error_string, func, file, line) );
} }
} }
} }
......
This diff is collapsed.
This diff is collapsed.
...@@ -59,10 +59,8 @@ namespace ...@@ -59,10 +59,8 @@ namespace
NCVMatrix<Ncv32f>& u, NCVMatrix<Ncv32f>& v, const cudaDeviceProp& devProp) NCVMatrix<Ncv32f>& u, NCVMatrix<Ncv32f>& v, const cudaDeviceProp& devProp)
{ {
NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(devProp.textureAlignment)); NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(devProp.textureAlignment));
CV_Assert(gpuCounter.isInitialized());
NCVStatus ncvStat = NCVBroxOpticalFlow(desc, gpuCounter, frame0, frame1, u, v, 0); ncvSafeCall( NCVBroxOpticalFlow(desc, gpuCounter, frame0, frame1, u, v, 0) );
CV_Assert(ncvStat == NCV_SUCCESS);
return gpuCounter.maxSize(); return gpuCounter.maxSize();
} }
...@@ -130,10 +128,8 @@ void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& f ...@@ -130,10 +128,8 @@ void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& f
ensureSizeIsEnough(1, bufSize, CV_8UC1, buf); ensureSizeIsEnough(1, bufSize, CV_8UC1, buf);
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(devProp.textureAlignment), buf.ptr()); NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(devProp.textureAlignment), buf.ptr());
CV_Assert(gpuAllocator.isInitialized());
NCVStatus ncvStat = NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, stream); ncvSafeCall( NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, stream) );
CV_Assert(ncvStat == NCV_SUCCESS);
} }
void cv::gpu::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, const GpuMat& fu, const GpuMat& fv, const GpuMat& bu, const GpuMat& bv, void cv::gpu::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, const GpuMat& fu, const GpuMat& fv, const GpuMat& bu, const GpuMat& bv,
...@@ -189,7 +185,7 @@ void cv::gpu::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, cons ...@@ -189,7 +185,7 @@ void cv::gpu::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, cons
state.ppBuffers[4] = bui.ptr<Ncv32f>(); state.ppBuffers[4] = bui.ptr<Ncv32f>();
state.ppBuffers[5] = bvi.ptr<Ncv32f>(); state.ppBuffers[5] = bvi.ptr<Ncv32f>();
nppSafeCall( nppiStInterpolateFrames(&state) ); ncvSafeCall( nppiStInterpolateFrames(&state) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
......
...@@ -39,15 +39,16 @@ ...@@ -39,15 +39,16 @@
// the use of this software, even if advised of the possibility of such damage. // the use of this software, even if advised of the possibility of such damage.
// //
//M*/ //M*/
#ifndef __OPENCV_PRECOMP_H__ #ifndef __OPENCV_PRECOMP_H__
#define __OPENCV_PRECOMP_H__ #define __OPENCV_PRECOMP_H__
#if _MSC_VER >= 1200 #if _MSC_VER >= 1200
#pragma warning( disable: 4251 4710 4711 4514 4996 ) #pragma warning( disable: 4251 4710 4711 4514 4996 )
#endif #endif
#ifdef HAVE_CVCONFIG_H #ifdef HAVE_CVCONFIG_H
#include "cvconfig.h" #include "cvconfig.h"
#endif #endif
#include <iostream> #include <iostream>
...@@ -65,33 +66,43 @@ ...@@ -65,33 +66,43 @@
#include "opencv2/calib3d/calib3d.hpp" #include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/core/internal.hpp" #include "opencv2/core/internal.hpp"
#if defined(HAVE_CUDA) #define OPENCV_GPU_UNUSED(x) (void)x
#ifdef HAVE_CUDA
#include "internal_shared.hpp"
#include "cuda_runtime_api.h" #include "cuda_runtime_api.h"
#include "npp.h"
#ifdef HAVE_CUFFT
#include "cufft.h" #include "cufft.h"
#endif
#ifdef HAVE_CUBLAS
#include "cublas.h"
#endif
#include "internal_shared.hpp"
#include "opencv2/gpu/stream_accessor.hpp" #include "opencv2/gpu/stream_accessor.hpp"
#include "npp.h"
#include "nvidia/core/NCV.hpp" #include "nvidia/core/NCV.hpp"
#include "nvidia/NPP_staging/NPP_staging.hpp" #include "nvidia/NPP_staging/NPP_staging.hpp"
#include "nvidia/NCVHaarObjectDetection.hpp" #include "nvidia/NCVHaarObjectDetection.hpp"
#include "nvidia/NCVBroxOpticalFlow.hpp" #include "nvidia/NCVBroxOpticalFlow.hpp"
#define CUDART_MINIMUM_REQUIRED_VERSION 4000 #define CUDART_MINIMUM_REQUIRED_VERSION 4000
#define NPP_MINIMUM_REQUIRED_VERSION 4000 #define NPP_MINIMUM_REQUIRED_VERSION 4000
#if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION) #if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
#error "Insufficient Cuda Runtime library version, please update it." #error "Insufficient Cuda Runtime library version, please update it."
#endif #endif
#if (NPP_VERSION_MAJOR*1000+NPP_VERSION_MINOR*100+NPP_VERSION_BUILD < NPP_MINIMUM_REQUIRED_VERSION) #if (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD < NPP_MINIMUM_REQUIRED_VERSION)
#error "Insufficient NPP version, please update it." #error "Insufficient NPP version, please update it."
#endif #endif
#if defined(CUDA_ARCH_BIN_OR_PTX_10) #if defined(CUDA_ARCH_BIN_OR_PTX_10)
#error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0" #error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0"
#endif #endif
static inline void throw_nogpu() { CV_Error(CV_GpuNotSupported, "The called functionality is disabled for current build or platform"); } static inline void throw_nogpu() { CV_Error(CV_GpuNotSupported, "The called functionality is disabled for current build or platform"); }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment