Commit 40ee754e authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

added WITH_CUFFT and WITH_CUBLAS flags to cmake scripts

fixed gpu module error reporting
added asynchronous version of some functions
parent a73b509b
......@@ -452,8 +452,12 @@ set(WITH_EIGEN ON CACHE BOOL "Include Eigen2/Eigen3 support")
if( CMAKE_VERSION VERSION_GREATER "2.8")
set(WITH_CUDA ON CACHE BOOL "Include NVidia Cuda Runtime support")
set(WITH_CUFFT ON CACHE BOOL "Include NVidia Cuda Fast Fourier Transform (FFT) library support")
set(WITH_CUBLAS OFF CACHE BOOL "Include NVidia Cuda Basic Linear Algebra Subprograms (BLAS) library support")
else()
set(WITH_CUDA OFF CACHE BOOL "Include NVidia Cuda Runtime support")
set(WITH_CUFFT OFF CACHE BOOL "Include NVidia Cuda Fast Fourier Transform (FFT) library support")
set(WITH_CUBLAS OFF CACHE BOOL "Include NVidia Cuda Basic Linear Algebra Subprograms (BLAS) library support")
endif()
set(WITH_OPENNI OFF CACHE BOOL "Include OpenNI support")
......@@ -995,6 +999,15 @@ if(WITH_CUDA)
if(CUDA_FOUND)
set(HAVE_CUDA 1)
if(WITH_CUFFT)
set(HAVE_CUFFT 1)
endif()
if(WITH_CUBLAS)
set(HAVE_CUBLAS 1)
endif()
message(STATUS "CUDA detected: " ${CUDA_VERSION})
set(CUDA_ARCH_BIN "1.1 1.2 1.3 2.0 2.1(2.0)" CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
......
......@@ -172,6 +172,12 @@
/* NVidia Cuda Runtime API*/
#cmakedefine HAVE_CUDA
/* NVidia Cuda Fast Fourier Transform (FFT) API*/
#cmakedefine HAVE_CUFFT
/* NVidia Cuda Basic Linear Algebra Subprograms (BLAS) API*/
#cmakedefine HAVE_CUBLAS
/* Compile for 'real' NVIDIA GPU architectures */
#define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}"
......
......@@ -250,9 +250,7 @@ enum {
CV_StsBadMemBlock= -214, /* an allocated block has been corrupted */
CV_StsAssert= -215, /* assertion failed */
CV_GpuNotSupported= -216,
CV_GpuApiCallError= -217,
CV_GpuNppCallError= -218,
CV_GpuCufftCallError= -219
CV_GpuApiCallError= -217
};
/****************************************************************************************\
......
......@@ -629,9 +629,8 @@ CV_IMPL const char* cvErrorStr( int status )
case CV_StsNotImplemented : return "The function/feature is not implemented";
case CV_StsBadMemBlock : return "Memory block has been corrupted";
case CV_StsAssert : return "Assertion failed";
case CV_GpuNotSupported : return "No GPU support";
case CV_GpuApiCallError : return "Gpu Api call";
case CV_GpuNppCallError : return "Npp Api call";
case CV_GpuNotSupported : return "No GPU support";
case CV_GpuApiCallError : return "Gpu Api call";
};
sprintf(buf, "Unknown %s code %d", status >= 0 ? "status":"error", status);
......
......@@ -120,12 +120,19 @@ set_target_properties(${the_target} PROPERTIES
target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS} )
if (HAVE_CUDA)
target_link_libraries(${the_target} ${CUDA_LIBRARIES})
CUDA_ADD_CUFFT_TO_TARGET(${the_target})
target_link_libraries(${the_target} ${CUDA_LIBRARIES})
unset(CUDA_npp_LIBRARY CACHE)
find_cuda_helper_libs(npp)
target_link_libraries(${the_target} ${CUDA_npp_LIBRARY})
if(HAVE_CUFFT)
CUDA_ADD_CUFFT_TO_TARGET(${the_target})
endif()
if(HAVE_CUBLAS)
CUDA_ADD_CUBLAS_TO_TARGET(${the_target})
endif()
endif()
if(MSVC)
......
......@@ -141,8 +141,8 @@ namespace cv
//////////////////////////////// Error handling ////////////////////////
CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
CV_EXPORTS void nppError( int err, const char *file, const int line, const char *func);
//CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
//CV_EXPORTS void nppError( int err, const char *file, const int line, const char *func);
//////////////////////////////// CudaMem ////////////////////////////////
// CudaMem is limited cv::Mat with page locked memory allocation.
......@@ -628,11 +628,11 @@ namespace cv
//! Does mean shift filtering on GPU.
CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream = Stream::Null());
//! Does mean shift procedure on GPU.
CV_EXPORTS void meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr,
TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream = Stream::Null());
//! Does mean shift segmentation with elimination of small regions.
CV_EXPORTS void meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize,
......@@ -683,10 +683,12 @@ namespace cv
//! rotate 8bit single or four channel image
//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC
//! supports CV_8UC1, CV_8UC4 types
CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0,
int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());
//! copies 2D array to a larger destination array and pads borders with user-specifiable constant
CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value = Scalar(), Stream& stream = Stream::Null());
CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType,
const Scalar& value = Scalar(), Stream& stream = Stream::Null());
//! computes the integral image
//! sum will have CV_32S type, but will contain unsigned int values
......@@ -715,21 +717,26 @@ namespace cv
CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null());
//! computes Harris cornerness criteria at each image pixel
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType=BORDER_REFLECT101);
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType=BORDER_REFLECT101);
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k,
int borderType = BORDER_REFLECT101);
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k,
int borderType = BORDER_REFLECT101);
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k,
int borderType = BORDER_REFLECT101, Stream& stream = Stream::Null());
//! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria
CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize,
int borderType=BORDER_REFLECT101, Stream& stream = Stream::Null());
//! performs per-element multiplication of two full (not packed) Fourier spectrums
//! supports 32FC2 matrixes only (interleaved format)
CV_EXPORTS void mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false);
CV_EXPORTS void mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false, Stream& stream = Stream::Null());
//! performs per-element multiplication of two full (not packed) Fourier spectrums
//! supports 32FC2 matrixes only (interleaved format)
CV_EXPORTS void mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags,
float scale, bool conjB=false);
CV_EXPORTS void mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB=false, Stream& stream = Stream::Null());
//! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
//! Param dft_size is the size of DFT transform.
......@@ -742,19 +749,14 @@ namespace cv
//! in CUFFT's format. Result as full complex matrix for such kind of transform cannot be retrieved.
//!
//! For complex-to-real transform it is assumed that the source matrix is packed in CUFFT's format.
CV_EXPORTS void dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags=0);
CV_EXPORTS void dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags=0, Stream& stream = Stream::Null());
//! computes convolution (or cross-correlation) of two images using discrete Fourier transform
//! supports source images of 32FC1 type only
//! result matrix will have 32FC1 type
CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
bool ccorr=false);
struct CV_EXPORTS ConvolveBuf;
//! buffered version
CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
bool ccorr, ConvolveBuf& buf);
CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr = false);
CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream = Stream::Null());
struct CV_EXPORTS ConvolveBuf
{
......@@ -766,7 +768,7 @@ namespace cv
private:
static Size estimateBlockSize(Size result_size, Size templ_size);
friend void convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&);
friend void convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&, Stream& stream);
Size result_size;
Size block_size;
......@@ -778,7 +780,7 @@ namespace cv
};
//! computes the proximity map for the raster template and the image where the template is searched for
CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method);
CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, Stream& stream = Stream::Null());
//! smoothes the source image and downsamples it
CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
......
......@@ -93,7 +93,7 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
sz.width = src.cols;
sz.height = src.rows;
nppSafeCall( nppiStTranspose_32u_C1R(const_cast<Ncv32u*>(src.ptr<Ncv32u>()), static_cast<int>(src.step),
ncvSafeCall( nppiStTranspose_32u_C1R(const_cast<Ncv32u*>(src.ptr<Ncv32u>()), static_cast<int>(src.step),
dst.ptr<Ncv32u>(), static_cast<int>(dst.step), sz) );
}
else // if (src.elemSize() == 8)
......@@ -104,7 +104,7 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
sz.width = src.cols;
sz.height = src.rows;
nppSafeCall( nppiStTranspose_64u_C1R(const_cast<Ncv64u*>(src.ptr<Ncv64u>()), static_cast<int>(src.step),
ncvSafeCall( nppiStTranspose_64u_C1R(const_cast<Ncv64u*>(src.ptr<Ncv64u>()), static_cast<int>(src.step),
dst.ptr<Ncv64u>(), static_cast<int>(dst.step), sz) );
}
......
......@@ -66,10 +66,7 @@ struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
CascadeClassifierImpl(const string& filename) : lastAllocatedFrameSize(-1, -1)
{
ncvSetDebugOutputHandler(NCVDebugOutputHandler);
if (ncvStat != load(filename))
{
CV_Error(CV_GpuApiCallError, "Error in GPU cacade load");
}
ncvSafeCall( load(filename) );
}
......@@ -287,11 +284,7 @@ int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat& image, GpuMa
}
unsigned int numDetections;
NCVStatus ncvStat = impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections);
if (ncvStat != NCV_SUCCESS)
{
CV_Error(CV_GpuApiCallError, "Error in face detectioln");
}
ncvSafeCall( impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, ncvMinSize, numDetections) );
return numDetections;
}
......
This diff is collapsed.
This diff is collapsed.
......@@ -45,16 +45,18 @@
#include "cuda_runtime_api.h"
#include "cufft.h"
//#include <nppdefs.h>
#include "NCV.hpp"
#if defined(__GNUC__)
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__, __func__)
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
#define ncvSafeCall(expr) ___ncvSafeCall(expr, __FILE__, __LINE__, __func__)
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__)
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
#define ncvSafeCall(expr) ___ncvSafeCall(expr, __FILE__, __LINE__)
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__)
#endif
namespace cv
......@@ -62,8 +64,9 @@ namespace cv
namespace gpu
{
void error(const char *error_string, const char *file, const int line, const char *func = "");
void nppError(int err, const char *file, const int line, const char *func = "");
void cufftError(int err, const char *file, const int line, const char *func = "");
void nppError(int err, const char *file, const int line, const char *func = "");
void ncvError(int err, const char *file, const int line, const char *func = "");
void cufftError(int err, const char *file, const int line, const char *func = "");
static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
{
......@@ -71,17 +74,23 @@ namespace cv
cv::gpu::error(cudaGetErrorString(err), file, line, func);
}
static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "")
{
if (CUFFT_SUCCESS != err)
cv::gpu::cufftError(err, file, line, func);
}
static inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
{
if (err < 0)
cv::gpu::nppError(err, file, line, func);
}
static inline void ___ncvSafeCall(int err, const char *file, const int line, const char *func = "")
{
if (NCV_SUCCESS != err)
cv::gpu::ncvError(err, file, line, func);
}
static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "")
{
if (CUFFT_SUCCESS != err)
cv::gpu::cufftError(err, file, line, func);
}
}
}
......
......@@ -42,30 +42,45 @@
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
using namespace std;
#if !defined (HAVE_CUDA)
#else /* !defined (HAVE_CUDA) */
#ifdef HAVE_CUDA
namespace
{
#define error_entry(entry) { entry, #entry }
//////////////////////////////////////////////////////////////////////////
// NPP errors
struct NppError
struct ErrorEntry
{
int error;
int code;
string str;
}
};
struct ErrorEntryComparer
{
int code;
ErrorEntryComparer(int code_) : code(code_) {};
bool operator()(const ErrorEntry& e) const { return e.code == code; }
};
string getErrorString(int code, const ErrorEntry* errors, size_t n)
{
size_t idx = find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
const string& msg = (idx != n) ? errors[idx].str : string("Unknown error code");
ostringstream ostr;
ostr << msg << " [Code = " << code << "]";
return ostr.str();
}
//////////////////////////////////////////////////////////////////////////
// NPP errors
npp_errors [] =
const ErrorEntry npp_errors [] =
{
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
......@@ -74,6 +89,7 @@ namespace
#if defined (_MSC_VER)
error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
#endif
error_entry( NPP_BAD_ARG_ERROR ),
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_TEXTURE_BIND_ERROR ),
......@@ -110,106 +126,116 @@ namespace
error_entry( NPP_ODD_ROI_WARNING )
};
const size_t error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
struct Searcher
//////////////////////////////////////////////////////////////////////////
// NCV errors
const ErrorEntry ncv_errors [] =
{
int err;
Searcher(int err_) : err(err_) {};
bool operator()(const NppError& e) const { return e.error == err; }
error_entry( NCV_SUCCESS ),
error_entry( NCV_UNKNOWN_ERROR ),
error_entry( NCV_CUDA_ERROR ),
error_entry( NCV_NPP_ERROR ),
error_entry( NCV_FILE_ERROR ),
error_entry( NCV_NULL_PTR ),
error_entry( NCV_INCONSISTENT_INPUT ),
error_entry( NCV_TEXTURE_BIND_ERROR ),
error_entry( NCV_DIMENSIONS_INVALID ),
error_entry( NCV_INVALID_ROI ),
error_entry( NCV_INVALID_STEP ),
error_entry( NCV_INVALID_SCALE ),
error_entry( NCV_INVALID_SCALE ),
error_entry( NCV_ALLOCATOR_NOT_INITIALIZED ),
error_entry( NCV_ALLOCATOR_BAD_ALLOC ),
error_entry( NCV_ALLOCATOR_BAD_DEALLOC ),
error_entry( NCV_ALLOCATOR_INSUFFICIENT_CAPACITY ),
error_entry( NCV_ALLOCATOR_DEALLOC_ORDER ),
error_entry( NCV_ALLOCATOR_BAD_REUSE ),
error_entry( NCV_MEM_COPY_ERROR ),
error_entry( NCV_MEM_RESIDENCE_ERROR ),
error_entry( NCV_MEM_INSUFFICIENT_CAPACITY ),
error_entry( NCV_HAAR_INVALID_PIXEL_STEP ),
error_entry( NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER ),
error_entry( NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE ),
error_entry( NCV_HAAR_TOO_LARGE_FEATURES ),
error_entry( NCV_HAAR_XML_LOADING_EXCEPTION ),
error_entry( NCV_NOIMPL_HAAR_TILTED_FEATURES ),
error_entry( NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW ),
error_entry( NPPST_SUCCESS ),
error_entry( NPPST_ERROR ),
error_entry( NPPST_CUDA_KERNEL_EXECUTION_ERROR ),
error_entry( NPPST_NULL_POINTER_ERROR ),
error_entry( NPPST_TEXTURE_BIND_ERROR ),
error_entry( NPPST_MEMCPY_ERROR ),
error_entry( NPPST_MEM_ALLOC_ERR ),
error_entry( NPPST_MEMFREE_ERR ),
error_entry( NPPST_INVALID_ROI ),
error_entry( NPPST_INVALID_STEP ),
error_entry( NPPST_INVALID_SCALE ),
error_entry( NPPST_MEM_INSUFFICIENT_BUFFER ),
error_entry( NPPST_MEM_RESIDENCE_ERROR ),
error_entry( NPPST_MEM_INTERNAL_ERROR )
};
const size_t ncv_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
//////////////////////////////////////////////////////////////////////////
// CUFFT errors
struct CufftError
{
int code;
string message;
};
const CufftError cufft_errors[] =
{
error_entry(CUFFT_INVALID_PLAN),
error_entry(CUFFT_ALLOC_FAILED),
error_entry(CUFFT_INVALID_TYPE),
error_entry(CUFFT_INVALID_VALUE),
error_entry(CUFFT_INTERNAL_ERROR),
error_entry(CUFFT_EXEC_FAILED),
error_entry(CUFFT_SETUP_FAILED),
error_entry(CUFFT_INVALID_SIZE),
error_entry(CUFFT_UNALIGNED_DATA)
};
struct CufftErrorComparer
const ErrorEntry cufft_errors[] =
{
CufftErrorComparer(int code_): code(code_) {}
bool operator()(const CufftError& other) const
{
return other.code == code;
}
int code;
error_entry( CUFFT_INVALID_PLAN ),
error_entry( CUFFT_ALLOC_FAILED ),
error_entry( CUFFT_INVALID_TYPE ),
error_entry( CUFFT_INVALID_VALUE ),
error_entry( CUFFT_INTERNAL_ERROR ),
error_entry( CUFFT_EXEC_FAILED ),
error_entry( CUFFT_SETUP_FAILED ),
error_entry( CUFFT_INVALID_SIZE ),
error_entry( CUFFT_UNALIGNED_DATA )
};
const int cufft_error_num = sizeof(cufft_errors) / sizeof(cufft_errors[0]);
}
namespace cv
{
namespace gpu
{
const string getNppErrorString( int err )
{
size_t idx = std::find_if(npp_errors, npp_errors + error_num, Searcher(err)) - npp_errors;
const string& msg = (idx != error_num) ? npp_errors[idx].str : string("Unknown error code");
std::stringstream interpreter;
interpreter << msg <<" [Code = " << err << "]";
return interpreter.str();
}
void nppError( int err, const char *file, const int line, const char *func)
{
cv::error( cv::Exception(CV_GpuNppCallError, getNppErrorString(err), func, file, line) );
}
const string getCufftErrorString(int err_code)
{
const CufftError* cufft_error = std::find_if(
cufft_errors, cufft_errors + cufft_error_num,
CufftErrorComparer(err_code));
bool found = cufft_error != cufft_errors + cufft_error_num;
std::stringstream ss;
ss << (found ? cufft_error->message : "Unknown error code");
ss << " [Code = " << err_code << "]";
return ss.str();
}
void cufftError(int err, const char *file, const int line, const char *func)
{
cv::error(cv::Exception(CV_GpuCufftCallError, getCufftErrorString(err), func, file, line));
}
void error(const char *error_string, const char *file, const int line, const char *func)
{
int code = CV_GpuApiCallError;
if (std::uncaught_exception())
if (uncaught_exception())
{
const char* errorStr = cvErrorStr(code);
const char* function = func ? func : "unknown function";
std::cerr << "OpenCV Error: " << errorStr << "(" << error_string << ") in " << function << ", file " << file << ", line " << line;
std::cerr.flush();
cerr << "OpenCV Error: " << errorStr << "(" << error_string << ") in " << function << ", file " << file << ", line " << line;
cerr.flush();
}
else
cv::error( cv::Exception(code, error_string, func, file, line) );
}
void nppError(int code, const char *file, const int line, const char *func)
{
string msg = getErrorString(code, npp_errors, npp_error_num);
cv::gpu::error(msg.c_str(), file, line, func);
}
void ncvError(int code, const char *file, const int line, const char *func)
{
string msg = getErrorString(code, ncv_errors, ncv_error_num);
cv::gpu::error(msg.c_str(), file, line, func);
}
void cufftError(int code, const char *file, const int line, const char *func)
{
string msg = getErrorString(code, cufft_errors, cufft_error_num);
cv::gpu::error(msg.c_str(), file, line, func);
}
}
}
......
This diff is collapsed.
This diff is collapsed.
......@@ -59,10 +59,8 @@ namespace
NCVMatrix<Ncv32f>& u, NCVMatrix<Ncv32f>& v, const cudaDeviceProp& devProp)
{
NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(devProp.textureAlignment));
CV_Assert(gpuCounter.isInitialized());
NCVStatus ncvStat = NCVBroxOpticalFlow(desc, gpuCounter, frame0, frame1, u, v, 0);
CV_Assert(ncvStat == NCV_SUCCESS);
ncvSafeCall( NCVBroxOpticalFlow(desc, gpuCounter, frame0, frame1, u, v, 0) );
return gpuCounter.maxSize();
}
......@@ -130,10 +128,8 @@ void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& f
ensureSizeIsEnough(1, bufSize, CV_8UC1, buf);
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(devProp.textureAlignment), buf.ptr());
CV_Assert(gpuAllocator.isInitialized());
NCVStatus ncvStat = NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, stream);
CV_Assert(ncvStat == NCV_SUCCESS);
ncvSafeCall( NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, stream) );
}
void cv::gpu::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, const GpuMat& fu, const GpuMat& fv, const GpuMat& bu, const GpuMat& bv,
......@@ -189,7 +185,7 @@ void cv::gpu::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, cons
state.ppBuffers[4] = bui.ptr<Ncv32f>();
state.ppBuffers[5] = bvi.ptr<Ncv32f>();
nppSafeCall( nppiStInterpolateFrames(&state) );
ncvSafeCall( nppiStInterpolateFrames(&state) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
......
......@@ -39,15 +39,16 @@
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_PRECOMP_H__
#define __OPENCV_PRECOMP_H__
#if _MSC_VER >= 1200
#pragma warning( disable: 4251 4710 4711 4514 4996 )
#pragma warning( disable: 4251 4710 4711 4514 4996 )
#endif
#ifdef HAVE_CVCONFIG_H
#include "cvconfig.h"
#include "cvconfig.h"
#endif
#include <iostream>
......@@ -65,33 +66,43 @@
#include "opencv2/calib3d/calib3d.hpp"
#include "opencv2/core/internal.hpp"
#if defined(HAVE_CUDA)
#define OPENCV_GPU_UNUSED(x) (void)x
#ifdef HAVE_CUDA
#include "internal_shared.hpp"
#include "cuda_runtime_api.h"
#include "cufft.h"
#include "npp.h"
#ifdef HAVE_CUFFT
#include "cufft.h"
#endif
#ifdef HAVE_CUBLAS
#include "cublas.h"
#endif
#include "internal_shared.hpp"
#include "opencv2/gpu/stream_accessor.hpp"
#include "npp.h"
#include "nvidia/core/NCV.hpp"
#include "nvidia/NPP_staging/NPP_staging.hpp"
#include "nvidia/NCVHaarObjectDetection.hpp"
#include "nvidia/NCVBroxOpticalFlow.hpp"
#define CUDART_MINIMUM_REQUIRED_VERSION 4000
#define NPP_MINIMUM_REQUIRED_VERSION 4000
#define CUDART_MINIMUM_REQUIRED_VERSION 4000
#define NPP_MINIMUM_REQUIRED_VERSION 4000
#if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
#error "Insufficient Cuda Runtime library version, please update it."
#endif
#if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
#error "Insufficient Cuda Runtime library version, please update it."
#endif
#if (NPP_VERSION_MAJOR*1000+NPP_VERSION_MINOR*100+NPP_VERSION_BUILD < NPP_MINIMUM_REQUIRED_VERSION)
#error "Insufficient NPP version, please update it."
#endif
#if (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD < NPP_MINIMUM_REQUIRED_VERSION)
#error "Insufficient NPP version, please update it."
#endif
#if defined(CUDA_ARCH_BIN_OR_PTX_10)
#error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0"
#endif
#if defined(CUDA_ARCH_BIN_OR_PTX_10)
#error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0"
#endif
static inline void throw_nogpu() { CV_Error(CV_GpuNotSupported, "The called functionality is disabled for current build or platform"); }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment