Commit 12f304ec authored by Roman Donchenko's avatar Roman Donchenko Committed by OpenCV Buildbot

Merge pull request #1146 from jet47:fix-gpu-without-cufft-cublas

parents c48d3ad7 ebe7ff99
...@@ -619,6 +619,7 @@ namespace cv { namespace gpu { namespace device ...@@ -619,6 +619,7 @@ namespace cv { namespace gpu { namespace device
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// mulSpectrums // mulSpectrums
#ifdef HAVE_CUFFT
__global__ void mulSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c) __global__ void mulSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c)
{ {
const int x = blockIdx.x * blockDim.x + threadIdx.x; const int x = blockIdx.x * blockDim.x + threadIdx.x;
...@@ -642,11 +643,13 @@ namespace cv { namespace gpu { namespace device ...@@ -642,11 +643,13 @@ namespace cv { namespace gpu { namespace device
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
#endif
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// mulSpectrums_CONJ // mulSpectrums_CONJ
#ifdef HAVE_CUFFT
__global__ void mulSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c) __global__ void mulSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c)
{ {
const int x = blockIdx.x * blockDim.x + threadIdx.x; const int x = blockIdx.x * blockDim.x + threadIdx.x;
...@@ -670,11 +673,13 @@ namespace cv { namespace gpu { namespace device ...@@ -670,11 +673,13 @@ namespace cv { namespace gpu { namespace device
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
#endif
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// mulAndScaleSpectrums // mulAndScaleSpectrums
#ifdef HAVE_CUFFT
__global__ void mulAndScaleSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c) __global__ void mulAndScaleSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c)
{ {
const int x = blockIdx.x * blockDim.x + threadIdx.x; const int x = blockIdx.x * blockDim.x + threadIdx.x;
...@@ -699,11 +704,13 @@ namespace cv { namespace gpu { namespace device ...@@ -699,11 +704,13 @@ namespace cv { namespace gpu { namespace device
if (stream) if (stream)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
#endif
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// mulAndScaleSpectrums_CONJ // mulAndScaleSpectrums_CONJ
#ifdef HAVE_CUFFT
__global__ void mulAndScaleSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c) __global__ void mulAndScaleSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c)
{ {
const int x = blockIdx.x * blockDim.x + threadIdx.x; const int x = blockIdx.x * blockDim.x + threadIdx.x;
...@@ -728,6 +735,7 @@ namespace cv { namespace gpu { namespace device ...@@ -728,6 +735,7 @@ namespace cv { namespace gpu { namespace device
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
#endif
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// buildWarpMaps // buildWarpMaps
......
...@@ -43,53 +43,94 @@ ...@@ -43,53 +43,94 @@
#ifndef __OPENCV_CUDA_SAFE_CALL_HPP__ #ifndef __OPENCV_CUDA_SAFE_CALL_HPP__
#define __OPENCV_CUDA_SAFE_CALL_HPP__ #define __OPENCV_CUDA_SAFE_CALL_HPP__
#include "cvconfig.h"
#include <cuda_runtime_api.h> #include <cuda_runtime_api.h>
#include <cufft.h>
#include <cublas.h>
#include "NCV.hpp"
#if defined(__GNUC__) #ifdef HAVE_CUFFT
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__) # include <cufft.h>
#define ncvSafeCall(expr) ___ncvSafeCall(expr, __FILE__, __LINE__, __func__)
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__, __func__)
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__, __func__)
#else /* defined(__CUDACC__) || defined(__MSVC__) */
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
#define ncvSafeCall(expr) ___ncvSafeCall(expr, __FILE__, __LINE__)
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__)
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__)
#endif #endif
namespace cv { namespace gpu #ifdef HAVE_CUBLAS
{ # include <cublas.h>
void nppError(int err, const char *file, const int line, const char *func = ""); #endif
void ncvError(int err, const char *file, const int line, const char *func = "");
#include "NCV.hpp"
namespace cv { namespace gpu {
void nppError(int err, const char *file, const int line, const char *func = "");
void ncvError(int err, const char *file, const int line, const char *func = "");
#ifdef HAVE_CUFFT
void cufftError(int err, const char *file, const int line, const char *func = ""); void cufftError(int err, const char *file, const int line, const char *func = "");
#endif
#ifdef HAVE_CUBLAS
void cublasError(int err, const char *file, const int line, const char *func = ""); void cublasError(int err, const char *file, const int line, const char *func = "");
#endif
}} }}
// nppSafeCall
static inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "") static inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
{ {
if (err < 0) if (err < 0)
cv::gpu::nppError(err, file, line, func); cv::gpu::nppError(err, file, line, func);
} }
#if defined(__GNUC__)
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
#else
#define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
#endif
// ncvSafeCall
static inline void ___ncvSafeCall(int err, const char *file, const int line, const char *func = "") static inline void ___ncvSafeCall(int err, const char *file, const int line, const char *func = "")
{ {
if (NCV_SUCCESS != err) if (NCV_SUCCESS != err)
cv::gpu::ncvError(err, file, line, func); cv::gpu::ncvError(err, file, line, func);
} }
static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "") #if defined(__GNUC__)
{ #define ncvSafeCall(expr) ___ncvSafeCall(expr, __FILE__, __LINE__, __func__)
if (CUFFT_SUCCESS != err) #else
cv::gpu::cufftError(err, file, line, func); #define ncvSafeCall(expr) ___ncvSafeCall(expr, __FILE__, __LINE__)
} #endif
static inline void ___cublasSafeCall(cublasStatus_t err, const char *file, const int line, const char *func = "") // cufftSafeCall
{
if (CUBLAS_STATUS_SUCCESS != err) #ifdef HAVE_CUFFT
cv::gpu::cublasError(err, file, line, func); static inline void ___cufftSafeCall(cufftResult_t err, const char *file, const int line, const char *func = "")
} {
if (CUFFT_SUCCESS != err)
cv::gpu::cufftError(err, file, line, func);
}
#if defined(__GNUC__)
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__, __func__)
#else
#define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__)
#endif
#endif
// cublasSafeCall
#ifdef HAVE_CUBLAS
static inline void ___cublasSafeCall(cublasStatus_t err, const char *file, const int line, const char *func = "")
{
if (CUBLAS_STATUS_SUCCESS != err)
cv::gpu::cublasError(err, file, line, func);
}
#if defined(__GNUC__)
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__, __func__)
#else
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__)
#endif
#endif
#endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */ #endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */
...@@ -224,6 +224,7 @@ namespace ...@@ -224,6 +224,7 @@ namespace
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// CUFFT errors // CUFFT errors
#ifdef HAVE_CUFFT
const ErrorEntry cufft_errors[] = const ErrorEntry cufft_errors[] =
{ {
error_entry( CUFFT_INVALID_PLAN ), error_entry( CUFFT_INVALID_PLAN ),
...@@ -238,10 +239,12 @@ namespace ...@@ -238,10 +239,12 @@ namespace
}; };
const int cufft_error_num = sizeof(cufft_errors) / sizeof(cufft_errors[0]); const int cufft_error_num = sizeof(cufft_errors) / sizeof(cufft_errors[0]);
#endif
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// CUBLAS errors // CUBLAS errors
#ifdef HAVE_CUBLAS
const ErrorEntry cublas_errors[] = const ErrorEntry cublas_errors[] =
{ {
error_entry( CUBLAS_STATUS_SUCCESS ), error_entry( CUBLAS_STATUS_SUCCESS ),
...@@ -255,6 +258,7 @@ namespace ...@@ -255,6 +258,7 @@ namespace
}; };
const int cublas_error_num = sizeof(cublas_errors) / sizeof(cublas_errors[0]); const int cublas_error_num = sizeof(cublas_errors) / sizeof(cublas_errors[0]);
#endif
} }
namespace cv namespace cv
...@@ -273,17 +277,21 @@ namespace cv ...@@ -273,17 +277,21 @@ namespace cv
cv::gpu::error(msg.c_str(), file, line, func); cv::gpu::error(msg.c_str(), file, line, func);
} }
#ifdef HAVE_CUFFT
void cufftError(int code, const char *file, const int line, const char *func) void cufftError(int code, const char *file, const int line, const char *func)
{ {
string msg = getErrorString(code, cufft_errors, cufft_error_num); string msg = getErrorString(code, cufft_errors, cufft_error_num);
cv::gpu::error(msg.c_str(), file, line, func); cv::gpu::error(msg.c_str(), file, line, func);
} }
#endif
#ifdef HAVE_CUBLAS
void cublasError(int code, const char *file, const int line, const char *func) void cublasError(int code, const char *file, const int line, const char *func)
{ {
string msg = getErrorString(code, cublas_errors, cublas_error_num); string msg = getErrorString(code, cublas_errors, cublas_error_num);
cv::gpu::error(msg.c_str(), file, line, func); cv::gpu::error(msg.c_str(), file, line, func);
} }
#endif
} }
} }
......
...@@ -1136,6 +1136,8 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuM ...@@ -1136,6 +1136,8 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuM
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// mulSpectrums // mulSpectrums
#ifdef HAVE_CUFFT
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
namespace imgproc namespace imgproc
...@@ -1146,9 +1148,20 @@ namespace cv { namespace gpu { namespace device ...@@ -1146,9 +1148,20 @@ namespace cv { namespace gpu { namespace device
} }
}}} }}}
#endif
void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB, Stream& stream) void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB, Stream& stream)
{ {
(void)flags; #ifndef HAVE_CUFFT
(void) a;
(void) b;
(void) c;
(void) flags;
(void) conjB;
(void) stream;
throw_nogpu();
#else
(void) flags;
using namespace ::cv::gpu::device::imgproc; using namespace ::cv::gpu::device::imgproc;
typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, PtrStepSz<cufftComplex>, cudaStream_t stream); typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, PtrStepSz<cufftComplex>, cudaStream_t stream);
...@@ -1162,11 +1175,14 @@ void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flag ...@@ -1162,11 +1175,14 @@ void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flag
Caller caller = callers[(int)conjB]; Caller caller = callers[(int)conjB];
caller(a, b, c, StreamAccessor::getStream(stream)); caller(a, b, c, StreamAccessor::getStream(stream));
#endif
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// mulAndScaleSpectrums // mulAndScaleSpectrums
#ifdef HAVE_CUFFT
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
namespace imgproc namespace imgproc
...@@ -1177,8 +1193,20 @@ namespace cv { namespace gpu { namespace device ...@@ -1177,8 +1193,20 @@ namespace cv { namespace gpu { namespace device
} }
}}} }}}
#endif
void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB, Stream& stream) void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB, Stream& stream)
{ {
#ifndef HAVE_CUFFT
(void) a;
(void) b;
(void) c;
(void) flags;
(void) scale;
(void) conjB;
(void) stream;
throw_nogpu();
#else
(void)flags; (void)flags;
using namespace ::cv::gpu::device::imgproc; using namespace ::cv::gpu::device::imgproc;
...@@ -1192,6 +1220,7 @@ void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, ...@@ -1192,6 +1220,7 @@ void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c,
Caller caller = callers[(int)conjB]; Caller caller = callers[(int)conjB];
caller(a, b, scale, c, StreamAccessor::getStream(stream)); caller(a, b, scale, c, StreamAccessor::getStream(stream));
#endif
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment