Commit 75cf5cc4 authored by Roman Donchenko's avatar Roman Donchenko Committed by OpenCV Buildbot

Merge pull request #943 from jet47:cuda-5.5-support

parents 5237647f bcf8bdb4
...@@ -26,6 +26,15 @@ if(CUDA_FOUND) ...@@ -26,6 +26,15 @@ if(CUDA_FOUND)
set(HAVE_CUBLAS 1) set(HAVE_CUBLAS 1)
endif() endif()
if(${CUDA_VERSION} VERSION_LESS "5.5")
find_cuda_helper_libs(npp)
else()
find_cuda_helper_libs(nppc)
find_cuda_helper_libs(nppi)
find_cuda_helper_libs(npps)
set(CUDA_npp_LIBRARY ${CUDA_nppc_LIBRARY} ${CUDA_nppi_LIBRARY} ${CUDA_npps_LIBRARY})
endif()
if(WITH_NVCUVID) if(WITH_NVCUVID)
find_cuda_helper_libs(nvcuvid) find_cuda_helper_libs(nvcuvid)
set(HAVE_NVCUVID 1) set(HAVE_NVCUVID 1)
...@@ -136,8 +145,6 @@ if(CUDA_FOUND) ...@@ -136,8 +145,6 @@ if(CUDA_FOUND)
mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD CUDA_SDK_ROOT_DIR) mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD CUDA_SDK_ROOT_DIR)
find_cuda_helper_libs(npp)
macro(ocv_cuda_compile VAR) macro(ocv_cuda_compile VAR)
foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
set(${var}_backup_in_cuda_compile_ "${${var}}") set(${var}_backup_in_cuda_compile_ "${${var}}")
......
...@@ -124,8 +124,8 @@ namespace cv { namespace gpu { namespace device ...@@ -124,8 +124,8 @@ namespace cv { namespace gpu { namespace device
struct WithOutMask struct WithOutMask
{ {
__device__ __forceinline__ WithOutMask(){} __host__ __device__ __forceinline__ WithOutMask(){}
__device__ __forceinline__ WithOutMask(const WithOutMask& mask){} __host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){}
__device__ __forceinline__ void next() const __device__ __forceinline__ void next() const
{ {
......
...@@ -67,8 +67,8 @@ namespace cv { namespace gpu { namespace device ...@@ -67,8 +67,8 @@ namespace cv { namespace gpu { namespace device
crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y, crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y,
crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z); crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z);
} }
__device__ __forceinline__ TransformOp() {} __host__ __device__ __forceinline__ TransformOp() {}
__device__ __forceinline__ TransformOp(const TransformOp&) {} __host__ __device__ __forceinline__ TransformOp(const TransformOp&) {}
}; };
void call(const PtrStepSz<float3> src, const float* rot, void call(const PtrStepSz<float3> src, const float* rot,
...@@ -106,8 +106,8 @@ namespace cv { namespace gpu { namespace device ...@@ -106,8 +106,8 @@ namespace cv { namespace gpu { namespace device
(cproj0.x * t.x + cproj0.y * t.y) / t.z + cproj0.z, (cproj0.x * t.x + cproj0.y * t.y) / t.z + cproj0.z,
(cproj1.x * t.x + cproj1.y * t.y) / t.z + cproj1.z); (cproj1.x * t.x + cproj1.y * t.y) / t.z + cproj1.z);
} }
__device__ __forceinline__ ProjectOp() {} __host__ __device__ __forceinline__ ProjectOp() {}
__device__ __forceinline__ ProjectOp(const ProjectOp&) {} __host__ __device__ __forceinline__ ProjectOp(const ProjectOp&) {}
}; };
void call(const PtrStepSz<float3> src, const float* rot, void call(const PtrStepSz<float3> src, const float* rot,
......
...@@ -62,8 +62,8 @@ namespace canny ...@@ -62,8 +62,8 @@ namespace canny
return ::abs(x) + ::abs(y); return ::abs(x) + ::abs(y);
} }
__device__ __forceinline__ L1() {} __host__ __device__ __forceinline__ L1() {}
__device__ __forceinline__ L1(const L1&) {} __host__ __device__ __forceinline__ L1(const L1&) {}
}; };
struct L2 : binary_function<int, int, float> struct L2 : binary_function<int, int, float>
{ {
...@@ -72,8 +72,8 @@ namespace canny ...@@ -72,8 +72,8 @@ namespace canny
return ::sqrtf(x * x + y * y); return ::sqrtf(x * x + y * y);
} }
__device__ __forceinline__ L2() {} __host__ __device__ __forceinline__ L2() {}
__device__ __forceinline__ L2(const L2&) {} __host__ __device__ __forceinline__ L2(const L2&) {}
}; };
} }
...@@ -470,8 +470,8 @@ namespace canny ...@@ -470,8 +470,8 @@ namespace canny
return (uchar)(-(e >> 1)); return (uchar)(-(e >> 1));
} }
__device__ __forceinline__ GetEdges() {} __host__ __device__ __forceinline__ GetEdges() {}
__device__ __forceinline__ GetEdges(const GetEdges&) {} __host__ __device__ __forceinline__ GetEdges(const GetEdges&) {}
}; };
} }
......
...@@ -162,8 +162,8 @@ namespace arithm ...@@ -162,8 +162,8 @@ namespace arithm
return vadd4(a, b); return vadd4(a, b);
} }
__device__ __forceinline__ VAdd4() {} __host__ __device__ __forceinline__ VAdd4() {}
__device__ __forceinline__ VAdd4(const VAdd4& other) {} __host__ __device__ __forceinline__ VAdd4(const VAdd4&) {}
}; };
//////////////////////////////////// ////////////////////////////////////
...@@ -175,8 +175,8 @@ namespace arithm ...@@ -175,8 +175,8 @@ namespace arithm
return vadd2(a, b); return vadd2(a, b);
} }
__device__ __forceinline__ VAdd2() {} __host__ __device__ __forceinline__ VAdd2() {}
__device__ __forceinline__ VAdd2(const VAdd2& other) {} __host__ __device__ __forceinline__ VAdd2(const VAdd2&) {}
}; };
//////////////////////////////////// ////////////////////////////////////
...@@ -188,8 +188,8 @@ namespace arithm ...@@ -188,8 +188,8 @@ namespace arithm
return saturate_cast<D>(a + b); return saturate_cast<D>(a + b);
} }
__device__ __forceinline__ AddMat() {} __host__ __device__ __forceinline__ AddMat() {}
__device__ __forceinline__ AddMat(const AddMat& other) {} __host__ __device__ __forceinline__ AddMat(const AddMat&) {}
}; };
} }
...@@ -397,8 +397,8 @@ namespace arithm ...@@ -397,8 +397,8 @@ namespace arithm
return vsub4(a, b); return vsub4(a, b);
} }
__device__ __forceinline__ VSub4() {} __host__ __device__ __forceinline__ VSub4() {}
__device__ __forceinline__ VSub4(const VSub4& other) {} __host__ __device__ __forceinline__ VSub4(const VSub4&) {}
}; };
//////////////////////////////////// ////////////////////////////////////
...@@ -410,8 +410,8 @@ namespace arithm ...@@ -410,8 +410,8 @@ namespace arithm
return vsub2(a, b); return vsub2(a, b);
} }
__device__ __forceinline__ VSub2() {} __host__ __device__ __forceinline__ VSub2() {}
__device__ __forceinline__ VSub2(const VSub2& other) {} __host__ __device__ __forceinline__ VSub2(const VSub2&) {}
}; };
//////////////////////////////////// ////////////////////////////////////
...@@ -423,8 +423,8 @@ namespace arithm ...@@ -423,8 +423,8 @@ namespace arithm
return saturate_cast<D>(a - b); return saturate_cast<D>(a - b);
} }
__device__ __forceinline__ SubMat() {} __host__ __device__ __forceinline__ SubMat() {}
__device__ __forceinline__ SubMat(const SubMat& other) {} __host__ __device__ __forceinline__ SubMat(const SubMat&) {}
}; };
} }
...@@ -617,8 +617,8 @@ namespace arithm ...@@ -617,8 +617,8 @@ namespace arithm
return res; return res;
} }
__device__ __forceinline__ Mul_8uc4_32f() {} __host__ __device__ __forceinline__ Mul_8uc4_32f() {}
__device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {} __host__ __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f&) {}
}; };
struct Mul_16sc4_32f : binary_function<short4, float, short4> struct Mul_16sc4_32f : binary_function<short4, float, short4>
...@@ -629,8 +629,8 @@ namespace arithm ...@@ -629,8 +629,8 @@ namespace arithm
saturate_cast<short>(a.z * b), saturate_cast<short>(a.w * b)); saturate_cast<short>(a.z * b), saturate_cast<short>(a.w * b));
} }
__device__ __forceinline__ Mul_16sc4_32f() {} __host__ __device__ __forceinline__ Mul_16sc4_32f() {}
__device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {} __host__ __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f&) {}
}; };
template <typename T, typename D> struct Mul : binary_function<T, T, D> template <typename T, typename D> struct Mul : binary_function<T, T, D>
...@@ -640,8 +640,8 @@ namespace arithm ...@@ -640,8 +640,8 @@ namespace arithm
return saturate_cast<D>(a * b); return saturate_cast<D>(a * b);
} }
__device__ __forceinline__ Mul() {} __host__ __device__ __forceinline__ Mul() {}
__device__ __forceinline__ Mul(const Mul& other) {} __host__ __device__ __forceinline__ Mul(const Mul&) {}
}; };
template <typename T, typename S, typename D> struct MulScale : binary_function<T, T, D> template <typename T, typename S, typename D> struct MulScale : binary_function<T, T, D>
...@@ -888,8 +888,8 @@ namespace arithm ...@@ -888,8 +888,8 @@ namespace arithm
return b != 0 ? saturate_cast<D>(a / b) : 0; return b != 0 ? saturate_cast<D>(a / b) : 0;
} }
__device__ __forceinline__ Div() {} __host__ __device__ __forceinline__ Div() {}
__device__ __forceinline__ Div(const Div& other) {} __host__ __device__ __forceinline__ Div(const Div&) {}
}; };
template <typename T> struct Div<T, float> : binary_function<T, T, float> template <typename T> struct Div<T, float> : binary_function<T, T, float>
{ {
...@@ -898,8 +898,8 @@ namespace arithm ...@@ -898,8 +898,8 @@ namespace arithm
return b != 0 ? static_cast<float>(a) / b : 0; return b != 0 ? static_cast<float>(a) / b : 0;
} }
__device__ __forceinline__ Div() {} __host__ __device__ __forceinline__ Div() {}
__device__ __forceinline__ Div(const Div& other) {} __host__ __device__ __forceinline__ Div(const Div&) {}
}; };
template <typename T> struct Div<T, double> : binary_function<T, T, double> template <typename T> struct Div<T, double> : binary_function<T, T, double>
{ {
...@@ -908,8 +908,8 @@ namespace arithm ...@@ -908,8 +908,8 @@ namespace arithm
return b != 0 ? static_cast<double>(a) / b : 0; return b != 0 ? static_cast<double>(a) / b : 0;
} }
__device__ __forceinline__ Div() {} __host__ __device__ __forceinline__ Div() {}
__device__ __forceinline__ Div(const Div& other) {} __host__ __device__ __forceinline__ Div(const Div&) {}
}; };
template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D> template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D>
...@@ -1196,8 +1196,8 @@ namespace arithm ...@@ -1196,8 +1196,8 @@ namespace arithm
return vabsdiff4(a, b); return vabsdiff4(a, b);
} }
__device__ __forceinline__ VAbsDiff4() {} __host__ __device__ __forceinline__ VAbsDiff4() {}
__device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {} __host__ __device__ __forceinline__ VAbsDiff4(const VAbsDiff4&) {}
}; };
//////////////////////////////////// ////////////////////////////////////
...@@ -1209,8 +1209,8 @@ namespace arithm ...@@ -1209,8 +1209,8 @@ namespace arithm
return vabsdiff2(a, b); return vabsdiff2(a, b);
} }
__device__ __forceinline__ VAbsDiff2() {} __host__ __device__ __forceinline__ VAbsDiff2() {}
__device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {} __host__ __device__ __forceinline__ VAbsDiff2(const VAbsDiff2&) {}
}; };
//////////////////////////////////// ////////////////////////////////////
...@@ -1235,8 +1235,8 @@ namespace arithm ...@@ -1235,8 +1235,8 @@ namespace arithm
return saturate_cast<T>(_abs(a - b)); return saturate_cast<T>(_abs(a - b));
} }
__device__ __forceinline__ AbsDiffMat() {} __host__ __device__ __forceinline__ AbsDiffMat() {}
__device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {} __host__ __device__ __forceinline__ AbsDiffMat(const AbsDiffMat&) {}
}; };
} }
...@@ -1370,8 +1370,8 @@ namespace arithm ...@@ -1370,8 +1370,8 @@ namespace arithm
return saturate_cast<T>(x * x); return saturate_cast<T>(x * x);
} }
__device__ __forceinline__ Sqr() {} __host__ __device__ __forceinline__ Sqr() {}
__device__ __forceinline__ Sqr(const Sqr& other) {} __host__ __device__ __forceinline__ Sqr(const Sqr&) {}
}; };
} }
...@@ -1466,8 +1466,8 @@ namespace arithm ...@@ -1466,8 +1466,8 @@ namespace arithm
return saturate_cast<T>(f(x)); return saturate_cast<T>(f(x));
} }
__device__ __forceinline__ Exp() {} __host__ __device__ __forceinline__ Exp() {}
__device__ __forceinline__ Exp(const Exp& other) {} __host__ __device__ __forceinline__ Exp(const Exp&) {}
}; };
} }
...@@ -1507,8 +1507,8 @@ namespace arithm ...@@ -1507,8 +1507,8 @@ namespace arithm
return vcmpeq4(a, b); return vcmpeq4(a, b);
} }
__device__ __forceinline__ VCmpEq4() {} __host__ __device__ __forceinline__ VCmpEq4() {}
__device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {} __host__ __device__ __forceinline__ VCmpEq4(const VCmpEq4&) {}
}; };
struct VCmpNe4 : binary_function<uint, uint, uint> struct VCmpNe4 : binary_function<uint, uint, uint>
{ {
...@@ -1517,8 +1517,8 @@ namespace arithm ...@@ -1517,8 +1517,8 @@ namespace arithm
return vcmpne4(a, b); return vcmpne4(a, b);
} }
__device__ __forceinline__ VCmpNe4() {} __host__ __device__ __forceinline__ VCmpNe4() {}
__device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {} __host__ __device__ __forceinline__ VCmpNe4(const VCmpNe4&) {}
}; };
struct VCmpLt4 : binary_function<uint, uint, uint> struct VCmpLt4 : binary_function<uint, uint, uint>
{ {
...@@ -1527,8 +1527,8 @@ namespace arithm ...@@ -1527,8 +1527,8 @@ namespace arithm
return vcmplt4(a, b); return vcmplt4(a, b);
} }
__device__ __forceinline__ VCmpLt4() {} __host__ __device__ __forceinline__ VCmpLt4() {}
__device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {} __host__ __device__ __forceinline__ VCmpLt4(const VCmpLt4&) {}
}; };
struct VCmpLe4 : binary_function<uint, uint, uint> struct VCmpLe4 : binary_function<uint, uint, uint>
{ {
...@@ -1537,8 +1537,8 @@ namespace arithm ...@@ -1537,8 +1537,8 @@ namespace arithm
return vcmple4(a, b); return vcmple4(a, b);
} }
__device__ __forceinline__ VCmpLe4() {} __host__ __device__ __forceinline__ VCmpLe4() {}
__device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {} __host__ __device__ __forceinline__ VCmpLe4(const VCmpLe4&) {}
}; };
//////////////////////////////////// ////////////////////////////////////
...@@ -2008,8 +2008,8 @@ namespace arithm ...@@ -2008,8 +2008,8 @@ namespace arithm
return vmin4(a, b); return vmin4(a, b);
} }
__device__ __forceinline__ VMin4() {} __host__ __device__ __forceinline__ VMin4() {}
__device__ __forceinline__ VMin4(const VMin4& other) {} __host__ __device__ __forceinline__ VMin4(const VMin4&) {}
}; };
//////////////////////////////////// ////////////////////////////////////
...@@ -2021,8 +2021,8 @@ namespace arithm ...@@ -2021,8 +2021,8 @@ namespace arithm
return vmin2(a, b); return vmin2(a, b);
} }
__device__ __forceinline__ VMin2() {} __host__ __device__ __forceinline__ VMin2() {}
__device__ __forceinline__ VMin2(const VMin2& other) {} __host__ __device__ __forceinline__ VMin2(const VMin2&) {}
}; };
} }
...@@ -2100,8 +2100,8 @@ namespace arithm ...@@ -2100,8 +2100,8 @@ namespace arithm
return vmax4(a, b); return vmax4(a, b);
} }
__device__ __forceinline__ VMax4() {} __host__ __device__ __forceinline__ VMax4() {}
__device__ __forceinline__ VMax4(const VMax4& other) {} __host__ __device__ __forceinline__ VMax4(const VMax4&) {}
}; };
//////////////////////////////////// ////////////////////////////////////
...@@ -2113,8 +2113,8 @@ namespace arithm ...@@ -2113,8 +2113,8 @@ namespace arithm
return vmax2(a, b); return vmax2(a, b);
} }
__device__ __forceinline__ VMax2() {} __host__ __device__ __forceinline__ VMax2() {}
__device__ __forceinline__ VMax2(const VMax2& other) {} __host__ __device__ __forceinline__ VMax2(const VMax2&) {}
}; };
} }
......
...@@ -81,48 +81,90 @@ namespace ...@@ -81,48 +81,90 @@ namespace
const ErrorEntry npp_errors [] = const ErrorEntry npp_errors [] =
{ {
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
#if defined (_MSC_VER) #if defined (_MSC_VER)
error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ), error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
#endif #endif
#if NPP_VERSION < 5500
error_entry( NPP_BAD_ARG_ERROR ), error_entry( NPP_BAD_ARG_ERROR ),
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_TEXTURE_BIND_ERROR ),
error_entry( NPP_COEFF_ERROR ), error_entry( NPP_COEFF_ERROR ),
error_entry( NPP_RECT_ERROR ), error_entry( NPP_RECT_ERROR ),
error_entry( NPP_QUAD_ERROR ), error_entry( NPP_QUAD_ERROR ),
error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
error_entry( NPP_NOT_EVEN_STEP_ERROR ),
error_entry( NPP_INTERPOLATION_ERROR ),
error_entry( NPP_RESIZE_FACTOR_ERROR ),
error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
error_entry( NPP_MEMFREE_ERR ), error_entry( NPP_MEMFREE_ERR ),
error_entry( NPP_MEMSET_ERR ), error_entry( NPP_MEMSET_ERR ),
error_entry( NPP_MEMCPY_ERROR ),
error_entry( NPP_MEM_ALLOC_ERR ), error_entry( NPP_MEM_ALLOC_ERR ),
error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ), error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_MIRROR_FLIP_ERR ), error_entry( NPP_MIRROR_FLIP_ERR ),
error_entry( NPP_INVALID_INPUT ), error_entry( NPP_INVALID_INPUT ),
error_entry( NPP_POINTER_ERROR ),
error_entry( NPP_WARNING ),
error_entry( NPP_ODD_ROI_WARNING ),
#else
error_entry( NPP_INVALID_HOST_POINTER_ERROR ),
error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ),
error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ),
error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ),
error_entry( NPP_MEMFREE_ERROR ),
error_entry( NPP_MEMSET_ERROR ),
error_entry( NPP_QUALITY_INDEX_ERROR ),
error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_CHANNEL_ORDER_ERROR ),
error_entry( NPP_ZERO_MASK_VALUE_ERROR ),
error_entry( NPP_QUADRANGLE_ERROR ),
error_entry( NPP_RECTANGLE_ERROR ),
error_entry( NPP_COEFFICIENT_ERROR ),
error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ),
error_entry( NPP_COI_ERROR ),
error_entry( NPP_DIVISOR_ERROR ),
error_entry( NPP_CHANNEL_ERROR ),
error_entry( NPP_STRIDE_ERROR ),
error_entry( NPP_ANCHOR_ERROR ),
error_entry( NPP_MASK_SIZE_ERROR ),
error_entry( NPP_MIRROR_FLIP_ERROR ),
error_entry( NPP_MOMENT_00_ZERO_ERROR ),
error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ),
error_entry( NPP_THRESHOLD_ERROR ),
error_entry( NPP_CONTEXT_MATCH_ERROR ),
error_entry( NPP_FFT_FLAG_ERROR ),
error_entry( NPP_FFT_ORDER_ERROR ),
error_entry( NPP_SCALE_RANGE_ERROR ),
error_entry( NPP_DATA_TYPE_ERROR ),
error_entry( NPP_OUT_OFF_RANGE_ERROR ),
error_entry( NPP_DIVIDE_BY_ZERO_ERROR ),
error_entry( NPP_MEMORY_ALLOCATION_ERR ),
error_entry( NPP_RANGE_ERROR ),
error_entry( NPP_BAD_ARGUMENT_ERROR ),
error_entry( NPP_NO_MEMORY_ERROR ),
error_entry( NPP_ERROR_RESERVED ),
error_entry( NPP_NO_OPERATION_WARNING ),
error_entry( NPP_DIVIDE_BY_ZERO_WARNING ),
error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ),
#endif
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_TEXTURE_BIND_ERROR ),
error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
error_entry( NPP_NOT_EVEN_STEP_ERROR ),
error_entry( NPP_INTERPOLATION_ERROR ),
error_entry( NPP_RESIZE_FACTOR_ERROR ),
error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
error_entry( NPP_MEMCPY_ERROR ),
error_entry( NPP_ALIGNMENT_ERROR ), error_entry( NPP_ALIGNMENT_ERROR ),
error_entry( NPP_STEP_ERROR ), error_entry( NPP_STEP_ERROR ),
error_entry( NPP_SIZE_ERROR ), error_entry( NPP_SIZE_ERROR ),
error_entry( NPP_POINTER_ERROR ),
error_entry( NPP_NULL_POINTER_ERROR ), error_entry( NPP_NULL_POINTER_ERROR ),
error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ), error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
error_entry( NPP_NOT_IMPLEMENTED_ERROR ), error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
error_entry( NPP_ERROR ), error_entry( NPP_ERROR ),
error_entry( NPP_NO_ERROR ), error_entry( NPP_NO_ERROR ),
error_entry( NPP_SUCCESS ), error_entry( NPP_SUCCESS ),
error_entry( NPP_WARNING ),
error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ), error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
error_entry( NPP_MISALIGNED_DST_ROI_WARNING ), error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ), error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
error_entry( NPP_DOUBLE_SIZE_WARNING ), error_entry( NPP_DOUBLE_SIZE_WARNING )
error_entry( NPP_ODD_ROI_WARNING )
}; };
const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]); const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
......
...@@ -187,10 +187,20 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) ...@@ -187,10 +187,20 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2); CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);
typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, #if CUDA_VERSION < 5050
NppiSize oSizeROI, Npp64f* pRetVal); typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal);
static const npp_norm_diff_func_t npp_norm_diff_func[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
#else
typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2,
NppiSize oSizeROI, Npp64f* pRetVal, Npp8u * pDeviceBuffer);
typedef NppStatus (*buf_size_func_t)(NppiSize oSizeROI, int* hpBufferSize);
static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
static const buf_size_func_t buf_size_funcs[] = {nppiNormDiffInfGetBufferHostSize_8u_C1R, nppiNormDiffL1GetBufferHostSize_8u_C1R, nppiNormDiffL2GetBufferHostSize_8u_C1R};
#endif
NppiSize sz; NppiSize sz;
sz.width = src1.cols; sz.width = src1.cols;
...@@ -202,7 +212,16 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) ...@@ -202,7 +212,16 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
DeviceBuffer dbuf; DeviceBuffer dbuf;
nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) ); #if CUDA_VERSION < 5050
nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) );
#else
int bufSize;
buf_size_funcs[funcIdx](sz, &bufSize);
GpuMat buf(1, bufSize, CV_8UC1);
nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf, buf.data) );
#endif
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
......
...@@ -116,11 +116,13 @@ ...@@ -116,11 +116,13 @@
#define CUDART_MINIMUM_REQUIRED_VERSION 4010 #define CUDART_MINIMUM_REQUIRED_VERSION 4010
#define NPP_MINIMUM_REQUIRED_VERSION 4100 #define NPP_MINIMUM_REQUIRED_VERSION 4100
#define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD)
#if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION) #if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
#error "Insufficient Cuda Runtime library version, please update it." #error "Insufficient Cuda Runtime library version, please update it."
#endif #endif
#if (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD < NPP_MINIMUM_REQUIRED_VERSION) #if (NPP_VERSION < NPP_MINIMUM_REQUIRED_VERSION)
#error "Insufficient NPP version, please update it." #error "Insufficient NPP version, please update it."
#endif #endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment