Commit 173442bb authored by Roman Donchenko's avatar Roman Donchenko Committed by OpenCV Buildbot

Merge pull request #964 from jet47:cuda-5.5-support

parents c9295471 4559d461
...@@ -60,6 +60,8 @@ ...@@ -60,6 +60,8 @@
# include "opencv2/core/stream_accessor.hpp" # include "opencv2/core/stream_accessor.hpp"
# include "opencv2/core/cuda/common.hpp" # include "opencv2/core/cuda/common.hpp"
# define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD)
# define CUDART_MINIMUM_REQUIRED_VERSION 4020 # define CUDART_MINIMUM_REQUIRED_VERSION 4020
# if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION) # if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
......
...@@ -1547,48 +1547,90 @@ namespace ...@@ -1547,48 +1547,90 @@ namespace
const ErrorEntry npp_errors [] = const ErrorEntry npp_errors [] =
{ {
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ), #if defined (_MSC_VER)
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
#if defined (_MSC_VER)
error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ), error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
#endif #endif
#if NPP_VERSION < 5500
error_entry( NPP_BAD_ARG_ERROR ), error_entry( NPP_BAD_ARG_ERROR ),
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_TEXTURE_BIND_ERROR ),
error_entry( NPP_COEFF_ERROR ), error_entry( NPP_COEFF_ERROR ),
error_entry( NPP_RECT_ERROR ), error_entry( NPP_RECT_ERROR ),
error_entry( NPP_QUAD_ERROR ), error_entry( NPP_QUAD_ERROR ),
error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
error_entry( NPP_NOT_EVEN_STEP_ERROR ),
error_entry( NPP_INTERPOLATION_ERROR ),
error_entry( NPP_RESIZE_FACTOR_ERROR ),
error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
error_entry( NPP_MEMFREE_ERR ), error_entry( NPP_MEMFREE_ERR ),
error_entry( NPP_MEMSET_ERR ), error_entry( NPP_MEMSET_ERR ),
error_entry( NPP_MEMCPY_ERROR ),
error_entry( NPP_MEM_ALLOC_ERR ), error_entry( NPP_MEM_ALLOC_ERR ),
error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ), error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_MIRROR_FLIP_ERR ), error_entry( NPP_MIRROR_FLIP_ERR ),
error_entry( NPP_INVALID_INPUT ), error_entry( NPP_INVALID_INPUT ),
error_entry( NPP_POINTER_ERROR ),
error_entry( NPP_WARNING ),
error_entry( NPP_ODD_ROI_WARNING ),
#else
error_entry( NPP_INVALID_HOST_POINTER_ERROR ),
error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ),
error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ),
error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ),
error_entry( NPP_MEMFREE_ERROR ),
error_entry( NPP_MEMSET_ERROR ),
error_entry( NPP_QUALITY_INDEX_ERROR ),
error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_CHANNEL_ORDER_ERROR ),
error_entry( NPP_ZERO_MASK_VALUE_ERROR ),
error_entry( NPP_QUADRANGLE_ERROR ),
error_entry( NPP_RECTANGLE_ERROR ),
error_entry( NPP_COEFFICIENT_ERROR ),
error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ),
error_entry( NPP_COI_ERROR ),
error_entry( NPP_DIVISOR_ERROR ),
error_entry( NPP_CHANNEL_ERROR ),
error_entry( NPP_STRIDE_ERROR ),
error_entry( NPP_ANCHOR_ERROR ),
error_entry( NPP_MASK_SIZE_ERROR ),
error_entry( NPP_MIRROR_FLIP_ERROR ),
error_entry( NPP_MOMENT_00_ZERO_ERROR ),
error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ),
error_entry( NPP_THRESHOLD_ERROR ),
error_entry( NPP_CONTEXT_MATCH_ERROR ),
error_entry( NPP_FFT_FLAG_ERROR ),
error_entry( NPP_FFT_ORDER_ERROR ),
error_entry( NPP_SCALE_RANGE_ERROR ),
error_entry( NPP_DATA_TYPE_ERROR ),
error_entry( NPP_OUT_OFF_RANGE_ERROR ),
error_entry( NPP_DIVIDE_BY_ZERO_ERROR ),
error_entry( NPP_MEMORY_ALLOCATION_ERR ),
error_entry( NPP_RANGE_ERROR ),
error_entry( NPP_BAD_ARGUMENT_ERROR ),
error_entry( NPP_NO_MEMORY_ERROR ),
error_entry( NPP_ERROR_RESERVED ),
error_entry( NPP_NO_OPERATION_WARNING ),
error_entry( NPP_DIVIDE_BY_ZERO_WARNING ),
error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ),
#endif
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_TEXTURE_BIND_ERROR ),
error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
error_entry( NPP_NOT_EVEN_STEP_ERROR ),
error_entry( NPP_INTERPOLATION_ERROR ),
error_entry( NPP_RESIZE_FACTOR_ERROR ),
error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
error_entry( NPP_MEMCPY_ERROR ),
error_entry( NPP_ALIGNMENT_ERROR ), error_entry( NPP_ALIGNMENT_ERROR ),
error_entry( NPP_STEP_ERROR ), error_entry( NPP_STEP_ERROR ),
error_entry( NPP_SIZE_ERROR ), error_entry( NPP_SIZE_ERROR ),
error_entry( NPP_POINTER_ERROR ),
error_entry( NPP_NULL_POINTER_ERROR ), error_entry( NPP_NULL_POINTER_ERROR ),
error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ), error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
error_entry( NPP_NOT_IMPLEMENTED_ERROR ), error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
error_entry( NPP_ERROR ), error_entry( NPP_ERROR ),
error_entry( NPP_NO_ERROR ), error_entry( NPP_NO_ERROR ),
error_entry( NPP_SUCCESS ), error_entry( NPP_SUCCESS ),
error_entry( NPP_WARNING ),
error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ), error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
error_entry( NPP_MISALIGNED_DST_ROI_WARNING ), error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ), error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
error_entry( NPP_DOUBLE_SIZE_WARNING ), error_entry( NPP_DOUBLE_SIZE_WARNING )
error_entry( NPP_ODD_ROI_WARNING )
}; };
const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]); const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
......
...@@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace cudev
template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
{ {
I d = a - b; I d = saturate_cast<I>(a - b);
return lo.x <= d.x && d.x <= hi.x && return lo.x <= d.x && d.x <= hi.x &&
lo.y <= d.y && d.y <= hi.y && lo.y <= d.y && d.y <= hi.y &&
lo.z <= d.z && d.z <= hi.z; lo.z <= d.z && d.z <= hi.z;
...@@ -169,7 +169,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -169,7 +169,7 @@ namespace cv { namespace gpu { namespace cudev
template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
{ {
I d = a - b; I d = saturate_cast<I>(a - b);
return lo.x <= d.x && d.x <= hi.x && return lo.x <= d.x && d.x <= hi.x &&
lo.y <= d.y && d.y <= hi.y && lo.y <= d.y && d.y <= hi.y &&
lo.z <= d.z && d.z <= hi.z && lo.z <= d.z && d.z <= hi.z &&
......
...@@ -62,8 +62,8 @@ namespace arithm ...@@ -62,8 +62,8 @@ namespace arithm
return vabsdiff4(a, b); return vabsdiff4(a, b);
} }
__device__ __forceinline__ VAbsDiff4() {} __host__ __device__ __forceinline__ VAbsDiff4() {}
__device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {} __host__ __device__ __forceinline__ VAbsDiff4(const VAbsDiff4&) {}
}; };
struct VAbsDiff2 : binary_function<uint, uint, uint> struct VAbsDiff2 : binary_function<uint, uint, uint>
...@@ -73,8 +73,8 @@ namespace arithm ...@@ -73,8 +73,8 @@ namespace arithm
return vabsdiff2(a, b); return vabsdiff2(a, b);
} }
__device__ __forceinline__ VAbsDiff2() {} __host__ __device__ __forceinline__ VAbsDiff2() {}
__device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {} __host__ __device__ __forceinline__ VAbsDiff2(const VAbsDiff2&) {}
}; };
__device__ __forceinline__ int _abs(int a) __device__ __forceinline__ int _abs(int a)
...@@ -97,8 +97,8 @@ namespace arithm ...@@ -97,8 +97,8 @@ namespace arithm
return saturate_cast<T>(_abs(a - b)); return saturate_cast<T>(_abs(a - b));
} }
__device__ __forceinline__ AbsDiffMat() {} __host__ __device__ __forceinline__ AbsDiffMat() {}
__device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {} __host__ __device__ __forceinline__ AbsDiffMat(const AbsDiffMat&) {}
}; };
} }
......
...@@ -59,7 +59,7 @@ namespace arithm ...@@ -59,7 +59,7 @@ namespace arithm
{ {
S val; S val;
explicit AbsDiffScalar(S val_) : val(val_) {} __host__ explicit AbsDiffScalar(S val_) : val(val_) {}
__device__ __forceinline__ T operator ()(T a) const __device__ __forceinline__ T operator ()(T a) const
{ {
......
...@@ -62,8 +62,8 @@ namespace arithm ...@@ -62,8 +62,8 @@ namespace arithm
return vadd4(a, b); return vadd4(a, b);
} }
__device__ __forceinline__ VAdd4() {} __host__ __device__ __forceinline__ VAdd4() {}
__device__ __forceinline__ VAdd4(const VAdd4& other) {} __host__ __device__ __forceinline__ VAdd4(const VAdd4&) {}
}; };
struct VAdd2 : binary_function<uint, uint, uint> struct VAdd2 : binary_function<uint, uint, uint>
...@@ -73,8 +73,8 @@ namespace arithm ...@@ -73,8 +73,8 @@ namespace arithm
return vadd2(a, b); return vadd2(a, b);
} }
__device__ __forceinline__ VAdd2() {} __host__ __device__ __forceinline__ VAdd2() {}
__device__ __forceinline__ VAdd2(const VAdd2& other) {} __host__ __device__ __forceinline__ VAdd2(const VAdd2&) {}
}; };
template <typename T, typename D> struct AddMat : binary_function<T, T, D> template <typename T, typename D> struct AddMat : binary_function<T, T, D>
...@@ -84,8 +84,8 @@ namespace arithm ...@@ -84,8 +84,8 @@ namespace arithm
return saturate_cast<D>(a + b); return saturate_cast<D>(a + b);
} }
__device__ __forceinline__ AddMat() {} __host__ __device__ __forceinline__ AddMat() {}
__device__ __forceinline__ AddMat(const AddMat& other) {} __host__ __device__ __forceinline__ AddMat(const AddMat&) {}
}; };
} }
......
...@@ -59,7 +59,7 @@ namespace arithm ...@@ -59,7 +59,7 @@ namespace arithm
{ {
S val; S val;
explicit AddScalar(S val_) : val(val_) {} __host__ explicit AddScalar(S val_) : val(val_) {}
__device__ __forceinline__ D operator ()(T a) const __device__ __forceinline__ D operator ()(T a) const
{ {
......
...@@ -74,7 +74,7 @@ namespace arithm ...@@ -74,7 +74,7 @@ namespace arithm
float beta; float beta;
float gamma; float gamma;
AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(static_cast<float>(alpha_)), beta(static_cast<float>(beta_)), gamma(static_cast<float>(gamma_)) {} __host__ AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(static_cast<float>(alpha_)), beta(static_cast<float>(beta_)), gamma(static_cast<float>(gamma_)) {}
__device__ __forceinline__ D operator ()(T1 a, T2 b) const __device__ __forceinline__ D operator ()(T1 a, T2 b) const
{ {
...@@ -87,7 +87,7 @@ namespace arithm ...@@ -87,7 +87,7 @@ namespace arithm
double beta; double beta;
double gamma; double gamma;
AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {} __host__ AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {}
__device__ __forceinline__ D operator ()(T1 a, T2 b) const __device__ __forceinline__ D operator ()(T1 a, T2 b) const
{ {
......
...@@ -62,8 +62,8 @@ namespace arithm ...@@ -62,8 +62,8 @@ namespace arithm
return vcmpeq4(a, b); return vcmpeq4(a, b);
} }
__device__ __forceinline__ VCmpEq4() {} __host__ __device__ __forceinline__ VCmpEq4() {}
__device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {} __host__ __device__ __forceinline__ VCmpEq4(const VCmpEq4&) {}
}; };
struct VCmpNe4 : binary_function<uint, uint, uint> struct VCmpNe4 : binary_function<uint, uint, uint>
{ {
...@@ -72,8 +72,8 @@ namespace arithm ...@@ -72,8 +72,8 @@ namespace arithm
return vcmpne4(a, b); return vcmpne4(a, b);
} }
__device__ __forceinline__ VCmpNe4() {} __host__ __device__ __forceinline__ VCmpNe4() {}
__device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {} __host__ __device__ __forceinline__ VCmpNe4(const VCmpNe4&) {}
}; };
struct VCmpLt4 : binary_function<uint, uint, uint> struct VCmpLt4 : binary_function<uint, uint, uint>
{ {
...@@ -82,8 +82,8 @@ namespace arithm ...@@ -82,8 +82,8 @@ namespace arithm
return vcmplt4(a, b); return vcmplt4(a, b);
} }
__device__ __forceinline__ VCmpLt4() {} __host__ __device__ __forceinline__ VCmpLt4() {}
__device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {} __host__ __device__ __forceinline__ VCmpLt4(const VCmpLt4&) {}
}; };
struct VCmpLe4 : binary_function<uint, uint, uint> struct VCmpLe4 : binary_function<uint, uint, uint>
{ {
...@@ -92,8 +92,8 @@ namespace arithm ...@@ -92,8 +92,8 @@ namespace arithm
return vcmple4(a, b); return vcmple4(a, b);
} }
__device__ __forceinline__ VCmpLe4() {} __host__ __device__ __forceinline__ VCmpLe4() {}
__device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {} __host__ __device__ __forceinline__ VCmpLe4(const VCmpLe4&) {}
}; };
template <class Op, typename T> template <class Op, typename T>
......
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp" #include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp" #include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp" #include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/emulation.hpp" #include "opencv2/core/cuda/emulation.hpp"
......
...@@ -59,7 +59,7 @@ namespace arithm ...@@ -59,7 +59,7 @@ namespace arithm
{ {
S val; S val;
explicit DivInv(S val_) : val(val_) {} __host__ explicit DivInv(S val_) : val(val_) {}
__device__ __forceinline__ D operator ()(T a) const __device__ __forceinline__ D operator ()(T a) const
{ {
......
...@@ -91,8 +91,8 @@ namespace arithm ...@@ -91,8 +91,8 @@ namespace arithm
return b != 0 ? saturate_cast<D>(a / b) : 0; return b != 0 ? saturate_cast<D>(a / b) : 0;
} }
__device__ __forceinline__ Div() {} __host__ __device__ __forceinline__ Div() {}
__device__ __forceinline__ Div(const Div& other) {} __host__ __device__ __forceinline__ Div(const Div&) {}
}; };
template <typename T> struct Div<T, float> : binary_function<T, T, float> template <typename T> struct Div<T, float> : binary_function<T, T, float>
{ {
...@@ -101,8 +101,8 @@ namespace arithm ...@@ -101,8 +101,8 @@ namespace arithm
return b != 0 ? static_cast<float>(a) / b : 0; return b != 0 ? static_cast<float>(a) / b : 0;
} }
__device__ __forceinline__ Div() {} __host__ __device__ __forceinline__ Div() {}
__device__ __forceinline__ Div(const Div& other) {} __host__ __device__ __forceinline__ Div(const Div&) {}
}; };
template <typename T> struct Div<T, double> : binary_function<T, T, double> template <typename T> struct Div<T, double> : binary_function<T, T, double>
{ {
...@@ -111,15 +111,15 @@ namespace arithm ...@@ -111,15 +111,15 @@ namespace arithm
return b != 0 ? static_cast<double>(a) / b : 0; return b != 0 ? static_cast<double>(a) / b : 0;
} }
__device__ __forceinline__ Div() {} __host__ __device__ __forceinline__ Div() {}
__device__ __forceinline__ Div(const Div& other) {} __host__ __device__ __forceinline__ Div(const Div&) {}
}; };
template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D> template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D>
{ {
S scale; S scale;
explicit DivScale(S scale_) : scale(scale_) {} __host__ explicit DivScale(S scale_) : scale(scale_) {}
__device__ __forceinline__ D operator ()(T a, T b) const __device__ __forceinline__ D operator ()(T a, T b) const
{ {
......
...@@ -59,7 +59,7 @@ namespace arithm ...@@ -59,7 +59,7 @@ namespace arithm
{ {
S val; S val;
explicit DivScalar(S val_) : val(val_) {} __host__ explicit DivScalar(S val_) : val(val_) {}
__device__ __forceinline__ D operator ()(T a) const __device__ __forceinline__ D operator ()(T a) const
{ {
......
...@@ -94,8 +94,8 @@ namespace arithm ...@@ -94,8 +94,8 @@ namespace arithm
return saturate_cast<T>(x * x); return saturate_cast<T>(x * x);
} }
__device__ __forceinline__ Sqr() {} __host__ __device__ __forceinline__ Sqr() {}
__device__ __forceinline__ Sqr(const Sqr& other) {} __host__ __device__ __forceinline__ Sqr(const Sqr&) {}
}; };
} }
...@@ -190,8 +190,8 @@ namespace arithm ...@@ -190,8 +190,8 @@ namespace arithm
return saturate_cast<T>(f(x)); return saturate_cast<T>(f(x));
} }
__device__ __forceinline__ Exp() {} __host__ __device__ __forceinline__ Exp() {}
__device__ __forceinline__ Exp(const Exp& other) {} __host__ __device__ __forceinline__ Exp(const Exp&) {}
}; };
} }
...@@ -228,7 +228,7 @@ namespace arithm ...@@ -228,7 +228,7 @@ namespace arithm
{ {
float power; float power;
PowOp(double power_) : power(static_cast<float>(power_)) {} __host__ explicit PowOp(double power_) : power(static_cast<float>(power_)) {}
__device__ __forceinline__ T operator()(T e) const __device__ __forceinline__ T operator()(T e) const
{ {
...@@ -239,7 +239,7 @@ namespace arithm ...@@ -239,7 +239,7 @@ namespace arithm
{ {
float power; float power;
PowOp(double power_) : power(static_cast<float>(power_)) {} __host__ explicit PowOp(double power_) : power(static_cast<float>(power_)) {}
__device__ __forceinline__ T operator()(T e) const __device__ __forceinline__ T operator()(T e) const
{ {
...@@ -255,7 +255,7 @@ namespace arithm ...@@ -255,7 +255,7 @@ namespace arithm
{ {
float power; float power;
PowOp(double power_) : power(static_cast<float>(power_)) {} __host__ explicit PowOp(double power_) : power(static_cast<float>(power_)) {}
__device__ __forceinline__ float operator()(float e) const __device__ __forceinline__ float operator()(float e) const
{ {
...@@ -266,7 +266,7 @@ namespace arithm ...@@ -266,7 +266,7 @@ namespace arithm
{ {
double power; double power;
PowOp(double power_) : power(power_) {} __host__ explicit PowOp(double power_) : power(power_) {}
__device__ __forceinline__ double operator()(double e) const __device__ __forceinline__ double operator()(double e) const
{ {
......
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp" #include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp" #include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp" #include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/emulation.hpp" #include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/limits.hpp" #include "opencv2/core/cuda/limits.hpp"
......
...@@ -65,8 +65,8 @@ namespace arithm ...@@ -65,8 +65,8 @@ namespace arithm
return vmin4(a, b); return vmin4(a, b);
} }
__device__ __forceinline__ VMin4() {} __host__ __device__ __forceinline__ VMin4() {}
__device__ __forceinline__ VMin4(const VMin4& other) {} __host__ __device__ __forceinline__ VMin4(const VMin4&) {}
}; };
struct VMin2 : binary_function<uint, uint, uint> struct VMin2 : binary_function<uint, uint, uint>
...@@ -76,8 +76,8 @@ namespace arithm ...@@ -76,8 +76,8 @@ namespace arithm
return vmin2(a, b); return vmin2(a, b);
} }
__device__ __forceinline__ VMin2() {} __host__ __device__ __forceinline__ VMin2() {}
__device__ __forceinline__ VMin2(const VMin2& other) {} __host__ __device__ __forceinline__ VMin2(const VMin2&) {}
}; };
} }
...@@ -151,8 +151,8 @@ namespace arithm ...@@ -151,8 +151,8 @@ namespace arithm
return vmax4(a, b); return vmax4(a, b);
} }
__device__ __forceinline__ VMax4() {} __host__ __device__ __forceinline__ VMax4() {}
__device__ __forceinline__ VMax4(const VMax4& other) {} __host__ __device__ __forceinline__ VMax4(const VMax4&) {}
}; };
struct VMax2 : binary_function<uint, uint, uint> struct VMax2 : binary_function<uint, uint, uint>
...@@ -162,8 +162,8 @@ namespace arithm ...@@ -162,8 +162,8 @@ namespace arithm
return vmax2(a, b); return vmax2(a, b);
} }
__device__ __forceinline__ VMax2() {} __host__ __device__ __forceinline__ VMax2() {}
__device__ __forceinline__ VMax2(const VMax2& other) {} __host__ __device__ __forceinline__ VMax2(const VMax2&) {}
}; };
} }
......
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp" #include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp" #include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp" #include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/emulation.hpp" #include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/limits.hpp" #include "opencv2/core/cuda/limits.hpp"
......
...@@ -69,8 +69,8 @@ namespace arithm ...@@ -69,8 +69,8 @@ namespace arithm
return res; return res;
} }
__device__ __forceinline__ Mul_8uc4_32f() {} __host__ __device__ __forceinline__ Mul_8uc4_32f() {}
__device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {} __host__ __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f&) {}
}; };
struct Mul_16sc4_32f : binary_function<short4, float, short4> struct Mul_16sc4_32f : binary_function<short4, float, short4>
...@@ -81,8 +81,8 @@ namespace arithm ...@@ -81,8 +81,8 @@ namespace arithm
saturate_cast<short>(a.z * b), saturate_cast<short>(a.w * b)); saturate_cast<short>(a.z * b), saturate_cast<short>(a.w * b));
} }
__device__ __forceinline__ Mul_16sc4_32f() {} __host__ __device__ __forceinline__ Mul_16sc4_32f() {}
__device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {} __host__ __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f&) {}
}; };
template <typename T, typename D> struct Mul : binary_function<T, T, D> template <typename T, typename D> struct Mul : binary_function<T, T, D>
...@@ -92,15 +92,15 @@ namespace arithm ...@@ -92,15 +92,15 @@ namespace arithm
return saturate_cast<D>(a * b); return saturate_cast<D>(a * b);
} }
__device__ __forceinline__ Mul() {} __host__ __device__ __forceinline__ Mul() {}
__device__ __forceinline__ Mul(const Mul& other) {} __host__ __device__ __forceinline__ Mul(const Mul&) {}
}; };
template <typename T, typename S, typename D> struct MulScale : binary_function<T, T, D> template <typename T, typename S, typename D> struct MulScale : binary_function<T, T, D>
{ {
S scale; S scale;
explicit MulScale(S scale_) : scale(scale_) {} __host__ explicit MulScale(S scale_) : scale(scale_) {}
__device__ __forceinline__ D operator ()(T a, T b) const __device__ __forceinline__ D operator ()(T a, T b) const
{ {
......
...@@ -59,7 +59,7 @@ namespace arithm ...@@ -59,7 +59,7 @@ namespace arithm
{ {
S val; S val;
explicit MulScalar(S val_) : val(val_) {} __host__ explicit MulScalar(S val_) : val(val_) {}
__device__ __forceinline__ D operator ()(T a) const __device__ __forceinline__ D operator ()(T a) const
{ {
......
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include "opencv2/core/cuda/saturate_cast.hpp" #include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/vec_traits.hpp" #include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp" #include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp" #include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/limits.hpp" #include "opencv2/core/cuda/limits.hpp"
...@@ -76,8 +77,8 @@ namespace reduce ...@@ -76,8 +77,8 @@ namespace reduce
return r; return r;
} }
__device__ __forceinline__ Sum() {} __host__ __device__ __forceinline__ Sum() {}
__device__ __forceinline__ Sum(const Sum&) {} __host__ __device__ __forceinline__ Sum(const Sum&) {}
}; };
struct Avg struct Avg
...@@ -100,8 +101,8 @@ namespace reduce ...@@ -100,8 +101,8 @@ namespace reduce
return r / sz; return r / sz;
} }
__device__ __forceinline__ Avg() {} __host__ __device__ __forceinline__ Avg() {}
__device__ __forceinline__ Avg(const Avg&) {} __host__ __device__ __forceinline__ Avg(const Avg&) {}
}; };
struct Min struct Min
...@@ -125,8 +126,8 @@ namespace reduce ...@@ -125,8 +126,8 @@ namespace reduce
return r; return r;
} }
__device__ __forceinline__ Min() {} __host__ __device__ __forceinline__ Min() {}
__device__ __forceinline__ Min(const Min&) {} __host__ __device__ __forceinline__ Min(const Min&) {}
}; };
struct Max struct Max
...@@ -150,8 +151,8 @@ namespace reduce ...@@ -150,8 +151,8 @@ namespace reduce
return r; return r;
} }
__device__ __forceinline__ Max() {} __host__ __device__ __forceinline__ Max() {}
__device__ __forceinline__ Max(const Max&) {} __host__ __device__ __forceinline__ Max(const Max&) {}
}; };
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
......
...@@ -62,8 +62,8 @@ namespace arithm ...@@ -62,8 +62,8 @@ namespace arithm
return vsub4(a, b); return vsub4(a, b);
} }
__device__ __forceinline__ VSub4() {} __host__ __device__ __forceinline__ VSub4() {}
__device__ __forceinline__ VSub4(const VSub4& other) {} __host__ __device__ __forceinline__ VSub4(const VSub4&) {}
}; };
struct VSub2 : binary_function<uint, uint, uint> struct VSub2 : binary_function<uint, uint, uint>
...@@ -73,8 +73,8 @@ namespace arithm ...@@ -73,8 +73,8 @@ namespace arithm
return vsub2(a, b); return vsub2(a, b);
} }
__device__ __forceinline__ VSub2() {} __host__ __device__ __forceinline__ VSub2() {}
__device__ __forceinline__ VSub2(const VSub2& other) {} __host__ __device__ __forceinline__ VSub2(const VSub2&) {}
}; };
template <typename T, typename D> struct SubMat : binary_function<T, T, D> template <typename T, typename D> struct SubMat : binary_function<T, T, D>
...@@ -84,8 +84,8 @@ namespace arithm ...@@ -84,8 +84,8 @@ namespace arithm
return saturate_cast<D>(a - b); return saturate_cast<D>(a - b);
} }
__device__ __forceinline__ SubMat() {} __host__ __device__ __forceinline__ SubMat() {}
__device__ __forceinline__ SubMat(const SubMat& other) {} __host__ __device__ __forceinline__ SubMat(const SubMat&) {}
}; };
} }
......
...@@ -59,7 +59,7 @@ namespace arithm ...@@ -59,7 +59,7 @@ namespace arithm
{ {
S val; S val;
explicit SubScalar(S val_) : val(val_) {} __host__ explicit SubScalar(S val_) : val(val_) {}
__device__ __forceinline__ D operator ()(T a) const __device__ __forceinline__ D operator ()(T a) const
{ {
......
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp" #include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp" #include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp" #include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/emulation.hpp" #include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/utility.hpp" #include "opencv2/core/cuda/utility.hpp"
......
...@@ -72,7 +72,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Blur, ...@@ -72,7 +72,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Blur,
TEST_CYCLE() cv::gpu::blur(d_src, dst, cv::Size(ksize, ksize)); TEST_CYCLE() cv::gpu::blur(d_src, dst, cv::Size(ksize, ksize));
GPU_SANITY_CHECK(dst); GPU_SANITY_CHECK(dst, 1);
} }
else else
{ {
......
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
#include "opencv2/core/cuda/common.hpp" #include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/emulation.hpp" #include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/vec_math.hpp" #include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/limits.hpp" #include "opencv2/core/cuda/limits.hpp"
#include "opencv2/core/cuda/dynamic_smem.hpp" #include "opencv2/core/cuda/dynamic_smem.hpp"
...@@ -811,7 +812,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -811,7 +812,7 @@ namespace cv { namespace gpu { namespace cudev
const int ind = ::atomicAdd(r_sizes + n, 1); const int ind = ::atomicAdd(r_sizes + n, 1);
if (ind < maxSize) if (ind < maxSize)
r_table(n, ind) = p - templCenter; r_table(n, ind) = saturate_cast<short2>(p - templCenter);
} }
void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
...@@ -855,7 +856,7 @@ namespace cv { namespace gpu { namespace cudev ...@@ -855,7 +856,7 @@ namespace cv { namespace gpu { namespace cudev
for (int j = 0; j < r_row_size; ++j) for (int j = 0; j < r_row_size; ++j)
{ {
short2 c = p - r_row[j]; int2 c = p - r_row[j];
c.x = __float2int_rn(c.x * idp); c.x = __float2int_rn(c.x * idp);
c.y = __float2int_rn(c.y * idp); c.y = __float2int_rn(c.y * idp);
......
...@@ -84,7 +84,7 @@ PERF_TEST_P(ImagePair, InterpolateFrames, ...@@ -84,7 +84,7 @@ PERF_TEST_P(ImagePair, InterpolateFrames,
TEST_CYCLE() cv::gpu::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, newFrame, d_buf); TEST_CYCLE() cv::gpu::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, newFrame, d_buf);
GPU_SANITY_CHECK(newFrame); GPU_SANITY_CHECK(newFrame, 1e-4);
} }
else else
{ {
...@@ -123,7 +123,7 @@ PERF_TEST_P(ImagePair, CreateOpticalFlowNeedleMap, ...@@ -123,7 +123,7 @@ PERF_TEST_P(ImagePair, CreateOpticalFlowNeedleMap,
TEST_CYCLE() cv::gpu::createOpticalFlowNeedleMap(u, v, vertex, colors); TEST_CYCLE() cv::gpu::createOpticalFlowNeedleMap(u, v, vertex, colors);
GPU_SANITY_CHECK(vertex); GPU_SANITY_CHECK(vertex, 1e-6);
GPU_SANITY_CHECK(colors); GPU_SANITY_CHECK(colors);
} }
else else
...@@ -161,8 +161,8 @@ PERF_TEST_P(ImagePair, BroxOpticalFlow, ...@@ -161,8 +161,8 @@ PERF_TEST_P(ImagePair, BroxOpticalFlow,
TEST_CYCLE() d_flow(d_frame0, d_frame1, u, v); TEST_CYCLE() d_flow(d_frame0, d_frame1, u, v);
GPU_SANITY_CHECK(u); GPU_SANITY_CHECK(u, 1e-1);
GPU_SANITY_CHECK(v); GPU_SANITY_CHECK(v, 1e-1);
} }
else else
{ {
......
...@@ -103,8 +103,8 @@ GPU_TEST_P(BroxOpticalFlow, Regression) ...@@ -103,8 +103,8 @@ GPU_TEST_P(BroxOpticalFlow, Regression)
for (int i = 0; i < v_gold.rows; ++i) for (int i = 0; i < v_gold.rows; ++i)
f.read(v_gold.ptr<char>(i), v_gold.cols * sizeof(float)); f.read(v_gold.ptr<char>(i), v_gold.cols * sizeof(float));
EXPECT_MAT_NEAR(u_gold, u, 0); EXPECT_MAT_SIMILAR(u_gold, u, 1e-3);
EXPECT_MAT_NEAR(v_gold, v, 0); EXPECT_MAT_SIMILAR(v_gold, v, 1e-3);
#else #else
std::ofstream f(fname.c_str(), std::ios_base::binary); std::ofstream f(fname.c_str(), std::ios_base::binary);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment