Commit 173442bb authored by Roman Donchenko's avatar Roman Donchenko Committed by OpenCV Buildbot

Merge pull request #964 from jet47:cuda-5.5-support

parents c9295471 4559d461
......@@ -60,6 +60,8 @@
# include "opencv2/core/stream_accessor.hpp"
# include "opencv2/core/cuda/common.hpp"
# define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD)
# define CUDART_MINIMUM_REQUIRED_VERSION 4020
# if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
......
......@@ -1547,48 +1547,90 @@ namespace
const ErrorEntry npp_errors [] =
{
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
#if defined (_MSC_VER)
#if defined (_MSC_VER)
error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
#endif
#endif
#if NPP_VERSION < 5500
error_entry( NPP_BAD_ARG_ERROR ),
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_TEXTURE_BIND_ERROR ),
error_entry( NPP_COEFF_ERROR ),
error_entry( NPP_RECT_ERROR ),
error_entry( NPP_QUAD_ERROR ),
error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
error_entry( NPP_NOT_EVEN_STEP_ERROR ),
error_entry( NPP_INTERPOLATION_ERROR ),
error_entry( NPP_RESIZE_FACTOR_ERROR ),
error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
error_entry( NPP_MEMFREE_ERR ),
error_entry( NPP_MEMSET_ERR ),
error_entry( NPP_MEMCPY_ERROR ),
error_entry( NPP_MEM_ALLOC_ERR ),
error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_MIRROR_FLIP_ERR ),
error_entry( NPP_INVALID_INPUT ),
error_entry( NPP_POINTER_ERROR ),
error_entry( NPP_WARNING ),
error_entry( NPP_ODD_ROI_WARNING ),
#else
error_entry( NPP_INVALID_HOST_POINTER_ERROR ),
error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ),
error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ),
error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ),
error_entry( NPP_MEMFREE_ERROR ),
error_entry( NPP_MEMSET_ERROR ),
error_entry( NPP_QUALITY_INDEX_ERROR ),
error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_CHANNEL_ORDER_ERROR ),
error_entry( NPP_ZERO_MASK_VALUE_ERROR ),
error_entry( NPP_QUADRANGLE_ERROR ),
error_entry( NPP_RECTANGLE_ERROR ),
error_entry( NPP_COEFFICIENT_ERROR ),
error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ),
error_entry( NPP_COI_ERROR ),
error_entry( NPP_DIVISOR_ERROR ),
error_entry( NPP_CHANNEL_ERROR ),
error_entry( NPP_STRIDE_ERROR ),
error_entry( NPP_ANCHOR_ERROR ),
error_entry( NPP_MASK_SIZE_ERROR ),
error_entry( NPP_MIRROR_FLIP_ERROR ),
error_entry( NPP_MOMENT_00_ZERO_ERROR ),
error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ),
error_entry( NPP_THRESHOLD_ERROR ),
error_entry( NPP_CONTEXT_MATCH_ERROR ),
error_entry( NPP_FFT_FLAG_ERROR ),
error_entry( NPP_FFT_ORDER_ERROR ),
error_entry( NPP_SCALE_RANGE_ERROR ),
error_entry( NPP_DATA_TYPE_ERROR ),
error_entry( NPP_OUT_OFF_RANGE_ERROR ),
error_entry( NPP_DIVIDE_BY_ZERO_ERROR ),
error_entry( NPP_MEMORY_ALLOCATION_ERR ),
error_entry( NPP_RANGE_ERROR ),
error_entry( NPP_BAD_ARGUMENT_ERROR ),
error_entry( NPP_NO_MEMORY_ERROR ),
error_entry( NPP_ERROR_RESERVED ),
error_entry( NPP_NO_OPERATION_WARNING ),
error_entry( NPP_DIVIDE_BY_ZERO_WARNING ),
error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ),
#endif
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
error_entry( NPP_TEXTURE_BIND_ERROR ),
error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
error_entry( NPP_NOT_EVEN_STEP_ERROR ),
error_entry( NPP_INTERPOLATION_ERROR ),
error_entry( NPP_RESIZE_FACTOR_ERROR ),
error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
error_entry( NPP_MEMCPY_ERROR ),
error_entry( NPP_ALIGNMENT_ERROR ),
error_entry( NPP_STEP_ERROR ),
error_entry( NPP_SIZE_ERROR ),
error_entry( NPP_POINTER_ERROR ),
error_entry( NPP_NULL_POINTER_ERROR ),
error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
error_entry( NPP_ERROR ),
error_entry( NPP_NO_ERROR ),
error_entry( NPP_SUCCESS ),
error_entry( NPP_WARNING ),
error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
error_entry( NPP_DOUBLE_SIZE_WARNING ),
error_entry( NPP_ODD_ROI_WARNING )
error_entry( NPP_DOUBLE_SIZE_WARNING )
};
const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
......
......@@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace cudev
template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
{
I d = a - b;
I d = saturate_cast<I>(a - b);
return lo.x <= d.x && d.x <= hi.x &&
lo.y <= d.y && d.y <= hi.y &&
lo.z <= d.z && d.z <= hi.z;
......@@ -169,7 +169,7 @@ namespace cv { namespace gpu { namespace cudev
template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
{
I d = a - b;
I d = saturate_cast<I>(a - b);
return lo.x <= d.x && d.x <= hi.x &&
lo.y <= d.y && d.y <= hi.y &&
lo.z <= d.z && d.z <= hi.z &&
......
......@@ -62,8 +62,8 @@ namespace arithm
return vabsdiff4(a, b);
}
__device__ __forceinline__ VAbsDiff4() {}
__device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {}
__host__ __device__ __forceinline__ VAbsDiff4() {}
__host__ __device__ __forceinline__ VAbsDiff4(const VAbsDiff4&) {}
};
struct VAbsDiff2 : binary_function<uint, uint, uint>
......@@ -73,8 +73,8 @@ namespace arithm
return vabsdiff2(a, b);
}
__device__ __forceinline__ VAbsDiff2() {}
__device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {}
__host__ __device__ __forceinline__ VAbsDiff2() {}
__host__ __device__ __forceinline__ VAbsDiff2(const VAbsDiff2&) {}
};
__device__ __forceinline__ int _abs(int a)
......@@ -97,8 +97,8 @@ namespace arithm
return saturate_cast<T>(_abs(a - b));
}
__device__ __forceinline__ AbsDiffMat() {}
__device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {}
__host__ __device__ __forceinline__ AbsDiffMat() {}
__host__ __device__ __forceinline__ AbsDiffMat(const AbsDiffMat&) {}
};
}
......
......@@ -59,7 +59,7 @@ namespace arithm
{
S val;
explicit AbsDiffScalar(S val_) : val(val_) {}
__host__ explicit AbsDiffScalar(S val_) : val(val_) {}
__device__ __forceinline__ T operator ()(T a) const
{
......
......@@ -62,8 +62,8 @@ namespace arithm
return vadd4(a, b);
}
__device__ __forceinline__ VAdd4() {}
__device__ __forceinline__ VAdd4(const VAdd4& other) {}
__host__ __device__ __forceinline__ VAdd4() {}
__host__ __device__ __forceinline__ VAdd4(const VAdd4&) {}
};
struct VAdd2 : binary_function<uint, uint, uint>
......@@ -73,8 +73,8 @@ namespace arithm
return vadd2(a, b);
}
__device__ __forceinline__ VAdd2() {}
__device__ __forceinline__ VAdd2(const VAdd2& other) {}
__host__ __device__ __forceinline__ VAdd2() {}
__host__ __device__ __forceinline__ VAdd2(const VAdd2&) {}
};
template <typename T, typename D> struct AddMat : binary_function<T, T, D>
......@@ -84,8 +84,8 @@ namespace arithm
return saturate_cast<D>(a + b);
}
__device__ __forceinline__ AddMat() {}
__device__ __forceinline__ AddMat(const AddMat& other) {}
__host__ __device__ __forceinline__ AddMat() {}
__host__ __device__ __forceinline__ AddMat(const AddMat&) {}
};
}
......
......@@ -59,7 +59,7 @@ namespace arithm
{
S val;
explicit AddScalar(S val_) : val(val_) {}
__host__ explicit AddScalar(S val_) : val(val_) {}
__device__ __forceinline__ D operator ()(T a) const
{
......
......@@ -74,7 +74,7 @@ namespace arithm
float beta;
float gamma;
AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(static_cast<float>(alpha_)), beta(static_cast<float>(beta_)), gamma(static_cast<float>(gamma_)) {}
__host__ AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(static_cast<float>(alpha_)), beta(static_cast<float>(beta_)), gamma(static_cast<float>(gamma_)) {}
__device__ __forceinline__ D operator ()(T1 a, T2 b) const
{
......@@ -87,7 +87,7 @@ namespace arithm
double beta;
double gamma;
AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {}
__host__ AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {}
__device__ __forceinline__ D operator ()(T1 a, T2 b) const
{
......
......@@ -62,8 +62,8 @@ namespace arithm
return vcmpeq4(a, b);
}
__device__ __forceinline__ VCmpEq4() {}
__device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {}
__host__ __device__ __forceinline__ VCmpEq4() {}
__host__ __device__ __forceinline__ VCmpEq4(const VCmpEq4&) {}
};
struct VCmpNe4 : binary_function<uint, uint, uint>
{
......@@ -72,8 +72,8 @@ namespace arithm
return vcmpne4(a, b);
}
__device__ __forceinline__ VCmpNe4() {}
__device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {}
__host__ __device__ __forceinline__ VCmpNe4() {}
__host__ __device__ __forceinline__ VCmpNe4(const VCmpNe4&) {}
};
struct VCmpLt4 : binary_function<uint, uint, uint>
{
......@@ -82,8 +82,8 @@ namespace arithm
return vcmplt4(a, b);
}
__device__ __forceinline__ VCmpLt4() {}
__device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {}
__host__ __device__ __forceinline__ VCmpLt4() {}
__host__ __device__ __forceinline__ VCmpLt4(const VCmpLt4&) {}
};
struct VCmpLe4 : binary_function<uint, uint, uint>
{
......@@ -92,8 +92,8 @@ namespace arithm
return vcmple4(a, b);
}
__device__ __forceinline__ VCmpLe4() {}
__device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {}
__host__ __device__ __forceinline__ VCmpLe4() {}
__host__ __device__ __forceinline__ VCmpLe4(const VCmpLe4&) {}
};
template <class Op, typename T>
......
......@@ -45,6 +45,7 @@
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/emulation.hpp"
......
......@@ -59,7 +59,7 @@ namespace arithm
{
S val;
explicit DivInv(S val_) : val(val_) {}
__host__ explicit DivInv(S val_) : val(val_) {}
__device__ __forceinline__ D operator ()(T a) const
{
......
......@@ -91,8 +91,8 @@ namespace arithm
return b != 0 ? saturate_cast<D>(a / b) : 0;
}
__device__ __forceinline__ Div() {}
__device__ __forceinline__ Div(const Div& other) {}
__host__ __device__ __forceinline__ Div() {}
__host__ __device__ __forceinline__ Div(const Div&) {}
};
template <typename T> struct Div<T, float> : binary_function<T, T, float>
{
......@@ -101,8 +101,8 @@ namespace arithm
return b != 0 ? static_cast<float>(a) / b : 0;
}
__device__ __forceinline__ Div() {}
__device__ __forceinline__ Div(const Div& other) {}
__host__ __device__ __forceinline__ Div() {}
__host__ __device__ __forceinline__ Div(const Div&) {}
};
template <typename T> struct Div<T, double> : binary_function<T, T, double>
{
......@@ -111,15 +111,15 @@ namespace arithm
return b != 0 ? static_cast<double>(a) / b : 0;
}
__device__ __forceinline__ Div() {}
__device__ __forceinline__ Div(const Div& other) {}
__host__ __device__ __forceinline__ Div() {}
__host__ __device__ __forceinline__ Div(const Div&) {}
};
template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D>
{
S scale;
explicit DivScale(S scale_) : scale(scale_) {}
__host__ explicit DivScale(S scale_) : scale(scale_) {}
__device__ __forceinline__ D operator ()(T a, T b) const
{
......
......@@ -59,7 +59,7 @@ namespace arithm
{
S val;
explicit DivScalar(S val_) : val(val_) {}
__host__ explicit DivScalar(S val_) : val(val_) {}
__device__ __forceinline__ D operator ()(T a) const
{
......
......@@ -94,8 +94,8 @@ namespace arithm
return saturate_cast<T>(x * x);
}
__device__ __forceinline__ Sqr() {}
__device__ __forceinline__ Sqr(const Sqr& other) {}
__host__ __device__ __forceinline__ Sqr() {}
__host__ __device__ __forceinline__ Sqr(const Sqr&) {}
};
}
......@@ -190,8 +190,8 @@ namespace arithm
return saturate_cast<T>(f(x));
}
__device__ __forceinline__ Exp() {}
__device__ __forceinline__ Exp(const Exp& other) {}
__host__ __device__ __forceinline__ Exp() {}
__host__ __device__ __forceinline__ Exp(const Exp&) {}
};
}
......@@ -228,7 +228,7 @@ namespace arithm
{
float power;
PowOp(double power_) : power(static_cast<float>(power_)) {}
__host__ explicit PowOp(double power_) : power(static_cast<float>(power_)) {}
__device__ __forceinline__ T operator()(T e) const
{
......@@ -239,7 +239,7 @@ namespace arithm
{
float power;
PowOp(double power_) : power(static_cast<float>(power_)) {}
__host__ explicit PowOp(double power_) : power(static_cast<float>(power_)) {}
__device__ __forceinline__ T operator()(T e) const
{
......@@ -255,7 +255,7 @@ namespace arithm
{
float power;
PowOp(double power_) : power(static_cast<float>(power_)) {}
__host__ explicit PowOp(double power_) : power(static_cast<float>(power_)) {}
__device__ __forceinline__ float operator()(float e) const
{
......@@ -266,7 +266,7 @@ namespace arithm
{
double power;
PowOp(double power_) : power(power_) {}
__host__ explicit PowOp(double power_) : power(power_) {}
__device__ __forceinline__ double operator()(double e) const
{
......
......@@ -45,6 +45,7 @@
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/limits.hpp"
......
......@@ -65,8 +65,8 @@ namespace arithm
return vmin4(a, b);
}
__device__ __forceinline__ VMin4() {}
__device__ __forceinline__ VMin4(const VMin4& other) {}
__host__ __device__ __forceinline__ VMin4() {}
__host__ __device__ __forceinline__ VMin4(const VMin4&) {}
};
struct VMin2 : binary_function<uint, uint, uint>
......@@ -76,8 +76,8 @@ namespace arithm
return vmin2(a, b);
}
__device__ __forceinline__ VMin2() {}
__device__ __forceinline__ VMin2(const VMin2& other) {}
__host__ __device__ __forceinline__ VMin2() {}
__host__ __device__ __forceinline__ VMin2(const VMin2&) {}
};
}
......@@ -151,8 +151,8 @@ namespace arithm
return vmax4(a, b);
}
__device__ __forceinline__ VMax4() {}
__device__ __forceinline__ VMax4(const VMax4& other) {}
__host__ __device__ __forceinline__ VMax4() {}
__host__ __device__ __forceinline__ VMax4(const VMax4&) {}
};
struct VMax2 : binary_function<uint, uint, uint>
......@@ -162,8 +162,8 @@ namespace arithm
return vmax2(a, b);
}
__device__ __forceinline__ VMax2() {}
__device__ __forceinline__ VMax2(const VMax2& other) {}
__host__ __device__ __forceinline__ VMax2() {}
__host__ __device__ __forceinline__ VMax2(const VMax2&) {}
};
}
......
......@@ -45,6 +45,7 @@
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/limits.hpp"
......
......@@ -69,8 +69,8 @@ namespace arithm
return res;
}
__device__ __forceinline__ Mul_8uc4_32f() {}
__device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {}
__host__ __device__ __forceinline__ Mul_8uc4_32f() {}
__host__ __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f&) {}
};
struct Mul_16sc4_32f : binary_function<short4, float, short4>
......@@ -81,8 +81,8 @@ namespace arithm
saturate_cast<short>(a.z * b), saturate_cast<short>(a.w * b));
}
__device__ __forceinline__ Mul_16sc4_32f() {}
__device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {}
__host__ __device__ __forceinline__ Mul_16sc4_32f() {}
__host__ __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f&) {}
};
template <typename T, typename D> struct Mul : binary_function<T, T, D>
......@@ -92,15 +92,15 @@ namespace arithm
return saturate_cast<D>(a * b);
}
__device__ __forceinline__ Mul() {}
__device__ __forceinline__ Mul(const Mul& other) {}
__host__ __device__ __forceinline__ Mul() {}
__host__ __device__ __forceinline__ Mul(const Mul&) {}
};
template <typename T, typename S, typename D> struct MulScale : binary_function<T, T, D>
{
S scale;
explicit MulScale(S scale_) : scale(scale_) {}
__host__ explicit MulScale(S scale_) : scale(scale_) {}
__device__ __forceinline__ D operator ()(T a, T b) const
{
......
......@@ -59,7 +59,7 @@ namespace arithm
{
S val;
explicit MulScalar(S val_) : val(val_) {}
__host__ explicit MulScalar(S val_) : val(val_) {}
__device__ __forceinline__ D operator ()(T a) const
{
......
......@@ -46,6 +46,7 @@
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/limits.hpp"
......@@ -76,8 +77,8 @@ namespace reduce
return r;
}
__device__ __forceinline__ Sum() {}
__device__ __forceinline__ Sum(const Sum&) {}
__host__ __device__ __forceinline__ Sum() {}
__host__ __device__ __forceinline__ Sum(const Sum&) {}
};
struct Avg
......@@ -100,8 +101,8 @@ namespace reduce
return r / sz;
}
__device__ __forceinline__ Avg() {}
__device__ __forceinline__ Avg(const Avg&) {}
__host__ __device__ __forceinline__ Avg() {}
__host__ __device__ __forceinline__ Avg(const Avg&) {}
};
struct Min
......@@ -125,8 +126,8 @@ namespace reduce
return r;
}
__device__ __forceinline__ Min() {}
__device__ __forceinline__ Min(const Min&) {}
__host__ __device__ __forceinline__ Min() {}
__host__ __device__ __forceinline__ Min(const Min&) {}
};
struct Max
......@@ -150,8 +151,8 @@ namespace reduce
return r;
}
__device__ __forceinline__ Max() {}
__device__ __forceinline__ Max(const Max&) {}
__host__ __device__ __forceinline__ Max() {}
__host__ __device__ __forceinline__ Max(const Max&) {}
};
///////////////////////////////////////////////////////////
......
......@@ -62,8 +62,8 @@ namespace arithm
return vsub4(a, b);
}
__device__ __forceinline__ VSub4() {}
__device__ __forceinline__ VSub4(const VSub4& other) {}
__host__ __device__ __forceinline__ VSub4() {}
__host__ __device__ __forceinline__ VSub4(const VSub4&) {}
};
struct VSub2 : binary_function<uint, uint, uint>
......@@ -73,8 +73,8 @@ namespace arithm
return vsub2(a, b);
}
__device__ __forceinline__ VSub2() {}
__device__ __forceinline__ VSub2(const VSub2& other) {}
__host__ __device__ __forceinline__ VSub2() {}
__host__ __device__ __forceinline__ VSub2(const VSub2&) {}
};
template <typename T, typename D> struct SubMat : binary_function<T, T, D>
......@@ -84,8 +84,8 @@ namespace arithm
return saturate_cast<D>(a - b);
}
__device__ __forceinline__ SubMat() {}
__device__ __forceinline__ SubMat(const SubMat& other) {}
__host__ __device__ __forceinline__ SubMat() {}
__host__ __device__ __forceinline__ SubMat(const SubMat&) {}
};
}
......
......@@ -59,7 +59,7 @@ namespace arithm
{
S val;
explicit SubScalar(S val_) : val(val_) {}
__host__ explicit SubScalar(S val_) : val(val_) {}
__device__ __forceinline__ D operator ()(T a) const
{
......
......@@ -45,6 +45,7 @@
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/utility.hpp"
......
......@@ -72,7 +72,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Blur,
TEST_CYCLE() cv::gpu::blur(d_src, dst, cv::Size(ksize, ksize));
GPU_SANITY_CHECK(dst);
GPU_SANITY_CHECK(dst, 1);
}
else
{
......
......@@ -48,6 +48,7 @@
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/limits.hpp"
#include "opencv2/core/cuda/dynamic_smem.hpp"
......@@ -811,7 +812,7 @@ namespace cv { namespace gpu { namespace cudev
const int ind = ::atomicAdd(r_sizes + n, 1);
if (ind < maxSize)
r_table(n, ind) = p - templCenter;
r_table(n, ind) = saturate_cast<short2>(p - templCenter);
}
void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
......@@ -855,7 +856,7 @@ namespace cv { namespace gpu { namespace cudev
for (int j = 0; j < r_row_size; ++j)
{
short2 c = p - r_row[j];
int2 c = p - r_row[j];
c.x = __float2int_rn(c.x * idp);
c.y = __float2int_rn(c.y * idp);
......
......@@ -84,7 +84,7 @@ PERF_TEST_P(ImagePair, InterpolateFrames,
TEST_CYCLE() cv::gpu::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, newFrame, d_buf);
GPU_SANITY_CHECK(newFrame);
GPU_SANITY_CHECK(newFrame, 1e-4);
}
else
{
......@@ -123,7 +123,7 @@ PERF_TEST_P(ImagePair, CreateOpticalFlowNeedleMap,
TEST_CYCLE() cv::gpu::createOpticalFlowNeedleMap(u, v, vertex, colors);
GPU_SANITY_CHECK(vertex);
GPU_SANITY_CHECK(vertex, 1e-6);
GPU_SANITY_CHECK(colors);
}
else
......@@ -161,8 +161,8 @@ PERF_TEST_P(ImagePair, BroxOpticalFlow,
TEST_CYCLE() d_flow(d_frame0, d_frame1, u, v);
GPU_SANITY_CHECK(u);
GPU_SANITY_CHECK(v);
GPU_SANITY_CHECK(u, 1e-1);
GPU_SANITY_CHECK(v, 1e-1);
}
else
{
......
......@@ -103,8 +103,8 @@ GPU_TEST_P(BroxOpticalFlow, Regression)
for (int i = 0; i < v_gold.rows; ++i)
f.read(v_gold.ptr<char>(i), v_gold.cols * sizeof(float));
EXPECT_MAT_NEAR(u_gold, u, 0);
EXPECT_MAT_NEAR(v_gold, v, 0);
EXPECT_MAT_SIMILAR(u_gold, u, 1e-3);
EXPECT_MAT_SIMILAR(v_gold, v, 1e-3);
#else
std::ofstream f(fname.c_str(), std::ios_base::binary);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment