Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
9c5da2ea
Commit
9c5da2ea
authored
Aug 23, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
used new device layer for cv::gpu::add
parent
32d578f5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
268 additions
and
435 deletions
+268
-435
CMakeLists.txt
modules/cudaarithm/CMakeLists.txt
+1
-1
add_mat.cu
modules/cudaarithm/src/cuda/add_mat.cu
+147
-107
add_scalar.cu
modules/cudaarithm/src/cuda/add_scalar.cu
+116
-84
element_operations.cpp
modules/cudaarithm/src/element_operations.cpp
+2
-241
integral.hpp
modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp
+2
-2
No files found.
modules/cudaarithm/CMakeLists.txt
View file @
9c5da2ea
...
...
@@ -6,7 +6,7 @@ set(the_description "CUDA-accelerated Operations on Matrices")
ocv_warnings_disable
(
CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations
)
ocv_add_module
(
cudaarithm opencv_core OPTIONAL opencv_cudalegacy
)
ocv_add_module
(
cudaarithm opencv_core OPTIONAL opencv_cud
ev opencv_cud
alegacy
)
ocv_module_include_directories
()
ocv_glob_module_sources
()
...
...
modules/cudaarithm/src/cuda/add_mat.cu
View file @
9c5da2ea
...
...
@@ -40,146 +40,186 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#ifndef HAVE_OPENCV_CUDEV
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
using namespace cv::cuda::device;
#else
namespace arithm
#include "opencv2/cudev.hpp"
using namespace cv::cudev;
void addMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int);
namespace
{
struct VAdd4 : binary_function<uint, uint, uint
>
template <typename T, typename D> struct AddOp1 : binary_function<T, T, D
>
{
__device__ __forceinline__
uint operator ()(uint a, uint
b) const
__device__ __forceinline__
D operator ()(T a, T
b) const
{
return
vadd4(a,
b);
return
saturate_cast<D>(a +
b);
}
__host__ __device__ __forceinline__ VAdd4() {}
__host__ __device__ __forceinline__ VAdd4(const VAdd4&) {}
};
struct VAdd2 : binary_function<uint, uint, uint>
template <typename T, typename D>
void addMat_v1(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream)
{
if (mask.data)
gridTransformBinary(globPtr<T>(src1), globPtr<T>(src2), globPtr<D>(dst), AddOp1<T, D>(), globPtr<uchar>(mask), stream);
else
gridTransformBinary(globPtr<T>(src1), globPtr<T>(src2), globPtr<D>(dst), AddOp1<T, D>(), stream);
}
struct AddOp2 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
return vadd2(a, b);
}
__host__ __device__ __forceinline__ VAdd2() {}
__host__ __device__ __forceinline__ VAdd2(const VAdd2&) {}
};
template <typename T, typename D> struct AddMat : binary_function<T, T, D>
void addMat_v2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
__device__ __forceinline__ D operator ()(T a, T b) const
const int vcols = src1.cols >> 1;
GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
gridTransformBinary(src1_, src2_, dst_, AddOp2(), stream);
}
struct AddOp4 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
return
saturate_cast<D>(a +
b);
return
vadd4(a,
b);
}
__host__ __device__ __forceinline__ AddMat() {}
__host__ __device__ __forceinline__ AddMat(const AddMat&) {}
};
void addMat_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
const int vcols = src1.cols >> 2;
GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
gridTransformBinary(src1_, src2_, dst_, AddOp4(), stream);
}
}
namespace cv { namespace cuda { namespace device
void addMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int)
{
template <> struct TransformFunctorTraits< arithm::VAdd4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream);
static const func_t funcs[7][7] =
{
{
addMat_v1<uchar, uchar>,
addMat_v1<uchar, schar>,
addMat_v1<uchar, ushort>,
addMat_v1<uchar, short>,
addMat_v1<uchar, int>,
addMat_v1<uchar, float>,
addMat_v1<uchar, double>
},
{
addMat_v1<schar, uchar>,
addMat_v1<schar, schar>,
addMat_v1<schar, ushort>,
addMat_v1<schar, short>,
addMat_v1<schar, int>,
addMat_v1<schar, float>,
addMat_v1<schar, double>
},
{
0 /*addMat_v1<ushort, uchar>*/,
0 /*addMat_v1<ushort, schar>*/,
addMat_v1<ushort, ushort>,
addMat_v1<ushort, short>,
addMat_v1<ushort, int>,
addMat_v1<ushort, float>,
addMat_v1<ushort, double>
},
{
0 /*addMat_v1<short, uchar>*/,
0 /*addMat_v1<short, schar>*/,
addMat_v1<short, ushort>,
addMat_v1<short, short>,
addMat_v1<short, int>,
addMat_v1<short, float>,
addMat_v1<short, double>
},
{
0 /*addMat_v1<int, uchar>*/,
0 /*addMat_v1<int, schar>*/,
0 /*addMat_v1<int, ushort>*/,
0 /*addMat_v1<int, short>*/,
addMat_v1<int, int>,
addMat_v1<int, float>,
addMat_v1<int, double>
},
{
0 /*addMat_v1<float, uchar>*/,
0 /*addMat_v1<float, schar>*/,
0 /*addMat_v1<float, ushort>*/,
0 /*addMat_v1<float, short>*/,
0 /*addMat_v1<float, int>*/,
addMat_v1<float, float>,
addMat_v1<float, double>
},
{
0 /*addMat_v1<double, uchar>*/,
0 /*addMat_v1<double, schar>*/,
0 /*addMat_v1<double, ushort>*/,
0 /*addMat_v1<double, short>*/,
0 /*addMat_v1<double, int>*/,
0 /*addMat_v1<double, float>*/,
addMat_v1<double, double>
}
};
template <> struct TransformFunctorTraits< arithm::VAdd2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
const int sdepth = src1.depth();
const int ddepth = dst.depth();
template <typename T, typename D> struct TransformFunctorTraits< arithm::AddMat<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
};
}}}
CV_DbgAssert( sdepth < 7 && ddepth < 7 );
namespace arithm
{
void addMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
{
device::transform(src1, src2, dst, VAdd4(), WithOutMask(), stream);
}
GpuMat src1_ = src1.reshape(1);
GpuMat src2_ = src2.reshape(1);
GpuMat dst_ = dst.reshape(1);
void addMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream
)
if (mask.empty() && (sdepth == CV_8U || sdepth == CV_16U) && ddepth == sdepth
)
{
device::transform(src1, src2, dst, VAdd2(), WithOutMask(), stream);
}
const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data);
template <typename T, typename D>
void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
{
if (mask.data)
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), mask, stream);
else
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), WithOutMask(), stream);
const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
if (isAllAligned)
{
if (sdepth == CV_8U && (src1_.cols & 3) == 0)
{
addMat_v4(src1_, src2_, dst_, stream);
return;
}
else if (sdepth == CV_16U && (src1_.cols & 1) == 0)
{
addMat_v2(src1_, src2_, dst_, stream);
return;
}
}
}
template void addMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<uchar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<uchar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<uchar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<uchar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<uchar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<uchar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<schar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<schar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<schar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<schar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<schar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<schar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<schar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<ushort, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<ushort, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<ushort, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<ushort, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<ushort, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<ushort, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<ushort, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<short, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<short, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<short, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<short, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<short, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<short, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<short, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<int, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<int, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<int, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<int, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<int, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<int, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<int, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<float, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addMat<double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addMat<double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
const func_t func = funcs[sdepth][ddepth];
if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src1_, src2_, dst_, mask, stream);
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/cuda/add_scalar.cu
View file @
9c5da2ea
...
...
@@ -40,109 +40,141 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#ifndef HAVE_OPENCV_CUDEV
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
using namespace cv::cuda::device;
#else
namespace arithm
#include "opencv2/cudev.hpp"
using namespace cv::cudev;
void addScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int);
namespace
{
template <typename
T, typename S, typename D> struct AddScalar : unary_function<T, D
>
template <typename
SrcType, typename ScalarType, typename DstType> struct AddScalarOp : unary_function<SrcType, DstType
>
{
S val;
__host__ explicit AddScalar(S val_) : val(val_) {}
ScalarType val;
__device__ __forceinline__ D
operator ()(T
a) const
__device__ __forceinline__ D
stType operator ()(SrcType
a) const
{
return saturate_cast<D
>(a
+ val);
return saturate_cast<D
stType>(saturate_cast<ScalarType>(a)
+ val);
}
};
}
namespace cv { namespace cuda { namespace device
{
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::AddScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
{
};
template <> struct TransformPolicy<double> : DefaultTransformPolicy
{
enum {
shift = 1
};
};
}}}
namespace arithm
{
template <typename T, typename S, typename D>
void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
template <typename SrcType, typename ScalarDepth, typename DstType>
void addScalarImpl(const GpuMat& src, cv::Scalar value, GpuMat& dst, const GpuMat& mask, Stream& stream)
{
AddScalar<T, S, D> op(static_cast<S>(val));
typedef typename MakeVec<ScalarDepth, VecTraits<SrcType>::cn>::type ScalarType;
cv::Scalar_<ScalarDepth> value_ = value;
AddScalarOp<SrcType, ScalarType, DstType> op;
op.val = VecTraits<ScalarType>::make(value_.val);
if (mask.data)
device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask
, stream);
gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op, globPtr<uchar>(mask)
, stream);
else
device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask()
, stream);
gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op
, stream);
}
}
void addScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int)
{
typedef void (*func_t)(const GpuMat& src, cv::Scalar val, GpuMat& dst, const GpuMat& mask, Stream& stream);
static const func_t funcs[7][7][4] =
{
{
{addScalarImpl<uchar, float, uchar>, addScalarImpl<uchar2, float, uchar2>, addScalarImpl<uchar3, float, uchar3>, addScalarImpl<uchar4, float, uchar4>},
{addScalarImpl<uchar, float, schar>, addScalarImpl<uchar2, float, char2>, addScalarImpl<uchar3, float, char3>, addScalarImpl<uchar4, float, char4>},
{addScalarImpl<uchar, float, ushort>, addScalarImpl<uchar2, float, ushort2>, addScalarImpl<uchar3, float, ushort3>, addScalarImpl<uchar4, float, ushort4>},
{addScalarImpl<uchar, float, short>, addScalarImpl<uchar2, float, short2>, addScalarImpl<uchar3, float, short3>, addScalarImpl<uchar4, float, short4>},
{addScalarImpl<uchar, float, int>, addScalarImpl<uchar2, float, int2>, addScalarImpl<uchar3, float, int3>, addScalarImpl<uchar4, float, int4>},
{addScalarImpl<uchar, float, float>, addScalarImpl<uchar2, float, float2>, addScalarImpl<uchar3, float, float3>, addScalarImpl<uchar4, float, float4>},
{addScalarImpl<uchar, double, double>, addScalarImpl<uchar2, double, double2>, addScalarImpl<uchar3, double, double3>, addScalarImpl<uchar4, double, double4>}
},
{
{addScalarImpl<schar, float, uchar>, addScalarImpl<char2, float, uchar2>, addScalarImpl<char3, float, uchar3>, addScalarImpl<char4, float, uchar4>},
{addScalarImpl<schar, float, schar>, addScalarImpl<char2, float, char2>, addScalarImpl<char3, float, char3>, addScalarImpl<char4, float, char4>},
{addScalarImpl<schar, float, ushort>, addScalarImpl<char2, float, ushort2>, addScalarImpl<char3, float, ushort3>, addScalarImpl<char4, float, ushort4>},
{addScalarImpl<schar, float, short>, addScalarImpl<char2, float, short2>, addScalarImpl<char3, float, short3>, addScalarImpl<char4, float, short4>},
{addScalarImpl<schar, float, int>, addScalarImpl<char2, float, int2>, addScalarImpl<char3, float, int3>, addScalarImpl<char4, float, int4>},
{addScalarImpl<schar, float, float>, addScalarImpl<char2, float, float2>, addScalarImpl<char3, float, float3>, addScalarImpl<char4, float, float4>},
{addScalarImpl<schar, double, double>, addScalarImpl<char2, double, double2>, addScalarImpl<char3, double, double3>, addScalarImpl<char4, double, double4>}
},
{
{0 /*addScalarImpl<ushort, float, uchar>*/, 0 /*addScalarImpl<ushort2, float, uchar2>*/, 0 /*addScalarImpl<ushort3, float, uchar3>*/, 0 /*addScalarImpl<ushort4, float, uchar4>*/},
{0 /*addScalarImpl<ushort, float, schar>*/, 0 /*addScalarImpl<ushort2, float, char2>*/, 0 /*addScalarImpl<ushort3, float, char3>*/, 0 /*addScalarImpl<ushort4, float, char4>*/},
{addScalarImpl<ushort, float, ushort>, addScalarImpl<ushort2, float, ushort2>, addScalarImpl<ushort3, float, ushort3>, addScalarImpl<ushort4, float, ushort4>},
{addScalarImpl<ushort, float, short>, addScalarImpl<ushort2, float, short2>, addScalarImpl<ushort3, float, short3>, addScalarImpl<ushort4, float, short4>},
{addScalarImpl<ushort, float, int>, addScalarImpl<ushort2, float, int2>, addScalarImpl<ushort3, float, int3>, addScalarImpl<ushort4, float, int4>},
{addScalarImpl<ushort, float, float>, addScalarImpl<ushort2, float, float2>, addScalarImpl<ushort3, float, float3>, addScalarImpl<ushort4, float, float4>},
{addScalarImpl<ushort, double, double>, addScalarImpl<ushort2, double, double2>, addScalarImpl<ushort3, double, double3>, addScalarImpl<ushort4, double, double4>}
},
{
{0 /*addScalarImpl<short, float, uchar>*/, 0 /*addScalarImpl<short2, float, uchar2>*/, 0 /*addScalarImpl<short3, float, uchar3>*/, 0 /*addScalarImpl<short4, float, uchar4>*/},
{0 /*addScalarImpl<short, float, schar>*/, 0 /*addScalarImpl<short2, float, char2>*/, 0 /*addScalarImpl<short3, float, char3>*/, 0 /*addScalarImpl<short4, float, char4>*/},
{addScalarImpl<short, float, ushort>, addScalarImpl<short2, float, ushort2>, addScalarImpl<short3, float, ushort3>, addScalarImpl<short4, float, ushort4>},
{addScalarImpl<short, float, short>, addScalarImpl<short2, float, short2>, addScalarImpl<short3, float, short3>, addScalarImpl<short4, float, short4>},
{addScalarImpl<short, float, int>, addScalarImpl<short2, float, int2>, addScalarImpl<short3, float, int3>, addScalarImpl<short4, float, int4>},
{addScalarImpl<short, float, float>, addScalarImpl<short2, float, float2>, addScalarImpl<short3, float, float3>, addScalarImpl<short4, float, float4>},
{addScalarImpl<short, double, double>, addScalarImpl<short2, double, double2>, addScalarImpl<short3, double, double3>, addScalarImpl<short4, double, double4>}
},
{
{0 /*addScalarImpl<int, float, uchar>*/, 0 /*addScalarImpl<int2, float, uchar2>*/, 0 /*addScalarImpl<int3, float, uchar3>*/, 0 /*addScalarImpl<int4, float, uchar4>*/},
{0 /*addScalarImpl<int, float, schar>*/, 0 /*addScalarImpl<int2, float, char2>*/, 0 /*addScalarImpl<int3, float, char3>*/, 0 /*addScalarImpl<int4, float, char4>*/},
{0 /*addScalarImpl<int, float, ushort>*/, 0 /*addScalarImpl<int2, float, ushort2>*/, 0 /*addScalarImpl<int3, float, ushort3>*/, 0 /*addScalarImpl<int4, float, ushort4>*/},
{0 /*addScalarImpl<int, float, short>*/, 0 /*addScalarImpl<int2, float, short2>*/, 0 /*addScalarImpl<int3, float, short3>*/, 0 /*addScalarImpl<int4, float, short4>*/},
{addScalarImpl<int, float, int>, addScalarImpl<int2, float, int2>, addScalarImpl<int3, float, int3>, addScalarImpl<int4, float, int4>},
{addScalarImpl<int, float, float>, addScalarImpl<int2, float, float2>, addScalarImpl<int3, float, float3>, addScalarImpl<int4, float, float4>},
{addScalarImpl<int, double, double>, addScalarImpl<int2, double, double2>, addScalarImpl<int3, double, double3>, addScalarImpl<int4, double, double4>}
},
{
{0 /*addScalarImpl<float, float, uchar>*/, 0 /*addScalarImpl<float2, float, uchar2>*/, 0 /*addScalarImpl<float3, float, uchar3>*/, 0 /*addScalarImpl<float4, float, uchar4>*/},
{0 /*addScalarImpl<float, float, schar>*/, 0 /*addScalarImpl<float2, float, char2>*/, 0 /*addScalarImpl<float3, float, char3>*/, 0 /*addScalarImpl<float4, float, char4>*/},
{0 /*addScalarImpl<float, float, ushort>*/, 0 /*addScalarImpl<float2, float, ushort2>*/, 0 /*addScalarImpl<float3, float, ushort3>*/, 0 /*addScalarImpl<float4, float, ushort4>*/},
{0 /*addScalarImpl<float, float, short>*/, 0 /*addScalarImpl<float2, float, short2>*/, 0 /*addScalarImpl<float3, float, short3>*/, 0 /*addScalarImpl<float4, float, short4>*/},
{0 /*addScalarImpl<float, float, int>*/, 0 /*addScalarImpl<float2, float, int2>*/, 0 /*addScalarImpl<float3, float, int3>*/, 0 /*addScalarImpl<float4, float, int4>*/},
{addScalarImpl<float, float, float>, addScalarImpl<float2, float, float2>, addScalarImpl<float3, float, float3>, addScalarImpl<float4, float, float4>},
{addScalarImpl<float, double, double>, addScalarImpl<float2, double, double2>, addScalarImpl<float3, double, double3>, addScalarImpl<float4, double, double4>}
},
{
{0 /*addScalarImpl<double, double, uchar>*/, 0 /*addScalarImpl<double2, double, uchar2>*/, 0 /*addScalarImpl<double3, double, uchar3>*/, 0 /*addScalarImpl<double4, double, uchar4>*/},
{0 /*addScalarImpl<double, double, schar>*/, 0 /*addScalarImpl<double2, double, char2>*/, 0 /*addScalarImpl<double3, double, char3>*/, 0 /*addScalarImpl<double4, double, char4>*/},
{0 /*addScalarImpl<double, double, ushort>*/, 0 /*addScalarImpl<double2, double, ushort2>*/, 0 /*addScalarImpl<double3, double, ushort3>*/, 0 /*addScalarImpl<double4, double, ushort4>*/},
{0 /*addScalarImpl<double, double, short>*/, 0 /*addScalarImpl<double2, double, short2>*/, 0 /*addScalarImpl<double3, double, short3>*/, 0 /*addScalarImpl<double4, double, short4>*/},
{0 /*addScalarImpl<double, double, int>*/, 0 /*addScalarImpl<double2, double, int2>*/, 0 /*addScalarImpl<double3, double, int3>*/, 0 /*addScalarImpl<double4, double, int4>*/},
{0 /*addScalarImpl<double, double, float>*/, 0 /*addScalarImpl<double2, double, float2>*/, 0 /*addScalarImpl<double3, double, float3>*/, 0 /*addScalarImpl<double4, double, float4>*/},
{addScalarImpl<double, double, double>, addScalarImpl<double2, double, double2>, addScalarImpl<double3, double, double3>, addScalarImpl<double4, double, double4>}
}
};
const int sdepth = src.depth();
const int ddepth = dst.depth();
const int cn = src.channels();
CV_DbgAssert( sdepth < 7 && ddepth < 7 && cn <= 4 );
const func_t func = funcs[sdepth][ddepth][cn - 1];
if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
template void addScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void addScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void addScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
func(src, val, dst, mask, stream);
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/element_operations.cpp
View file @
9c5da2ea
...
...
@@ -336,248 +336,9 @@ namespace
////////////////////////////////////////////////////////////////////////
// add
namespace
arithm
{
void
addMat_v4
(
PtrStepSz
<
unsigned
int
>
src1
,
PtrStepSz
<
unsigned
int
>
src2
,
PtrStepSz
<
unsigned
int
>
dst
,
cudaStream_t
stream
);
void
addMat_v2
(
PtrStepSz
<
unsigned
int
>
src1
,
PtrStepSz
<
unsigned
int
>
src2
,
PtrStepSz
<
unsigned
int
>
dst
,
cudaStream_t
stream
);
template
<
typename
T
,
typename
D
>
void
addMat
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
PtrStepb
mask
,
cudaStream_t
stream
);
}
static
void
addMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
,
Stream
&
_stream
,
int
)
{
typedef
void
(
*
func_t
)(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
PtrStepb
mask
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
7
]
=
{
{
arithm
::
addMat
<
unsigned
char
,
unsigned
char
>
,
arithm
::
addMat
<
unsigned
char
,
signed
char
>
,
arithm
::
addMat
<
unsigned
char
,
unsigned
short
>
,
arithm
::
addMat
<
unsigned
char
,
short
>
,
arithm
::
addMat
<
unsigned
char
,
int
>
,
arithm
::
addMat
<
unsigned
char
,
float
>
,
arithm
::
addMat
<
unsigned
char
,
double
>
},
{
arithm
::
addMat
<
signed
char
,
unsigned
char
>
,
arithm
::
addMat
<
signed
char
,
signed
char
>
,
arithm
::
addMat
<
signed
char
,
unsigned
short
>
,
arithm
::
addMat
<
signed
char
,
short
>
,
arithm
::
addMat
<
signed
char
,
int
>
,
arithm
::
addMat
<
signed
char
,
float
>
,
arithm
::
addMat
<
signed
char
,
double
>
},
{
0
/*arithm::addMat<unsigned short, unsigned char>*/
,
0
/*arithm::addMat<unsigned short, signed char>*/
,
arithm
::
addMat
<
unsigned
short
,
unsigned
short
>
,
arithm
::
addMat
<
unsigned
short
,
short
>
,
arithm
::
addMat
<
unsigned
short
,
int
>
,
arithm
::
addMat
<
unsigned
short
,
float
>
,
arithm
::
addMat
<
unsigned
short
,
double
>
},
{
0
/*arithm::addMat<short, unsigned char>*/
,
0
/*arithm::addMat<short, signed char>*/
,
arithm
::
addMat
<
short
,
unsigned
short
>
,
arithm
::
addMat
<
short
,
short
>
,
arithm
::
addMat
<
short
,
int
>
,
arithm
::
addMat
<
short
,
float
>
,
arithm
::
addMat
<
short
,
double
>
},
{
0
/*arithm::addMat<int, unsigned char>*/
,
0
/*arithm::addMat<int, signed char>*/
,
0
/*arithm::addMat<int, unsigned short>*/
,
0
/*arithm::addMat<int, short>*/
,
arithm
::
addMat
<
int
,
int
>
,
arithm
::
addMat
<
int
,
float
>
,
arithm
::
addMat
<
int
,
double
>
},
{
0
/*arithm::addMat<float, unsigned char>*/
,
0
/*arithm::addMat<float, signed char>*/
,
0
/*arithm::addMat<float, unsigned short>*/
,
0
/*arithm::addMat<float, short>*/
,
0
/*arithm::addMat<float, int>*/
,
arithm
::
addMat
<
float
,
float
>
,
arithm
::
addMat
<
float
,
double
>
},
{
0
/*arithm::addMat<double, unsigned char>*/
,
0
/*arithm::addMat<double, signed char>*/
,
0
/*arithm::addMat<double, unsigned short>*/
,
0
/*arithm::addMat<double, short>*/
,
0
/*arithm::addMat<double, int>*/
,
0
/*arithm::addMat<double, float>*/
,
arithm
::
addMat
<
double
,
double
>
}
};
const
int
sdepth
=
src1
.
depth
();
const
int
ddepth
=
dst
.
depth
();
const
int
cn
=
src1
.
channels
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
PtrStepSzb
src1_
(
src1
.
rows
,
src1
.
cols
*
cn
,
src1
.
data
,
src1
.
step
);
PtrStepSzb
src2_
(
src1
.
rows
,
src1
.
cols
*
cn
,
src2
.
data
,
src2
.
step
);
PtrStepSzb
dst_
(
src1
.
rows
,
src1
.
cols
*
cn
,
dst
.
data
,
dst
.
step
);
if
(
mask
.
empty
()
&&
(
sdepth
==
CV_8U
||
sdepth
==
CV_16U
)
&&
ddepth
==
sdepth
)
{
const
intptr_t
src1ptr
=
reinterpret_cast
<
intptr_t
>
(
src1_
.
data
);
const
intptr_t
src2ptr
=
reinterpret_cast
<
intptr_t
>
(
src2_
.
data
);
const
intptr_t
dstptr
=
reinterpret_cast
<
intptr_t
>
(
dst_
.
data
);
const
bool
isAllAligned
=
(
src1ptr
&
31
)
==
0
&&
(
src2ptr
&
31
)
==
0
&&
(
dstptr
&
31
)
==
0
;
if
(
isAllAligned
)
{
if
(
sdepth
==
CV_8U
&&
(
src1_
.
cols
&
3
)
==
0
)
{
const
int
vcols
=
src1_
.
cols
>>
2
;
arithm
::
addMat_v4
(
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src1_
.
data
,
src1_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src2_
.
data
,
src2_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
dst_
.
data
,
dst_
.
step
),
stream
);
return
;
}
else
if
(
sdepth
==
CV_16U
&&
(
src1_
.
cols
&
1
)
==
0
)
{
const
int
vcols
=
src1_
.
cols
>>
1
;
arithm
::
addMat_v2
(
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src1_
.
data
,
src1_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src2_
.
data
,
src2_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
dst_
.
data
,
dst_
.
step
),
stream
);
return
;
}
}
}
const
func_t
func
=
funcs
[
sdepth
][
ddepth
];
if
(
!
func
)
CV_Error
(
cv
::
Error
::
StsUnsupportedFormat
,
"Unsupported combination of source and destination types"
);
func
(
src1_
,
src2_
,
dst_
,
mask
,
stream
);
}
namespace
arithm
{
template
<
typename
T
,
typename
S
,
typename
D
>
void
addScalar
(
PtrStepSzb
src1
,
double
val
,
PtrStepSzb
dst
,
PtrStepb
mask
,
cudaStream_t
stream
);
}
static
void
addScalar
(
const
GpuMat
&
src
,
Scalar
val
,
bool
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
,
Stream
&
_stream
,
int
)
{
typedef
void
(
*
func_t
)(
PtrStepSzb
src1
,
double
val
,
PtrStepSzb
dst
,
PtrStepb
mask
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
7
]
=
{
{
arithm
::
addScalar
<
unsigned
char
,
float
,
unsigned
char
>
,
arithm
::
addScalar
<
unsigned
char
,
float
,
signed
char
>
,
arithm
::
addScalar
<
unsigned
char
,
float
,
unsigned
short
>
,
arithm
::
addScalar
<
unsigned
char
,
float
,
short
>
,
arithm
::
addScalar
<
unsigned
char
,
float
,
int
>
,
arithm
::
addScalar
<
unsigned
char
,
float
,
float
>
,
arithm
::
addScalar
<
unsigned
char
,
double
,
double
>
},
{
arithm
::
addScalar
<
signed
char
,
float
,
unsigned
char
>
,
arithm
::
addScalar
<
signed
char
,
float
,
signed
char
>
,
arithm
::
addScalar
<
signed
char
,
float
,
unsigned
short
>
,
arithm
::
addScalar
<
signed
char
,
float
,
short
>
,
arithm
::
addScalar
<
signed
char
,
float
,
int
>
,
arithm
::
addScalar
<
signed
char
,
float
,
float
>
,
arithm
::
addScalar
<
signed
char
,
double
,
double
>
},
{
0
/*arithm::addScalar<unsigned short, float, unsigned char>*/
,
0
/*arithm::addScalar<unsigned short, float, signed char>*/
,
arithm
::
addScalar
<
unsigned
short
,
float
,
unsigned
short
>
,
arithm
::
addScalar
<
unsigned
short
,
float
,
short
>
,
arithm
::
addScalar
<
unsigned
short
,
float
,
int
>
,
arithm
::
addScalar
<
unsigned
short
,
float
,
float
>
,
arithm
::
addScalar
<
unsigned
short
,
double
,
double
>
},
{
0
/*arithm::addScalar<short, float, unsigned char>*/
,
0
/*arithm::addScalar<short, float, signed char>*/
,
arithm
::
addScalar
<
short
,
float
,
unsigned
short
>
,
arithm
::
addScalar
<
short
,
float
,
short
>
,
arithm
::
addScalar
<
short
,
float
,
int
>
,
arithm
::
addScalar
<
short
,
float
,
float
>
,
arithm
::
addScalar
<
short
,
double
,
double
>
},
{
0
/*arithm::addScalar<int, float, unsigned char>*/
,
0
/*arithm::addScalar<int, float, signed char>*/
,
0
/*arithm::addScalar<int, float, unsigned short>*/
,
0
/*arithm::addScalar<int, float, short>*/
,
arithm
::
addScalar
<
int
,
float
,
int
>
,
arithm
::
addScalar
<
int
,
float
,
float
>
,
arithm
::
addScalar
<
int
,
double
,
double
>
},
{
0
/*arithm::addScalar<float, float, unsigned char>*/
,
0
/*arithm::addScalar<float, float, signed char>*/
,
0
/*arithm::addScalar<float, float, unsigned short>*/
,
0
/*arithm::addScalar<float, float, short>*/
,
0
/*arithm::addScalar<float, float, int>*/
,
arithm
::
addScalar
<
float
,
float
,
float
>
,
arithm
::
addScalar
<
float
,
double
,
double
>
},
{
0
/*arithm::addScalar<double, double, unsigned char>*/
,
0
/*arithm::addScalar<double, double, signed char>*/
,
0
/*arithm::addScalar<double, double, unsigned short>*/
,
0
/*arithm::addScalar<double, double, short>*/
,
0
/*arithm::addScalar<double, double, int>*/
,
0
/*arithm::addScalar<double, double, float>*/
,
arithm
::
addScalar
<
double
,
double
,
double
>
}
};
typedef
void
(
*
npp_func_t
)(
const
PtrStepSzb
src
,
Scalar
sc
,
PtrStepb
dst
,
cudaStream_t
stream
);
static
const
npp_func_t
npp_funcs
[
7
][
4
]
=
{
{
NppArithmScalar
<
CV_8U
,
1
,
nppiAddC_8u_C1RSfs
>::
call
,
0
,
NppArithmScalar
<
CV_8U
,
3
,
nppiAddC_8u_C3RSfs
>::
call
,
NppArithmScalar
<
CV_8U
,
4
,
nppiAddC_8u_C4RSfs
>::
call
},
{
0
,
0
,
0
,
0
},
{
NppArithmScalar
<
CV_16U
,
1
,
nppiAddC_16u_C1RSfs
>::
call
,
0
,
NppArithmScalar
<
CV_16U
,
3
,
nppiAddC_16u_C3RSfs
>::
call
,
NppArithmScalar
<
CV_16U
,
4
,
nppiAddC_16u_C4RSfs
>::
call
},
{
NppArithmScalar
<
CV_16S
,
1
,
nppiAddC_16s_C1RSfs
>::
call
,
NppArithmScalar
<
CV_16S
,
2
,
nppiAddC_16sc_C1RSfs
>::
call
,
NppArithmScalar
<
CV_16S
,
3
,
nppiAddC_16s_C3RSfs
>::
call
,
NppArithmScalar
<
CV_16S
,
4
,
nppiAddC_16s_C4RSfs
>::
call
},
{
NppArithmScalar
<
CV_32S
,
1
,
nppiAddC_32s_C1RSfs
>::
call
,
NppArithmScalar
<
CV_32S
,
2
,
nppiAddC_32sc_C1RSfs
>::
call
,
NppArithmScalar
<
CV_32S
,
3
,
nppiAddC_32s_C3RSfs
>::
call
,
0
},
{
NppArithmScalar
<
CV_32F
,
1
,
nppiAddC_32f_C1R
>::
call
,
NppArithmScalar
<
CV_32F
,
2
,
nppiAddC_32fc_C1R
>::
call
,
NppArithmScalar
<
CV_32F
,
3
,
nppiAddC_32f_C3R
>::
call
,
NppArithmScalar
<
CV_32F
,
4
,
nppiAddC_32f_C4R
>::
call
},
{
0
,
0
,
0
,
0
}
};
const
int
sdepth
=
src
.
depth
();
const
int
ddepth
=
dst
.
depth
();
const
int
cn
=
src
.
channels
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
const
npp_func_t
npp_func
=
npp_funcs
[
sdepth
][
cn
-
1
];
if
(
ddepth
==
sdepth
&&
cn
>
1
&&
npp_func
!=
0
)
{
npp_func
(
src
,
val
,
dst
,
stream
);
return
;
}
void
addMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
,
Stream
&
_stream
,
int
);
CV_Assert
(
cn
==
1
);
const
func_t
func
=
funcs
[
sdepth
][
ddepth
];
if
(
!
func
)
CV_Error
(
cv
::
Error
::
StsUnsupportedFormat
,
"Unsupported combination of source and destination types"
);
func
(
src
,
val
[
0
],
dst
,
mask
,
stream
);
}
void
addScalar
(
const
GpuMat
&
src
,
Scalar
val
,
bool
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
,
Stream
&
stream
,
int
);
void
cv
::
cuda
::
add
(
InputArray
src1
,
InputArray
src2
,
OutputArray
dst
,
InputArray
mask
,
int
dtype
,
Stream
&
stream
)
{
...
...
modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp
View file @
9c5da2ea
...
...
@@ -594,7 +594,7 @@ namespace integral_detail
CV_CUDEV_SAFE_CALL
(
cudaDeviceSynchronize
()
);
}
__host__
static
void
integral
(
const
GlobPtr
<
uchar
>
src
,
GlobPtr
<
uint
>
dst
,
int
rows
,
int
cols
,
cudaStream_t
stream
)
__host__
static
void
integral
(
const
GlobPtr
<
uchar
>
&
src
,
const
GlobPtr
<
uint
>&
dst
,
int
rows
,
int
cols
,
cudaStream_t
stream
)
{
if
(
deviceSupports
(
FEATURE_SET_COMPUTE_30
)
&&
(
cols
%
16
==
0
)
...
...
@@ -614,7 +614,7 @@ namespace integral_detail
CV_CUDEV_SAFE_CALL
(
cudaDeviceSynchronize
()
);
}
__host__
static
void
integral
(
const
GlobPtr
<
uchar
>
src
,
GlobPtr
<
int
>
dst
,
int
rows
,
int
cols
,
cudaStream_t
stream
)
__host__
__forceinline__
void
integral
(
const
GlobPtr
<
uchar
>&
src
,
const
GlobPtr
<
int
>&
dst
,
int
rows
,
int
cols
,
cudaStream_t
stream
)
{
GlobPtr
<
uint
>
dstui
=
globPtr
((
uint
*
)
dst
.
data
,
dst
.
step
);
integral
(
src
,
dstui
,
rows
,
cols
,
stream
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment