Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
b11cccaa
Commit
b11cccaa
authored
Aug 26, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
switched to new device layer in bitwize operations
parent
fdfffa52
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
244 additions
and
518 deletions
+244
-518
bitwise_mat.cu
modules/cudaarithm/src/cuda/bitwise_mat.cu
+92
-55
bitwise_scalar.cu
modules/cudaarithm/src/cuda/bitwise_scalar.cu
+106
-39
element_operations.cpp
modules/cudaarithm/src/element_operations.cpp
+10
-424
mask.hpp
modules/cudev/include/opencv2/cudev/ptr2d/mask.hpp
+36
-0
No files found.
modules/cudaarithm/src/cuda/bitwise_mat.cu
View file @
b11cccaa
...
...
@@ -40,87 +40,124 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#ifndef HAVE_OPENCV_CUDEV
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
using namespace cv::cuda::device;
#else
namespace cv { namespace cuda { namespace device
{
template <typename T> struct TransformFunctorTraits< bit_not<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
template <typename T> struct TransformFunctorTraits< bit_and<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
using namespace cv::cudev;
template <typename T> struct TransformFunctorTraits< bit_or<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op);
template <typename T> struct TransformFunctorTraits< bit_xor<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
}}}
//////////////////////////////////////////////////////////////////////////////
/// bitwise_not
namespace
arithm
namespace
{
template <typename T> void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
template <typename T>
void bitMatNot(const GpuMat& src, GpuMat& dst, const GpuMat& mask, Stream& stream)
{
GlobPtrSz<T> vsrc = globPtr((T*) src.data, src.step, src.rows, src.cols * src.channels());
GlobPtrSz<T> vdst = globPtr((T*) dst.data, dst.step, src.rows, src.cols * src.channels());
if (mask.data)
device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), mask
, stream);
gridTransformUnary(vsrc, vdst, bit_not<T>(), singleMaskChannels(globPtr<uchar>(mask), src.channels())
, stream);
else
device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), WithOutMask
(), stream);
gridTransformUnary(vsrc, vdst, bit_not<T>
(), stream);
}
}
template <typename T> void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
{
GpuMat src = _src.getGpuMat();
GpuMat mask = _mask.getGpuMat();
const int depth = src.depth();
CV_DbgAssert( depth <= CV_32F );
CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
if (depth == CV_32F || depth == CV_32S)
{
if (mask.data)
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), mask, stream);
else
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), WithOutMask(), stream);
bitMatNot<uint>(src, dst, mask, stream);
}
template <typename T> void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
else if (depth == CV_16S || depth == CV_16U)
{
if (mask.data)
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), mask, stream);
else
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), WithOutMask(), stream);
bitMatNot<ushort>(src, dst, mask, stream);
}
else
{
bitMatNot<uchar>(src, dst, mask, stream);
}
}
template <typename T> void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
//////////////////////////////////////////////////////////////////////////////
/// Binary bitwise logical operations
namespace
{
template <template <typename> class Op, typename T>
void bitMatOp(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream)
{
GlobPtrSz<T> vsrc1 = globPtr((T*) src1.data, src1.step, src1.rows, src1.cols * src1.channels());
GlobPtrSz<T> vsrc2 = globPtr((T*) src2.data, src2.step, src1.rows, src1.cols * src1.channels());
GlobPtrSz<T> vdst = globPtr((T*) dst.data, dst.step, src1.rows, src1.cols * src1.channels());
if (mask.data)
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), mask
, stream);
gridTransformBinary(vsrc1, vsrc2, vdst, Op<T>(), singleMaskChannels(globPtr<uchar>(mask), src1.channels())
, stream);
else
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), WithOutMask
(), stream);
gridTransformBinary(vsrc1, vsrc2, vdst, Op<T>
(), stream);
}
}
template void bitMatNot<uchar>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void bitMatNot<ushort>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void bitMatNot<uint>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op)
{
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream);
static const func_t funcs32[] =
{
bitMatOp<bit_and, uint>,
bitMatOp<bit_or, uint>,
bitMatOp<bit_xor, uint>
};
static const func_t funcs16[] =
{
bitMatOp<bit_and, ushort>,
bitMatOp<bit_or, ushort>,
bitMatOp<bit_xor, ushort>
};
static const func_t funcs8[] =
{
bitMatOp<bit_and, uchar>,
bitMatOp<bit_or, uchar>,
bitMatOp<bit_xor, uchar>
};
template void bitMatAnd<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void bitMatAnd<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void bitMatAnd<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
const int depth = src1.depth();
template void bitMatOr<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void bitMatOr<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void bitMatOr<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
CV_DbgAssert( depth <= CV_32F );
CV_DbgAssert( op >= 0 && op < 3 );
template void bitMatXor<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void bitMatXor<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void bitMatXor<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
if (depth == CV_32F || depth == CV_32S)
{
funcs32[op](src1, src2, dst, mask, stream);
}
else if (depth == CV_16S || depth == CV_16U)
{
funcs16[op](src1, src2, dst, mask, stream);
}
else
{
funcs8[op](src1, src2, dst, mask, stream);
}
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/cuda/bitwise_scalar.cu
View file @
b11cccaa
...
...
@@ -40,65 +40,132 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#ifndef HAVE_OPENCV_CUDEV
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
using namespace cv::cuda::device;
#else
namespace cv { namespace cuda { namespace device
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv::cudev;
void bitScalar(const GpuMat& src, cv::Scalar value, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op);
namespace
{
template <typename T> struct TransformFunctorTraits< binder2nd< bit_and<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
template <template <typename> class Op, typename T>
void bitScalarOp(const GpuMat& src, uint value, GpuMat& dst, Stream& stream)
{
};
gridTransformUnary(globPtr<T>(src), globPtr<T>(dst), bind2nd(Op<T>(), value), stream);
}
typedef void (*bit_scalar_func_t)(const GpuMat& src, uint value, GpuMat& dst, Stream& stream);
template <typename T
> struct TransformFunctorTraits< binder2nd< bit_or<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T
, bit_scalar_func_t func> struct BitScalar
{
static void call(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& stream)
{
func(src, cv::saturate_cast<T>(value[0]), dst, stream);
}
};
template <
typename T> struct TransformFunctorTraits< binder2nd< bit_xor<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
template <
bit_scalar_func_t func> struct BitScalar4
{
static void call(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& stream)
{
uint packedVal = 0;
packedVal |= cv::saturate_cast<uchar>(value[0]);
packedVal |= cv::saturate_cast<uchar>(value[1]) << 8;
packedVal |= cv::saturate_cast<uchar>(value[2]) << 16;
packedVal |= cv::saturate_cast<uchar>(value[3]) << 24;
func(src, packedVal, dst, stream);
}
};
}}}
namespace arithm
{
template <typename T> void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
template <int DEPTH, int cn> struct NppBitwiseCFunc
{
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::device::bind2nd(bit_and<T>(), src2), WithOutMask(), stream);
}
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
typedef NppStatus (*func_t)(const npp_type* pSrc1, int nSrc1Step, const npp_type* pConstants, npp_type* pDst, int nDstStep, NppiSize oSizeROI);
};
template <
typename T> void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
template <
int DEPTH, int cn, typename NppBitwiseCFunc<DEPTH, cn>::func_t func> struct NppBitwiseC
{
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::device::bind2nd(bit_or<T>(), src2), WithOutMask(), stream);
}
typedef typename NppBitwiseCFunc<DEPTH, cn>::npp_type npp_type;
static void call(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& _stream)
{
cudaStream_t stream = StreamAccessor::getStream(_stream);
NppStreamHandler h(stream);
NppiSize oSizeROI;
oSizeROI.width = src.cols;
oSizeROI.height = src.rows;
const npp_type pConstants[] =
{
cv::saturate_cast<npp_type>(value[0]),
cv::saturate_cast<npp_type>(value[1]),
cv::saturate_cast<npp_type>(value[2]),
cv::saturate_cast<npp_type>(value[3])
};
nppSafeCall( func(src.ptr<npp_type>(), static_cast<int>(src.step), pConstants, dst.ptr<npp_type>(), static_cast<int>(dst.step), oSizeROI) );
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
};
}
void bitScalar(const GpuMat& src, cv::Scalar value, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op)
{
(void) mask;
template <typename T> void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
typedef void (*func_t)(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& stream);
static const func_t funcs[3][6][4] =
{
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::device::bind2nd(bit_xor<T>(), src2), WithOutMask(), stream);
}
{
{BitScalar<uchar, bitScalarOp<bit_and, uchar> >::call , 0, NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_and, uint> >::call},
{BitScalar<uchar, bitScalarOp<bit_and, uchar> >::call , 0, NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_and, uint> >::call},
{BitScalar<ushort, bitScalarOp<bit_and, ushort> >::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call},
{BitScalar<ushort, bitScalarOp<bit_and, ushort> >::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call},
{BitScalar<uint, bitScalarOp<bit_and, uint> >::call , 0, NppBitwiseC<CV_32S, 3, nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiAndC_32s_C4R>::call},
{BitScalar<uint, bitScalarOp<bit_and, uint> >::call , 0, NppBitwiseC<CV_32S, 3, nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiAndC_32s_C4R>::call}
},
{
{BitScalar<uchar, bitScalarOp<bit_or, uchar> >::call , 0, NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_or, uint> >::call},
{BitScalar<uchar, bitScalarOp<bit_or, uchar> >::call , 0, NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_or, uint> >::call},
{BitScalar<ushort, bitScalarOp<bit_or, ushort> >::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call},
{BitScalar<ushort, bitScalarOp<bit_or, ushort> >::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call},
{BitScalar<uint, bitScalarOp<bit_or, uint> >::call , 0, NppBitwiseC<CV_32S, 3, nppiOrC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiOrC_32s_C4R>::call},
{BitScalar<uint, bitScalarOp<bit_or, uint> >::call , 0, NppBitwiseC<CV_32S, 3, nppiOrC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiOrC_32s_C4R>::call}
},
{
{BitScalar<uchar, bitScalarOp<bit_xor, uchar> >::call , 0, NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_xor, uint> >::call},
{BitScalar<uchar, bitScalarOp<bit_xor, uchar> >::call , 0, NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_xor, uint> >::call},
{BitScalar<ushort, bitScalarOp<bit_xor, ushort> >::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call},
{BitScalar<ushort, bitScalarOp<bit_xor, ushort> >::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call},
{BitScalar<uint, bitScalarOp<bit_xor, uint> >::call , 0, NppBitwiseC<CV_32S, 3, nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiXorC_32s_C4R>::call},
{BitScalar<uint, bitScalarOp<bit_xor, uint> >::call , 0, NppBitwiseC<CV_32S, 3, nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiXorC_32s_C4R>::call}
}
};
template void bitScalarAnd<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarAnd<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarAnd<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarAnd<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
const int depth = src.depth();
const int cn = src.channels();
template void bitScalarOr<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream
);
template void bitScalarOr<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream
);
template void bitScalarOr<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream
);
template void bitScalarOr<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream
);
CV_DbgAssert( depth <= CV_32F
);
CV_DbgAssert( cn == 1 || cn == 3 || cn == 4
);
CV_DbgAssert( mask.empty()
);
CV_DbgAssert( op >= 0 && op < 3
);
template void bitScalarXor<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarXor<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarXor<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarXor<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
funcs[op][depth][cn - 1](src, value, dst, stream);
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/element_operations.cpp
View file @
b11cccaa
...
...
@@ -159,180 +159,6 @@ namespace
}
}
////////////////////////////////////////////////////////////////////////
// Basic arithmetical operations (add subtract multiply divide)
namespace
{
template
<
int
DEPTH
>
struct
NppTypeTraits
;
template
<>
struct
NppTypeTraits
<
CV_8U
>
{
typedef
Npp8u
npp_t
;
};
template
<>
struct
NppTypeTraits
<
CV_8S
>
{
typedef
Npp8s
npp_t
;
};
template
<>
struct
NppTypeTraits
<
CV_16U
>
{
typedef
Npp16u
npp_t
;
};
template
<>
struct
NppTypeTraits
<
CV_16S
>
{
typedef
Npp16s
npp_t
;
typedef
Npp16sc
npp_complex_type
;
};
template
<>
struct
NppTypeTraits
<
CV_32S
>
{
typedef
Npp32s
npp_t
;
typedef
Npp32sc
npp_complex_type
;
};
template
<>
struct
NppTypeTraits
<
CV_32F
>
{
typedef
Npp32f
npp_t
;
typedef
Npp32fc
npp_complex_type
;
};
template
<>
struct
NppTypeTraits
<
CV_64F
>
{
typedef
Npp64f
npp_t
;
typedef
Npp64fc
npp_complex_type
;
};
template
<
int
DEPTH
,
int
cn
>
struct
NppArithmScalarFunc
{
typedef
typename
NppTypeTraits
<
DEPTH
>::
npp_t
npp_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
npp_t
*
pSrc1
,
int
nSrc1Step
,
const
npp_t
*
pConstants
,
npp_t
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
,
int
nScaleFactor
);
};
template
<
int
DEPTH
>
struct
NppArithmScalarFunc
<
DEPTH
,
1
>
{
typedef
typename
NppTypeTraits
<
DEPTH
>::
npp_t
npp_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
npp_t
*
pSrc1
,
int
nSrc1Step
,
const
npp_t
pConstants
,
npp_t
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
,
int
nScaleFactor
);
};
template
<
int
DEPTH
>
struct
NppArithmScalarFunc
<
DEPTH
,
2
>
{
typedef
typename
NppTypeTraits
<
DEPTH
>::
npp_complex_type
npp_complex_type
;
typedef
NppStatus
(
*
func_ptr
)(
const
npp_complex_type
*
pSrc1
,
int
nSrc1Step
,
const
npp_complex_type
pConstants
,
npp_complex_type
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
,
int
nScaleFactor
);
};
template
<
int
cn
>
struct
NppArithmScalarFunc
<
CV_32F
,
cn
>
{
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32f
*
pSrc1
,
int
nSrc1Step
,
const
Npp32f
*
pConstants
,
Npp32f
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
};
template
<>
struct
NppArithmScalarFunc
<
CV_32F
,
1
>
{
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32f
*
pSrc1
,
int
nSrc1Step
,
const
Npp32f
pConstants
,
Npp32f
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
};
template
<>
struct
NppArithmScalarFunc
<
CV_32F
,
2
>
{
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32fc
*
pSrc1
,
int
nSrc1Step
,
const
Npp32fc
pConstants
,
Npp32fc
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
};
template
<
int
DEPTH
,
int
cn
,
typename
NppArithmScalarFunc
<
DEPTH
,
cn
>::
func_ptr
func
>
struct
NppArithmScalar
{
typedef
typename
NppTypeTraits
<
DEPTH
>::
npp_t
npp_t
;
static
void
call
(
const
PtrStepSzb
src
,
Scalar
sc
,
PtrStepb
dst
,
cudaStream_t
stream
)
{
NppStreamHandler
h
(
stream
);
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
const
npp_t
pConstants
[]
=
{
saturate_cast
<
npp_t
>
(
sc
.
val
[
0
]),
saturate_cast
<
npp_t
>
(
sc
.
val
[
1
]),
saturate_cast
<
npp_t
>
(
sc
.
val
[
2
]),
saturate_cast
<
npp_t
>
(
sc
.
val
[
3
])
};
nppSafeCall
(
func
((
const
npp_t
*
)
src
.
data
,
static_cast
<
int
>
(
src
.
step
),
pConstants
,
(
npp_t
*
)
dst
.
data
,
static_cast
<
int
>
(
dst
.
step
),
sz
,
0
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
int
DEPTH
,
typename
NppArithmScalarFunc
<
DEPTH
,
1
>::
func_ptr
func
>
struct
NppArithmScalar
<
DEPTH
,
1
,
func
>
{
typedef
typename
NppTypeTraits
<
DEPTH
>::
npp_t
npp_t
;
static
void
call
(
const
PtrStepSzb
src
,
Scalar
sc
,
PtrStepb
dst
,
cudaStream_t
stream
)
{
NppStreamHandler
h
(
stream
);
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
func
((
const
npp_t
*
)
src
.
data
,
static_cast
<
int
>
(
src
.
step
),
saturate_cast
<
npp_t
>
(
sc
.
val
[
0
]),
(
npp_t
*
)
dst
.
data
,
static_cast
<
int
>
(
dst
.
step
),
sz
,
0
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
int
DEPTH
,
typename
NppArithmScalarFunc
<
DEPTH
,
2
>::
func_ptr
func
>
struct
NppArithmScalar
<
DEPTH
,
2
,
func
>
{
typedef
typename
NppTypeTraits
<
DEPTH
>::
npp_t
npp_t
;
typedef
typename
NppTypeTraits
<
DEPTH
>::
npp_complex_type
npp_complex_type
;
static
void
call
(
const
PtrStepSzb
src
,
Scalar
sc
,
PtrStepb
dst
,
cudaStream_t
stream
)
{
NppStreamHandler
h
(
stream
);
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
npp_complex_type
nConstant
;
nConstant
.
re
=
saturate_cast
<
npp_t
>
(
sc
.
val
[
0
]);
nConstant
.
im
=
saturate_cast
<
npp_t
>
(
sc
.
val
[
1
]);
nppSafeCall
(
func
((
const
npp_complex_type
*
)
src
.
data
,
static_cast
<
int
>
(
src
.
step
),
nConstant
,
(
npp_complex_type
*
)
dst
.
data
,
static_cast
<
int
>
(
dst
.
step
),
sz
,
0
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
int
cn
,
typename
NppArithmScalarFunc
<
CV_32F
,
cn
>::
func_ptr
func
>
struct
NppArithmScalar
<
CV_32F
,
cn
,
func
>
{
typedef
typename
NppTypeTraits
<
CV_32F
>::
npp_t
npp_t
;
static
void
call
(
const
PtrStepSzb
src
,
Scalar
sc
,
PtrStepb
dst
,
cudaStream_t
stream
)
{
NppStreamHandler
h
(
stream
);
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
const
Npp32f
pConstants
[]
=
{
saturate_cast
<
Npp32f
>
(
sc
.
val
[
0
]),
saturate_cast
<
Npp32f
>
(
sc
.
val
[
1
]),
saturate_cast
<
Npp32f
>
(
sc
.
val
[
2
]),
saturate_cast
<
Npp32f
>
(
sc
.
val
[
3
])
};
nppSafeCall
(
func
((
const
npp_t
*
)
src
.
data
,
static_cast
<
int
>
(
src
.
step
),
pConstants
,
(
npp_t
*
)
dst
.
data
,
static_cast
<
int
>
(
dst
.
step
),
sz
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
typename
NppArithmScalarFunc
<
CV_32F
,
1
>::
func_ptr
func
>
struct
NppArithmScalar
<
CV_32F
,
1
,
func
>
{
typedef
typename
NppTypeTraits
<
CV_32F
>::
npp_t
npp_t
;
static
void
call
(
const
PtrStepSzb
src
,
Scalar
sc
,
PtrStepb
dst
,
cudaStream_t
stream
)
{
NppStreamHandler
h
(
stream
);
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
func
((
const
npp_t
*
)
src
.
data
,
static_cast
<
int
>
(
src
.
step
),
saturate_cast
<
Npp32f
>
(
sc
.
val
[
0
]),
(
npp_t
*
)
dst
.
data
,
static_cast
<
int
>
(
dst
.
step
),
sz
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
typename
NppArithmScalarFunc
<
CV_32F
,
2
>::
func_ptr
func
>
struct
NppArithmScalar
<
CV_32F
,
2
,
func
>
{
typedef
typename
NppTypeTraits
<
CV_32F
>::
npp_t
npp_t
;
typedef
typename
NppTypeTraits
<
CV_32F
>::
npp_complex_type
npp_complex_type
;
static
void
call
(
const
PtrStepSzb
src
,
Scalar
sc
,
PtrStepb
dst
,
cudaStream_t
stream
)
{
NppStreamHandler
h
(
stream
);
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Npp32fc
nConstant
;
nConstant
.
re
=
saturate_cast
<
Npp32f
>
(
sc
.
val
[
0
]);
nConstant
.
im
=
saturate_cast
<
Npp32f
>
(
sc
.
val
[
1
]);
nppSafeCall
(
func
((
const
npp_complex_type
*
)
src
.
data
,
static_cast
<
int
>
(
src
.
step
),
nConstant
,
(
npp_complex_type
*
)
dst
.
data
,
static_cast
<
int
>
(
dst
.
step
),
sz
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
}
////////////////////////////////////////////////////////////////////////
// add
...
...
@@ -463,60 +289,6 @@ void cv::cuda::compare(InputArray src1, InputArray src2, OutputArray dst, int cm
arithm_op
(
src1
,
src2
,
dst
,
noArray
(),
1.0
,
CV_8U
,
stream
,
cmpMat
,
cmpScalar
,
cmpop
);
}
//////////////////////////////////////////////////////////////////////////////
// bitwise_not
namespace
arithm
{
template
<
typename
T
>
void
bitMatNot
(
PtrStepSzb
src
,
PtrStepSzb
dst
,
PtrStepb
mask
,
cudaStream_t
stream
);
}
void
cv
::
cuda
::
bitwise_not
(
InputArray
_src
,
OutputArray
_dst
,
InputArray
_mask
,
Stream
&
_stream
)
{
using
namespace
arithm
;
GpuMat
src
=
_src
.
getGpuMat
();
GpuMat
mask
=
_mask
.
getGpuMat
();
const
int
depth
=
src
.
depth
();
CV_Assert
(
depth
<=
CV_64F
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
size
()
==
src
.
size
())
);
_dst
.
create
(
src
.
size
(),
src
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
const
int
bcols
=
(
int
)
(
src
.
cols
*
src
.
elemSize
());
if
((
bcols
&
3
)
==
0
)
{
const
int
vcols
=
bcols
>>
2
;
bitMatNot
<
unsigned
int
>
(
PtrStepSzb
(
src
.
rows
,
vcols
,
src
.
data
,
src
.
step
),
PtrStepSzb
(
src
.
rows
,
vcols
,
dst
.
data
,
dst
.
step
),
mask
,
stream
);
}
else
if
((
bcols
&
1
)
==
0
)
{
const
int
vcols
=
bcols
>>
1
;
bitMatNot
<
unsigned
short
>
(
PtrStepSzb
(
src
.
rows
,
vcols
,
src
.
data
,
src
.
step
),
PtrStepSzb
(
src
.
rows
,
vcols
,
dst
.
data
,
dst
.
step
),
mask
,
stream
);
}
else
{
bitMatNot
<
unsigned
char
>
(
PtrStepSzb
(
src
.
rows
,
bcols
,
src
.
data
,
src
.
step
),
PtrStepSzb
(
src
.
rows
,
bcols
,
dst
.
data
,
dst
.
step
),
mask
,
stream
);
}
}
//////////////////////////////////////////////////////////////////////////////
// Binary bitwise logical operations
...
...
@@ -530,195 +302,9 @@ namespace
};
}
namespace
arithm
{
template
<
typename
T
>
void
bitMatAnd
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
PtrStepb
mask
,
cudaStream_t
stream
);
template
<
typename
T
>
void
bitMatOr
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
PtrStepb
mask
,
cudaStream_t
stream
);
template
<
typename
T
>
void
bitMatXor
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
PtrStepb
mask
,
cudaStream_t
stream
);
}
void
bitMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
,
Stream
&
stream
,
int
op
);
static
void
bitMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
,
Stream
&
_stream
,
int
op
)
{
using
namespace
arithm
;
typedef
void
(
*
func_t
)(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
PtrStepb
mask
,
cudaStream_t
stream
);
static
const
func_t
funcs32
[]
=
{
bitMatAnd
<
uint
>
,
bitMatOr
<
uint
>
,
bitMatXor
<
uint
>
};
static
const
func_t
funcs16
[]
=
{
bitMatAnd
<
ushort
>
,
bitMatOr
<
ushort
>
,
bitMatXor
<
ushort
>
};
static
const
func_t
funcs8
[]
=
{
bitMatAnd
<
uchar
>
,
bitMatOr
<
uchar
>
,
bitMatXor
<
uchar
>
};
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
const
int
bcols
=
(
int
)
(
src1
.
cols
*
src1
.
elemSize
());
if
((
bcols
&
3
)
==
0
)
{
const
int
vcols
=
bcols
>>
2
;
funcs32
[
op
](
PtrStepSzb
(
src1
.
rows
,
vcols
,
src1
.
data
,
src1
.
step
),
PtrStepSzb
(
src1
.
rows
,
vcols
,
src2
.
data
,
src2
.
step
),
PtrStepSzb
(
src1
.
rows
,
vcols
,
dst
.
data
,
dst
.
step
),
mask
,
stream
);
}
else
if
((
bcols
&
1
)
==
0
)
{
const
int
vcols
=
bcols
>>
1
;
funcs16
[
op
](
PtrStepSzb
(
src1
.
rows
,
vcols
,
src1
.
data
,
src1
.
step
),
PtrStepSzb
(
src1
.
rows
,
vcols
,
src2
.
data
,
src2
.
step
),
PtrStepSzb
(
src1
.
rows
,
vcols
,
dst
.
data
,
dst
.
step
),
mask
,
stream
);
}
else
{
funcs8
[
op
](
PtrStepSzb
(
src1
.
rows
,
bcols
,
src1
.
data
,
src1
.
step
),
PtrStepSzb
(
src1
.
rows
,
bcols
,
src2
.
data
,
src2
.
step
),
PtrStepSzb
(
src1
.
rows
,
bcols
,
dst
.
data
,
dst
.
step
),
mask
,
stream
);
}
}
namespace
arithm
{
template
<
typename
T
>
void
bitScalarAnd
(
PtrStepSzb
src1
,
unsigned
int
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
bitScalarOr
(
PtrStepSzb
src1
,
unsigned
int
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
bitScalarXor
(
PtrStepSzb
src1
,
unsigned
int
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
}
namespace
{
typedef
void
(
*
bit_scalar_func_t
)(
PtrStepSzb
src1
,
unsigned
int
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
,
bit_scalar_func_t
func
>
struct
BitScalar
{
static
void
call
(
const
GpuMat
&
src
,
Scalar
sc
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
func
(
src
,
saturate_cast
<
T
>
(
sc
.
val
[
0
]),
dst
,
stream
);
}
};
template
<
bit_scalar_func_t
func
>
struct
BitScalar4
{
static
void
call
(
const
GpuMat
&
src
,
Scalar
sc
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
unsigned
int
packedVal
=
0
;
packedVal
|=
(
saturate_cast
<
unsigned
char
>
(
sc
.
val
[
0
])
&
0xffff
);
packedVal
|=
(
saturate_cast
<
unsigned
char
>
(
sc
.
val
[
1
])
&
0xffff
)
<<
8
;
packedVal
|=
(
saturate_cast
<
unsigned
char
>
(
sc
.
val
[
2
])
&
0xffff
)
<<
16
;
packedVal
|=
(
saturate_cast
<
unsigned
char
>
(
sc
.
val
[
3
])
&
0xffff
)
<<
24
;
func
(
src
,
packedVal
,
dst
,
stream
);
}
};
template
<
int
DEPTH
,
int
cn
>
struct
NppBitwiseCFunc
{
typedef
typename
NppTypeTraits
<
DEPTH
>::
npp_t
npp_t
;
typedef
NppStatus
(
*
func_t
)(
const
npp_t
*
pSrc1
,
int
nSrc1Step
,
const
npp_t
*
pConstants
,
npp_t
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
};
template
<
int
DEPTH
>
struct
NppBitwiseCFunc
<
DEPTH
,
1
>
{
typedef
typename
NppTypeTraits
<
DEPTH
>::
npp_t
npp_t
;
typedef
NppStatus
(
*
func_t
)(
const
npp_t
*
pSrc1
,
int
nSrc1Step
,
const
npp_t
pConstant
,
npp_t
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
};
template
<
int
DEPTH
,
int
cn
,
typename
NppBitwiseCFunc
<
DEPTH
,
cn
>::
func_t
func
>
struct
NppBitwiseC
{
typedef
typename
NppBitwiseCFunc
<
DEPTH
,
cn
>::
npp_t
npp_t
;
static
void
call
(
const
GpuMat
&
src
,
Scalar
sc
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
NppStreamHandler
h
(
stream
);
NppiSize
oSizeROI
;
oSizeROI
.
width
=
src
.
cols
;
oSizeROI
.
height
=
src
.
rows
;
const
npp_t
pConstants
[]
=
{
saturate_cast
<
npp_t
>
(
sc
.
val
[
0
]),
saturate_cast
<
npp_t
>
(
sc
.
val
[
1
]),
saturate_cast
<
npp_t
>
(
sc
.
val
[
2
]),
saturate_cast
<
npp_t
>
(
sc
.
val
[
3
])};
nppSafeCall
(
func
(
src
.
ptr
<
npp_t
>
(),
static_cast
<
int
>
(
src
.
step
),
pConstants
,
dst
.
ptr
<
npp_t
>
(),
static_cast
<
int
>
(
dst
.
step
),
oSizeROI
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
int
DEPTH
,
typename
NppBitwiseCFunc
<
DEPTH
,
1
>::
func_t
func
>
struct
NppBitwiseC
<
DEPTH
,
1
,
func
>
{
typedef
typename
NppBitwiseCFunc
<
DEPTH
,
1
>::
npp_t
npp_t
;
static
void
call
(
const
GpuMat
&
src
,
Scalar
sc
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
NppStreamHandler
h
(
stream
);
NppiSize
oSizeROI
;
oSizeROI
.
width
=
src
.
cols
;
oSizeROI
.
height
=
src
.
rows
;
nppSafeCall
(
func
(
src
.
ptr
<
npp_t
>
(),
static_cast
<
int
>
(
src
.
step
),
saturate_cast
<
npp_t
>
(
sc
.
val
[
0
]),
dst
.
ptr
<
npp_t
>
(),
static_cast
<
int
>
(
dst
.
step
),
oSizeROI
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
}
static
void
bitScalar
(
const
GpuMat
&
src
,
Scalar
val
,
bool
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
,
Stream
&
stream
,
int
op
)
{
using
namespace
arithm
;
typedef
void
(
*
func_t
)(
const
GpuMat
&
src
,
Scalar
sc
,
GpuMat
&
dst
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
3
][
5
][
4
]
=
{
{
{
BitScalar
<
unsigned
char
,
bitScalarAnd
<
unsigned
char
>
>::
call
,
0
,
NppBitwiseC
<
CV_8U
,
3
,
nppiAndC_8u_C3R
>::
call
,
BitScalar4
<
bitScalarAnd
<
unsigned
int
>
>::
call
},
{
0
,
0
,
0
,
0
},
{
BitScalar
<
unsigned
short
,
bitScalarAnd
<
unsigned
short
>
>::
call
,
0
,
NppBitwiseC
<
CV_16U
,
3
,
nppiAndC_16u_C3R
>::
call
,
NppBitwiseC
<
CV_16U
,
4
,
nppiAndC_16u_C4R
>::
call
},
{
0
,
0
,
0
,
0
},
{
BitScalar
<
int
,
bitScalarAnd
<
int
>
>::
call
,
0
,
NppBitwiseC
<
CV_32S
,
3
,
nppiAndC_32s_C3R
>::
call
,
NppBitwiseC
<
CV_32S
,
4
,
nppiAndC_32s_C4R
>::
call
}
},
{
{
BitScalar
<
unsigned
char
,
bitScalarOr
<
unsigned
char
>
>::
call
,
0
,
NppBitwiseC
<
CV_8U
,
3
,
nppiOrC_8u_C3R
>::
call
,
BitScalar4
<
bitScalarOr
<
unsigned
int
>
>::
call
},
{
0
,
0
,
0
,
0
},
{
BitScalar
<
unsigned
short
,
bitScalarOr
<
unsigned
short
>
>::
call
,
0
,
NppBitwiseC
<
CV_16U
,
3
,
nppiOrC_16u_C3R
>::
call
,
NppBitwiseC
<
CV_16U
,
4
,
nppiOrC_16u_C4R
>::
call
},
{
0
,
0
,
0
,
0
},
{
BitScalar
<
int
,
bitScalarOr
<
int
>
>::
call
,
0
,
NppBitwiseC
<
CV_32S
,
3
,
nppiOrC_32s_C3R
>::
call
,
NppBitwiseC
<
CV_32S
,
4
,
nppiOrC_32s_C4R
>::
call
}
},
{
{
BitScalar
<
unsigned
char
,
bitScalarXor
<
unsigned
char
>
>::
call
,
0
,
NppBitwiseC
<
CV_8U
,
3
,
nppiXorC_8u_C3R
>::
call
,
BitScalar4
<
bitScalarXor
<
unsigned
int
>
>::
call
},
{
0
,
0
,
0
,
0
},
{
BitScalar
<
unsigned
short
,
bitScalarXor
<
unsigned
short
>
>::
call
,
0
,
NppBitwiseC
<
CV_16U
,
3
,
nppiXorC_16u_C3R
>::
call
,
NppBitwiseC
<
CV_16U
,
4
,
nppiXorC_16u_C4R
>::
call
},
{
0
,
0
,
0
,
0
},
{
BitScalar
<
int
,
bitScalarXor
<
int
>
>::
call
,
0
,
NppBitwiseC
<
CV_32S
,
3
,
nppiXorC_32s_C3R
>::
call
,
NppBitwiseC
<
CV_32S
,
4
,
nppiXorC_32s_C4R
>::
call
}
}
};
const
int
depth
=
src
.
depth
();
const
int
cn
=
src
.
channels
();
CV_Assert
(
depth
==
CV_8U
||
depth
==
CV_16U
||
depth
==
CV_32S
);
CV_Assert
(
cn
==
1
||
cn
==
3
||
cn
==
4
);
CV_Assert
(
mask
.
empty
()
);
funcs
[
op
][
depth
][
cn
-
1
](
src
,
val
,
dst
,
StreamAccessor
::
getStream
(
stream
));
}
void
bitScalar
(
const
GpuMat
&
src
,
cv
::
Scalar
value
,
bool
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
,
Stream
&
stream
,
int
op
);
void
cv
::
cuda
::
bitwise_or
(
InputArray
src1
,
InputArray
src2
,
OutputArray
dst
,
InputArray
mask
,
Stream
&
stream
)
{
...
...
@@ -742,20 +328,20 @@ namespace
{
template
<
int
DEPTH
,
int
cn
>
struct
NppShiftFunc
{
typedef
typename
N
ppTypeTraits
<
DEPTH
>::
npp_t
npp_t
;
typedef
typename
N
PPTypeTraits
<
DEPTH
>::
npp_type
npp_type
;
typedef
NppStatus
(
*
func_t
)(
const
npp_t
*
pSrc1
,
int
nSrc1Step
,
const
Npp32u
*
pConstants
,
npp_t
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
typedef
NppStatus
(
*
func_t
)(
const
npp_t
ype
*
pSrc1
,
int
nSrc1Step
,
const
Npp32u
*
pConstants
,
npp_type
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
};
template
<
int
DEPTH
>
struct
NppShiftFunc
<
DEPTH
,
1
>
{
typedef
typename
N
ppTypeTraits
<
DEPTH
>::
npp_t
npp_t
;
typedef
typename
N
PPTypeTraits
<
DEPTH
>::
npp_type
npp_type
;
typedef
NppStatus
(
*
func_t
)(
const
npp_t
*
pSrc1
,
int
nSrc1Step
,
const
Npp32u
pConstants
,
npp_t
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
typedef
NppStatus
(
*
func_t
)(
const
npp_t
ype
*
pSrc1
,
int
nSrc1Step
,
const
Npp32u
pConstants
,
npp_type
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
};
template
<
int
DEPTH
,
int
cn
,
typename
NppShiftFunc
<
DEPTH
,
cn
>::
func_t
func
>
struct
NppShift
{
typedef
typename
N
ppTypeTraits
<
DEPTH
>::
npp_t
npp_t
;
typedef
typename
N
PPTypeTraits
<
DEPTH
>::
npp_type
npp_type
;
static
void
call
(
const
GpuMat
&
src
,
Scalar_
<
Npp32u
>
sc
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
...
...
@@ -765,7 +351,7 @@ namespace
oSizeROI
.
width
=
src
.
cols
;
oSizeROI
.
height
=
src
.
rows
;
nppSafeCall
(
func
(
src
.
ptr
<
npp_t
>
(),
static_cast
<
int
>
(
src
.
step
),
sc
.
val
,
dst
.
ptr
<
npp_t
>
(),
static_cast
<
int
>
(
dst
.
step
),
oSizeROI
)
);
nppSafeCall
(
func
(
src
.
ptr
<
npp_t
ype
>
(),
static_cast
<
int
>
(
src
.
step
),
sc
.
val
,
dst
.
ptr
<
npp_type
>
(),
static_cast
<
int
>
(
dst
.
step
),
oSizeROI
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
...
...
@@ -773,7 +359,7 @@ namespace
};
template
<
int
DEPTH
,
typename
NppShiftFunc
<
DEPTH
,
1
>::
func_t
func
>
struct
NppShift
<
DEPTH
,
1
,
func
>
{
typedef
typename
N
ppTypeTraits
<
DEPTH
>::
npp_t
npp_t
;
typedef
typename
N
PPTypeTraits
<
DEPTH
>::
npp_type
npp_type
;
static
void
call
(
const
GpuMat
&
src
,
Scalar_
<
Npp32u
>
sc
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
...
...
@@ -783,7 +369,7 @@ namespace
oSizeROI
.
width
=
src
.
cols
;
oSizeROI
.
height
=
src
.
rows
;
nppSafeCall
(
func
(
src
.
ptr
<
npp_t
>
(),
static_cast
<
int
>
(
src
.
step
),
sc
.
val
[
0
],
dst
.
ptr
<
npp_t
>
(),
static_cast
<
int
>
(
dst
.
step
),
oSizeROI
)
);
nppSafeCall
(
func
(
src
.
ptr
<
npp_t
ype
>
(),
static_cast
<
int
>
(
src
.
step
),
sc
.
val
[
0
],
dst
.
ptr
<
npp_type
>
(),
static_cast
<
int
>
(
dst
.
step
),
oSizeROI
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
...
...
modules/cudev/include/opencv2/cudev/ptr2d/mask.hpp
View file @
b11cccaa
...
...
@@ -62,6 +62,42 @@ struct WithOutMask
}
};
template
<
class
MaskPtr
>
struct
SingleMaskChannels
{
typedef
typename
PtrTraits
<
MaskPtr
>::
value_type
value_type
;
typedef
typename
PtrTraits
<
MaskPtr
>::
index_type
index_type
;
MaskPtr
mask
;
int
channels
;
__device__
__forceinline__
value_type
operator
()(
index_type
y
,
index_type
x
)
const
{
return
mask
(
y
,
x
/
channels
);
}
};
template
<
class
MaskPtr
>
struct
SingleMaskChannelsSz
:
SingleMaskChannels
<
MaskPtr
>
{
int
rows
,
cols
;
};
template
<
class
MaskPtr
>
__host__
SingleMaskChannelsSz
<
typename
PtrTraits
<
MaskPtr
>::
ptr_type
>
singleMaskChannels
(
const
MaskPtr
&
mask
,
int
channels
)
{
SingleMaskChannelsSz
<
typename
PtrTraits
<
MaskPtr
>::
ptr_type
>
ptr
;
ptr
.
mask
=
shrinkPtr
(
mask
);
ptr
.
channels
=
channels
;
ptr
.
rows
=
getRows
(
mask
);
ptr
.
cols
=
getCols
(
mask
)
*
channels
;
return
ptr
;
}
template
<
class
MaskPtr
>
struct
PtrTraits
<
SingleMaskChannelsSz
<
MaskPtr
>
>
:
PtrTraitsBase
<
SingleMaskChannelsSz
<
MaskPtr
>
,
SingleMaskChannels
<
MaskPtr
>
>
{
};
}}
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment