Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
b11cccaa
Commit
b11cccaa
authored
Aug 26, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
switched to new device layer in bitwize operations
parent
fdfffa52
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
234 additions
and
94 deletions
+234
-94
bitwise_mat.cu
modules/cudaarithm/src/cuda/bitwise_mat.cu
+92
-55
bitwise_scalar.cu
modules/cudaarithm/src/cuda/bitwise_scalar.cu
+106
-39
element_operations.cpp
modules/cudaarithm/src/element_operations.cpp
+0
-0
mask.hpp
modules/cudev/include/opencv2/cudev/ptr2d/mask.hpp
+36
-0
No files found.
modules/cudaarithm/src/cuda/bitwise_mat.cu
View file @
b11cccaa
...
...
@@ -40,87 +40,124 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#ifndef HAVE_OPENCV_CUDEV
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
using namespace cv::cuda::device;
#else
namespace cv { namespace cuda { namespace device
{
template <typename T> struct TransformFunctorTraits< bit_not<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
template <typename T> struct TransformFunctorTraits< bit_and<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
using namespace cv::cudev;
template <typename T> struct TransformFunctorTraits< bit_or<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op);
template <typename T> struct TransformFunctorTraits< bit_xor<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
{
};
}}}
//////////////////////////////////////////////////////////////////////////////
/// bitwise_not
namespace
arithm
namespace
{
template <typename T> void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
template <typename T>
void bitMatNot(const GpuMat& src, GpuMat& dst, const GpuMat& mask, Stream& stream)
{
GlobPtrSz<T> vsrc = globPtr((T*) src.data, src.step, src.rows, src.cols * src.channels());
GlobPtrSz<T> vdst = globPtr((T*) dst.data, dst.step, src.rows, src.cols * src.channels());
if (mask.data)
device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), mask
, stream);
gridTransformUnary(vsrc, vdst, bit_not<T>(), singleMaskChannels(globPtr<uchar>(mask), src.channels())
, stream);
else
device::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), WithOutMask
(), stream);
gridTransformUnary(vsrc, vdst, bit_not<T>
(), stream);
}
}
template <typename T> void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
{
GpuMat src = _src.getGpuMat();
GpuMat mask = _mask.getGpuMat();
const int depth = src.depth();
CV_DbgAssert( depth <= CV_32F );
CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
if (depth == CV_32F || depth == CV_32S)
{
if (mask.data)
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), mask, stream);
else
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), WithOutMask(), stream);
bitMatNot<uint>(src, dst, mask, stream);
}
template <typename T> void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
else if (depth == CV_16S || depth == CV_16U)
{
if (mask.data)
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), mask, stream);
else
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), WithOutMask(), stream);
bitMatNot<ushort>(src, dst, mask, stream);
}
else
{
bitMatNot<uchar>(src, dst, mask, stream);
}
}
template <typename T> void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
//////////////////////////////////////////////////////////////////////////////
/// Binary bitwise logical operations
namespace
{
template <template <typename> class Op, typename T>
void bitMatOp(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream)
{
GlobPtrSz<T> vsrc1 = globPtr((T*) src1.data, src1.step, src1.rows, src1.cols * src1.channels());
GlobPtrSz<T> vsrc2 = globPtr((T*) src2.data, src2.step, src1.rows, src1.cols * src1.channels());
GlobPtrSz<T> vdst = globPtr((T*) dst.data, dst.step, src1.rows, src1.cols * src1.channels());
if (mask.data)
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), mask
, stream);
gridTransformBinary(vsrc1, vsrc2, vdst, Op<T>(), singleMaskChannels(globPtr<uchar>(mask), src1.channels())
, stream);
else
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), WithOutMask
(), stream);
gridTransformBinary(vsrc1, vsrc2, vdst, Op<T>
(), stream);
}
}
template void bitMatNot<uchar>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void bitMatNot<ushort>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void bitMatNot<uint>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op)
{
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream);
static const func_t funcs32[] =
{
bitMatOp<bit_and, uint>,
bitMatOp<bit_or, uint>,
bitMatOp<bit_xor, uint>
};
static const func_t funcs16[] =
{
bitMatOp<bit_and, ushort>,
bitMatOp<bit_or, ushort>,
bitMatOp<bit_xor, ushort>
};
static const func_t funcs8[] =
{
bitMatOp<bit_and, uchar>,
bitMatOp<bit_or, uchar>,
bitMatOp<bit_xor, uchar>
};
template void bitMatAnd<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void bitMatAnd<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void bitMatAnd<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
const int depth = src1.depth();
template void bitMatOr<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void bitMatOr<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void bitMatOr<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
CV_DbgAssert( depth <= CV_32F );
CV_DbgAssert( op >= 0 && op < 3 );
template void bitMatXor<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void bitMatXor<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void bitMatXor<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
if (depth == CV_32F || depth == CV_32S)
{
funcs32[op](src1, src2, dst, mask, stream);
}
else if (depth == CV_16S || depth == CV_16U)
{
funcs16[op](src1, src2, dst, mask, stream);
}
else
{
funcs8[op](src1, src2, dst, mask, stream);
}
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/cuda/bitwise_scalar.cu
View file @
b11cccaa
...
...
@@ -40,65 +40,132 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#ifndef HAVE_OPENCV_CUDEV
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
using namespace cv::cuda::device;
#else
namespace cv { namespace cuda { namespace device
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv::cudev;
void bitScalar(const GpuMat& src, cv::Scalar value, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op);
namespace
{
template <typename T> struct TransformFunctorTraits< binder2nd< bit_and<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
template <template <typename> class Op, typename T>
void bitScalarOp(const GpuMat& src, uint value, GpuMat& dst, Stream& stream)
{
};
gridTransformUnary(globPtr<T>(src), globPtr<T>(dst), bind2nd(Op<T>(), value), stream);
}
typedef void (*bit_scalar_func_t)(const GpuMat& src, uint value, GpuMat& dst, Stream& stream);
template <typename T
> struct TransformFunctorTraits< binder2nd< bit_or<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T
, bit_scalar_func_t func> struct BitScalar
{
static void call(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& stream)
{
func(src, cv::saturate_cast<T>(value[0]), dst, stream);
}
};
template <
typename T> struct TransformFunctorTraits< binder2nd< bit_xor<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
template <
bit_scalar_func_t func> struct BitScalar4
{
static void call(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& stream)
{
uint packedVal = 0;
packedVal |= cv::saturate_cast<uchar>(value[0]);
packedVal |= cv::saturate_cast<uchar>(value[1]) << 8;
packedVal |= cv::saturate_cast<uchar>(value[2]) << 16;
packedVal |= cv::saturate_cast<uchar>(value[3]) << 24;
func(src, packedVal, dst, stream);
}
};
}}}
namespace arithm
{
template <typename T> void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
template <int DEPTH, int cn> struct NppBitwiseCFunc
{
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::device::bind2nd(bit_and<T>(), src2), WithOutMask(), stream);
}
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
typedef NppStatus (*func_t)(const npp_type* pSrc1, int nSrc1Step, const npp_type* pConstants, npp_type* pDst, int nDstStep, NppiSize oSizeROI);
};
template <
typename T> void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
template <
int DEPTH, int cn, typename NppBitwiseCFunc<DEPTH, cn>::func_t func> struct NppBitwiseC
{
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::device::bind2nd(bit_or<T>(), src2), WithOutMask(), stream);
}
typedef typename NppBitwiseCFunc<DEPTH, cn>::npp_type npp_type;
static void call(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& _stream)
{
cudaStream_t stream = StreamAccessor::getStream(_stream);
NppStreamHandler h(stream);
NppiSize oSizeROI;
oSizeROI.width = src.cols;
oSizeROI.height = src.rows;
const npp_type pConstants[] =
{
cv::saturate_cast<npp_type>(value[0]),
cv::saturate_cast<npp_type>(value[1]),
cv::saturate_cast<npp_type>(value[2]),
cv::saturate_cast<npp_type>(value[3])
};
nppSafeCall( func(src.ptr<npp_type>(), static_cast<int>(src.step), pConstants, dst.ptr<npp_type>(), static_cast<int>(dst.step), oSizeROI) );
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
};
}
void bitScalar(const GpuMat& src, cv::Scalar value, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op)
{
(void) mask;
template <typename T> void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
typedef void (*func_t)(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& stream);
static const func_t funcs[3][6][4] =
{
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::device::bind2nd(bit_xor<T>(), src2), WithOutMask(), stream);
}
{
{BitScalar<uchar, bitScalarOp<bit_and, uchar> >::call , 0, NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_and, uint> >::call},
{BitScalar<uchar, bitScalarOp<bit_and, uchar> >::call , 0, NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_and, uint> >::call},
{BitScalar<ushort, bitScalarOp<bit_and, ushort> >::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call},
{BitScalar<ushort, bitScalarOp<bit_and, ushort> >::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call},
{BitScalar<uint, bitScalarOp<bit_and, uint> >::call , 0, NppBitwiseC<CV_32S, 3, nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiAndC_32s_C4R>::call},
{BitScalar<uint, bitScalarOp<bit_and, uint> >::call , 0, NppBitwiseC<CV_32S, 3, nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiAndC_32s_C4R>::call}
},
{
{BitScalar<uchar, bitScalarOp<bit_or, uchar> >::call , 0, NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_or, uint> >::call},
{BitScalar<uchar, bitScalarOp<bit_or, uchar> >::call , 0, NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_or, uint> >::call},
{BitScalar<ushort, bitScalarOp<bit_or, ushort> >::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call},
{BitScalar<ushort, bitScalarOp<bit_or, ushort> >::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call},
{BitScalar<uint, bitScalarOp<bit_or, uint> >::call , 0, NppBitwiseC<CV_32S, 3, nppiOrC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiOrC_32s_C4R>::call},
{BitScalar<uint, bitScalarOp<bit_or, uint> >::call , 0, NppBitwiseC<CV_32S, 3, nppiOrC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiOrC_32s_C4R>::call}
},
{
{BitScalar<uchar, bitScalarOp<bit_xor, uchar> >::call , 0, NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_xor, uint> >::call},
{BitScalar<uchar, bitScalarOp<bit_xor, uchar> >::call , 0, NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_xor, uint> >::call},
{BitScalar<ushort, bitScalarOp<bit_xor, ushort> >::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call},
{BitScalar<ushort, bitScalarOp<bit_xor, ushort> >::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call},
{BitScalar<uint, bitScalarOp<bit_xor, uint> >::call , 0, NppBitwiseC<CV_32S, 3, nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiXorC_32s_C4R>::call},
{BitScalar<uint, bitScalarOp<bit_xor, uint> >::call , 0, NppBitwiseC<CV_32S, 3, nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4, nppiXorC_32s_C4R>::call}
}
};
template void bitScalarAnd<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarAnd<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarAnd<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarAnd<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
const int depth = src.depth();
const int cn = src.channels();
template void bitScalarOr<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream
);
template void bitScalarOr<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream
);
template void bitScalarOr<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream
);
template void bitScalarOr<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream
);
CV_DbgAssert( depth <= CV_32F
);
CV_DbgAssert( cn == 1 || cn == 3 || cn == 4
);
CV_DbgAssert( mask.empty()
);
CV_DbgAssert( op >= 0 && op < 3
);
template void bitScalarXor<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarXor<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarXor<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
template void bitScalarXor<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
funcs[op][depth][cn - 1](src, value, dst, stream);
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/element_operations.cpp
View file @
b11cccaa
This diff is collapsed.
Click to expand it.
modules/cudev/include/opencv2/cudev/ptr2d/mask.hpp
View file @
b11cccaa
...
...
@@ -62,6 +62,42 @@ struct WithOutMask
}
};
template
<
class
MaskPtr
>
struct
SingleMaskChannels
{
typedef
typename
PtrTraits
<
MaskPtr
>::
value_type
value_type
;
typedef
typename
PtrTraits
<
MaskPtr
>::
index_type
index_type
;
MaskPtr
mask
;
int
channels
;
__device__
__forceinline__
value_type
operator
()(
index_type
y
,
index_type
x
)
const
{
return
mask
(
y
,
x
/
channels
);
}
};
template
<
class
MaskPtr
>
struct
SingleMaskChannelsSz
:
SingleMaskChannels
<
MaskPtr
>
{
int
rows
,
cols
;
};
template
<
class
MaskPtr
>
__host__
SingleMaskChannelsSz
<
typename
PtrTraits
<
MaskPtr
>::
ptr_type
>
singleMaskChannels
(
const
MaskPtr
&
mask
,
int
channels
)
{
SingleMaskChannelsSz
<
typename
PtrTraits
<
MaskPtr
>::
ptr_type
>
ptr
;
ptr
.
mask
=
shrinkPtr
(
mask
);
ptr
.
channels
=
channels
;
ptr
.
rows
=
getRows
(
mask
);
ptr
.
cols
=
getCols
(
mask
)
*
channels
;
return
ptr
;
}
template
<
class
MaskPtr
>
struct
PtrTraits
<
SingleMaskChannelsSz
<
MaskPtr
>
>
:
PtrTraitsBase
<
SingleMaskChannelsSz
<
MaskPtr
>
,
SingleMaskChannels
<
MaskPtr
>
>
{
};
}}
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment