Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
7628e57f
Commit
7628e57f
authored
Jul 29, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
used new device layer for cv::gpu::absdiff
parent
574ff471
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
146 additions
and
189 deletions
+146
-189
absdiff_mat.cu
modules/cudaarithm/src/cuda/absdiff_mat.cu
+100
-59
absdiff_scalar.cu
modules/cudaarithm/src/cuda/absdiff_scalar.cu
+44
-32
element_operations.cpp
modules/cudaarithm/src/element_operations.cpp
+2
-98
No files found.
modules/cudaarithm/src/cuda/absdiff_mat.cu
View file @
7628e57f
...
...
@@ -40,43 +40,22 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#ifndef HAVE_OPENCV_CUDEV
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
using namespace cv::cuda::device;
#else
namespace arithm
{
struct VAbsDiff4 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
return vabsdiff4(a, b);
}
__host__ __device__ __forceinline__ VAbsDiff4() {}
__host__ __device__ __forceinline__ VAbsDiff4(const VAbsDiff4&) {}
};
#include "opencv2/cudev.hpp"
struct VAbsDiff2 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
return vabsdiff2(a, b);
}
using namespace cv::cudev;
__host__ __device__ __forceinline__ VAbsDiff2() {}
__host__ __device__ __forceinline__ VAbsDiff2(const VAbsDiff2&) {}
};
void absDiffMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int);
namespace
{
__device__ __forceinline__ int _abs(int a)
{
return ::abs(a);
...
...
@@ -90,58 +69,120 @@ namespace arithm
return ::fabs(a);
}
template <typename T> struct AbsDiff
Mat
: binary_function<T, T, T>
template <typename T> struct AbsDiff
Op1
: binary_function<T, T, T>
{
__device__ __forceinline__ T operator ()(T a, T b) const
{
return saturate_cast<T>(_abs(a - b));
}
__host__ __device__ __forceinline__ AbsDiffMat() {}
__host__ __device__ __forceinline__ AbsDiffMat(const AbsDiffMat&) {}
};
}
namespace cv { namespace cuda { namespace device
{
template <> struct TransformFunctorTraits< arithm::VAbsDiff4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
{
};
template <> struct TransformFunctorTraits< arithm::VAbsDiff2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
template <> struct TransformPolicy<double> : DefaultTransformPolicy
{
enum {
shift = 1
};
};
template <typename T> struct TransformFunctorTraits< arithm::AbsDiffMat<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename T>
void absDiffMat_v1(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
gridTransformBinary_< TransformPolicy<T> >(globPtr<T>(src1), globPtr<T>(src2), globPtr<T>(dst), AbsDiffOp1<T>(), stream);
}
struct AbsDiffOp2 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
return vabsdiff2(a, b);
}
};
}}}
namespace arithm
{
void absDiffMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
void absDiffMat_v2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
device::transform(src1, src2, dst, VAbsDiff4(), WithOutMask(), stream);
const int vcols = src1.cols >> 1;
GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
gridTransformBinary(src1_, src2_, dst_, AbsDiffOp2(), stream);
}
void absDiffMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
struct AbsDiffOp4 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
return vabsdiff4(a, b);
}
};
void absDiffMat_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
device::transform(src1, src2, dst, VAbsDiff2(), WithOutMask(), stream);
const int vcols = src1.cols >> 2;
GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
gridTransformBinary(src1_, src2_, dst_, AbsDiffOp4(), stream);
}
}
template <typename T>
void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
void absDiffMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int)
{
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
static const func_t funcs[] =
{
absDiffMat_v1<uchar>,
absDiffMat_v1<schar>,
absDiffMat_v1<ushort>,
absDiffMat_v1<short>,
absDiffMat_v1<int>,
absDiffMat_v1<float>,
absDiffMat_v1<double>
};
const int depth = src1.depth();
CV_DbgAssert( depth < 7 );
GpuMat src1_ = src1.reshape(1);
GpuMat src2_ = src2.reshape(1);
GpuMat dst_ = dst.reshape(1);
if (depth == CV_8U || depth == CV_16U)
{
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, AbsDiffMat<T>(), WithOutMask(), stream);
const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data);
const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
if (isAllAligned)
{
if (depth == CV_8U && (src1_.cols & 3) == 0)
{
absDiffMat_v4(src1_, src2_, dst_, stream);
return;
}
else if (depth == CV_16U && (src1_.cols & 1) == 0)
{
absDiffMat_v2(src1_, src2_, dst_, stream);
return;
}
}
}
template void absDiffMat<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void absDiffMat<schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void absDiffMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void absDiffMat<short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void absDiffMat<int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void absDiffMat<float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void absDiffMat<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
const func_t func = funcs[depth];
if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src1_, src2_, dst_, stream);
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/cuda/absdiff_scalar.cu
View file @
7628e57f
...
...
@@ -40,59 +40,71 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#ifndef HAVE_OPENCV_CUDEV
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
using namespace cv::cuda::device;
#else
namespace arithm
#include "opencv2/cudev.hpp"
using namespace cv::cudev;
void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int);
namespace
{
template <typename T, typename S> struct AbsDiffScalar : unary_function<T, T>
template <typename T, typename S> struct AbsDiffScalar
Op
: unary_function<T, T>
{
S val;
__host__ explicit AbsDiffScalar(S val_) : val(val_) {}
__device__ __forceinline__ T operator ()(T a) const
{
abs_func<S> f;
return saturate_cast<T>(f(a - val));
}
};
}
namespace cv { namespace cuda { namespace device
{
template <typename T, typename S> struct TransformFunctorTraits< arithm::AbsDiffScalar<T, S> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
{
};
template <> struct TransformPolicy<double> : DefaultTransformPolicy
{
enum {
shift = 1
};
};
}}}
namespace arithm
template <typename SrcType, typename ScalarDepth>
void absDiffScalarImpl(const GpuMat& src, double value, GpuMat& dst, Stream& stream)
{
AbsDiffScalarOp<SrcType, ScalarDepth> op;
op.val = static_cast<ScalarDepth>(value);
gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<SrcType>(dst), op, stream);
}
}
void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int)
{
t
emplate <typename T, typename S>
void absDiffScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
t
ypedef void (*func_t)(const GpuMat& src, double val, GpuMat& dst, Stream& stream);
static const func_t funcs[] =
{
AbsDiffScalar<T, S> op(static_cast<S>(val));
absDiffScalarImpl<uchar, float>,
absDiffScalarImpl<schar, float>,
absDiffScalarImpl<ushort, float>,
absDiffScalarImpl<short, float>,
absDiffScalarImpl<int, float>,
absDiffScalarImpl<float, float>,
absDiffScalarImpl<double, double>
};
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, op, WithOutMask(), stream);
}
const int depth = src.depth();
CV_DbgAssert( depth < 7 );
template void absDiffScalar<uchar, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
template void absDiffScalar<schar, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
template void absDiffScalar<ushort, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
template void absDiffScalar<short, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
template void absDiffScalar<int, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
template void absDiffScalar<float, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
template void absDiffScalar<double, double>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
funcs[depth](src, val[0], dst, stream);
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/element_operations.cpp
View file @
7628e57f
...
...
@@ -442,105 +442,9 @@ void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, doub
//////////////////////////////////////////////////////////////////////////////
// absdiff
namespace
arithm
{
void
absDiffMat_v4
(
PtrStepSz
<
unsigned
int
>
src1
,
PtrStepSz
<
unsigned
int
>
src2
,
PtrStepSz
<
unsigned
int
>
dst
,
cudaStream_t
stream
);
void
absDiffMat_v2
(
PtrStepSz
<
unsigned
int
>
src1
,
PtrStepSz
<
unsigned
int
>
src2
,
PtrStepSz
<
unsigned
int
>
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
absDiffMat
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
}
static
void
absDiffMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
,
Stream
&
_stream
,
int
)
{
typedef
void
(
*
func_t
)(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
static
const
func_t
funcs
[]
=
{
arithm
::
absDiffMat
<
unsigned
char
>
,
arithm
::
absDiffMat
<
signed
char
>
,
arithm
::
absDiffMat
<
unsigned
short
>
,
arithm
::
absDiffMat
<
short
>
,
arithm
::
absDiffMat
<
int
>
,
arithm
::
absDiffMat
<
float
>
,
arithm
::
absDiffMat
<
double
>
};
const
int
depth
=
src1
.
depth
();
const
int
cn
=
src1
.
channels
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
PtrStepSzb
src1_
(
src1
.
rows
,
src1
.
cols
*
cn
,
src1
.
data
,
src1
.
step
);
PtrStepSzb
src2_
(
src1
.
rows
,
src1
.
cols
*
cn
,
src2
.
data
,
src2
.
step
);
PtrStepSzb
dst_
(
src1
.
rows
,
src1
.
cols
*
cn
,
dst
.
data
,
dst
.
step
);
if
(
depth
==
CV_8U
||
depth
==
CV_16U
)
{
const
intptr_t
src1ptr
=
reinterpret_cast
<
intptr_t
>
(
src1_
.
data
);
const
intptr_t
src2ptr
=
reinterpret_cast
<
intptr_t
>
(
src2_
.
data
);
const
intptr_t
dstptr
=
reinterpret_cast
<
intptr_t
>
(
dst_
.
data
);
const
bool
isAllAligned
=
(
src1ptr
&
31
)
==
0
&&
(
src2ptr
&
31
)
==
0
&&
(
dstptr
&
31
)
==
0
;
if
(
isAllAligned
)
{
if
(
depth
==
CV_8U
&&
(
src1_
.
cols
&
3
)
==
0
)
{
const
int
vcols
=
src1_
.
cols
>>
2
;
arithm
::
absDiffMat_v4
(
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src1_
.
data
,
src1_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src2_
.
data
,
src2_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
dst_
.
data
,
dst_
.
step
),
stream
);
return
;
}
else
if
(
depth
==
CV_16U
&&
(
src1_
.
cols
&
1
)
==
0
)
{
const
int
vcols
=
src1_
.
cols
>>
1
;
arithm
::
absDiffMat_v2
(
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src1_
.
data
,
src1_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src2_
.
data
,
src2_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
dst_
.
data
,
dst_
.
step
),
stream
);
void
absDiffMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
,
Stream
&
stream
,
int
);
return
;
}
}
}
const
func_t
func
=
funcs
[
depth
];
if
(
!
func
)
CV_Error
(
cv
::
Error
::
StsUnsupportedFormat
,
"Unsupported combination of source and destination types"
);
func
(
src1_
,
src2_
,
dst_
,
stream
);
}
namespace
arithm
{
template
<
typename
T
,
typename
S
>
void
absDiffScalar
(
PtrStepSzb
src1
,
double
val
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
}
static
void
absDiffScalar
(
const
GpuMat
&
src
,
Scalar
val
,
bool
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
,
Stream
&
stream
,
int
)
{
typedef
void
(
*
func_t
)(
PtrStepSzb
src1
,
double
val
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
static
const
func_t
funcs
[]
=
{
arithm
::
absDiffScalar
<
unsigned
char
,
float
>
,
arithm
::
absDiffScalar
<
signed
char
,
float
>
,
arithm
::
absDiffScalar
<
unsigned
short
,
float
>
,
arithm
::
absDiffScalar
<
short
,
float
>
,
arithm
::
absDiffScalar
<
int
,
float
>
,
arithm
::
absDiffScalar
<
float
,
float
>
,
arithm
::
absDiffScalar
<
double
,
double
>
};
const
int
depth
=
src
.
depth
();
funcs
[
depth
](
src
,
val
[
0
],
dst
,
StreamAccessor
::
getStream
(
stream
));
}
void
absDiffScalar
(
const
GpuMat
&
src
,
cv
::
Scalar
val
,
bool
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
,
Stream
&
stream
,
int
);
void
cv
::
cuda
::
absdiff
(
InputArray
src1
,
InputArray
src2
,
OutputArray
dst
,
Stream
&
stream
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment