Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
5e9ae6b1
Commit
5e9ae6b1
authored
Aug 17, 2011
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added TransformFunctorTraits, optimized some functions that use transform
parent
6ce2277c
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
558 additions
and
279 deletions
+558
-279
element_operations.cu
modules/gpu/src/cuda/element_operations.cu
+102
-16
mathfunc.cu
modules/gpu/src/cuda/mathfunc.cu
+0
-5
matrix_operations.cu
modules/gpu/src/cuda/matrix_operations.cu
+49
-3
cudastream.cpp
modules/gpu/src/cudastream.cpp
+5
-12
element_operations.cpp
modules/gpu/src/element_operations.cpp
+46
-57
gpumat.cpp
modules/gpu/src/gpumat.cpp
+6
-6
transform.hpp
modules/gpu/src/opencv2/gpu/device/detail/transform.hpp
+79
-96
functional.hpp
modules/gpu/src/opencv2/gpu/device/functional.hpp
+84
-31
vec_math.hpp
modules/gpu/src/opencv2/gpu/device/vec_math.hpp
+68
-24
vec_traits.hpp
modules/gpu/src/opencv2/gpu/device/vec_traits.hpp
+97
-13
tests.cpp
samples/gpu/performance/tests.cpp
+22
-16
No files found.
modules/gpu/src/cuda/element_operations.cu
View file @
5e9ae6b1
...
@@ -47,37 +47,33 @@
...
@@ -47,37 +47,33 @@
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "internal_shared.hpp"
#include "internal_shared.hpp"
using namespace cv::gpu;
namespace cv { namespace gpu { namespace device
using namespace cv::gpu::device;
namespace cv { namespace gpu { namespace mathfunc
{
{
//////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////
// Compare
// Compare
template <typename T
1, typename T2> struct NotEqual : binary_function<T1, T2
, uchar>
template <typename T
> struct NotEqual : binary_function<T, T
, uchar>
{
{
__device__ __forceinline__ uchar operator()(
const T1& src1, const T2&
src2) const
__device__ __forceinline__ uchar operator()(
T src1, T
src2) const
{
{
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
}
}
};
};
template <typename T
1, typename T2
>
template <typename T>
inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream)
inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream)
{
{
NotEqual<T
1, T2
> op;
NotEqual<T> op;
transform(static_cast< DevMem2D_<T
1> >(src1), static_cast< DevMem2D_<T2
> >(src2), dst, op, stream);
transform(static_cast< DevMem2D_<T
> >(src1), static_cast< DevMem2D_<T
> >(src2), dst, op, stream);
}
}
void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream)
void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream)
{
{
compare_ne<uint
, uint
>(src1, src2, dst, stream);
compare_ne<uint>(src1, src2, dst, stream);
}
}
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream)
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream)
{
{
compare_ne<float
, float
>(src1, src2, dst, stream);
compare_ne<float>(src1, src2, dst, stream);
}
}
...
@@ -355,6 +351,35 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -355,6 +351,35 @@ namespace cv { namespace gpu { namespace mathfunc
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
// min/max
// min/max
namespace detail
{
template <size_t size, typename F> struct MinMaxTraits : DefaultTransformFunctorTraits<F>
{
};
template <typename F> struct MinMaxTraits<2, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_shift = 4 };
};
template <typename F> struct MinMaxTraits<4, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_block_dim_y = 4 };
enum { smart_shift = 4 };
};
}
template <typename T> struct TransformFunctorTraits< minimum<T> > : detail::MinMaxTraits< sizeof(T), minimum<T> >
{
};
template <typename T> struct TransformFunctorTraits< maximum<T> > : detail::MinMaxTraits< sizeof(T), maximum<T> >
{
};
template <typename T> struct TransformFunctorTraits< binder2nd< minimum<T> > > : detail::MinMaxTraits< sizeof(T), binder2nd< minimum<T> > >
{
};
template <typename T> struct TransformFunctorTraits< binder2nd< maximum<T> > > : detail::MinMaxTraits< sizeof(T), binder2nd< maximum<T> > >
{
};
template <typename T>
template <typename T>
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
{
{
...
@@ -415,6 +440,38 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -415,6 +440,38 @@ namespace cv { namespace gpu { namespace mathfunc
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
// threshold
// threshold
namespace detail
{
template <size_t size, typename F> struct ThresholdTraits : DefaultTransformFunctorTraits<F>
{
};
template <typename F> struct ThresholdTraits<2, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_shift = 4 };
};
template <typename F> struct ThresholdTraits<4, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_block_dim_y = 4 };
enum { smart_shift = 4 };
};
}
template <typename T> struct TransformFunctorTraits< thresh_binary_func<T> > : detail::ThresholdTraits< sizeof(T), thresh_binary_func<T> >
{
};
template <typename T> struct TransformFunctorTraits< thresh_binary_inv_func<T> > : detail::ThresholdTraits< sizeof(T), thresh_binary_inv_func<T> >
{
};
template <typename T> struct TransformFunctorTraits< thresh_trunc_func<T> > : detail::ThresholdTraits< sizeof(T), thresh_trunc_func<T> >
{
};
template <typename T> struct TransformFunctorTraits< thresh_to_zero_func<T> > : detail::ThresholdTraits< sizeof(T), thresh_to_zero_func<T> >
{
};
template <typename T> struct TransformFunctorTraits< thresh_to_zero_inv_func<T> > : detail::ThresholdTraits< sizeof(T), thresh_to_zero_inv_func<T> >
{
};
template <template <typename> class Op, typename T>
template <template <typename> class Op, typename T>
void threshold_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, T thresh, T maxVal,
void threshold_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, T thresh, T maxVal,
cudaStream_t stream)
cudaStream_t stream)
...
@@ -454,8 +511,13 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -454,8 +511,13 @@ namespace cv { namespace gpu { namespace mathfunc
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
// subtract
// subtract
template <typename T>
template <> struct TransformFunctorTraits< minus<short> > : DefaultTransformFunctorTraits< minus<short> >
void subtractCaller(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
template <typename T> void subtractCaller(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream)
{
{
transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, minus<T>(), stream);
transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, minus<T>(), stream);
}
}
...
@@ -499,10 +561,35 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -499,10 +561,35 @@ namespace cv { namespace gpu { namespace mathfunc
__device__ __forceinline__ float operator()(const float& e) const
__device__ __forceinline__ float operator()(const float& e) const
{
{
return __powf(fabs(e), power);
return __powf(
::
fabs(e), power);
}
}
};
};
namespace detail
{
template <size_t size, typename T> struct PowOpTraits : DefaultTransformFunctorTraits< PowOp<T> >
{
};
template <typename T> struct PowOpTraits<1, T> : DefaultTransformFunctorTraits< PowOp<T> >
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 8 };
};
template <typename T> struct PowOpTraits<2, T> : DefaultTransformFunctorTraits< PowOp<T> >
{
enum { smart_shift = 4 };
};
template <typename T> struct PowOpTraits<4, T> : DefaultTransformFunctorTraits< PowOp<T> >
{
enum { smart_block_dim_y = 4 };
enum { smart_shift = 4 };
};
}
template <typename T> struct TransformFunctorTraits< PowOp<T> > : detail::PowOpTraits<sizeof(T), T>
{
};
template<typename T>
template<typename T>
void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream)
void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream)
{
{
...
@@ -514,6 +601,5 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -514,6 +601,5 @@ namespace cv { namespace gpu { namespace mathfunc
template void pow_caller<short>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
template void pow_caller<short>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
template void pow_caller<ushort>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
template void pow_caller<ushort>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
template void pow_caller<int>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
template void pow_caller<int>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
template void pow_caller<uint>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
template void pow_caller<float>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
template void pow_caller<float>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
}}}
}}}
modules/gpu/src/cuda/mathfunc.cu
View file @
5e9ae6b1
...
@@ -40,14 +40,9 @@
...
@@ -40,14 +40,9 @@
//
//
//M*/
//M*/
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/transform.hpp"
#include "internal_shared.hpp"
#include "internal_shared.hpp"
using namespace cv::gpu;
using namespace cv::gpu;
using namespace cv::gpu::device;
#ifndef CV_PI
#ifndef CV_PI
#define CV_PI 3.1415926535897932384626433832795f
#define CV_PI 3.1415926535897932384626433832795f
...
...
modules/gpu/src/cuda/matrix_operations.cu
View file @
5e9ae6b1
...
@@ -45,9 +45,7 @@
...
@@ -45,9 +45,7 @@
#include "opencv2/gpu/device/transform.hpp"
#include "opencv2/gpu/device/transform.hpp"
#include "opencv2/gpu/device/functional.hpp"
#include "opencv2/gpu/device/functional.hpp"
using namespace cv::gpu::device;
namespace cv { namespace gpu { namespace device {
namespace cv { namespace gpu { namespace matrix_operations {
template <typename T> struct shift_and_sizeof;
template <typename T> struct shift_and_sizeof;
template <> struct shift_and_sizeof<signed char> { enum { shift = 0 }; };
template <> struct shift_and_sizeof<signed char> { enum { shift = 0 }; };
...
@@ -250,6 +248,54 @@ namespace cv { namespace gpu { namespace matrix_operations {
...
@@ -250,6 +248,54 @@ namespace cv { namespace gpu { namespace matrix_operations {
const double alpha, beta;
const double alpha, beta;
};
};
namespace detail
{
template <size_t src_size, size_t dst_size, typename F> struct ConvertTraitsDispatcher : DefaultTransformFunctorTraits<F>
{
};
template <typename F> struct ConvertTraitsDispatcher<1, 1, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_shift = 8 };
};
template <typename F> struct ConvertTraitsDispatcher<1, 2, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_shift = 4 };
};
template <typename F> struct ConvertTraitsDispatcher<1, 4, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
template <typename F> struct ConvertTraitsDispatcher<2, 2, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_shift = 4 };
};
template <typename F> struct ConvertTraitsDispatcher<2, 4, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_shift = 2 };
};
template <typename F> struct ConvertTraitsDispatcher<4, 2, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
template <typename F> struct ConvertTraitsDispatcher<4, 4, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 2 };
};
template <typename F> struct ConvertTraits : ConvertTraitsDispatcher<sizeof(typename F::argument_type), sizeof(typename F::result_type), F>
{
};
}
template <typename T, typename D> struct TransformFunctorTraits< Convertor<T, D> > : detail::ConvertTraits< Convertor<T, D> >
{
};
template<typename T, typename D>
template<typename T, typename D>
void cvt_(const DevMem2D& src, const DevMem2D& dst, double alpha, double beta, cudaStream_t stream)
void cvt_(const DevMem2D& src, const DevMem2D& dst, double alpha, double beta, cudaStream_t stream)
{
{
...
...
modules/gpu/src/cudastream.cpp
View file @
5e9ae6b1
...
@@ -71,12 +71,7 @@ cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; }
...
@@ -71,12 +71,7 @@ cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; }
#include "opencv2/gpu/stream_accessor.hpp"
#include "opencv2/gpu/stream_accessor.hpp"
namespace
cv
namespace
cv
{
namespace
gpu
{
namespace
device
{
{
namespace
gpu
{
namespace
matrix_operations
{
void
copy_to_with_mask
(
const
DevMem2D
&
src
,
DevMem2D
dst
,
int
depth
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
void
copy_to_with_mask
(
const
DevMem2D
&
src
,
DevMem2D
dst
,
int
depth
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
template
<
typename
T
>
template
<
typename
T
>
...
@@ -85,9 +80,7 @@ namespace cv
...
@@ -85,9 +80,7 @@ namespace cv
void
set_to_gpu
(
const
DevMem2D
&
mat
,
const
T
*
scalar
,
const
DevMem2D
&
mask
,
int
channels
,
cudaStream_t
stream
);
void
set_to_gpu
(
const
DevMem2D
&
mat
,
const
T
*
scalar
,
const
DevMem2D
&
mask
,
int
channels
,
cudaStream_t
stream
);
void
convert_gpu
(
const
DevMem2D
&
src
,
int
sdepth
,
const
DevMem2D
&
dst
,
int
ddepth
,
double
alpha
,
double
beta
,
cudaStream_t
stream
=
0
);
void
convert_gpu
(
const
DevMem2D
&
src
,
int
sdepth
,
const
DevMem2D
&
dst
,
int
ddepth
,
double
alpha
,
double
beta
,
cudaStream_t
stream
=
0
);
}
}}}
}
}
struct
Stream
::
Impl
struct
Stream
::
Impl
{
{
...
@@ -108,14 +101,14 @@ namespace
...
@@ -108,14 +101,14 @@ namespace
void
kernelSet
(
GpuMat
&
src
,
const
Scalar
&
s
,
cudaStream_t
stream
)
void
kernelSet
(
GpuMat
&
src
,
const
Scalar
&
s
,
cudaStream_t
stream
)
{
{
Scalar_
<
T
>
sf
=
s
;
Scalar_
<
T
>
sf
=
s
;
matrix_operations
::
set_to_gpu
(
src
,
sf
.
val
,
src
.
channels
(),
stream
);
device
::
set_to_gpu
(
src
,
sf
.
val
,
src
.
channels
(),
stream
);
}
}
template
<
typename
T
>
template
<
typename
T
>
void
kernelSetMask
(
GpuMat
&
src
,
const
Scalar
&
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
void
kernelSetMask
(
GpuMat
&
src
,
const
Scalar
&
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
{
Scalar_
<
T
>
sf
=
s
;
Scalar_
<
T
>
sf
=
s
;
matrix_operations
::
set_to_gpu
(
src
,
sf
.
val
,
mask
,
src
.
channels
(),
stream
);
device
::
set_to_gpu
(
src
,
sf
.
val
,
mask
,
src
.
channels
(),
stream
);
}
}
}
}
...
@@ -262,7 +255,7 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype,
...
@@ -262,7 +255,7 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype,
psrc
=
&
(
temp
=
src
);
psrc
=
&
(
temp
=
src
);
dst
.
create
(
src
.
size
(),
rtype
);
dst
.
create
(
src
.
size
(),
rtype
);
matrix_operations
::
convert_gpu
(
psrc
->
reshape
(
1
),
sdepth
,
dst
.
reshape
(
1
),
ddepth
,
alpha
,
beta
,
impl
->
stream
);
device
::
convert_gpu
(
psrc
->
reshape
(
1
),
sdepth
,
dst
.
reshape
(
1
),
ddepth
,
alpha
,
beta
,
impl
->
stream
);
}
}
cv
::
gpu
::
Stream
::
operator
bool
()
const
cv
::
gpu
::
Stream
::
operator
bool
()
const
...
...
modules/gpu/src/element_operations.cpp
View file @
5e9ae6b1
...
@@ -67,7 +67,6 @@ void cv::gpu::min(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); }
...
@@ -67,7 +67,6 @@ void cv::gpu::min(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); }
void
cv
::
gpu
::
max
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
max
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
max
(
const
GpuMat
&
,
double
,
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
max
(
const
GpuMat
&
,
double
,
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
double
cv
::
gpu
::
threshold
(
const
GpuMat
&
,
GpuMat
&
,
double
,
double
,
int
,
Stream
&
)
{
throw_nogpu
();
return
0.0
;}
double
cv
::
gpu
::
threshold
(
const
GpuMat
&
,
GpuMat
&
,
double
,
double
,
int
,
Stream
&
)
{
throw_nogpu
();
return
0.0
;}
void
cv
::
gpu
::
pow
(
const
GpuMat
&
,
double
,
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
pow
(
const
GpuMat
&
,
double
,
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
#else
#else
...
@@ -180,7 +179,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
...
@@ -180,7 +179,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
nppArithmCaller
(
src1
,
src2
,
dst
,
nppiAdd_8u_C1RSfs
,
nppiAdd_8u_C4RSfs
,
nppiAdd_32s_C1R
,
nppiAdd_32f_C1R
,
StreamAccessor
::
getStream
(
stream
));
nppArithmCaller
(
src1
,
src2
,
dst
,
nppiAdd_8u_C1RSfs
,
nppiAdd_8u_C4RSfs
,
nppiAdd_32s_C1R
,
nppiAdd_32f_C1R
,
StreamAccessor
::
getStream
(
stream
));
}
}
namespace
cv
{
namespace
gpu
{
namespace
mathfunc
namespace
cv
{
namespace
gpu
{
namespace
device
{
{
template
<
typename
T
>
template
<
typename
T
>
void
subtractCaller
(
const
DevMem2D
src1
,
const
DevMem2D
src2
,
DevMem2D
dst
,
cudaStream_t
stream
);
void
subtractCaller
(
const
DevMem2D
src1
,
const
DevMem2D
src2
,
DevMem2D
dst
,
cudaStream_t
stream
);
...
@@ -192,7 +191,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stre
...
@@ -192,7 +191,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stre
{
{
CV_Assert
(
src1
.
size
()
==
src2
.
size
());
CV_Assert
(
src1
.
size
()
==
src2
.
size
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
mathfunc
::
subtractCaller
<
short
>
(
src1
.
reshape
(
1
),
src2
.
reshape
(
1
),
dst
.
reshape
(
1
),
StreamAccessor
::
getStream
(
stream
));
device
::
subtractCaller
<
short
>
(
src1
.
reshape
(
1
),
src2
.
reshape
(
1
),
dst
.
reshape
(
1
),
StreamAccessor
::
getStream
(
stream
));
}
}
else
else
nppArithmCaller
(
src2
,
src1
,
dst
,
nppiSub_8u_C1RSfs
,
nppiSub_8u_C4RSfs
,
nppiSub_32s_C1R
,
nppiSub_32f_C1R
,
StreamAccessor
::
getStream
(
stream
));
nppArithmCaller
(
src2
,
src1
,
dst
,
nppiSub_8u_C1RSfs
,
nppiSub_8u_C4RSfs
,
nppiSub_32s_C1R
,
nppiSub_32f_C1R
,
StreamAccessor
::
getStream
(
stream
));
...
@@ -338,7 +337,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea
...
@@ -338,7 +337,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
// Comparison of two matrixes
// Comparison of two matrixes
namespace
cv
{
namespace
gpu
{
namespace
mathfunc
namespace
cv
{
namespace
gpu
{
namespace
device
{
{
void
compare_ne_8uc4
(
const
DevMem2D
&
src1
,
const
DevMem2D
&
src2
,
const
DevMem2D
&
dst
,
cudaStream_t
stream
);
void
compare_ne_8uc4
(
const
DevMem2D
&
src1
,
const
DevMem2D
&
src2
,
const
DevMem2D
&
dst
,
cudaStream_t
stream
);
void
compare_ne_32f
(
const
DevMem2D
&
src1
,
const
DevMem2D
&
src2
,
const
DevMem2D
&
dst
,
cudaStream_t
stream
);
void
compare_ne_32f
(
const
DevMem2D
&
src1
,
const
DevMem2D
&
src2
,
const
DevMem2D
&
dst
,
cudaStream_t
stream
);
...
@@ -375,7 +374,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
...
@@ -375,7 +374,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
}
}
else
else
{
{
mathfunc
::
compare_ne_8uc4
(
src1
,
src2
,
dst
,
stream
);
device
::
compare_ne_8uc4
(
src1
,
src2
,
dst
,
stream
);
}
}
}
}
else
else
...
@@ -393,7 +392,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
...
@@ -393,7 +392,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
}
}
else
else
{
{
mathfunc
::
compare_ne_32f
(
src1
,
src2
,
dst
,
stream
);
device
::
compare_ne_32f
(
src1
,
src2
,
dst
,
stream
);
}
}
}
}
}
}
...
@@ -402,7 +401,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
...
@@ -402,7 +401,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
// Unary bitwise logical operations
// Unary bitwise logical operations
namespace
cv
{
namespace
gpu
{
namespace
mathfunc
namespace
cv
{
namespace
gpu
{
namespace
device
{
{
void
bitwiseNotCaller
(
int
rows
,
int
cols
,
size_t
elem_size1
,
int
cn
,
const
PtrStep
src
,
PtrStep
dst
,
cudaStream_t
stream
);
void
bitwiseNotCaller
(
int
rows
,
int
cols
,
size_t
elem_size1
,
int
cn
,
const
PtrStep
src
,
PtrStep
dst
,
cudaStream_t
stream
);
...
@@ -416,7 +415,7 @@ namespace
...
@@ -416,7 +415,7 @@ namespace
{
{
dst
.
create
(
src
.
size
(),
src
.
type
());
dst
.
create
(
src
.
size
(),
src
.
type
());
cv
::
gpu
::
mathfunc
::
bitwiseNotCaller
(
src
.
rows
,
src
.
cols
,
src
.
elemSize1
(),
cv
::
gpu
::
device
::
bitwiseNotCaller
(
src
.
rows
,
src
.
cols
,
src
.
elemSize1
(),
dst
.
channels
(),
src
,
dst
,
stream
);
dst
.
channels
(),
src
,
dst
,
stream
);
}
}
...
@@ -426,10 +425,10 @@ namespace
...
@@ -426,10 +425,10 @@ namespace
using
namespace
cv
::
gpu
;
using
namespace
cv
::
gpu
;
typedef
void
(
*
Caller
)(
int
,
int
,
int
,
const
PtrStep
,
const
PtrStep
,
PtrStep
,
cudaStream_t
);
typedef
void
(
*
Caller
)(
int
,
int
,
int
,
const
PtrStep
,
const
PtrStep
,
PtrStep
,
cudaStream_t
);
static
Caller
callers
[]
=
{
mathfunc
::
bitwiseMaskNotCaller
<
unsigned
char
>
,
mathfunc
::
bitwiseMaskNotCaller
<
unsigned
char
>
,
static
Caller
callers
[]
=
{
device
::
bitwiseMaskNotCaller
<
unsigned
char
>
,
device
::
bitwiseMaskNotCaller
<
unsigned
char
>
,
mathfunc
::
bitwiseMaskNotCaller
<
unsigned
short
>
,
mathfunc
::
bitwiseMaskNotCaller
<
unsigned
short
>
,
device
::
bitwiseMaskNotCaller
<
unsigned
short
>
,
device
::
bitwiseMaskNotCaller
<
unsigned
short
>
,
mathfunc
::
bitwiseMaskNotCaller
<
unsigned
int
>
,
mathfunc
::
bitwiseMaskNotCaller
<
unsigned
int
>
,
device
::
bitwiseMaskNotCaller
<
unsigned
int
>
,
device
::
bitwiseMaskNotCaller
<
unsigned
int
>
,
mathfunc
::
bitwiseMaskNotCaller
<
unsigned
int
>
};
device
::
bitwiseMaskNotCaller
<
unsigned
int
>
};
CV_Assert
(
mask
.
type
()
==
CV_8U
&&
mask
.
size
()
==
src
.
size
());
CV_Assert
(
mask
.
type
()
==
CV_8U
&&
mask
.
size
()
==
src
.
size
());
dst
.
create
(
src
.
size
(),
src
.
type
());
dst
.
create
(
src
.
size
(),
src
.
type
());
...
@@ -456,7 +455,7 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St
...
@@ -456,7 +455,7 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
// Binary bitwise logical operations
// Binary bitwise logical operations
namespace
cv
{
namespace
gpu
{
namespace
mathfunc
namespace
cv
{
namespace
gpu
{
namespace
device
{
{
void
bitwiseOrCaller
(
int
rows
,
int
cols
,
size_t
elem_size1
,
int
cn
,
const
PtrStep
src1
,
const
PtrStep
src2
,
PtrStep
dst
,
cudaStream_t
stream
);
void
bitwiseOrCaller
(
int
rows
,
int
cols
,
size_t
elem_size1
,
int
cn
,
const
PtrStep
src1
,
const
PtrStep
src2
,
PtrStep
dst
,
cudaStream_t
stream
);
...
@@ -482,7 +481,7 @@ namespace
...
@@ -482,7 +481,7 @@ namespace
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
cv
::
gpu
::
mathfunc
::
bitwiseOrCaller
(
dst
.
rows
,
dst
.
cols
,
dst
.
elemSize1
(),
cv
::
gpu
::
device
::
bitwiseOrCaller
(
dst
.
rows
,
dst
.
cols
,
dst
.
elemSize1
(),
dst
.
channels
(),
src1
,
src2
,
dst
,
stream
);
dst
.
channels
(),
src1
,
src2
,
dst
,
stream
);
}
}
...
@@ -492,10 +491,10 @@ namespace
...
@@ -492,10 +491,10 @@ namespace
using
namespace
cv
::
gpu
;
using
namespace
cv
::
gpu
;
typedef
void
(
*
Caller
)(
int
,
int
,
int
,
const
PtrStep
,
const
PtrStep
,
const
PtrStep
,
PtrStep
,
cudaStream_t
);
typedef
void
(
*
Caller
)(
int
,
int
,
int
,
const
PtrStep
,
const
PtrStep
,
const
PtrStep
,
PtrStep
,
cudaStream_t
);
static
Caller
callers
[]
=
{
mathfunc
::
bitwiseMaskOrCaller
<
unsigned
char
>
,
mathfunc
::
bitwiseMaskOrCaller
<
unsigned
char
>
,
static
Caller
callers
[]
=
{
device
::
bitwiseMaskOrCaller
<
unsigned
char
>
,
device
::
bitwiseMaskOrCaller
<
unsigned
char
>
,
mathfunc
::
bitwiseMaskOrCaller
<
unsigned
short
>
,
mathfunc
::
bitwiseMaskOrCaller
<
unsigned
short
>
,
device
::
bitwiseMaskOrCaller
<
unsigned
short
>
,
device
::
bitwiseMaskOrCaller
<
unsigned
short
>
,
mathfunc
::
bitwiseMaskOrCaller
<
unsigned
int
>
,
mathfunc
::
bitwiseMaskOrCaller
<
unsigned
int
>
,
device
::
bitwiseMaskOrCaller
<
unsigned
int
>
,
device
::
bitwiseMaskOrCaller
<
unsigned
int
>
,
mathfunc
::
bitwiseMaskOrCaller
<
unsigned
int
>
};
device
::
bitwiseMaskOrCaller
<
unsigned
int
>
};
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
...
@@ -513,7 +512,7 @@ namespace
...
@@ -513,7 +512,7 @@ namespace
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
cv
::
gpu
::
mathfunc
::
bitwiseAndCaller
(
dst
.
rows
,
dst
.
cols
,
dst
.
elemSize1
(),
cv
::
gpu
::
device
::
bitwiseAndCaller
(
dst
.
rows
,
dst
.
cols
,
dst
.
elemSize1
(),
dst
.
channels
(),
src1
,
src2
,
dst
,
stream
);
dst
.
channels
(),
src1
,
src2
,
dst
,
stream
);
}
}
...
@@ -523,10 +522,10 @@ namespace
...
@@ -523,10 +522,10 @@ namespace
using
namespace
cv
::
gpu
;
using
namespace
cv
::
gpu
;
typedef
void
(
*
Caller
)(
int
,
int
,
int
,
const
PtrStep
,
const
PtrStep
,
const
PtrStep
,
PtrStep
,
cudaStream_t
);
typedef
void
(
*
Caller
)(
int
,
int
,
int
,
const
PtrStep
,
const
PtrStep
,
const
PtrStep
,
PtrStep
,
cudaStream_t
);
static
Caller
callers
[]
=
{
mathfunc
::
bitwiseMaskAndCaller
<
unsigned
char
>
,
mathfunc
::
bitwiseMaskAndCaller
<
unsigned
char
>
,
static
Caller
callers
[]
=
{
device
::
bitwiseMaskAndCaller
<
unsigned
char
>
,
device
::
bitwiseMaskAndCaller
<
unsigned
char
>
,
mathfunc
::
bitwiseMaskAndCaller
<
unsigned
short
>
,
mathfunc
::
bitwiseMaskAndCaller
<
unsigned
short
>
,
device
::
bitwiseMaskAndCaller
<
unsigned
short
>
,
device
::
bitwiseMaskAndCaller
<
unsigned
short
>
,
mathfunc
::
bitwiseMaskAndCaller
<
unsigned
int
>
,
mathfunc
::
bitwiseMaskAndCaller
<
unsigned
int
>
,
device
::
bitwiseMaskAndCaller
<
unsigned
int
>
,
device
::
bitwiseMaskAndCaller
<
unsigned
int
>
,
mathfunc
::
bitwiseMaskAndCaller
<
unsigned
int
>
};
device
::
bitwiseMaskAndCaller
<
unsigned
int
>
};
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
...
@@ -544,7 +543,7 @@ namespace
...
@@ -544,7 +543,7 @@ namespace
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
cv
::
gpu
::
mathfunc
::
bitwiseXorCaller
(
dst
.
rows
,
dst
.
cols
,
dst
.
elemSize1
(),
cv
::
gpu
::
device
::
bitwiseXorCaller
(
dst
.
rows
,
dst
.
cols
,
dst
.
elemSize1
(),
dst
.
channels
(),
src1
,
src2
,
dst
,
stream
);
dst
.
channels
(),
src1
,
src2
,
dst
,
stream
);
}
}
...
@@ -554,10 +553,10 @@ namespace
...
@@ -554,10 +553,10 @@ namespace
using
namespace
cv
::
gpu
;
using
namespace
cv
::
gpu
;
typedef
void
(
*
Caller
)(
int
,
int
,
int
,
const
PtrStep
,
const
PtrStep
,
const
PtrStep
,
PtrStep
,
cudaStream_t
);
typedef
void
(
*
Caller
)(
int
,
int
,
int
,
const
PtrStep
,
const
PtrStep
,
const
PtrStep
,
PtrStep
,
cudaStream_t
);
static
Caller
callers
[]
=
{
mathfunc
::
bitwiseMaskXorCaller
<
unsigned
char
>
,
mathfunc
::
bitwiseMaskXorCaller
<
unsigned
char
>
,
static
Caller
callers
[]
=
{
device
::
bitwiseMaskXorCaller
<
unsigned
char
>
,
device
::
bitwiseMaskXorCaller
<
unsigned
char
>
,
mathfunc
::
bitwiseMaskXorCaller
<
unsigned
short
>
,
mathfunc
::
bitwiseMaskXorCaller
<
unsigned
short
>
,
device
::
bitwiseMaskXorCaller
<
unsigned
short
>
,
device
::
bitwiseMaskXorCaller
<
unsigned
short
>
,
mathfunc
::
bitwiseMaskXorCaller
<
unsigned
int
>
,
mathfunc
::
bitwiseMaskXorCaller
<
unsigned
int
>
,
device
::
bitwiseMaskXorCaller
<
unsigned
int
>
,
device
::
bitwiseMaskXorCaller
<
unsigned
int
>
,
mathfunc
::
bitwiseMaskXorCaller
<
unsigned
int
>
};
device
::
bitwiseMaskXorCaller
<
unsigned
int
>
};
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
...
@@ -601,7 +600,7 @@ void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c
...
@@ -601,7 +600,7 @@ void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
// Minimum and maximum operations
// Minimum and maximum operations
namespace
cv
{
namespace
gpu
{
namespace
mathfunc
namespace
cv
{
namespace
gpu
{
namespace
device
{
{
template
<
typename
T
>
template
<
typename
T
>
void
min_gpu
(
const
DevMem2D_
<
T
>&
src1
,
const
DevMem2D_
<
T
>&
src2
,
const
DevMem2D_
<
T
>&
dst
,
cudaStream_t
stream
);
void
min_gpu
(
const
DevMem2D_
<
T
>&
src1
,
const
DevMem2D_
<
T
>&
src2
,
const
DevMem2D_
<
T
>&
dst
,
cudaStream_t
stream
);
...
@@ -623,14 +622,14 @@ namespace
...
@@ -623,14 +622,14 @@ namespace
{
{
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
mathfunc
::
min_gpu
<
T
>
(
src1
.
reshape
(
1
),
src2
.
reshape
(
1
),
dst
.
reshape
(
1
),
stream
);
device
::
min_gpu
<
T
>
(
src1
.
reshape
(
1
),
src2
.
reshape
(
1
),
dst
.
reshape
(
1
),
stream
);
}
}
template
<
typename
T
>
template
<
typename
T
>
void
min_caller
(
const
GpuMat
&
src1
,
double
src2
,
GpuMat
&
dst
,
cudaStream_t
stream
)
void
min_caller
(
const
GpuMat
&
src1
,
double
src2
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
{
dst
.
create
(
src1
.
size
(),
src1
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
mathfunc
::
min_gpu
<
T
>
(
src1
.
reshape
(
1
),
saturate_cast
<
T
>
(
src2
),
dst
.
reshape
(
1
),
stream
);
device
::
min_gpu
<
T
>
(
src1
.
reshape
(
1
),
saturate_cast
<
T
>
(
src2
),
dst
.
reshape
(
1
),
stream
);
}
}
template
<
typename
T
>
template
<
typename
T
>
...
@@ -638,14 +637,14 @@ namespace
...
@@ -638,14 +637,14 @@ namespace
{
{
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
mathfunc
::
max_gpu
<
T
>
(
src1
.
reshape
(
1
),
src2
.
reshape
(
1
),
dst
.
reshape
(
1
),
stream
);
device
::
max_gpu
<
T
>
(
src1
.
reshape
(
1
),
src2
.
reshape
(
1
),
dst
.
reshape
(
1
),
stream
);
}
}
template
<
typename
T
>
template
<
typename
T
>
void
max_caller
(
const
GpuMat
&
src1
,
double
src2
,
GpuMat
&
dst
,
cudaStream_t
stream
)
void
max_caller
(
const
GpuMat
&
src1
,
double
src2
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
{
dst
.
create
(
src1
.
size
(),
src1
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
mathfunc
::
max_gpu
<
T
>
(
src1
.
reshape
(
1
),
saturate_cast
<
T
>
(
src2
),
dst
.
reshape
(
1
),
stream
);
device
::
max_gpu
<
T
>
(
src1
.
reshape
(
1
),
saturate_cast
<
T
>
(
src2
),
dst
.
reshape
(
1
),
stream
);
}
}
}
}
...
@@ -709,7 +708,7 @@ void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
...
@@ -709,7 +708,7 @@ void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
// threshold
// threshold
namespace
cv
{
namespace
gpu
{
namespace
mathfunc
namespace
cv
{
namespace
gpu
{
namespace
device
{
{
template
<
typename
T
>
template
<
typename
T
>
void
threshold_gpu
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
T
thresh
,
T
maxVal
,
int
type
,
void
threshold_gpu
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
T
thresh
,
T
maxVal
,
int
type
,
...
@@ -718,24 +717,25 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -718,24 +717,25 @@ namespace cv { namespace gpu { namespace mathfunc
namespace
namespace
{
{
template
<
typename
T
>
template
<
typename
T
>
void
threshold_caller
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
thresh
,
double
maxVal
,
int
type
,
cudaStream_t
stream
)
void
threshold_caller
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
thresh
,
double
maxVal
,
int
type
,
cudaStream_t
stream
)
{
{
mathfunc
::
threshold_gpu
<
T
>
(
src
,
dst
,
saturate_cast
<
T
>
(
thresh
),
saturate_cast
<
T
>
(
maxVal
),
type
,
stream
);
device
::
threshold_gpu
<
T
>
(
src
,
dst
,
saturate_cast
<
T
>
(
thresh
),
saturate_cast
<
T
>
(
maxVal
),
type
,
stream
);
}
}
}
}
double
cv
::
gpu
::
threshold
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
thresh
,
double
maxVal
,
int
type
,
Stream
&
s
)
double
cv
::
gpu
::
threshold
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
thresh
,
double
maxVal
,
int
type
,
Stream
&
s
)
{
{
CV_Assert
(
src
.
channels
()
==
1
&&
src
.
depth
()
<=
CV_64F
);
CV_Assert
(
type
<=
THRESH_TOZERO_INV
);
dst
.
create
(
src
.
size
(),
src
.
type
());
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
s
);
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
s
);
if
(
src
.
type
()
==
CV_32FC1
&&
type
==
THRESH_TRUNC
)
if
(
src
.
type
()
==
CV_32FC1
&&
type
==
THRESH_TRUNC
)
{
{
NppStreamHandler
h
(
stream
);
NppStreamHandler
h
(
stream
);
dst
.
create
(
src
.
size
(),
src
.
type
());
NppiSize
sz
;
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
sz
.
height
=
src
.
rows
;
...
@@ -761,12 +761,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
...
@@ -761,12 +761,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
threshold_caller
<
int
>
,
threshold_caller
<
float
>
,
threshold_caller
<
double
>
threshold_caller
<
int
>
,
threshold_caller
<
float
>
,
threshold_caller
<
double
>
};
};
CV_Assert
(
src
.
channels
()
==
1
&&
src
.
depth
()
<=
CV_64F
);
if
(
src
.
depth
()
!=
CV_32F
&&
src
.
depth
()
!=
CV_64F
)
CV_Assert
(
type
<=
THRESH_TOZERO_INV
);
dst
.
create
(
src
.
size
(),
src
.
type
());
if
(
src
.
depth
()
!=
CV_32F
)
{
{
thresh
=
cvFloor
(
thresh
);
thresh
=
cvFloor
(
thresh
);
maxVal
=
cvRound
(
maxVal
);
maxVal
=
cvRound
(
maxVal
);
...
@@ -781,17 +776,11 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
...
@@ -781,17 +776,11 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
// pow
// pow
namespace
cv
namespace
cv
{
namespace
gpu
{
namespace
device
{
{
namespace
gpu
{
namespace
mathfunc
{
template
<
typename
T
>
template
<
typename
T
>
void
pow_caller
(
const
DevMem2D
&
src
,
float
power
,
DevMem2D
dst
,
cudaStream_t
stream
);
void
pow_caller
(
const
DevMem2D
&
src
,
float
power
,
DevMem2D
dst
,
cudaStream_t
stream
);
}
}}}
}
}
void
cv
::
gpu
::
pow
(
const
GpuMat
&
src
,
double
power
,
GpuMat
&
dst
,
Stream
&
stream
)
void
cv
::
gpu
::
pow
(
const
GpuMat
&
src
,
double
power
,
GpuMat
&
dst
,
Stream
&
stream
)
{
{
...
@@ -802,9 +791,9 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
...
@@ -802,9 +791,9 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
static
const
caller_t
callers
[]
=
static
const
caller_t
callers
[]
=
{
{
mathfunc
::
pow_caller
<
unsigned
char
>
,
mathfunc
::
pow_caller
<
signed
char
>
,
device
::
pow_caller
<
unsigned
char
>
,
device
::
pow_caller
<
signed
char
>
,
mathfunc
::
pow_caller
<
unsigned
short
>
,
mathfunc
::
pow_caller
<
short
>
,
device
::
pow_caller
<
unsigned
short
>
,
device
::
pow_caller
<
short
>
,
mathfunc
::
pow_caller
<
int
>
,
mathfunc
::
pow_caller
<
float
>
device
::
pow_caller
<
int
>
,
device
::
pow_caller
<
float
>
};
};
callers
[
src
.
depth
()](
src
.
reshape
(
1
),
(
float
)
power
,
dst
.
reshape
(
1
),
StreamAccessor
::
getStream
(
stream
));
callers
[
src
.
depth
()](
src
.
reshape
(
1
),
(
float
)
power
,
dst
.
reshape
(
1
),
StreamAccessor
::
getStream
(
stream
));
...
...
modules/gpu/src/gpumat.cpp
View file @
5e9ae6b1
...
@@ -393,7 +393,7 @@ void cv::gpu::ensureSizeIsEnough(int, int, int, GpuMat&) { throw_nogpu(); }
...
@@ -393,7 +393,7 @@ void cv::gpu::ensureSizeIsEnough(int, int, int, GpuMat&) { throw_nogpu(); }
#else
/* !defined (HAVE_CUDA) */
#else
/* !defined (HAVE_CUDA) */
namespace
cv
{
namespace
gpu
{
namespace
matrix_operations
namespace
cv
{
namespace
gpu
{
namespace
device
{
{
void
copy_to_with_mask
(
const
DevMem2D
&
src
,
DevMem2D
dst
,
int
depth
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
void
copy_to_with_mask
(
const
DevMem2D
&
src
,
DevMem2D
dst
,
int
depth
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
...
@@ -449,7 +449,7 @@ void cv::gpu::GpuMat::copyTo(GpuMat& mat, const GpuMat& mask) const
...
@@ -449,7 +449,7 @@ void cv::gpu::GpuMat::copyTo(GpuMat& mat, const GpuMat& mask) const
else
else
{
{
mat
.
create
(
size
(),
type
());
mat
.
create
(
size
(),
type
());
cv
::
gpu
::
matrix_operations
::
copy_to_with_mask
(
*
this
,
mat
,
depth
(),
mask
,
channels
());
device
::
copy_to_with_mask
(
*
this
,
mat
,
depth
(),
mask
,
channels
());
}
}
}
}
...
@@ -508,7 +508,7 @@ namespace
...
@@ -508,7 +508,7 @@ namespace
void
convertToKernelCaller
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
void
convertToKernelCaller
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
{
{
matrix_operations
::
convert_gpu
(
src
.
reshape
(
1
),
src
.
depth
(),
dst
.
reshape
(
1
),
dst
.
depth
(),
1.0
,
0.0
);
device
::
convert_gpu
(
src
.
reshape
(
1
),
src
.
depth
(),
dst
.
reshape
(
1
),
dst
.
depth
(),
1.0
,
0.0
);
}
}
}
}
...
@@ -540,7 +540,7 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
...
@@ -540,7 +540,7 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
dst
.
create
(
size
(),
rtype
);
dst
.
create
(
size
(),
rtype
);
if
(
!
noScale
)
if
(
!
noScale
)
matrix_operations
::
convert_gpu
(
psrc
->
reshape
(
1
),
sdepth
,
dst
.
reshape
(
1
),
ddepth
,
alpha
,
beta
);
device
::
convert_gpu
(
psrc
->
reshape
(
1
),
sdepth
,
dst
.
reshape
(
1
),
ddepth
,
alpha
,
beta
);
else
else
{
{
typedef
void
(
*
convert_caller_t
)(
const
GpuMat
&
src
,
GpuMat
&
dst
);
typedef
void
(
*
convert_caller_t
)(
const
GpuMat
&
src
,
GpuMat
&
dst
);
...
@@ -681,7 +681,7 @@ namespace
...
@@ -681,7 +681,7 @@ namespace
void
kernelSet
(
GpuMat
&
src
,
const
Scalar
&
s
)
void
kernelSet
(
GpuMat
&
src
,
const
Scalar
&
s
)
{
{
Scalar_
<
T
>
sf
=
s
;
Scalar_
<
T
>
sf
=
s
;
matrix_operations
::
set_to_gpu
(
src
,
sf
.
val
,
src
.
channels
(),
0
);
device
::
set_to_gpu
(
src
,
sf
.
val
,
src
.
channels
(),
0
);
}
}
template
<
int
SDEPTH
,
int
SCN
>
struct
NppSetMaskFunc
template
<
int
SDEPTH
,
int
SCN
>
struct
NppSetMaskFunc
...
@@ -732,7 +732,7 @@ namespace
...
@@ -732,7 +732,7 @@ namespace
void
kernelSetMask
(
GpuMat
&
src
,
const
Scalar
&
s
,
const
GpuMat
&
mask
)
void
kernelSetMask
(
GpuMat
&
src
,
const
Scalar
&
s
,
const
GpuMat
&
mask
)
{
{
Scalar_
<
T
>
sf
=
s
;
Scalar_
<
T
>
sf
=
s
;
matrix_operations
::
set_to_gpu
(
src
,
sf
.
val
,
mask
,
src
.
channels
(),
0
);
device
::
set_to_gpu
(
src
,
sf
.
val
,
mask
,
src
.
channels
(),
0
);
}
}
}
}
...
...
modules/gpu/src/opencv2/gpu/device/detail/transform.hpp
View file @
5e9ae6b1
...
@@ -45,6 +45,7 @@
...
@@ -45,6 +45,7 @@
#include "internal_shared.hpp"
#include "internal_shared.hpp"
#include "../vec_traits.hpp"
#include "../vec_traits.hpp"
#include "../functional.hpp"
namespace
cv
{
namespace
gpu
{
namespace
device
namespace
cv
{
namespace
gpu
{
namespace
device
{
{
...
@@ -68,51 +69,17 @@ namespace cv { namespace gpu { namespace device
...
@@ -68,51 +69,17 @@ namespace cv { namespace gpu { namespace device
//! Read Write Traits
//! Read Write Traits
template
<
size_t
src_elem_size
,
size_t
dst_elem_size
>
template
<
typename
T
,
typename
D
,
int
shift
>
struct
UnaryReadWriteTraits
struct
UnReadWriteTraits_
{
{
enum
{
shift
=
1
};
};
template
<
size_t
src_elem_size
>
struct
UnReadWriteTraits_
<
src_elem_size
,
1
>
{
enum
{
shift
=
4
};
};
template
<
size_t
src_elem_size
>
struct
UnReadWriteTraits_
<
src_elem_size
,
2
>
{
enum
{
shift
=
2
};
};
template
<
typename
T
,
typename
D
>
struct
UnReadWriteTraits
{
enum
{
shift
=
UnReadWriteTraits_
<
sizeof
(
T
),
sizeof
(
D
)
>::
shift
};
typedef
typename
TypeVec
<
T
,
shift
>::
vec_type
read_type
;
typedef
typename
TypeVec
<
T
,
shift
>::
vec_type
read_type
;
typedef
typename
TypeVec
<
D
,
shift
>::
vec_type
write_type
;
typedef
typename
TypeVec
<
D
,
shift
>::
vec_type
write_type
;
};
};
template
<
size_t
src_elem_size1
,
size_t
src_elem_size2
,
size_t
dst_elem_size
>
template
<
typename
T1
,
typename
T2
,
typename
D
,
int
shift
>
struct
BinaryReadWriteTraits
struct
BinReadWriteTraits_
{
enum
{
shift
=
1
};
};
template
<
size_t
src_elem_size1
,
size_t
src_elem_size2
>
struct
BinReadWriteTraits_
<
src_elem_size1
,
src_elem_size2
,
1
>
{
enum
{
shift
=
4
};
};
template
<
size_t
src_elem_size1
,
size_t
src_elem_size2
>
struct
BinReadWriteTraits_
<
src_elem_size1
,
src_elem_size2
,
2
>
{
enum
{
shift
=
2
};
};
template
<
typename
T1
,
typename
T2
,
typename
D
>
struct
BinReadWriteTraits
{
{
enum
{
shift
=
BinReadWriteTraits_
<
sizeof
(
T1
),
sizeof
(
T2
),
sizeof
(
D
)
>::
shift
};
typedef
typename
TypeVec
<
T1
,
shift
>::
vec_type
read_type1
;
typedef
typename
TypeVec
<
T1
,
shift
>::
vec_type
read_type1
;
typedef
typename
TypeVec
<
T2
,
shift
>::
vec_type
read_type2
;
typedef
typename
TypeVec
<
T2
,
shift
>::
vec_type
read_type2
;
typedef
typename
TypeVec
<
D
,
shift
>::
vec_type
write_type
;
typedef
typename
TypeVec
<
D
,
shift
>::
vec_type
write_type
;
};
};
//! Transform kernels
//! Transform kernels
...
@@ -206,29 +173,73 @@ namespace cv { namespace gpu { namespace device
...
@@ -206,29 +173,73 @@ namespace cv { namespace gpu { namespace device
dst
.
w
=
op
(
src1
.
w
,
src2
.
w
);
dst
.
w
=
op
(
src1
.
w
,
src2
.
w
);
}
}
};
};
template
<>
struct
OpUnroller
<
8
>
{
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
__device__
__forceinline__
void
unroll
(
const
T
&
src
,
D
&
dst
,
const
Mask
&
mask
,
const
UnOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
a0
=
op
(
src
.
a0
);
if
(
mask
(
y
,
x_shifted
+
1
))
dst
.
a1
=
op
(
src
.
a1
);
if
(
mask
(
y
,
x_shifted
+
2
))
dst
.
a2
=
op
(
src
.
a2
);
if
(
mask
(
y
,
x_shifted
+
3
))
dst
.
a3
=
op
(
src
.
a3
);
if
(
mask
(
y
,
x_shifted
+
4
))
dst
.
a4
=
op
(
src
.
a4
);
if
(
mask
(
y
,
x_shifted
+
5
))
dst
.
a5
=
op
(
src
.
a5
);
if
(
mask
(
y
,
x_shifted
+
6
))
dst
.
a6
=
op
(
src
.
a6
);
if
(
mask
(
y
,
x_shifted
+
7
))
dst
.
a7
=
op
(
src
.
a7
);
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
__device__
__forceinline__
void
unroll
(
const
T1
&
src1
,
const
T2
&
src2
,
D
&
dst
,
const
Mask
&
mask
,
const
BinOp
&
op
,
int
x_shifted
,
int
y
)
{
if
(
mask
(
y
,
x_shifted
))
dst
.
a0
=
op
(
src1
.
a0
,
src2
.
a0
);
if
(
mask
(
y
,
x_shifted
+
1
))
dst
.
a1
=
op
(
src1
.
a1
,
src2
.
a1
);
if
(
mask
(
y
,
x_shifted
+
2
))
dst
.
a2
=
op
(
src1
.
a2
,
src2
.
a2
);
if
(
mask
(
y
,
x_shifted
+
3
))
dst
.
a3
=
op
(
src1
.
a3
,
src2
.
a3
);
if
(
mask
(
y
,
x_shifted
+
4
))
dst
.
a4
=
op
(
src1
.
a4
,
src2
.
a4
);
if
(
mask
(
y
,
x_shifted
+
5
))
dst
.
a5
=
op
(
src1
.
a5
,
src2
.
a5
);
if
(
mask
(
y
,
x_shifted
+
6
))
dst
.
a6
=
op
(
src1
.
a6
,
src2
.
a6
);
if
(
mask
(
y
,
x_shifted
+
7
))
dst
.
a7
=
op
(
src1
.
a7
,
src2
.
a7
);
}
};
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
__global__
static
void
transformSmart
(
const
DevMem2D_
<
T
>
src_
,
PtrStep_
<
D
>
dst_
,
const
Mask
mask
,
const
UnOp
op
)
__global__
static
void
transformSmart
(
const
DevMem2D_
<
T
>
src_
,
PtrStep_
<
D
>
dst_
,
const
Mask
mask
,
const
UnOp
op
)
{
{
typedef
typename
UnReadWriteTraits
<
T
,
D
>::
read_type
read_type
;
typedef
TransformFunctorTraits
<
UnOp
>
ft
;
typedef
typename
Un
ReadWriteTraits
<
T
,
D
>::
write_type
write
_type
;
typedef
typename
Un
aryReadWriteTraits
<
T
,
D
,
ft
::
smart_shift
>::
read_type
read
_type
;
const
int
shift
=
UnReadWriteTraits
<
T
,
D
>::
shift
;
typedef
typename
UnaryReadWriteTraits
<
T
,
D
,
ft
::
smart_shift
>::
write_type
write_type
;
const
int
x
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
const
int
x
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
const
int
y
=
threadIdx
.
y
+
blockIdx
.
y
*
blockDim
.
y
;
const
int
y
=
threadIdx
.
y
+
blockIdx
.
y
*
blockDim
.
y
;
const
int
x_shifted
=
x
*
shift
;
const
int
x_shifted
=
x
*
ft
::
smart_
shift
;
if
(
y
<
src_
.
rows
)
if
(
y
<
src_
.
rows
)
{
{
const
T
*
src
=
src_
.
ptr
(
y
);
const
T
*
src
=
src_
.
ptr
(
y
);
D
*
dst
=
dst_
.
ptr
(
y
);
D
*
dst
=
dst_
.
ptr
(
y
);
if
(
x_shifted
+
shift
-
1
<
src_
.
cols
)
if
(
x_shifted
+
ft
::
smart_
shift
-
1
<
src_
.
cols
)
{
{
const
read_type
src_n_el
=
((
const
read_type
*
)
src
)[
x
];
const
read_type
src_n_el
=
((
const
read_type
*
)
src
)[
x
];
write_type
dst_n_el
;
write_type
dst_n_el
;
OpUnroller
<
shift
>::
unroll
(
src_n_el
,
dst_n_el
,
mask
,
op
,
x_shifted
,
y
);
OpUnroller
<
ft
::
smart_
shift
>::
unroll
(
src_n_el
,
dst_n_el
,
mask
,
op
,
x_shifted
,
y
);
((
write_type
*
)
dst
)[
x
]
=
dst_n_el
;
((
write_type
*
)
dst
)[
x
]
=
dst_n_el
;
}
}
...
@@ -259,14 +270,14 @@ namespace cv { namespace gpu { namespace device
...
@@ -259,14 +270,14 @@ namespace cv { namespace gpu { namespace device
__global__
static
void
transformSmart
(
const
DevMem2D_
<
T1
>
src1_
,
const
PtrStep_
<
T2
>
src2_
,
PtrStep_
<
D
>
dst_
,
__global__
static
void
transformSmart
(
const
DevMem2D_
<
T1
>
src1_
,
const
PtrStep_
<
T2
>
src2_
,
PtrStep_
<
D
>
dst_
,
const
Mask
mask
,
const
BinOp
op
)
const
Mask
mask
,
const
BinOp
op
)
{
{
typedef
typename
BinReadWriteTraits
<
T1
,
T2
,
D
>::
read_type1
read_type1
;
typedef
TransformFunctorTraits
<
BinOp
>
ft
;
typedef
typename
Bin
ReadWriteTraits
<
T1
,
T2
,
D
>::
read_type2
read_type2
;
typedef
typename
Bin
aryReadWriteTraits
<
T1
,
T2
,
D
,
ft
::
smart_shift
>::
read_type1
read_type1
;
typedef
typename
Bin
ReadWriteTraits
<
T1
,
T2
,
D
>::
write_type
write_type
;
typedef
typename
Bin
aryReadWriteTraits
<
T1
,
T2
,
D
,
ft
::
smart_shift
>::
read_type2
read_type2
;
const
int
shift
=
BinReadWriteTraits
<
T1
,
T2
,
D
>::
shift
;
typedef
typename
BinaryReadWriteTraits
<
T1
,
T2
,
D
,
ft
::
smart_shift
>::
write_type
write_type
;
const
int
x
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
const
int
x
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
const
int
y
=
threadIdx
.
y
+
blockIdx
.
y
*
blockDim
.
y
;
const
int
y
=
threadIdx
.
y
+
blockIdx
.
y
*
blockDim
.
y
;
const
int
x_shifted
=
x
*
shift
;
const
int
x_shifted
=
x
*
ft
::
smart_
shift
;
if
(
y
<
src1_
.
rows
)
if
(
y
<
src1_
.
rows
)
{
{
...
@@ -274,13 +285,13 @@ namespace cv { namespace gpu { namespace device
...
@@ -274,13 +285,13 @@ namespace cv { namespace gpu { namespace device
const
T2
*
src2
=
src2_
.
ptr
(
y
);
const
T2
*
src2
=
src2_
.
ptr
(
y
);
D
*
dst
=
dst_
.
ptr
(
y
);
D
*
dst
=
dst_
.
ptr
(
y
);
if
(
x_shifted
+
shift
-
1
<
src1_
.
cols
)
if
(
x_shifted
+
ft
::
smart_
shift
-
1
<
src1_
.
cols
)
{
{
const
read_type1
src1_n_el
=
((
const
read_type1
*
)
src1
)[
x
];
const
read_type1
src1_n_el
=
((
const
read_type1
*
)
src1
)[
x
];
const
read_type2
src2_n_el
=
((
const
read_type2
*
)
src2
)[
x
];
const
read_type2
src2_n_el
=
((
const
read_type2
*
)
src2
)[
x
];
write_type
dst_n_el
;
write_type
dst_n_el
;
OpUnroller
<
shift
>::
unroll
(
src1_n_el
,
src2_n_el
,
dst_n_el
,
mask
,
op
,
x_shifted
,
y
);
OpUnroller
<
ft
::
smart_
shift
>::
unroll
(
src1_n_el
,
src2_n_el
,
dst_n_el
,
mask
,
op
,
x_shifted
,
y
);
((
write_type
*
)
dst
)[
x
]
=
dst_n_el
;
((
write_type
*
)
dst
)[
x
]
=
dst_n_el
;
}
}
...
@@ -316,11 +327,10 @@ namespace cv { namespace gpu { namespace device
...
@@ -316,11 +327,10 @@ namespace cv { namespace gpu { namespace device
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
void
call
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
const
UnOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
static
void
call
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
const
UnOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
{
{
dim3
threads
(
16
,
16
,
1
);
typedef
TransformFunctorTraits
<
UnOp
>
ft
;
dim3
grid
(
1
,
1
,
1
);
grid
.
x
=
divUp
(
src
.
cols
,
threads
.
x
);
const
dim3
threads
(
ft
::
simple_block_dim_x
,
ft
::
simple_block_dim_y
,
1
);
grid
.
y
=
divUp
(
src
.
rows
,
threads
.
y
);
const
dim3
grid
(
divUp
(
src
.
cols
,
threads
.
x
),
divUp
(
src
.
rows
,
threads
.
y
),
1
);
transformSimple
<
T
,
D
><<<
grid
,
threads
,
0
,
stream
>>>
(
src
,
dst
,
mask
,
op
);
transformSimple
<
T
,
D
><<<
grid
,
threads
,
0
,
stream
>>>
(
src
,
dst
,
mask
,
op
);
cudaSafeCall
(
cudaGetLastError
()
);
cudaSafeCall
(
cudaGetLastError
()
);
...
@@ -332,11 +342,10 @@ namespace cv { namespace gpu { namespace device
...
@@ -332,11 +342,10 @@ namespace cv { namespace gpu { namespace device
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
void
call
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
const
BinOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
static
void
call
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
const
BinOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
{
{
dim3
threads
(
16
,
16
,
1
);
typedef
TransformFunctorTraits
<
BinOp
>
ft
;
dim3
grid
(
1
,
1
,
1
);
grid
.
x
=
divUp
(
src1
.
cols
,
threads
.
x
);
const
dim3
threads
(
ft
::
simple_block_dim_x
,
ft
::
simple_block_dim_y
,
1
);
grid
.
y
=
divUp
(
src1
.
rows
,
threads
.
y
);
const
dim3
grid
(
divUp
(
src1
.
cols
,
threads
.
x
),
divUp
(
src1
.
rows
,
threads
.
y
),
1
);
transformSimple
<
T1
,
T2
,
D
><<<
grid
,
threads
,
0
,
stream
>>>
(
src1
,
src2
,
dst
,
mask
,
op
);
transformSimple
<
T1
,
T2
,
D
><<<
grid
,
threads
,
0
,
stream
>>>
(
src1
,
src2
,
dst
,
mask
,
op
);
cudaSafeCall
(
cudaGetLastError
()
);
cudaSafeCall
(
cudaGetLastError
()
);
...
@@ -350,13 +359,12 @@ namespace cv { namespace gpu { namespace device
...
@@ -350,13 +359,12 @@ namespace cv { namespace gpu { namespace device
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
void
call
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
const
UnOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
static
void
call
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
const
UnOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
{
{
const
int
shift
=
UnReadWriteTraits
<
T
,
D
>::
shi
ft
;
typedef
TransformFunctorTraits
<
UnOp
>
ft
;
dim3
threads
(
16
,
16
,
1
);
StaticAssert
<
ft
::
smart_shift
!=
1
>::
check
();
dim3
grid
(
1
,
1
,
1
);
grid
.
x
=
divUp
(
src
.
cols
,
threads
.
x
*
shift
);
const
dim3
threads
(
ft
::
smart_block_dim_x
,
ft
::
smart_block_dim_y
,
1
);
grid
.
y
=
divUp
(
src
.
rows
,
threads
.
y
);
const
dim3
grid
(
divUp
(
src
.
cols
,
threads
.
x
*
ft
::
smart_shift
),
divUp
(
src
.
rows
,
threads
.
y
),
1
);
transformSmart
<
T
,
D
><<<
grid
,
threads
,
0
,
stream
>>>
(
src
,
dst
,
mask
,
op
);
transformSmart
<
T
,
D
><<<
grid
,
threads
,
0
,
stream
>>>
(
src
,
dst
,
mask
,
op
);
cudaSafeCall
(
cudaGetLastError
()
);
cudaSafeCall
(
cudaGetLastError
()
);
...
@@ -368,13 +376,12 @@ namespace cv { namespace gpu { namespace device
...
@@ -368,13 +376,12 @@ namespace cv { namespace gpu { namespace device
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
void
call
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
const
BinOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
static
void
call
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
const
BinOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
{
{
const
int
shift
=
BinReadWriteTraits
<
T1
,
T2
,
D
>::
shi
ft
;
typedef
TransformFunctorTraits
<
BinOp
>
ft
;
dim3
threads
(
16
,
16
,
1
);
StaticAssert
<
ft
::
smart_shift
!=
1
>::
check
();
dim3
grid
(
1
,
1
,
1
);
grid
.
x
=
divUp
(
src1
.
cols
,
threads
.
x
*
shift
);
const
dim3
threads
(
ft
::
smart_block_dim_x
,
ft
::
smart_block_dim_y
,
1
);
grid
.
y
=
divUp
(
src1
.
rows
,
threads
.
y
);
const
dim3
grid
(
divUp
(
src1
.
cols
,
threads
.
x
*
ft
::
smart_shift
),
divUp
(
src1
.
rows
,
threads
.
y
),
1
);
transformSmart
<
T1
,
T2
,
D
><<<
grid
,
threads
,
0
,
stream
>>>
(
src1
,
src2
,
dst
,
mask
,
op
);
transformSmart
<
T1
,
T2
,
D
><<<
grid
,
threads
,
0
,
stream
>>>
(
src1
,
src2
,
dst
,
mask
,
op
);
cudaSafeCall
(
cudaGetLastError
()
);
cudaSafeCall
(
cudaGetLastError
()
);
...
@@ -384,42 +391,18 @@ namespace cv { namespace gpu { namespace device
...
@@ -384,42 +391,18 @@ namespace cv { namespace gpu { namespace device
}
}
};
};
template
<
typename
T
,
typename
D
,
int
scn
,
int
dcn
>
struct
UseSmartUn_
{
static
const
bool
value
=
false
;
};
template
<
typename
T
,
typename
D
>
struct
UseSmartUn_
<
T
,
D
,
1
,
1
>
{
static
const
bool
value
=
UnReadWriteTraits
<
T
,
D
>::
shift
!=
1
;
};
template
<
typename
T
,
typename
D
>
struct
UseSmartUn
{
static
const
bool
value
=
UseSmartUn_
<
T
,
D
,
VecTraits
<
T
>::
cn
,
VecTraits
<
D
>::
cn
>::
value
;
};
template
<
typename
T1
,
typename
T2
,
typename
D
,
int
src1cn
,
int
src2cn
,
int
dstcn
>
struct
UseSmartBin_
{
static
const
bool
value
=
false
;
};
template
<
typename
T1
,
typename
T2
,
typename
D
>
struct
UseSmartBin_
<
T1
,
T2
,
D
,
1
,
1
,
1
>
{
static
const
bool
value
=
BinReadWriteTraits
<
T1
,
T2
,
D
>::
shift
!=
1
;
};
template
<
typename
T1
,
typename
T2
,
typename
D
>
struct
UseSmartBin
{
static
const
bool
value
=
UseSmartBin_
<
T1
,
T2
,
D
,
VecTraits
<
T1
>::
cn
,
VecTraits
<
T2
>::
cn
,
VecTraits
<
D
>::
cn
>::
value
;
};
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
template
<
typename
T
,
typename
D
,
typename
UnOp
,
typename
Mask
>
static
void
transform_caller
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
const
UnOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
static
void
transform_caller
(
const
DevMem2D_
<
T
>&
src
,
const
DevMem2D_
<
D
>&
dst
,
const
UnOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
{
{
TransformDispatcher
<
UseSmartUn
<
T
,
D
>::
value
>::
call
(
src
,
dst
,
op
,
mask
,
stream
);
typedef
TransformFunctorTraits
<
UnOp
>
ft
;
TransformDispatcher
<
VecTraits
<
T
>::
cn
==
1
&&
VecTraits
<
D
>::
cn
==
1
&&
ft
::
smart_shift
!=
1
>::
call
(
src
,
dst
,
op
,
mask
,
stream
);
}
}
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
template
<
typename
T1
,
typename
T2
,
typename
D
,
typename
BinOp
,
typename
Mask
>
static
void
transform_caller
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
const
BinOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
static
void
transform_caller
(
const
DevMem2D_
<
T1
>&
src1
,
const
DevMem2D_
<
T2
>&
src2
,
const
DevMem2D_
<
D
>&
dst
,
const
BinOp
&
op
,
const
Mask
&
mask
,
cudaStream_t
stream
)
{
{
TransformDispatcher
<
UseSmartBin
<
T1
,
T2
,
D
>::
value
>::
call
(
src1
,
src2
,
dst
,
op
,
mask
,
stream
);
typedef
TransformFunctorTraits
<
BinOp
>
ft
;
TransformDispatcher
<
VecTraits
<
T1
>::
cn
==
1
&&
VecTraits
<
T2
>::
cn
==
1
&&
VecTraits
<
D
>::
cn
==
1
&&
ft
::
smart_shift
!=
1
>::
call
(
src1
,
src2
,
dst
,
op
,
mask
,
stream
);
}
}
}
}
}}}
}}}
...
...
modules/gpu/src/opencv2/gpu/device/functional.hpp
View file @
5e9ae6b1
...
@@ -46,12 +46,17 @@
...
@@ -46,12 +46,17 @@
#include <thrust/functional.h>
#include <thrust/functional.h>
#include "internal_shared.hpp"
#include "internal_shared.hpp"
#include "saturate_cast.hpp"
#include "saturate_cast.hpp"
#include "vec_traits.hpp"
namespace
cv
{
namespace
gpu
{
namespace
device
namespace
cv
{
namespace
gpu
{
namespace
device
{
{
// Function Objects
using
thrust
::
unary_function
;
using
thrust
::
unary_function
;
using
thrust
::
binary_function
;
using
thrust
::
binary_function
;
// Arithmetic Operations
using
thrust
::
plus
;
using
thrust
::
plus
;
using
thrust
::
minus
;
using
thrust
::
minus
;
using
thrust
::
multiplies
;
using
thrust
::
multiplies
;
...
@@ -59,6 +64,8 @@ namespace cv { namespace gpu { namespace device
...
@@ -59,6 +64,8 @@ namespace cv { namespace gpu { namespace device
using
thrust
::
modulus
;
using
thrust
::
modulus
;
using
thrust
::
negate
;
using
thrust
::
negate
;
// Comparison Operations
using
thrust
::
equal_to
;
using
thrust
::
equal_to
;
using
thrust
::
not_equal_to
;
using
thrust
::
not_equal_to
;
using
thrust
::
greater
;
using
thrust
::
greater
;
...
@@ -66,10 +73,14 @@ namespace cv { namespace gpu { namespace device
...
@@ -66,10 +73,14 @@ namespace cv { namespace gpu { namespace device
using
thrust
::
greater_equal
;
using
thrust
::
greater_equal
;
using
thrust
::
less_equal
;
using
thrust
::
less_equal
;
// Logical Operations
using
thrust
::
logical_and
;
using
thrust
::
logical_and
;
using
thrust
::
logical_or
;
using
thrust
::
logical_or
;
using
thrust
::
logical_not
;
using
thrust
::
logical_not
;
// Bitwise Operations
using
thrust
::
bit_and
;
using
thrust
::
bit_and
;
using
thrust
::
bit_or
;
using
thrust
::
bit_or
;
using
thrust
::
bit_xor
;
using
thrust
::
bit_xor
;
...
@@ -78,7 +89,13 @@ namespace cv { namespace gpu { namespace device
...
@@ -78,7 +89,13 @@ namespace cv { namespace gpu { namespace device
__forceinline__
__device__
T
operator
()(
const
T
&
v
)
const
{
return
~
v
;}
__forceinline__
__device__
T
operator
()(
const
T
&
v
)
const
{
return
~
v
;}
};
};
// Generalized Identity Operations
using
thrust
::
identity
;
using
thrust
::
identity
;
using
thrust
::
project1st
;
using
thrust
::
project2nd
;
// Min/Max Operations
#define OPENCV_GPU_IMPLEMENT_MINMAX(name, type, op) \
#define OPENCV_GPU_IMPLEMENT_MINMAX(name, type, op) \
template
<>
struct
name
<
type
>
:
binary_function
<
type
,
type
,
type
>
\
template
<>
struct
name
<
type
>
:
binary_function
<
type
,
type
,
type
>
\
...
@@ -116,14 +133,7 @@ namespace cv { namespace gpu { namespace device
...
@@ -116,14 +133,7 @@ namespace cv { namespace gpu { namespace device
#undef OPENCV_GPU_IMPLEMENT_MINMAX
#undef OPENCV_GPU_IMPLEMENT_MINMAX
using
thrust
::
project1st
;
// Math functions
using
thrust
::
project2nd
;
using
thrust
::
unary_negate
;
using
thrust
::
not1
;
using
thrust
::
binary_negate
;
using
thrust
::
not2
;
#define OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(func) \
#define OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(func) \
template
<
typename
T
>
struct
func
##
_func
:
unary_function
<
T
,
float
>
\
template
<
typename
T
>
struct
func
##
_func
:
unary_function
<
T
,
float
>
\
...
@@ -192,6 +202,8 @@ namespace cv { namespace gpu { namespace device
...
@@ -192,6 +202,8 @@ namespace cv { namespace gpu { namespace device
}
}
};
};
// Saturate Cast Functor
template
<
typename
T
,
typename
D
>
struct
saturate_cast_func
:
unary_function
<
T
,
D
>
template
<
typename
T
,
typename
D
>
struct
saturate_cast_func
:
unary_function
<
T
,
D
>
{
{
__forceinline__
__device__
D
operator
()(
const
T
&
v
)
const
__forceinline__
__device__
D
operator
()(
const
T
&
v
)
const
...
@@ -200,6 +212,8 @@ namespace cv { namespace gpu { namespace device
...
@@ -200,6 +212,8 @@ namespace cv { namespace gpu { namespace device
}
}
};
};
// Threshold Functors
template
<
typename
T
>
struct
thresh_binary_func
:
unary_function
<
T
,
T
>
template
<
typename
T
>
struct
thresh_binary_func
:
unary_function
<
T
,
T
>
{
{
__forceinline__
__host__
__device__
thresh_binary_func
(
T
thresh_
,
T
maxVal_
)
:
thresh
(
thresh_
),
maxVal
(
maxVal_
)
{}
__forceinline__
__host__
__device__
thresh_binary_func
(
T
thresh_
,
T
maxVal_
)
:
thresh
(
thresh_
),
maxVal
(
maxVal_
)
{}
...
@@ -258,6 +272,14 @@ namespace cv { namespace gpu { namespace device
...
@@ -258,6 +272,14 @@ namespace cv { namespace gpu { namespace device
const
T
thresh
;
const
T
thresh
;
};
};
// Function Object Adaptors
using
thrust
::
unary_negate
;
using
thrust
::
not1
;
using
thrust
::
binary_negate
;
using
thrust
::
not2
;
template
<
typename
Op
>
struct
binder1st
:
unary_function
<
typename
Op
::
second_argument_type
,
typename
Op
::
result_type
>
template
<
typename
Op
>
struct
binder1st
:
unary_function
<
typename
Op
::
second_argument_type
,
typename
Op
::
result_type
>
{
{
__forceinline__
__host__
__device__
binder1st
(
const
Op
&
op_
,
const
typename
Op
::
first_argument_type
&
arg1_
)
:
op
(
op_
),
arg1
(
arg1_
)
{}
__forceinline__
__host__
__device__
binder1st
(
const
Op
&
op_
,
const
typename
Op
::
first_argument_type
&
arg1_
)
:
op
(
op_
),
arg1
(
arg1_
)
{}
...
@@ -291,46 +313,77 @@ namespace cv { namespace gpu { namespace device
...
@@ -291,46 +313,77 @@ namespace cv { namespace gpu { namespace device
return
binder2nd
<
Op
>
(
op
,
typename
Op
::
second_argument_type
(
x
));
return
binder2nd
<
Op
>
(
op
,
typename
Op
::
second_argument_type
(
x
));
}
}
template
<
typename
T1
,
typename
T2
>
struct
BinOpTraits
// Functor Traits
{
typedef
int
argument_type
;
template
<
typename
F
>
struct
IsUnaryFunction
};
template
<
typename
T
>
struct
BinOpTraits
<
T
,
T
>
{
{
typedef
T
argument_type
;
struct
Yes
{};
struct
No
{
Yes
a
[
2
];};
template
<
typename
T
,
typename
D
>
static
Yes
check
(
unary_function
<
T
,
D
>*
);
static
No
check
(...);
enum
{
value
=
(
sizeof
(
check
((
F
*
)
0
))
==
sizeof
(
Yes
))
};
};
};
template
<
typename
T
>
struct
BinOpTraits
<
T
,
double
>
template
<
typename
F
>
struct
IsBinaryFunction
{
{
typedef
double
argument_type
;
struct
Yes
{};
struct
No
{
Yes
a
[
2
];};
template
<
typename
T1
,
typename
T2
,
typename
D
>
static
Yes
check
(
binary_function
<
T1
,
T2
,
D
>*
);
static
No
check
(...);
enum
{
value
=
(
sizeof
(
check
((
F
*
)
0
))
==
sizeof
(
Yes
))
};
};
};
template
<
typename
T
>
struct
BinOpTraits
<
double
,
T
>
namespace
detail
{
{
typedef
double
argument_type
;
template
<
size_t
src_elem_size
,
size_t
dst_elem_size
>
struct
UnOpShift
{
enum
{
shift
=
1
};
};
};
template
<
size_t
src_elem_size
>
struct
UnOpShift
<
src_elem_size
,
1
>
{
enum
{
shift
=
4
};
};
template
<>
struct
BinOpTraits
<
double
,
double
>
template
<
size_t
src_elem_size
>
struct
UnOpShift
<
src_elem_size
,
2
>
{
enum
{
shift
=
2
};
};
template
<
typename
T
,
typename
D
>
struct
DefaultUnaryShift
{
{
typedef
double
argument_type
;
enum
{
shift
=
detail
::
UnOpShift
<
sizeof
(
T
),
sizeof
(
D
)
>::
shift
}
;
};
};
template
<
typename
T
>
struct
BinOpTraits
<
T
,
float
>
template
<
size_t
src_elem_size1
,
size_t
src_elem_size2
,
size_t
dst_elem_size
>
struct
BinOpShift
{
enum
{
shift
=
1
};
};
template
<
size_t
src_elem_size1
,
size_t
src_elem_size2
>
struct
BinOpShift
<
src_elem_size1
,
src_elem_size2
,
1
>
{
enum
{
shift
=
4
};
};
template
<
size_t
src_elem_size1
,
size_t
src_elem_size2
>
struct
BinOpShift
<
src_elem_size1
,
src_elem_size2
,
2
>
{
enum
{
shift
=
2
};
};
template
<
typename
T1
,
typename
T2
,
typename
D
>
struct
DefaultBinaryShift
{
{
typedef
float
argument_type
;
enum
{
shift
=
detail
::
BinOpShift
<
sizeof
(
T1
),
sizeof
(
T2
),
sizeof
(
D
)
>::
shift
}
;
};
};
template
<
typename
T
>
struct
BinOpTraits
<
float
,
T
>
template
<
typename
Func
,
bool
unary
=
IsUnaryFunction
<
Func
>::
value
>
struct
ShiftDispatcher
;
template
<
typename
Func
>
struct
ShiftDispatcher
<
Func
,
true
>
{
{
typedef
float
argument_type
;
enum
{
shift
=
DefaultUnaryShift
<
typename
Func
::
argument_type
,
typename
Func
::
result_type
>::
shift
}
;
};
};
template
<>
struct
BinOpTraits
<
float
,
float
>
template
<
typename
Func
>
struct
ShiftDispatcher
<
Func
,
false
>
{
{
typedef
float
argument_type
;
enum
{
shift
=
DefaultBinaryShift
<
typename
Func
::
first_argument_type
,
typename
Func
::
second_argument_type
,
typename
Func
::
result_type
>::
shift
}
;
};
};
template
<>
struct
BinOpTraits
<
double
,
float
>
}
template
<
typename
Func
>
struct
DefaultTransformShift
{
{
typedef
double
argument_type
;
enum
{
shift
=
detail
::
ShiftDispatcher
<
Func
>::
shift
}
;
};
};
template
<>
struct
BinOpTraits
<
float
,
double
>
template
<
typename
Func
>
struct
DefaultTransformFunctorTraits
{
{
typedef
double
argument_type
;
enum
{
simple_block_dim_x
=
16
};
enum
{
simple_block_dim_y
=
16
};
enum
{
smart_block_dim_x
=
16
};
enum
{
smart_block_dim_y
=
16
};
enum
{
smart_shift
=
DefaultTransformShift
<
Func
>::
shift
};
};
};
template
<
typename
Func
>
struct
TransformFunctorTraits
:
DefaultTransformFunctorTraits
<
Func
>
{};
}}}
}}}
#endif // __OPENCV_GPU_FUNCTIONAL_HPP__
#endif // __OPENCV_GPU_FUNCTIONAL_HPP__
modules/gpu/src/opencv2/gpu/device/vec_math.hpp
View file @
5e9ae6b1
...
@@ -150,6 +150,50 @@ namespace cv { namespace gpu { namespace device
...
@@ -150,6 +150,50 @@ namespace cv { namespace gpu { namespace device
return
VecTraits
<
TypeVec
<
func
<
type
>::
result_type
,
4
>::
vec_type
>::
make
(
f
(
a
.
x
),
f
(
a
.
y
),
f
(
a
.
z
),
f
(
a
.
w
));
\
return
VecTraits
<
TypeVec
<
func
<
type
>::
result_type
,
4
>::
vec_type
>::
make
(
f
(
a
.
x
),
f
(
a
.
y
),
f
(
a
.
z
),
f
(
a
.
w
));
\
}
}
namespace
detail
{
template
<
typename
T1
,
typename
T2
>
struct
BinOpTraits
{
typedef
int
argument_type
;
};
template
<
typename
T
>
struct
BinOpTraits
<
T
,
T
>
{
typedef
T
argument_type
;
};
template
<
typename
T
>
struct
BinOpTraits
<
T
,
double
>
{
typedef
double
argument_type
;
};
template
<
typename
T
>
struct
BinOpTraits
<
double
,
T
>
{
typedef
double
argument_type
;
};
template
<>
struct
BinOpTraits
<
double
,
double
>
{
typedef
double
argument_type
;
};
template
<
typename
T
>
struct
BinOpTraits
<
T
,
float
>
{
typedef
float
argument_type
;
};
template
<
typename
T
>
struct
BinOpTraits
<
float
,
T
>
{
typedef
float
argument_type
;
};
template
<>
struct
BinOpTraits
<
float
,
float
>
{
typedef
float
argument_type
;
};
template
<>
struct
BinOpTraits
<
double
,
float
>
{
typedef
double
argument_type
;
};
template
<>
struct
BinOpTraits
<
float
,
double
>
{
typedef
double
argument_type
;
};
}
#define OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, op, func) \
#define OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, op, func) \
static
__device__
TypeVec
<
func
<
type
>::
result_type
,
1
>::
vec_type
op
(
const
type
##
1
&
a
,
const
type
##
1
&
b
)
\
static
__device__
TypeVec
<
func
<
type
>::
result_type
,
1
>::
vec_type
op
(
const
type
##
1
&
a
,
const
type
##
1
&
b
)
\
{
\
{
\
...
@@ -157,16 +201,16 @@ namespace cv { namespace gpu { namespace device
...
@@ -157,16 +201,16 @@ namespace cv { namespace gpu { namespace device
return
VecTraits
<
TypeVec
<
func
<
type
>::
result_type
,
1
>::
vec_type
>::
make
(
f
(
a
.
x
,
b
.
x
));
\
return
VecTraits
<
TypeVec
<
func
<
type
>::
result_type
,
1
>::
vec_type
>::
make
(
f
(
a
.
x
,
b
.
x
));
\
}
\
}
\
template
<
typename
T
>
\
template
<
typename
T
>
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
1
>::
vec_type
op
(
const
type
##
1
&
v
,
T
s
)
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
1
>::
vec_type
op
(
const
type
##
1
&
v
,
T
s
)
\
{
\
{
\
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
1
>::
vec_type
>::
make
(
f
(
v
.
x
,
s
));
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
1
>::
vec_type
>::
make
(
f
(
v
.
x
,
s
));
\
}
\
}
\
template
<
typename
T
>
\
template
<
typename
T
>
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
1
>::
vec_type
op
(
T
s
,
const
type
##
1
&
v
)
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
1
>::
vec_type
op
(
T
s
,
const
type
##
1
&
v
)
\
{
\
{
\
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
1
>::
vec_type
>::
make
(
f
(
s
,
v
.
x
));
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
1
>::
vec_type
>::
make
(
f
(
s
,
v
.
x
));
\
}
\
}
\
static
__device__
TypeVec
<
func
<
type
>::
result_type
,
2
>::
vec_type
op
(
const
type
##
2
&
a
,
const
type
##
2
&
b
)
\
static
__device__
TypeVec
<
func
<
type
>::
result_type
,
2
>::
vec_type
op
(
const
type
##
2
&
a
,
const
type
##
2
&
b
)
\
{
\
{
\
...
@@ -174,16 +218,16 @@ namespace cv { namespace gpu { namespace device
...
@@ -174,16 +218,16 @@ namespace cv { namespace gpu { namespace device
return
VecTraits
<
TypeVec
<
func
<
type
>::
result_type
,
2
>::
vec_type
>::
make
(
f
(
a
.
x
,
b
.
x
),
f
(
a
.
y
,
b
.
y
));
\
return
VecTraits
<
TypeVec
<
func
<
type
>::
result_type
,
2
>::
vec_type
>::
make
(
f
(
a
.
x
,
b
.
x
),
f
(
a
.
y
,
b
.
y
));
\
}
\
}
\
template
<
typename
T
>
\
template
<
typename
T
>
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
2
>::
vec_type
op
(
const
type
##
2
&
v
,
T
s
)
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
2
>::
vec_type
op
(
const
type
##
2
&
v
,
T
s
)
\
{
\
{
\
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
2
>::
vec_type
>::
make
(
f
(
v
.
x
,
s
),
f
(
v
.
y
,
s
));
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
2
>::
vec_type
>::
make
(
f
(
v
.
x
,
s
),
f
(
v
.
y
,
s
));
\
}
\
}
\
template
<
typename
T
>
\
template
<
typename
T
>
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
2
>::
vec_type
op
(
T
s
,
const
type
##
2
&
v
)
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
2
>::
vec_type
op
(
T
s
,
const
type
##
2
&
v
)
\
{
\
{
\
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
2
>::
vec_type
>::
make
(
f
(
s
,
v
.
x
),
f
(
s
,
v
.
y
));
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
2
>::
vec_type
>::
make
(
f
(
s
,
v
.
x
),
f
(
s
,
v
.
y
));
\
}
\
}
\
static
__device__
TypeVec
<
func
<
type
>::
result_type
,
3
>::
vec_type
op
(
const
type
##
3
&
a
,
const
type
##
3
&
b
)
\
static
__device__
TypeVec
<
func
<
type
>::
result_type
,
3
>::
vec_type
op
(
const
type
##
3
&
a
,
const
type
##
3
&
b
)
\
{
\
{
\
...
@@ -191,16 +235,16 @@ namespace cv { namespace gpu { namespace device
...
@@ -191,16 +235,16 @@ namespace cv { namespace gpu { namespace device
return
VecTraits
<
TypeVec
<
func
<
type
>::
result_type
,
3
>::
vec_type
>::
make
(
f
(
a
.
x
,
b
.
x
),
f
(
a
.
y
,
b
.
y
),
f
(
a
.
z
,
b
.
z
));
\
return
VecTraits
<
TypeVec
<
func
<
type
>::
result_type
,
3
>::
vec_type
>::
make
(
f
(
a
.
x
,
b
.
x
),
f
(
a
.
y
,
b
.
y
),
f
(
a
.
z
,
b
.
z
));
\
}
\
}
\
template
<
typename
T
>
\
template
<
typename
T
>
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
3
>::
vec_type
op
(
const
type
##
3
&
v
,
T
s
)
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
3
>::
vec_type
op
(
const
type
##
3
&
v
,
T
s
)
\
{
\
{
\
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
3
>::
vec_type
>::
make
(
f
(
v
.
x
,
s
),
f
(
v
.
y
,
s
),
f
(
v
.
z
,
s
));
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
3
>::
vec_type
>::
make
(
f
(
v
.
x
,
s
),
f
(
v
.
y
,
s
),
f
(
v
.
z
,
s
));
\
}
\
}
\
template
<
typename
T
>
\
template
<
typename
T
>
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
3
>::
vec_type
op
(
T
s
,
const
type
##
3
&
v
)
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
3
>::
vec_type
op
(
T
s
,
const
type
##
3
&
v
)
\
{
\
{
\
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
3
>::
vec_type
>::
make
(
f
(
s
,
v
.
x
),
f
(
s
,
v
.
y
),
f
(
s
,
v
.
z
));
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
3
>::
vec_type
>::
make
(
f
(
s
,
v
.
x
),
f
(
s
,
v
.
y
),
f
(
s
,
v
.
z
));
\
}
\
}
\
static
__device__
TypeVec
<
func
<
type
>::
result_type
,
4
>::
vec_type
op
(
const
type
##
4
&
a
,
const
type
##
4
&
b
)
\
static
__device__
TypeVec
<
func
<
type
>::
result_type
,
4
>::
vec_type
op
(
const
type
##
4
&
a
,
const
type
##
4
&
b
)
\
{
\
{
\
...
@@ -208,16 +252,16 @@ namespace cv { namespace gpu { namespace device
...
@@ -208,16 +252,16 @@ namespace cv { namespace gpu { namespace device
return
VecTraits
<
TypeVec
<
func
<
type
>::
result_type
,
4
>::
vec_type
>::
make
(
f
(
a
.
x
,
b
.
x
),
f
(
a
.
y
,
b
.
y
),
f
(
a
.
z
,
b
.
z
),
f
(
a
.
w
,
b
.
w
));
\
return
VecTraits
<
TypeVec
<
func
<
type
>::
result_type
,
4
>::
vec_type
>::
make
(
f
(
a
.
x
,
b
.
x
),
f
(
a
.
y
,
b
.
y
),
f
(
a
.
z
,
b
.
z
),
f
(
a
.
w
,
b
.
w
));
\
}
\
}
\
template
<
typename
T
>
\
template
<
typename
T
>
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
4
>::
vec_type
op
(
const
type
##
4
&
v
,
T
s
)
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
4
>::
vec_type
op
(
const
type
##
4
&
v
,
T
s
)
\
{
\
{
\
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
4
>::
vec_type
>::
make
(
f
(
v
.
x
,
s
),
f
(
v
.
y
,
s
),
f
(
v
.
z
,
s
),
f
(
v
.
w
,
s
));
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
4
>::
vec_type
>::
make
(
f
(
v
.
x
,
s
),
f
(
v
.
y
,
s
),
f
(
v
.
z
,
s
),
f
(
v
.
w
,
s
));
\
}
\
}
\
template
<
typename
T
>
\
template
<
typename
T
>
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
4
>::
vec_type
op
(
T
s
,
const
type
##
4
&
v
)
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
4
>::
vec_type
op
(
T
s
,
const
type
##
4
&
v
)
\
{
\
{
\
func
<
typename
BinOpTraits
<
T
,
type
>::
argument_type
>
f
;
\
func
<
typename
detail
::
BinOpTraits
<
T
,
type
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
4
>::
vec_type
>::
make
(
f
(
s
,
v
.
x
),
f
(
s
,
v
.
y
),
f
(
s
,
v
.
z
),
f
(
s
,
v
.
w
));
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
4
>::
vec_type
>::
make
(
f
(
s
,
v
.
x
),
f
(
s
,
v
.
y
),
f
(
s
,
v
.
z
),
f
(
s
,
v
.
w
));
\
}
}
#define OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
#define OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
...
...
modules/gpu/src/opencv2/gpu/device/vec_traits.hpp
View file @
5e9ae6b1
...
@@ -49,6 +49,79 @@ namespace cv { namespace gpu { namespace device
...
@@ -49,6 +49,79 @@ namespace cv { namespace gpu { namespace device
{
{
template
<
typename
T
,
int
N
>
struct
TypeVec
;
template
<
typename
T
,
int
N
>
struct
TypeVec
;
struct
__align__
(
8
)
uchar8
{
uchar
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
};
static
__host__
__device__
__forceinline__
uchar8
make_uchar8
(
uchar
a0
,
uchar
a1
,
uchar
a2
,
uchar
a3
,
uchar
a4
,
uchar
a5
,
uchar
a6
,
uchar
a7
)
{
uchar8
val
=
{
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
};
return
val
;
}
struct
__align__
(
8
)
char8
{
schar
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
};
static
__host__
__device__
__forceinline__
char8
make_char8
(
schar
a0
,
schar
a1
,
schar
a2
,
schar
a3
,
schar
a4
,
schar
a5
,
schar
a6
,
schar
a7
)
{
char8
val
=
{
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
};
return
val
;
}
struct
__align__
(
16
)
ushort8
{
ushort
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
};
static
__host__
__device__
__forceinline__
ushort8
make_ushort8
(
ushort
a0
,
ushort
a1
,
ushort
a2
,
ushort
a3
,
ushort
a4
,
ushort
a5
,
ushort
a6
,
ushort
a7
)
{
ushort8
val
=
{
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
};
return
val
;
}
struct
__align__
(
16
)
short8
{
short
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
};
static
__host__
__device__
__forceinline__
short8
make_short8
(
short
a0
,
short
a1
,
short
a2
,
short
a3
,
short
a4
,
short
a5
,
short
a6
,
short
a7
)
{
short8
val
=
{
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
};
return
val
;
}
struct
__align__
(
32
)
uint8
{
uint
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
};
static
__host__
__device__
__forceinline__
uint8
make_uint8
(
uint
a0
,
uint
a1
,
uint
a2
,
uint
a3
,
uint
a4
,
uint
a5
,
uint
a6
,
uint
a7
)
{
uint8
val
=
{
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
};
return
val
;
}
struct
__align__
(
32
)
int8
{
int
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
};
static
__host__
__device__
__forceinline__
int8
make_int8
(
int
a0
,
int
a1
,
int
a2
,
int
a3
,
int
a4
,
int
a5
,
int
a6
,
int
a7
)
{
int8
val
=
{
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
};
return
val
;
}
struct
__align__
(
32
)
float8
{
float
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
};
static
__host__
__device__
__forceinline__
float8
make_float8
(
float
a0
,
float
a1
,
float
a2
,
float
a3
,
float
a4
,
float
a5
,
float
a6
,
float
a7
)
{
float8
val
=
{
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
};
return
val
;
}
struct
double8
{
double
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
};
static
__host__
__device__
__forceinline__
double8
make_double8
(
double
a0
,
double
a1
,
double
a2
,
double
a3
,
double
a4
,
double
a5
,
double
a6
,
double
a7
)
{
double8
val
=
{
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
};
return
val
;
}
#define OPENCV_GPU_IMPLEMENT_TYPE_VEC(type) \
#define OPENCV_GPU_IMPLEMENT_TYPE_VEC(type) \
template
<>
struct
TypeVec
<
type
,
1
>
{
typedef
type
vec_type
;
};
\
template
<>
struct
TypeVec
<
type
,
1
>
{
typedef
type
vec_type
;
};
\
template
<>
struct
TypeVec
<
type
##
1
,
1
>
{
typedef
type
##
1
vec_type
;
};
\
template
<>
struct
TypeVec
<
type
##
1
,
1
>
{
typedef
type
##
1
vec_type
;
};
\
...
@@ -57,7 +130,9 @@ namespace cv { namespace gpu { namespace device
...
@@ -57,7 +130,9 @@ namespace cv { namespace gpu { namespace device
template
<>
struct
TypeVec
<
type
,
3
>
{
typedef
type
##
3
vec_type
;
};
\
template
<>
struct
TypeVec
<
type
,
3
>
{
typedef
type
##
3
vec_type
;
};
\
template
<>
struct
TypeVec
<
type
##
3
,
3
>
{
typedef
type
##
3
vec_type
;
};
\
template
<>
struct
TypeVec
<
type
##
3
,
3
>
{
typedef
type
##
3
vec_type
;
};
\
template
<>
struct
TypeVec
<
type
,
4
>
{
typedef
type
##
4
vec_type
;
};
\
template
<>
struct
TypeVec
<
type
,
4
>
{
typedef
type
##
4
vec_type
;
};
\
template
<>
struct
TypeVec
<
type
##
4
,
4
>
{
typedef
type
##
4
vec_type
;
};
template
<>
struct
TypeVec
<
type
##
4
,
4
>
{
typedef
type
##
4
vec_type
;
};
\
template
<>
struct
TypeVec
<
type
,
8
>
{
typedef
type
##
8
vec_type
;
};
\
template
<>
struct
TypeVec
<
type
##
8
,
8
>
{
typedef
type
##
8
vec_type
;
};
OPENCV_GPU_IMPLEMENT_TYPE_VEC
(
uchar
)
OPENCV_GPU_IMPLEMENT_TYPE_VEC
(
uchar
)
OPENCV_GPU_IMPLEMENT_TYPE_VEC
(
char
)
OPENCV_GPU_IMPLEMENT_TYPE_VEC
(
char
)
...
@@ -74,11 +149,13 @@ namespace cv { namespace gpu { namespace device
...
@@ -74,11 +149,13 @@ namespace cv { namespace gpu { namespace device
template
<>
struct
TypeVec
<
schar
,
2
>
{
typedef
char2
vec_type
;
};
template
<>
struct
TypeVec
<
schar
,
2
>
{
typedef
char2
vec_type
;
};
template
<>
struct
TypeVec
<
schar
,
3
>
{
typedef
char3
vec_type
;
};
template
<>
struct
TypeVec
<
schar
,
3
>
{
typedef
char3
vec_type
;
};
template
<>
struct
TypeVec
<
schar
,
4
>
{
typedef
char4
vec_type
;
};
template
<>
struct
TypeVec
<
schar
,
4
>
{
typedef
char4
vec_type
;
};
template
<>
struct
TypeVec
<
schar
,
8
>
{
typedef
char8
vec_type
;
};
template
<>
struct
TypeVec
<
bool
,
1
>
{
typedef
uchar
vec_type
;
};
template
<>
struct
TypeVec
<
bool
,
1
>
{
typedef
uchar
vec_type
;
};
template
<>
struct
TypeVec
<
bool
,
2
>
{
typedef
uchar2
vec_type
;
};
template
<>
struct
TypeVec
<
bool
,
2
>
{
typedef
uchar2
vec_type
;
};
template
<>
struct
TypeVec
<
bool
,
3
>
{
typedef
uchar3
vec_type
;
};
template
<>
struct
TypeVec
<
bool
,
3
>
{
typedef
uchar3
vec_type
;
};
template
<>
struct
TypeVec
<
bool
,
4
>
{
typedef
uchar4
vec_type
;
};
template
<>
struct
TypeVec
<
bool
,
4
>
{
typedef
uchar4
vec_type
;
};
template
<>
struct
TypeVec
<
bool
,
8
>
{
typedef
uchar8
vec_type
;
};
template
<
typename
T
>
struct
VecTraits
;
template
<
typename
T
>
struct
VecTraits
;
...
@@ -87,36 +164,43 @@ namespace cv { namespace gpu { namespace device
...
@@ -87,36 +164,43 @@ namespace cv { namespace gpu { namespace device
{
\
{
\
typedef
type
elem_type
;
\
typedef
type
elem_type
;
\
enum
{
cn
=
1
};
\
enum
{
cn
=
1
};
\
static
__device__
__host__
type
all
(
type
v
)
{
return
v
;}
\
static
__device__
__host__
__forceinline__
type
all
(
type
v
)
{
return
v
;}
\
static
__device__
__host__
type
make
(
type
x
)
{
return
x
;}
\
static
__device__
__host__
__forceinline__
type
make
(
type
x
)
{
return
x
;}
\
};
\
};
\
template
<>
struct
VecTraits
<
type
##
1
>
\
template
<>
struct
VecTraits
<
type
##
1
>
\
{
\
{
\
typedef
type
elem_type
;
\
typedef
type
elem_type
;
\
enum
{
cn
=
1
};
\
enum
{
cn
=
1
};
\
static
__device__
__host__
type
##
1
all
(
type
v
)
{
return
make_
##
type
##
1
(
v
);}
\
static
__device__
__host__
__forceinline__
type
##
1
all
(
type
v
)
{
return
make_
##
type
##
1
(
v
);}
\
static
__device__
__host__
type
##
1
make
(
type
x
)
{
return
make_
##
type
##
1
(
x
);}
\
static
__device__
__host__
__forceinline__
type
##
1
make
(
type
x
)
{
return
make_
##
type
##
1
(
x
);}
\
};
\
};
\
template
<>
struct
VecTraits
<
type
##
2
>
\
template
<>
struct
VecTraits
<
type
##
2
>
\
{
\
{
\
typedef
type
elem_type
;
\
typedef
type
elem_type
;
\
enum
{
cn
=
2
};
\
enum
{
cn
=
2
};
\
static
__device__
__host__
type
##
2
all
(
type
v
)
{
return
make_
##
type
##
2
(
v
,
v
);}
\
static
__device__
__host__
__forceinline__
type
##
2
all
(
type
v
)
{
return
make_
##
type
##
2
(
v
,
v
);}
\
static
__device__
__host__
type
##
2
make
(
type
x
,
type
y
)
{
return
make_
##
type
##
2
(
x
,
y
);}
\
static
__device__
__host__
__forceinline__
type
##
2
make
(
type
x
,
type
y
)
{
return
make_
##
type
##
2
(
x
,
y
);}
\
};
\
};
\
template
<>
struct
VecTraits
<
type
##
3
>
\
template
<>
struct
VecTraits
<
type
##
3
>
\
{
\
{
\
typedef
type
elem_type
;
\
typedef
type
elem_type
;
\
enum
{
cn
=
3
};
\
enum
{
cn
=
3
};
\
static
__device__
__host__
type
##
3
all
(
type
v
)
{
return
make_
##
type
##
3
(
v
,
v
,
v
);}
\
static
__device__
__host__
__forceinline__
type
##
3
all
(
type
v
)
{
return
make_
##
type
##
3
(
v
,
v
,
v
);}
\
static
__device__
__host__
type
##
3
make
(
type
x
,
type
y
,
type
z
)
{
return
make_
##
type
##
3
(
x
,
y
,
z
);}
\
static
__device__
__host__
__forceinline__
type
##
3
make
(
type
x
,
type
y
,
type
z
)
{
return
make_
##
type
##
3
(
x
,
y
,
z
);}
\
};
\
};
\
template
<>
struct
VecTraits
<
type
##
4
>
\
template
<>
struct
VecTraits
<
type
##
4
>
\
{
\
{
\
typedef
type
elem_type
;
\
typedef
type
elem_type
;
\
enum
{
cn
=
4
};
\
enum
{
cn
=
4
};
\
static
__device__
__host__
type
##
4
all
(
type
v
)
{
return
make_
##
type
##
4
(
v
,
v
,
v
,
v
);}
\
static
__device__
__host__
__forceinline__
type
##
4
all
(
type
v
)
{
return
make_
##
type
##
4
(
v
,
v
,
v
,
v
);}
\
static
__device__
__host__
type
##
4
make
(
type
x
,
type
y
,
type
z
,
type
w
)
{
return
make_
##
type
##
4
(
x
,
y
,
z
,
w
);}
\
static
__device__
__host__
__forceinline__
type
##
4
make
(
type
x
,
type
y
,
type
z
,
type
w
)
{
return
make_
##
type
##
4
(
x
,
y
,
z
,
w
);}
\
};
\
template
<>
struct
VecTraits
<
type
##
8
>
\
{
\
typedef
type
elem_type
;
\
enum
{
cn
=
8
};
\
static
__device__
__host__
__forceinline__
type
##
8
all
(
type
v
)
{
return
make_
##
type
##
8
(
v
,
v
,
v
,
v
,
v
,
v
,
v
,
v
);}
\
static
__device__
__host__
__forceinline__
type
##
8
make
(
type
a0
,
type
a1
,
type
a2
,
type
a3
,
type
a4
,
type
a5
,
type
a6
,
type
a7
)
{
return
make_
##
type
##
8
(
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
);}
\
};
};
OPENCV_GPU_IMPLEMENT_VEC_TRAITS
(
uchar
)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS
(
uchar
)
...
@@ -134,8 +218,8 @@ namespace cv { namespace gpu { namespace device
...
@@ -134,8 +218,8 @@ namespace cv { namespace gpu { namespace device
{
{
typedef
schar
elem_type
;
typedef
schar
elem_type
;
enum
{
cn
=
1
};
enum
{
cn
=
1
};
static
__device__
__host__
schar
all
(
schar
v
)
{
return
v
;}
static
__device__
__host__
__forceinline__
schar
all
(
schar
v
)
{
return
v
;}
static
__device__
__host__
schar
make
(
schar
x
)
{
return
x
;}
static
__device__
__host__
__forceinline__
schar
make
(
schar
x
)
{
return
x
;}
};
};
}}}
}}}
...
...
samples/gpu/performance/tests.cpp
View file @
5e9ae6b1
...
@@ -286,7 +286,7 @@ TEST(BruteForceMatcher)
...
@@ -286,7 +286,7 @@ TEST(BruteForceMatcher)
{
{
// Init CPU matcher
// Init CPU matcher
int
desc_len
=
64
;
int
desc_len
=
128
;
BruteForceMatcher
<
L2
<
float
>
>
matcher
;
BruteForceMatcher
<
L2
<
float
>
>
matcher
;
...
@@ -328,7 +328,7 @@ TEST(BruteForceMatcher)
...
@@ -328,7 +328,7 @@ TEST(BruteForceMatcher)
d_matcher
.
knnMatch
(
d_query
,
d_train
,
d_matches
,
knn
);
d_matcher
.
knnMatch
(
d_query
,
d_train
,
d_matches
,
knn
);
GPU_OFF
;
GPU_OFF
;
/*
SUBTEST << "radiusMatch";
SUBTEST
<<
"radiusMatch"
;
float
max_distance
=
3.8
f
;
float
max_distance
=
3.8
f
;
CPU_ON
;
CPU_ON
;
...
@@ -337,7 +337,7 @@ TEST(BruteForceMatcher)
...
@@ -337,7 +337,7 @@ TEST(BruteForceMatcher)
GPU_ON
;
GPU_ON
;
d_matcher
.
radiusMatch
(
d_query
,
d_train
,
d_matches
,
max_distance
);
d_matcher
.
radiusMatch
(
d_query
,
d_train
,
d_matches
,
max_distance
);
GPU_OFF;
*/
GPU_OFF
;
}
}
...
@@ -689,60 +689,66 @@ TEST(threshold)
...
@@ -689,60 +689,66 @@ TEST(threshold)
Mat
src
,
dst
;
Mat
src
,
dst
;
gpu
::
GpuMat
d_src
,
d_dst
;
gpu
::
GpuMat
d_src
,
d_dst
;
for
(
int
size
=
2
000
;
size
<=
4000
;
size
+=
1000
)
for
(
int
size
=
1
000
;
size
<=
4000
;
size
+=
1000
)
{
{
SUBTEST
<<
"size "
<<
size
<<
", 8U, THRESH_
TRUNC
"
;
SUBTEST
<<
"size "
<<
size
<<
", 8U, THRESH_
BINARY
"
;
gen
(
src
,
size
,
size
,
CV_8U
,
0
,
100
);
gen
(
src
,
size
,
size
,
CV_8U
,
0
,
100
);
dst
.
create
(
size
,
size
,
CV_8U
);
dst
.
create
(
size
,
size
,
CV_8U
);
CPU_ON
;
CPU_ON
;
threshold
(
src
,
dst
,
50.0
,
0.0
,
THRESH_
TRUNC
);
threshold
(
src
,
dst
,
50.0
,
0.0
,
THRESH_
BINARY
);
CPU_OFF
;
CPU_OFF
;
d_src
=
src
;
d_src
=
src
;
d_dst
.
create
(
size
,
size
,
CV_8U
);
d_dst
.
create
(
size
,
size
,
CV_8U
);
GPU_ON
;
GPU_ON
;
gpu
::
threshold
(
d_src
,
d_dst
,
50.0
,
0.0
,
THRESH_
TRUNC
);
gpu
::
threshold
(
d_src
,
d_dst
,
50.0
,
0.0
,
THRESH_
BINARY
);
GPU_OFF
;
GPU_OFF
;
}
}
for
(
int
size
=
2
000
;
size
<=
4000
;
size
+=
1000
)
for
(
int
size
=
1
000
;
size
<=
4000
;
size
+=
1000
)
{
{
SUBTEST
<<
"size "
<<
size
<<
",
8U
, THRESH_BINARY"
;
SUBTEST
<<
"size "
<<
size
<<
",
32F
, THRESH_BINARY"
;
gen
(
src
,
size
,
size
,
CV_
8U
,
0
,
100
);
gen
(
src
,
size
,
size
,
CV_
32F
,
0
,
100
);
dst
.
create
(
size
,
size
,
CV_
8U
);
dst
.
create
(
size
,
size
,
CV_
32F
);
CPU_ON
;
CPU_ON
;
threshold
(
src
,
dst
,
50.0
,
0.0
,
THRESH_BINARY
);
threshold
(
src
,
dst
,
50.0
,
0.0
,
THRESH_BINARY
);
CPU_OFF
;
CPU_OFF
;
d_src
=
src
;
d_src
=
src
;
d_dst
.
create
(
size
,
size
,
CV_
8U
);
d_dst
.
create
(
size
,
size
,
CV_
32F
);
GPU_ON
;
GPU_ON
;
gpu
::
threshold
(
d_src
,
d_dst
,
50.0
,
0.0
,
THRESH_BINARY
);
gpu
::
threshold
(
d_src
,
d_dst
,
50.0
,
0.0
,
THRESH_BINARY
);
GPU_OFF
;
GPU_OFF
;
}
}
}
for
(
int
size
=
2000
;
size
<=
4000
;
size
+=
1000
)
TEST
(
pow
)
{
Mat
src
,
dst
;
gpu
::
GpuMat
d_src
,
d_dst
;
for
(
int
size
=
1000
;
size
<=
4000
;
size
+=
1000
)
{
{
SUBTEST
<<
"size "
<<
size
<<
", 32F
, THRESH_TRUNC
"
;
SUBTEST
<<
"size "
<<
size
<<
", 32F"
;
gen
(
src
,
size
,
size
,
CV_32F
,
0
,
100
);
gen
(
src
,
size
,
size
,
CV_32F
,
0
,
100
);
dst
.
create
(
size
,
size
,
CV_32F
);
dst
.
create
(
size
,
size
,
CV_32F
);
CPU_ON
;
CPU_ON
;
threshold
(
src
,
dst
,
50.0
,
0.0
,
THRESH_TRUNC
);
pow
(
src
,
-
2.0
,
dst
);
CPU_OFF
;
CPU_OFF
;
d_src
=
src
;
d_src
=
src
;
d_dst
.
create
(
size
,
size
,
CV_32F
);
d_dst
.
create
(
size
,
size
,
CV_32F
);
GPU_ON
;
GPU_ON
;
gpu
::
threshold
(
d_src
,
d_dst
,
50.0
,
0.0
,
THRESH_TRUNC
);
gpu
::
pow
(
d_src
,
-
2.0
,
d_dst
);
GPU_OFF
;
GPU_OFF
;
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment