Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
5e9ae6b1
Commit
5e9ae6b1
authored
Aug 17, 2011
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added TransformFunctorTraits, optimized some functions that use transform
parent
6ce2277c
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
452 additions
and
145 deletions
+452
-145
element_operations.cu
modules/gpu/src/cuda/element_operations.cu
+103
-17
mathfunc.cu
modules/gpu/src/cuda/mathfunc.cu
+0
-5
matrix_operations.cu
modules/gpu/src/cuda/matrix_operations.cu
+50
-4
cudastream.cpp
modules/gpu/src/cudastream.cpp
+11
-18
element_operations.cpp
modules/gpu/src/element_operations.cpp
+0
-0
gpumat.cpp
modules/gpu/src/gpumat.cpp
+6
-6
transform.hpp
modules/gpu/src/opencv2/gpu/device/detail/transform.hpp
+0
-0
functional.hpp
modules/gpu/src/opencv2/gpu/device/functional.hpp
+94
-41
vec_math.hpp
modules/gpu/src/opencv2/gpu/device/vec_math.hpp
+68
-24
vec_traits.hpp
modules/gpu/src/opencv2/gpu/device/vec_traits.hpp
+97
-13
tests.cpp
samples/gpu/performance/tests.cpp
+23
-17
No files found.
modules/gpu/src/cuda/element_operations.cu
View file @
5e9ae6b1
...
...
@@ -47,37 +47,33 @@
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "internal_shared.hpp"
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace cv { namespace gpu { namespace mathfunc
namespace cv { namespace gpu { namespace device
{
//////////////////////////////////////////////////////////////////////////////////////
// Compare
template <typename T
1, typename T2> struct NotEqual : binary_function<T1, T2
, uchar>
template <typename T
> struct NotEqual : binary_function<T, T
, uchar>
{
__device__ __forceinline__ uchar operator()(
const T1& src1, const T2&
src2) const
__device__ __forceinline__ uchar operator()(
T src1, T
src2) const
{
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
}
};
template <typename T
1, typename T2
>
template <typename T>
inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream)
{
NotEqual<T
1, T2
> op;
transform(static_cast< DevMem2D_<T
1> >(src1), static_cast< DevMem2D_<T2
> >(src2), dst, op, stream);
NotEqual<T> op;
transform(static_cast< DevMem2D_<T
> >(src1), static_cast< DevMem2D_<T
> >(src2), dst, op, stream);
}
void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream)
{
compare_ne<uint
, uint
>(src1, src2, dst, stream);
compare_ne<uint>(src1, src2, dst, stream);
}
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream)
{
compare_ne<float
, float
>(src1, src2, dst, stream);
compare_ne<float>(src1, src2, dst, stream);
}
...
...
@@ -354,6 +350,35 @@ namespace cv { namespace gpu { namespace mathfunc
//////////////////////////////////////////////////////////////////////////
// min/max
namespace detail
{
template <size_t size, typename F> struct MinMaxTraits : DefaultTransformFunctorTraits<F>
{
};
template <typename F> struct MinMaxTraits<2, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_shift = 4 };
};
template <typename F> struct MinMaxTraits<4, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_block_dim_y = 4 };
enum { smart_shift = 4 };
};
}
template <typename T> struct TransformFunctorTraits< minimum<T> > : detail::MinMaxTraits< sizeof(T), minimum<T> >
{
};
template <typename T> struct TransformFunctorTraits< maximum<T> > : detail::MinMaxTraits< sizeof(T), maximum<T> >
{
};
template <typename T> struct TransformFunctorTraits< binder2nd< minimum<T> > > : detail::MinMaxTraits< sizeof(T), binder2nd< minimum<T> > >
{
};
template <typename T> struct TransformFunctorTraits< binder2nd< maximum<T> > > : detail::MinMaxTraits< sizeof(T), binder2nd< maximum<T> > >
{
};
template <typename T>
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
...
...
@@ -413,7 +438,39 @@ namespace cv { namespace gpu { namespace mathfunc
//////////////////////////////////////////////////////////////////////////
// threshold
// threshold
namespace detail
{
template <size_t size, typename F> struct ThresholdTraits : DefaultTransformFunctorTraits<F>
{
};
template <typename F> struct ThresholdTraits<2, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_shift = 4 };
};
template <typename F> struct ThresholdTraits<4, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_block_dim_y = 4 };
enum { smart_shift = 4 };
};
}
template <typename T> struct TransformFunctorTraits< thresh_binary_func<T> > : detail::ThresholdTraits< sizeof(T), thresh_binary_func<T> >
{
};
template <typename T> struct TransformFunctorTraits< thresh_binary_inv_func<T> > : detail::ThresholdTraits< sizeof(T), thresh_binary_inv_func<T> >
{
};
template <typename T> struct TransformFunctorTraits< thresh_trunc_func<T> > : detail::ThresholdTraits< sizeof(T), thresh_trunc_func<T> >
{
};
template <typename T> struct TransformFunctorTraits< thresh_to_zero_func<T> > : detail::ThresholdTraits< sizeof(T), thresh_to_zero_func<T> >
{
};
template <typename T> struct TransformFunctorTraits< thresh_to_zero_inv_func<T> > : detail::ThresholdTraits< sizeof(T), thresh_to_zero_inv_func<T> >
{
};
template <template <typename> class Op, typename T>
void threshold_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, T thresh, T maxVal,
...
...
@@ -454,8 +511,13 @@ namespace cv { namespace gpu { namespace mathfunc
//////////////////////////////////////////////////////////////////////////
// subtract
template <typename T>
void subtractCaller(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream)
template <> struct TransformFunctorTraits< minus<short> > : DefaultTransformFunctorTraits< minus<short> >
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
template <typename T> void subtractCaller(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream)
{
transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, minus<T>(), stream);
}
...
...
@@ -499,10 +561,35 @@ namespace cv { namespace gpu { namespace mathfunc
__device__ __forceinline__ float operator()(const float& e) const
{
return __powf(fabs(e), power);
return __powf(
::
fabs(e), power);
}
};
namespace detail
{
template <size_t size, typename T> struct PowOpTraits : DefaultTransformFunctorTraits< PowOp<T> >
{
};
template <typename T> struct PowOpTraits<1, T> : DefaultTransformFunctorTraits< PowOp<T> >
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 8 };
};
template <typename T> struct PowOpTraits<2, T> : DefaultTransformFunctorTraits< PowOp<T> >
{
enum { smart_shift = 4 };
};
template <typename T> struct PowOpTraits<4, T> : DefaultTransformFunctorTraits< PowOp<T> >
{
enum { smart_block_dim_y = 4 };
enum { smart_shift = 4 };
};
}
template <typename T> struct TransformFunctorTraits< PowOp<T> > : detail::PowOpTraits<sizeof(T), T>
{
};
template<typename T>
void pow_caller(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream)
{
...
...
@@ -514,6 +601,5 @@ namespace cv { namespace gpu { namespace mathfunc
template void pow_caller<short>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
template void pow_caller<ushort>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
template void pow_caller<int>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
template void pow_caller<uint>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
template void pow_caller<float>(const DevMem2D& src, float power, DevMem2D dst, cudaStream_t stream);
}}}
modules/gpu/src/cuda/mathfunc.cu
View file @
5e9ae6b1
...
...
@@ -40,14 +40,9 @@
//
//M*/
#include "opencv2/gpu/device/limits.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/transform.hpp"
#include "internal_shared.hpp"
using namespace cv::gpu;
using namespace cv::gpu::device;
#ifndef CV_PI
#define CV_PI 3.1415926535897932384626433832795f
...
...
modules/gpu/src/cuda/matrix_operations.cu
View file @
5e9ae6b1
...
...
@@ -45,9 +45,7 @@
#include "opencv2/gpu/device/transform.hpp"
#include "opencv2/gpu/device/functional.hpp"
using namespace cv::gpu::device;
namespace cv { namespace gpu { namespace matrix_operations {
namespace cv { namespace gpu { namespace device {
template <typename T> struct shift_and_sizeof;
template <> struct shift_and_sizeof<signed char> { enum { shift = 0 }; };
...
...
@@ -249,7 +247,55 @@ namespace cv { namespace gpu { namespace matrix_operations {
const double alpha, beta;
};
namespace detail
{
template <size_t src_size, size_t dst_size, typename F> struct ConvertTraitsDispatcher : DefaultTransformFunctorTraits<F>
{
};
template <typename F> struct ConvertTraitsDispatcher<1, 1, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_shift = 8 };
};
template <typename F> struct ConvertTraitsDispatcher<1, 2, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_shift = 4 };
};
template <typename F> struct ConvertTraitsDispatcher<1, 4, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
template <typename F> struct ConvertTraitsDispatcher<2, 2, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_shift = 4 };
};
template <typename F> struct ConvertTraitsDispatcher<2, 4, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_shift = 2 };
};
template <typename F> struct ConvertTraitsDispatcher<4, 2, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
template <typename F> struct ConvertTraitsDispatcher<4, 4, F> : DefaultTransformFunctorTraits<F>
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 2 };
};
template <typename F> struct ConvertTraits : ConvertTraitsDispatcher<sizeof(typename F::argument_type), sizeof(typename F::result_type), F>
{
};
}
template <typename T, typename D> struct TransformFunctorTraits< Convertor<T, D> > : detail::ConvertTraits< Convertor<T, D> >
{
};
template<typename T, typename D>
void cvt_(const DevMem2D& src, const DevMem2D& dst, double alpha, double beta, cudaStream_t stream)
{
...
...
modules/gpu/src/cudastream.cpp
View file @
5e9ae6b1
...
...
@@ -71,23 +71,16 @@ cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; }
#include "opencv2/gpu/stream_accessor.hpp"
namespace
cv
{
namespace
gpu
{
namespace
matrix_operations
{
void
copy_to_with_mask
(
const
DevMem2D
&
src
,
DevMem2D
dst
,
int
depth
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
namespace
cv
{
namespace
gpu
{
namespace
device
{
void
copy_to_with_mask
(
const
DevMem2D
&
src
,
DevMem2D
dst
,
int
depth
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
template
<
typename
T
>
void
set_to_gpu
(
const
DevMem2D
&
mat
,
const
T
*
scalar
,
int
channels
,
cudaStream_t
stream
);
template
<
typename
T
>
void
set_to_gpu
(
const
DevMem2D
&
mat
,
const
T
*
scalar
,
const
DevMem2D
&
mask
,
int
channels
,
cudaStream_t
stream
);
template
<
typename
T
>
void
set_to_gpu
(
const
DevMem2D
&
mat
,
const
T
*
scalar
,
int
channels
,
cudaStream_t
stream
);
template
<
typename
T
>
void
set_to_gpu
(
const
DevMem2D
&
mat
,
const
T
*
scalar
,
const
DevMem2D
&
mask
,
int
channels
,
cudaStream_t
stream
);
void
convert_gpu
(
const
DevMem2D
&
src
,
int
sdepth
,
const
DevMem2D
&
dst
,
int
ddepth
,
double
alpha
,
double
beta
,
cudaStream_t
stream
=
0
);
}
}
}
void
convert_gpu
(
const
DevMem2D
&
src
,
int
sdepth
,
const
DevMem2D
&
dst
,
int
ddepth
,
double
alpha
,
double
beta
,
cudaStream_t
stream
=
0
);
}}}
struct
Stream
::
Impl
{
...
...
@@ -108,14 +101,14 @@ namespace
void
kernelSet
(
GpuMat
&
src
,
const
Scalar
&
s
,
cudaStream_t
stream
)
{
Scalar_
<
T
>
sf
=
s
;
matrix_operations
::
set_to_gpu
(
src
,
sf
.
val
,
src
.
channels
(),
stream
);
device
::
set_to_gpu
(
src
,
sf
.
val
,
src
.
channels
(),
stream
);
}
template
<
typename
T
>
void
kernelSetMask
(
GpuMat
&
src
,
const
Scalar
&
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
Scalar_
<
T
>
sf
=
s
;
matrix_operations
::
set_to_gpu
(
src
,
sf
.
val
,
mask
,
src
.
channels
(),
stream
);
device
::
set_to_gpu
(
src
,
sf
.
val
,
mask
,
src
.
channels
(),
stream
);
}
}
...
...
@@ -262,7 +255,7 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype,
psrc
=
&
(
temp
=
src
);
dst
.
create
(
src
.
size
(),
rtype
);
matrix_operations
::
convert_gpu
(
psrc
->
reshape
(
1
),
sdepth
,
dst
.
reshape
(
1
),
ddepth
,
alpha
,
beta
,
impl
->
stream
);
device
::
convert_gpu
(
psrc
->
reshape
(
1
),
sdepth
,
dst
.
reshape
(
1
),
ddepth
,
alpha
,
beta
,
impl
->
stream
);
}
cv
::
gpu
::
Stream
::
operator
bool
()
const
...
...
modules/gpu/src/element_operations.cpp
View file @
5e9ae6b1
This diff is collapsed.
Click to expand it.
modules/gpu/src/gpumat.cpp
View file @
5e9ae6b1
...
...
@@ -393,7 +393,7 @@ void cv::gpu::ensureSizeIsEnough(int, int, int, GpuMat&) { throw_nogpu(); }
#else
/* !defined (HAVE_CUDA) */
namespace
cv
{
namespace
gpu
{
namespace
matrix_operations
namespace
cv
{
namespace
gpu
{
namespace
device
{
void
copy_to_with_mask
(
const
DevMem2D
&
src
,
DevMem2D
dst
,
int
depth
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
...
...
@@ -449,7 +449,7 @@ void cv::gpu::GpuMat::copyTo(GpuMat& mat, const GpuMat& mask) const
else
{
mat
.
create
(
size
(),
type
());
cv
::
gpu
::
matrix_operations
::
copy_to_with_mask
(
*
this
,
mat
,
depth
(),
mask
,
channels
());
device
::
copy_to_with_mask
(
*
this
,
mat
,
depth
(),
mask
,
channels
());
}
}
...
...
@@ -508,7 +508,7 @@ namespace
void
convertToKernelCaller
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
{
matrix_operations
::
convert_gpu
(
src
.
reshape
(
1
),
src
.
depth
(),
dst
.
reshape
(
1
),
dst
.
depth
(),
1.0
,
0.0
);
device
::
convert_gpu
(
src
.
reshape
(
1
),
src
.
depth
(),
dst
.
reshape
(
1
),
dst
.
depth
(),
1.0
,
0.0
);
}
}
...
...
@@ -540,7 +540,7 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
dst
.
create
(
size
(),
rtype
);
if
(
!
noScale
)
matrix_operations
::
convert_gpu
(
psrc
->
reshape
(
1
),
sdepth
,
dst
.
reshape
(
1
),
ddepth
,
alpha
,
beta
);
device
::
convert_gpu
(
psrc
->
reshape
(
1
),
sdepth
,
dst
.
reshape
(
1
),
ddepth
,
alpha
,
beta
);
else
{
typedef
void
(
*
convert_caller_t
)(
const
GpuMat
&
src
,
GpuMat
&
dst
);
...
...
@@ -681,7 +681,7 @@ namespace
void
kernelSet
(
GpuMat
&
src
,
const
Scalar
&
s
)
{
Scalar_
<
T
>
sf
=
s
;
matrix_operations
::
set_to_gpu
(
src
,
sf
.
val
,
src
.
channels
(),
0
);
device
::
set_to_gpu
(
src
,
sf
.
val
,
src
.
channels
(),
0
);
}
template
<
int
SDEPTH
,
int
SCN
>
struct
NppSetMaskFunc
...
...
@@ -732,7 +732,7 @@ namespace
void
kernelSetMask
(
GpuMat
&
src
,
const
Scalar
&
s
,
const
GpuMat
&
mask
)
{
Scalar_
<
T
>
sf
=
s
;
matrix_operations
::
set_to_gpu
(
src
,
sf
.
val
,
mask
,
src
.
channels
(),
0
);
device
::
set_to_gpu
(
src
,
sf
.
val
,
mask
,
src
.
channels
(),
0
);
}
}
...
...
modules/gpu/src/opencv2/gpu/device/detail/transform.hpp
View file @
5e9ae6b1
This diff is collapsed.
Click to expand it.
modules/gpu/src/opencv2/gpu/device/functional.hpp
View file @
5e9ae6b1
...
...
@@ -46,18 +46,25 @@
#include <thrust/functional.h>
#include "internal_shared.hpp"
#include "saturate_cast.hpp"
#include "vec_traits.hpp"
namespace
cv
{
namespace
gpu
{
namespace
device
{
// Function Objects
using
thrust
::
unary_function
;
using
thrust
::
binary_function
;
// Arithmetic Operations
using
thrust
::
plus
;
using
thrust
::
minus
;
using
thrust
::
multiplies
;
using
thrust
::
divides
;
using
thrust
::
modulus
;
using
thrust
::
negate
;
// Comparison Operations
using
thrust
::
equal_to
;
using
thrust
::
not_equal_to
;
...
...
@@ -65,11 +72,15 @@ namespace cv { namespace gpu { namespace device
using
thrust
::
less
;
using
thrust
::
greater_equal
;
using
thrust
::
less_equal
;
// Logical Operations
using
thrust
::
logical_and
;
using
thrust
::
logical_or
;
using
thrust
::
logical_not
;
// Bitwise Operations
using
thrust
::
bit_and
;
using
thrust
::
bit_or
;
using
thrust
::
bit_xor
;
...
...
@@ -78,7 +89,13 @@ namespace cv { namespace gpu { namespace device
__forceinline__
__device__
T
operator
()(
const
T
&
v
)
const
{
return
~
v
;}
};
using
thrust
::
identity
;
// Generalized Identity Operations
using
thrust
::
identity
;
using
thrust
::
project1st
;
using
thrust
::
project2nd
;
// Min/Max Operations
#define OPENCV_GPU_IMPLEMENT_MINMAX(name, type, op) \
template
<>
struct
name
<
type
>
:
binary_function
<
type
,
type
,
type
>
\
...
...
@@ -115,15 +132,8 @@ namespace cv { namespace gpu { namespace device
OPENCV_GPU_IMPLEMENT_MINMAX
(
minimum
,
double
,
fmin
)
#undef OPENCV_GPU_IMPLEMENT_MINMAX
using
thrust
::
project1st
;
using
thrust
::
project2nd
;
using
thrust
::
unary_negate
;
using
thrust
::
not1
;
using
thrust
::
binary_negate
;
using
thrust
::
not2
;
// Math functions
#define OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(func) \
template
<
typename
T
>
struct
func
##
_func
:
unary_function
<
T
,
float
>
\
...
...
@@ -192,6 +202,8 @@ namespace cv { namespace gpu { namespace device
}
};
// Saturate Cast Functor
template
<
typename
T
,
typename
D
>
struct
saturate_cast_func
:
unary_function
<
T
,
D
>
{
__forceinline__
__device__
D
operator
()(
const
T
&
v
)
const
...
...
@@ -200,6 +212,8 @@ namespace cv { namespace gpu { namespace device
}
};
// Threshold Functors
template
<
typename
T
>
struct
thresh_binary_func
:
unary_function
<
T
,
T
>
{
__forceinline__
__host__
__device__
thresh_binary_func
(
T
thresh_
,
T
maxVal_
)
:
thresh
(
thresh_
),
maxVal
(
maxVal_
)
{}
...
...
@@ -256,7 +270,15 @@ namespace cv { namespace gpu { namespace device
}
const
T
thresh
;
};
};
// Function Object Adaptors
using
thrust
::
unary_negate
;
using
thrust
::
not1
;
using
thrust
::
binary_negate
;
using
thrust
::
not2
;
template
<
typename
Op
>
struct
binder1st
:
unary_function
<
typename
Op
::
second_argument_type
,
typename
Op
::
result_type
>
{
...
...
@@ -291,46 +313,77 @@ namespace cv { namespace gpu { namespace device
return
binder2nd
<
Op
>
(
op
,
typename
Op
::
second_argument_type
(
x
));
}
template
<
typename
T1
,
typename
T2
>
struct
BinOpTraits
{
typedef
int
argument_type
;
};
template
<
typename
T
>
struct
BinOpTraits
<
T
,
T
>
{
typedef
T
argument_type
;
};
template
<
typename
T
>
struct
BinOpTraits
<
T
,
double
>
{
typedef
double
argument_type
;
};
template
<
typename
T
>
struct
BinOpTraits
<
double
,
T
>
{
typedef
double
argument_type
;
};
template
<>
struct
BinOpTraits
<
double
,
double
>
{
typedef
double
argument_type
;
};
template
<
typename
T
>
struct
BinOpTraits
<
T
,
float
>
// Functor Traits
template
<
typename
F
>
struct
IsUnaryFunction
{
typedef
float
argument_type
;
struct
Yes
{};
struct
No
{
Yes
a
[
2
];};
template
<
typename
T
,
typename
D
>
static
Yes
check
(
unary_function
<
T
,
D
>*
);
static
No
check
(...);
enum
{
value
=
(
sizeof
(
check
((
F
*
)
0
))
==
sizeof
(
Yes
))
};
};
template
<
typename
T
>
struct
BinOpTraits
<
float
,
T
>
template
<
typename
F
>
struct
IsBinaryFunction
{
typedef
float
argument_type
;
struct
Yes
{};
struct
No
{
Yes
a
[
2
];};
template
<
typename
T1
,
typename
T2
,
typename
D
>
static
Yes
check
(
binary_function
<
T1
,
T2
,
D
>*
);
static
No
check
(...);
enum
{
value
=
(
sizeof
(
check
((
F
*
)
0
))
==
sizeof
(
Yes
))
};
};
template
<>
struct
BinOpTraits
<
float
,
float
>
namespace
detail
{
typedef
float
argument_type
;
};
template
<>
struct
BinOpTraits
<
double
,
float
>
template
<
size_t
src_elem_size
,
size_t
dst_elem_size
>
struct
UnOpShift
{
enum
{
shift
=
1
};
};
template
<
size_t
src_elem_size
>
struct
UnOpShift
<
src_elem_size
,
1
>
{
enum
{
shift
=
4
};
};
template
<
size_t
src_elem_size
>
struct
UnOpShift
<
src_elem_size
,
2
>
{
enum
{
shift
=
2
};
};
template
<
typename
T
,
typename
D
>
struct
DefaultUnaryShift
{
enum
{
shift
=
detail
::
UnOpShift
<
sizeof
(
T
),
sizeof
(
D
)
>::
shift
};
};
template
<
size_t
src_elem_size1
,
size_t
src_elem_size2
,
size_t
dst_elem_size
>
struct
BinOpShift
{
enum
{
shift
=
1
};
};
template
<
size_t
src_elem_size1
,
size_t
src_elem_size2
>
struct
BinOpShift
<
src_elem_size1
,
src_elem_size2
,
1
>
{
enum
{
shift
=
4
};
};
template
<
size_t
src_elem_size1
,
size_t
src_elem_size2
>
struct
BinOpShift
<
src_elem_size1
,
src_elem_size2
,
2
>
{
enum
{
shift
=
2
};
};
template
<
typename
T1
,
typename
T2
,
typename
D
>
struct
DefaultBinaryShift
{
enum
{
shift
=
detail
::
BinOpShift
<
sizeof
(
T1
),
sizeof
(
T2
),
sizeof
(
D
)
>::
shift
};
};
template
<
typename
Func
,
bool
unary
=
IsUnaryFunction
<
Func
>::
value
>
struct
ShiftDispatcher
;
template
<
typename
Func
>
struct
ShiftDispatcher
<
Func
,
true
>
{
enum
{
shift
=
DefaultUnaryShift
<
typename
Func
::
argument_type
,
typename
Func
::
result_type
>::
shift
};
};
template
<
typename
Func
>
struct
ShiftDispatcher
<
Func
,
false
>
{
enum
{
shift
=
DefaultBinaryShift
<
typename
Func
::
first_argument_type
,
typename
Func
::
second_argument_type
,
typename
Func
::
result_type
>::
shift
};
};
}
template
<
typename
Func
>
struct
DefaultTransformShift
{
typedef
double
argument_type
;
enum
{
shift
=
detail
::
ShiftDispatcher
<
Func
>::
shift
}
;
};
template
<>
struct
BinOpTraits
<
float
,
double
>
template
<
typename
Func
>
struct
DefaultTransformFunctorTraits
{
typedef
double
argument_type
;
enum
{
simple_block_dim_x
=
16
};
enum
{
simple_block_dim_y
=
16
};
enum
{
smart_block_dim_x
=
16
};
enum
{
smart_block_dim_y
=
16
};
enum
{
smart_shift
=
DefaultTransformShift
<
Func
>::
shift
};
};
template
<
typename
Func
>
struct
TransformFunctorTraits
:
DefaultTransformFunctorTraits
<
Func
>
{};
}}}
#endif // __OPENCV_GPU_FUNCTIONAL_HPP__
modules/gpu/src/opencv2/gpu/device/vec_math.hpp
View file @
5e9ae6b1
...
...
@@ -150,6 +150,50 @@ namespace cv { namespace gpu { namespace device
return
VecTraits
<
TypeVec
<
func
<
type
>::
result_type
,
4
>::
vec_type
>::
make
(
f
(
a
.
x
),
f
(
a
.
y
),
f
(
a
.
z
),
f
(
a
.
w
));
\
}
namespace
detail
{
template
<
typename
T1
,
typename
T2
>
struct
BinOpTraits
{
typedef
int
argument_type
;
};
template
<
typename
T
>
struct
BinOpTraits
<
T
,
T
>
{
typedef
T
argument_type
;
};
template
<
typename
T
>
struct
BinOpTraits
<
T
,
double
>
{
typedef
double
argument_type
;
};
template
<
typename
T
>
struct
BinOpTraits
<
double
,
T
>
{
typedef
double
argument_type
;
};
template
<>
struct
BinOpTraits
<
double
,
double
>
{
typedef
double
argument_type
;
};
template
<
typename
T
>
struct
BinOpTraits
<
T
,
float
>
{
typedef
float
argument_type
;
};
template
<
typename
T
>
struct
BinOpTraits
<
float
,
T
>
{
typedef
float
argument_type
;
};
template
<>
struct
BinOpTraits
<
float
,
float
>
{
typedef
float
argument_type
;
};
template
<>
struct
BinOpTraits
<
double
,
float
>
{
typedef
double
argument_type
;
};
template
<>
struct
BinOpTraits
<
float
,
double
>
{
typedef
double
argument_type
;
};
}
#define OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, op, func) \
static
__device__
TypeVec
<
func
<
type
>::
result_type
,
1
>::
vec_type
op
(
const
type
##
1
&
a
,
const
type
##
1
&
b
)
\
{
\
...
...
@@ -157,16 +201,16 @@ namespace cv { namespace gpu { namespace device
return
VecTraits
<
TypeVec
<
func
<
type
>::
result_type
,
1
>::
vec_type
>::
make
(
f
(
a
.
x
,
b
.
x
));
\
}
\
template
<
typename
T
>
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
1
>::
vec_type
op
(
const
type
##
1
&
v
,
T
s
)
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
1
>::
vec_type
op
(
const
type
##
1
&
v
,
T
s
)
\
{
\
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
1
>::
vec_type
>::
make
(
f
(
v
.
x
,
s
));
\
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
1
>::
vec_type
>::
make
(
f
(
v
.
x
,
s
));
\
}
\
template
<
typename
T
>
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
1
>::
vec_type
op
(
T
s
,
const
type
##
1
&
v
)
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
1
>::
vec_type
op
(
T
s
,
const
type
##
1
&
v
)
\
{
\
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
1
>::
vec_type
>::
make
(
f
(
s
,
v
.
x
));
\
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
1
>::
vec_type
>::
make
(
f
(
s
,
v
.
x
));
\
}
\
static
__device__
TypeVec
<
func
<
type
>::
result_type
,
2
>::
vec_type
op
(
const
type
##
2
&
a
,
const
type
##
2
&
b
)
\
{
\
...
...
@@ -174,16 +218,16 @@ namespace cv { namespace gpu { namespace device
return
VecTraits
<
TypeVec
<
func
<
type
>::
result_type
,
2
>::
vec_type
>::
make
(
f
(
a
.
x
,
b
.
x
),
f
(
a
.
y
,
b
.
y
));
\
}
\
template
<
typename
T
>
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
2
>::
vec_type
op
(
const
type
##
2
&
v
,
T
s
)
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
2
>::
vec_type
op
(
const
type
##
2
&
v
,
T
s
)
\
{
\
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
2
>::
vec_type
>::
make
(
f
(
v
.
x
,
s
),
f
(
v
.
y
,
s
));
\
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
2
>::
vec_type
>::
make
(
f
(
v
.
x
,
s
),
f
(
v
.
y
,
s
));
\
}
\
template
<
typename
T
>
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
2
>::
vec_type
op
(
T
s
,
const
type
##
2
&
v
)
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
2
>::
vec_type
op
(
T
s
,
const
type
##
2
&
v
)
\
{
\
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
2
>::
vec_type
>::
make
(
f
(
s
,
v
.
x
),
f
(
s
,
v
.
y
));
\
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
2
>::
vec_type
>::
make
(
f
(
s
,
v
.
x
),
f
(
s
,
v
.
y
));
\
}
\
static
__device__
TypeVec
<
func
<
type
>::
result_type
,
3
>::
vec_type
op
(
const
type
##
3
&
a
,
const
type
##
3
&
b
)
\
{
\
...
...
@@ -191,16 +235,16 @@ namespace cv { namespace gpu { namespace device
return
VecTraits
<
TypeVec
<
func
<
type
>::
result_type
,
3
>::
vec_type
>::
make
(
f
(
a
.
x
,
b
.
x
),
f
(
a
.
y
,
b
.
y
),
f
(
a
.
z
,
b
.
z
));
\
}
\
template
<
typename
T
>
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
3
>::
vec_type
op
(
const
type
##
3
&
v
,
T
s
)
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
3
>::
vec_type
op
(
const
type
##
3
&
v
,
T
s
)
\
{
\
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
3
>::
vec_type
>::
make
(
f
(
v
.
x
,
s
),
f
(
v
.
y
,
s
),
f
(
v
.
z
,
s
));
\
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
3
>::
vec_type
>::
make
(
f
(
v
.
x
,
s
),
f
(
v
.
y
,
s
),
f
(
v
.
z
,
s
));
\
}
\
template
<
typename
T
>
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
3
>::
vec_type
op
(
T
s
,
const
type
##
3
&
v
)
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
3
>::
vec_type
op
(
T
s
,
const
type
##
3
&
v
)
\
{
\
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
3
>::
vec_type
>::
make
(
f
(
s
,
v
.
x
),
f
(
s
,
v
.
y
),
f
(
s
,
v
.
z
));
\
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
3
>::
vec_type
>::
make
(
f
(
s
,
v
.
x
),
f
(
s
,
v
.
y
),
f
(
s
,
v
.
z
));
\
}
\
static
__device__
TypeVec
<
func
<
type
>::
result_type
,
4
>::
vec_type
op
(
const
type
##
4
&
a
,
const
type
##
4
&
b
)
\
{
\
...
...
@@ -208,16 +252,16 @@ namespace cv { namespace gpu { namespace device
return
VecTraits
<
TypeVec
<
func
<
type
>::
result_type
,
4
>::
vec_type
>::
make
(
f
(
a
.
x
,
b
.
x
),
f
(
a
.
y
,
b
.
y
),
f
(
a
.
z
,
b
.
z
),
f
(
a
.
w
,
b
.
w
));
\
}
\
template
<
typename
T
>
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
4
>::
vec_type
op
(
const
type
##
4
&
v
,
T
s
)
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
4
>::
vec_type
op
(
const
type
##
4
&
v
,
T
s
)
\
{
\
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
4
>::
vec_type
>::
make
(
f
(
v
.
x
,
s
),
f
(
v
.
y
,
s
),
f
(
v
.
z
,
s
),
f
(
v
.
w
,
s
));
\
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
4
>::
vec_type
>::
make
(
f
(
v
.
x
,
s
),
f
(
v
.
y
,
s
),
f
(
v
.
z
,
s
),
f
(
v
.
w
,
s
));
\
}
\
template
<
typename
T
>
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
4
>::
vec_type
op
(
T
s
,
const
type
##
4
&
v
)
\
static
__device__
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
4
>::
vec_type
op
(
T
s
,
const
type
##
4
&
v
)
\
{
\
func
<
typename
BinOpTraits
<
T
,
type
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
4
>::
vec_type
>::
make
(
f
(
s
,
v
.
x
),
f
(
s
,
v
.
y
),
f
(
s
,
v
.
z
),
f
(
s
,
v
.
w
));
\
func
<
typename
detail
::
BinOpTraits
<
T
,
type
>::
argument_type
>
f
;
\
return
VecTraits
<
typename
TypeVec
<
typename
func
<
typename
detail
::
BinOpTraits
<
type
,
T
>::
argument_type
>::
result_type
,
4
>::
vec_type
>::
make
(
f
(
s
,
v
.
x
),
f
(
s
,
v
.
y
),
f
(
s
,
v
.
z
),
f
(
s
,
v
.
w
));
\
}
#define OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
...
...
modules/gpu/src/opencv2/gpu/device/vec_traits.hpp
View file @
5e9ae6b1
...
...
@@ -49,6 +49,79 @@ namespace cv { namespace gpu { namespace device
{
template
<
typename
T
,
int
N
>
struct
TypeVec
;
struct
__align__
(
8
)
uchar8
{
uchar
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
};
static
__host__
__device__
__forceinline__
uchar8
make_uchar8
(
uchar
a0
,
uchar
a1
,
uchar
a2
,
uchar
a3
,
uchar
a4
,
uchar
a5
,
uchar
a6
,
uchar
a7
)
{
uchar8
val
=
{
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
};
return
val
;
}
struct
__align__
(
8
)
char8
{
schar
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
};
static
__host__
__device__
__forceinline__
char8
make_char8
(
schar
a0
,
schar
a1
,
schar
a2
,
schar
a3
,
schar
a4
,
schar
a5
,
schar
a6
,
schar
a7
)
{
char8
val
=
{
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
};
return
val
;
}
struct
__align__
(
16
)
ushort8
{
ushort
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
};
static
__host__
__device__
__forceinline__
ushort8
make_ushort8
(
ushort
a0
,
ushort
a1
,
ushort
a2
,
ushort
a3
,
ushort
a4
,
ushort
a5
,
ushort
a6
,
ushort
a7
)
{
ushort8
val
=
{
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
};
return
val
;
}
struct
__align__
(
16
)
short8
{
short
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
};
static
__host__
__device__
__forceinline__
short8
make_short8
(
short
a0
,
short
a1
,
short
a2
,
short
a3
,
short
a4
,
short
a5
,
short
a6
,
short
a7
)
{
short8
val
=
{
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
};
return
val
;
}
struct
__align__
(
32
)
uint8
{
uint
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
};
static
__host__
__device__
__forceinline__
uint8
make_uint8
(
uint
a0
,
uint
a1
,
uint
a2
,
uint
a3
,
uint
a4
,
uint
a5
,
uint
a6
,
uint
a7
)
{
uint8
val
=
{
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
};
return
val
;
}
struct
__align__
(
32
)
int8
{
int
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
};
static
__host__
__device__
__forceinline__
int8
make_int8
(
int
a0
,
int
a1
,
int
a2
,
int
a3
,
int
a4
,
int
a5
,
int
a6
,
int
a7
)
{
int8
val
=
{
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
};
return
val
;
}
struct
__align__
(
32
)
float8
{
float
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
};
static
__host__
__device__
__forceinline__
float8
make_float8
(
float
a0
,
float
a1
,
float
a2
,
float
a3
,
float
a4
,
float
a5
,
float
a6
,
float
a7
)
{
float8
val
=
{
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
};
return
val
;
}
struct
double8
{
double
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
;
};
static
__host__
__device__
__forceinline__
double8
make_double8
(
double
a0
,
double
a1
,
double
a2
,
double
a3
,
double
a4
,
double
a5
,
double
a6
,
double
a7
)
{
double8
val
=
{
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
};
return
val
;
}
#define OPENCV_GPU_IMPLEMENT_TYPE_VEC(type) \
template
<>
struct
TypeVec
<
type
,
1
>
{
typedef
type
vec_type
;
};
\
template
<>
struct
TypeVec
<
type
##
1
,
1
>
{
typedef
type
##
1
vec_type
;
};
\
...
...
@@ -57,7 +130,9 @@ namespace cv { namespace gpu { namespace device
template
<>
struct
TypeVec
<
type
,
3
>
{
typedef
type
##
3
vec_type
;
};
\
template
<>
struct
TypeVec
<
type
##
3
,
3
>
{
typedef
type
##
3
vec_type
;
};
\
template
<>
struct
TypeVec
<
type
,
4
>
{
typedef
type
##
4
vec_type
;
};
\
template
<>
struct
TypeVec
<
type
##
4
,
4
>
{
typedef
type
##
4
vec_type
;
};
template
<>
struct
TypeVec
<
type
##
4
,
4
>
{
typedef
type
##
4
vec_type
;
};
\
template
<>
struct
TypeVec
<
type
,
8
>
{
typedef
type
##
8
vec_type
;
};
\
template
<>
struct
TypeVec
<
type
##
8
,
8
>
{
typedef
type
##
8
vec_type
;
};
OPENCV_GPU_IMPLEMENT_TYPE_VEC
(
uchar
)
OPENCV_GPU_IMPLEMENT_TYPE_VEC
(
char
)
...
...
@@ -74,11 +149,13 @@ namespace cv { namespace gpu { namespace device
template
<>
struct
TypeVec
<
schar
,
2
>
{
typedef
char2
vec_type
;
};
template
<>
struct
TypeVec
<
schar
,
3
>
{
typedef
char3
vec_type
;
};
template
<>
struct
TypeVec
<
schar
,
4
>
{
typedef
char4
vec_type
;
};
template
<>
struct
TypeVec
<
schar
,
8
>
{
typedef
char8
vec_type
;
};
template
<>
struct
TypeVec
<
bool
,
1
>
{
typedef
uchar
vec_type
;
};
template
<>
struct
TypeVec
<
bool
,
2
>
{
typedef
uchar2
vec_type
;
};
template
<>
struct
TypeVec
<
bool
,
3
>
{
typedef
uchar3
vec_type
;
};
template
<>
struct
TypeVec
<
bool
,
4
>
{
typedef
uchar4
vec_type
;
};
template
<>
struct
TypeVec
<
bool
,
8
>
{
typedef
uchar8
vec_type
;
};
template
<
typename
T
>
struct
VecTraits
;
...
...
@@ -87,36 +164,43 @@ namespace cv { namespace gpu { namespace device
{
\
typedef
type
elem_type
;
\
enum
{
cn
=
1
};
\
static
__device__
__host__
type
all
(
type
v
)
{
return
v
;}
\
static
__device__
__host__
type
make
(
type
x
)
{
return
x
;}
\
static
__device__
__host__
__forceinline__
type
all
(
type
v
)
{
return
v
;}
\
static
__device__
__host__
__forceinline__
type
make
(
type
x
)
{
return
x
;}
\
};
\
template
<>
struct
VecTraits
<
type
##
1
>
\
{
\
typedef
type
elem_type
;
\
enum
{
cn
=
1
};
\
static
__device__
__host__
type
##
1
all
(
type
v
)
{
return
make_
##
type
##
1
(
v
);}
\
static
__device__
__host__
type
##
1
make
(
type
x
)
{
return
make_
##
type
##
1
(
x
);}
\
static
__device__
__host__
__forceinline__
type
##
1
all
(
type
v
)
{
return
make_
##
type
##
1
(
v
);}
\
static
__device__
__host__
__forceinline__
type
##
1
make
(
type
x
)
{
return
make_
##
type
##
1
(
x
);}
\
};
\
template
<>
struct
VecTraits
<
type
##
2
>
\
{
\
typedef
type
elem_type
;
\
enum
{
cn
=
2
};
\
static
__device__
__host__
type
##
2
all
(
type
v
)
{
return
make_
##
type
##
2
(
v
,
v
);}
\
static
__device__
__host__
type
##
2
make
(
type
x
,
type
y
)
{
return
make_
##
type
##
2
(
x
,
y
);}
\
static
__device__
__host__
__forceinline__
type
##
2
all
(
type
v
)
{
return
make_
##
type
##
2
(
v
,
v
);}
\
static
__device__
__host__
__forceinline__
type
##
2
make
(
type
x
,
type
y
)
{
return
make_
##
type
##
2
(
x
,
y
);}
\
};
\
template
<>
struct
VecTraits
<
type
##
3
>
\
{
\
typedef
type
elem_type
;
\
enum
{
cn
=
3
};
\
static
__device__
__host__
type
##
3
all
(
type
v
)
{
return
make_
##
type
##
3
(
v
,
v
,
v
);}
\
static
__device__
__host__
type
##
3
make
(
type
x
,
type
y
,
type
z
)
{
return
make_
##
type
##
3
(
x
,
y
,
z
);}
\
static
__device__
__host__
__forceinline__
type
##
3
all
(
type
v
)
{
return
make_
##
type
##
3
(
v
,
v
,
v
);}
\
static
__device__
__host__
__forceinline__
type
##
3
make
(
type
x
,
type
y
,
type
z
)
{
return
make_
##
type
##
3
(
x
,
y
,
z
);}
\
};
\
template
<>
struct
VecTraits
<
type
##
4
>
\
{
\
typedef
type
elem_type
;
\
enum
{
cn
=
4
};
\
static
__device__
__host__
type
##
4
all
(
type
v
)
{
return
make_
##
type
##
4
(
v
,
v
,
v
,
v
);}
\
static
__device__
__host__
type
##
4
make
(
type
x
,
type
y
,
type
z
,
type
w
)
{
return
make_
##
type
##
4
(
x
,
y
,
z
,
w
);}
\
static
__device__
__host__
__forceinline__
type
##
4
all
(
type
v
)
{
return
make_
##
type
##
4
(
v
,
v
,
v
,
v
);}
\
static
__device__
__host__
__forceinline__
type
##
4
make
(
type
x
,
type
y
,
type
z
,
type
w
)
{
return
make_
##
type
##
4
(
x
,
y
,
z
,
w
);}
\
};
\
template
<>
struct
VecTraits
<
type
##
8
>
\
{
\
typedef
type
elem_type
;
\
enum
{
cn
=
8
};
\
static
__device__
__host__
__forceinline__
type
##
8
all
(
type
v
)
{
return
make_
##
type
##
8
(
v
,
v
,
v
,
v
,
v
,
v
,
v
,
v
);}
\
static
__device__
__host__
__forceinline__
type
##
8
make
(
type
a0
,
type
a1
,
type
a2
,
type
a3
,
type
a4
,
type
a5
,
type
a6
,
type
a7
)
{
return
make_
##
type
##
8
(
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
);}
\
};
OPENCV_GPU_IMPLEMENT_VEC_TRAITS
(
uchar
)
...
...
@@ -134,8 +218,8 @@ namespace cv { namespace gpu { namespace device
{
typedef
schar
elem_type
;
enum
{
cn
=
1
};
static
__device__
__host__
schar
all
(
schar
v
)
{
return
v
;}
static
__device__
__host__
schar
make
(
schar
x
)
{
return
x
;}
static
__device__
__host__
__forceinline__
schar
all
(
schar
v
)
{
return
v
;}
static
__device__
__host__
__forceinline__
schar
make
(
schar
x
)
{
return
x
;}
};
}}}
...
...
samples/gpu/performance/tests.cpp
View file @
5e9ae6b1
...
...
@@ -286,7 +286,7 @@ TEST(BruteForceMatcher)
{
// Init CPU matcher
int
desc_len
=
64
;
int
desc_len
=
128
;
BruteForceMatcher
<
L2
<
float
>
>
matcher
;
...
...
@@ -328,7 +328,7 @@ TEST(BruteForceMatcher)
d_matcher
.
knnMatch
(
d_query
,
d_train
,
d_matches
,
knn
);
GPU_OFF
;
/*
SUBTEST << "radiusMatch";
SUBTEST
<<
"radiusMatch"
;
float
max_distance
=
3.8
f
;
CPU_ON
;
...
...
@@ -337,7 +337,7 @@ TEST(BruteForceMatcher)
GPU_ON
;
d_matcher
.
radiusMatch
(
d_query
,
d_train
,
d_matches
,
max_distance
);
GPU_OFF;
*/
GPU_OFF
;
}
...
...
@@ -689,60 +689,66 @@ TEST(threshold)
Mat
src
,
dst
;
gpu
::
GpuMat
d_src
,
d_dst
;
for
(
int
size
=
2
000
;
size
<=
4000
;
size
+=
1000
)
for
(
int
size
=
1
000
;
size
<=
4000
;
size
+=
1000
)
{
SUBTEST
<<
"size "
<<
size
<<
", 8U, THRESH_
TRUNC
"
;
SUBTEST
<<
"size "
<<
size
<<
", 8U, THRESH_
BINARY
"
;
gen
(
src
,
size
,
size
,
CV_8U
,
0
,
100
);
dst
.
create
(
size
,
size
,
CV_8U
);
CPU_ON
;
threshold
(
src
,
dst
,
50.0
,
0.0
,
THRESH_
TRUNC
);
threshold
(
src
,
dst
,
50.0
,
0.0
,
THRESH_
BINARY
);
CPU_OFF
;
d_src
=
src
;
d_dst
.
create
(
size
,
size
,
CV_8U
);
GPU_ON
;
gpu
::
threshold
(
d_src
,
d_dst
,
50.0
,
0.0
,
THRESH_
TRUNC
);
gpu
::
threshold
(
d_src
,
d_dst
,
50.0
,
0.0
,
THRESH_
BINARY
);
GPU_OFF
;
}
for
(
int
size
=
2
000
;
size
<=
4000
;
size
+=
1000
)
for
(
int
size
=
1
000
;
size
<=
4000
;
size
+=
1000
)
{
SUBTEST
<<
"size "
<<
size
<<
",
8U
, THRESH_BINARY"
;
SUBTEST
<<
"size "
<<
size
<<
",
32F
, THRESH_BINARY"
;
gen
(
src
,
size
,
size
,
CV_
8U
,
0
,
100
);
dst
.
create
(
size
,
size
,
CV_
8U
);
gen
(
src
,
size
,
size
,
CV_
32F
,
0
,
100
);
dst
.
create
(
size
,
size
,
CV_
32F
);
CPU_ON
;
threshold
(
src
,
dst
,
50.0
,
0.0
,
THRESH_BINARY
);
CPU_OFF
;
d_src
=
src
;
d_dst
.
create
(
size
,
size
,
CV_
8U
);
d_dst
.
create
(
size
,
size
,
CV_
32F
);
GPU_ON
;
gpu
::
threshold
(
d_src
,
d_dst
,
50.0
,
0.0
,
THRESH_BINARY
);
GPU_OFF
;
}
}
for
(
int
size
=
2000
;
size
<=
4000
;
size
+=
1000
)
TEST
(
pow
)
{
Mat
src
,
dst
;
gpu
::
GpuMat
d_src
,
d_dst
;
for
(
int
size
=
1000
;
size
<=
4000
;
size
+=
1000
)
{
SUBTEST
<<
"size "
<<
size
<<
", 32F
, THRESH_TRUNC
"
;
SUBTEST
<<
"size "
<<
size
<<
", 32F"
;
gen
(
src
,
size
,
size
,
CV_32F
,
0
,
100
);
dst
.
create
(
size
,
size
,
CV_32F
);
CPU_ON
;
threshold
(
src
,
dst
,
50.0
,
0.0
,
THRESH_TRUNC
);
CPU_ON
;
pow
(
src
,
-
2.0
,
dst
);
CPU_OFF
;
d_src
=
src
;
d_dst
.
create
(
size
,
size
,
CV_32F
);
GPU_ON
;
gpu
::
threshold
(
d_src
,
d_dst
,
50.0
,
0.0
,
THRESH_TRUNC
);
gpu
::
pow
(
d_src
,
-
2.0
,
d_dst
);
GPU_OFF
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment