Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
1ef211b8
Commit
1ef211b8
authored
Aug 27, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
used new device layer for cv::gpu::reduce
parent
31a78143
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
81 additions
and
190 deletions
+81
-190
perf_reductions.cpp
modules/cudaarithm/perf/perf_reductions.cpp
+2
-2
reduce.cu
modules/cudaarithm/src/cuda/reduce.cu
+0
-0
reductions.cpp
modules/cudaarithm/src/reductions.cpp
+0
-182
reduce_to_column.hpp
...ev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp
+42
-2
reduce_to_vec.hpp
modules/cudev/include/opencv2/cudev/grid/reduce_to_vec.hpp
+25
-4
test_reduction.cu
modules/cudev/test/test_reduction.cu
+12
-0
No files found.
modules/cudaarithm/perf/perf_reductions.cpp
View file @
1ef211b8
...
...
@@ -373,7 +373,7 @@ PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Reduce,
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
dst
;
TEST_CYCLE
()
cv
::
cuda
::
reduce
(
d_src
,
dst
,
dim
,
reduceOp
);
TEST_CYCLE
()
cv
::
cuda
::
reduce
(
d_src
,
dst
,
dim
,
reduceOp
,
CV_32F
);
CUDA_SANITY_CHECK
(
dst
);
}
...
...
@@ -381,7 +381,7 @@ PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Reduce,
{
cv
::
Mat
dst
;
TEST_CYCLE
()
cv
::
reduce
(
src
,
dst
,
dim
,
reduceOp
);
TEST_CYCLE
()
cv
::
reduce
(
src
,
dst
,
dim
,
reduceOp
,
CV_32F
);
CPU_SANITY_CHECK
(
dst
);
}
...
...
modules/cudaarithm/src/cuda/reduce.cu
View file @
1ef211b8
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/reductions.cpp
View file @
1ef211b8
...
...
@@ -186,188 +186,6 @@ double cv::cuda::norm(InputArray _src1, InputArray _src2, GpuMat& buf, int normT
return
retVal
;
}
//////////////////////////////////////////////////////////////////////////////
// reduce
namespace
reduce
{
template
<
typename
T
,
typename
S
,
typename
D
>
void
rows
(
PtrStepSzb
src
,
void
*
dst
,
int
op
,
cudaStream_t
stream
);
template
<
typename
T
,
typename
S
,
typename
D
>
void
cols
(
PtrStepSzb
src
,
void
*
dst
,
int
cn
,
int
op
,
cudaStream_t
stream
);
}
void
cv
::
cuda
::
reduce
(
InputArray
_src
,
OutputArray
_dst
,
int
dim
,
int
reduceOp
,
int
dtype
,
Stream
&
stream
)
{
GpuMat
src
=
_src
.
getGpuMat
();
CV_Assert
(
src
.
channels
()
<=
4
);
CV_Assert
(
dim
==
0
||
dim
==
1
);
CV_Assert
(
reduceOp
==
REDUCE_SUM
||
reduceOp
==
REDUCE_AVG
||
reduceOp
==
REDUCE_MAX
||
reduceOp
==
REDUCE_MIN
);
if
(
dtype
<
0
)
dtype
=
src
.
depth
();
_dst
.
create
(
1
,
dim
==
0
?
src
.
cols
:
src
.
rows
,
CV_MAKE_TYPE
(
CV_MAT_DEPTH
(
dtype
),
src
.
channels
()));
GpuMat
dst
=
_dst
.
getGpuMat
();
if
(
dim
==
0
)
{
typedef
void
(
*
func_t
)(
PtrStepSzb
src
,
void
*
dst
,
int
op
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
7
]
=
{
{
::
reduce
::
rows
<
unsigned
char
,
int
,
unsigned
char
>
,
0
/*::reduce::rows<unsigned char, int, signed char>*/
,
0
/*::reduce::rows<unsigned char, int, unsigned short>*/
,
0
/*::reduce::rows<unsigned char, int, short>*/
,
::
reduce
::
rows
<
unsigned
char
,
int
,
int
>
,
::
reduce
::
rows
<
unsigned
char
,
float
,
float
>
,
::
reduce
::
rows
<
unsigned
char
,
double
,
double
>
},
{
0
/*::reduce::rows<signed char, int, unsigned char>*/
,
0
/*::reduce::rows<signed char, int, signed char>*/
,
0
/*::reduce::rows<signed char, int, unsigned short>*/
,
0
/*::reduce::rows<signed char, int, short>*/
,
0
/*::reduce::rows<signed char, int, int>*/
,
0
/*::reduce::rows<signed char, float, float>*/
,
0
/*::reduce::rows<signed char, double, double>*/
},
{
0
/*::reduce::rows<unsigned short, int, unsigned char>*/
,
0
/*::reduce::rows<unsigned short, int, signed char>*/
,
::
reduce
::
rows
<
unsigned
short
,
int
,
unsigned
short
>
,
0
/*::reduce::rows<unsigned short, int, short>*/
,
::
reduce
::
rows
<
unsigned
short
,
int
,
int
>
,
::
reduce
::
rows
<
unsigned
short
,
float
,
float
>
,
::
reduce
::
rows
<
unsigned
short
,
double
,
double
>
},
{
0
/*::reduce::rows<short, int, unsigned char>*/
,
0
/*::reduce::rows<short, int, signed char>*/
,
0
/*::reduce::rows<short, int, unsigned short>*/
,
::
reduce
::
rows
<
short
,
int
,
short
>
,
::
reduce
::
rows
<
short
,
int
,
int
>
,
::
reduce
::
rows
<
short
,
float
,
float
>
,
::
reduce
::
rows
<
short
,
double
,
double
>
},
{
0
/*::reduce::rows<int, int, unsigned char>*/
,
0
/*::reduce::rows<int, int, signed char>*/
,
0
/*::reduce::rows<int, int, unsigned short>*/
,
0
/*::reduce::rows<int, int, short>*/
,
::
reduce
::
rows
<
int
,
int
,
int
>
,
::
reduce
::
rows
<
int
,
float
,
float
>
,
::
reduce
::
rows
<
int
,
double
,
double
>
},
{
0
/*::reduce::rows<float, float, unsigned char>*/
,
0
/*::reduce::rows<float, float, signed char>*/
,
0
/*::reduce::rows<float, float, unsigned short>*/
,
0
/*::reduce::rows<float, float, short>*/
,
0
/*::reduce::rows<float, float, int>*/
,
::
reduce
::
rows
<
float
,
float
,
float
>
,
::
reduce
::
rows
<
float
,
double
,
double
>
},
{
0
/*::reduce::rows<double, double, unsigned char>*/
,
0
/*::reduce::rows<double, double, signed char>*/
,
0
/*::reduce::rows<double, double, unsigned short>*/
,
0
/*::reduce::rows<double, double, short>*/
,
0
/*::reduce::rows<double, double, int>*/
,
0
/*::reduce::rows<double, double, float>*/
,
::
reduce
::
rows
<
double
,
double
,
double
>
}
};
const
func_t
func
=
funcs
[
src
.
depth
()][
dst
.
depth
()];
if
(
!
func
)
CV_Error
(
cv
::
Error
::
StsUnsupportedFormat
,
"Unsupported combination of input and output array formats"
);
func
(
src
.
reshape
(
1
),
dst
.
data
,
reduceOp
,
StreamAccessor
::
getStream
(
stream
));
}
else
{
typedef
void
(
*
func_t
)(
PtrStepSzb
src
,
void
*
dst
,
int
cn
,
int
op
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
7
]
=
{
{
::
reduce
::
cols
<
unsigned
char
,
int
,
unsigned
char
>
,
0
/*::reduce::cols<unsigned char, int, signed char>*/
,
0
/*::reduce::cols<unsigned char, int, unsigned short>*/
,
0
/*::reduce::cols<unsigned char, int, short>*/
,
::
reduce
::
cols
<
unsigned
char
,
int
,
int
>
,
::
reduce
::
cols
<
unsigned
char
,
float
,
float
>
,
::
reduce
::
cols
<
unsigned
char
,
double
,
double
>
},
{
0
/*::reduce::cols<signed char, int, unsigned char>*/
,
0
/*::reduce::cols<signed char, int, signed char>*/
,
0
/*::reduce::cols<signed char, int, unsigned short>*/
,
0
/*::reduce::cols<signed char, int, short>*/
,
0
/*::reduce::cols<signed char, int, int>*/
,
0
/*::reduce::cols<signed char, float, float>*/
,
0
/*::reduce::cols<signed char, double, double>*/
},
{
0
/*::reduce::cols<unsigned short, int, unsigned char>*/
,
0
/*::reduce::cols<unsigned short, int, signed char>*/
,
::
reduce
::
cols
<
unsigned
short
,
int
,
unsigned
short
>
,
0
/*::reduce::cols<unsigned short, int, short>*/
,
::
reduce
::
cols
<
unsigned
short
,
int
,
int
>
,
::
reduce
::
cols
<
unsigned
short
,
float
,
float
>
,
::
reduce
::
cols
<
unsigned
short
,
double
,
double
>
},
{
0
/*::reduce::cols<short, int, unsigned char>*/
,
0
/*::reduce::cols<short, int, signed char>*/
,
0
/*::reduce::cols<short, int, unsigned short>*/
,
::
reduce
::
cols
<
short
,
int
,
short
>
,
::
reduce
::
cols
<
short
,
int
,
int
>
,
::
reduce
::
cols
<
short
,
float
,
float
>
,
::
reduce
::
cols
<
short
,
double
,
double
>
},
{
0
/*::reduce::cols<int, int, unsigned char>*/
,
0
/*::reduce::cols<int, int, signed char>*/
,
0
/*::reduce::cols<int, int, unsigned short>*/
,
0
/*::reduce::cols<int, int, short>*/
,
::
reduce
::
cols
<
int
,
int
,
int
>
,
::
reduce
::
cols
<
int
,
float
,
float
>
,
::
reduce
::
cols
<
int
,
double
,
double
>
},
{
0
/*::reduce::cols<float, float, unsigned char>*/
,
0
/*::reduce::cols<float, float, signed char>*/
,
0
/*::reduce::cols<float, float, unsigned short>*/
,
0
/*::reduce::cols<float, float, short>*/
,
0
/*::reduce::cols<float, float, int>*/
,
::
reduce
::
cols
<
float
,
float
,
float
>
,
::
reduce
::
cols
<
float
,
double
,
double
>
},
{
0
/*::reduce::cols<double, double, unsigned char>*/
,
0
/*::reduce::cols<double, double, signed char>*/
,
0
/*::reduce::cols<double, double, unsigned short>*/
,
0
/*::reduce::cols<double, double, short>*/
,
0
/*::reduce::cols<double, double, int>*/
,
0
/*::reduce::cols<double, double, float>*/
,
::
reduce
::
cols
<
double
,
double
,
double
>
}
};
const
func_t
func
=
funcs
[
src
.
depth
()][
dst
.
depth
()];
if
(
!
func
)
CV_Error
(
cv
::
Error
::
StsUnsupportedFormat
,
"Unsupported combination of input and output array formats"
);
func
(
src
,
dst
.
data
,
src
.
channels
(),
reduceOp
,
StreamAccessor
::
getStream
(
stream
));
}
}
////////////////////////////////////////////////////////////////////////
// meanStdDev
...
...
modules/cudev/include/opencv2/cudev/grid/detail/reduce_to_column.hpp
View file @
1ef211b8
...
...
@@ -54,12 +54,52 @@ namespace cv { namespace cudev {
namespace
grid_reduce_to_vec_detail
{
template
<
int
BLOCK_SIZE
,
typename
work_type
,
typename
work_elem_type
,
class
Reductor
,
int
cn
>
struct
Reduce
;
template
<
int
BLOCK_SIZE
,
typename
work_type
,
typename
work_elem_type
,
class
Reductor
>
struct
Reduce
<
BLOCK_SIZE
,
work_type
,
work_elem_type
,
Reductor
,
1
>
{
__device__
__forceinline__
static
void
call
(
work_elem_type
smem
[
1
][
BLOCK_SIZE
],
work_type
&
myVal
)
{
typename
Reductor
::
template
rebind
<
work_elem_type
>::
other
op
;
blockReduce
<
BLOCK_SIZE
>
(
smem
[
0
],
myVal
,
threadIdx
.
x
,
op
);
}
};
template
<
int
BLOCK_SIZE
,
typename
work_type
,
typename
work_elem_type
,
class
Reductor
>
struct
Reduce
<
BLOCK_SIZE
,
work_type
,
work_elem_type
,
Reductor
,
2
>
{
__device__
__forceinline__
static
void
call
(
work_elem_type
smem
[
2
][
BLOCK_SIZE
],
work_type
&
myVal
)
{
typename
Reductor
::
template
rebind
<
work_elem_type
>::
other
op
;
blockReduce
<
BLOCK_SIZE
>
(
smem_tuple
(
smem
[
0
],
smem
[
1
]),
tie
(
myVal
.
x
,
myVal
.
y
),
threadIdx
.
x
,
make_tuple
(
op
,
op
));
}
};
template
<
int
BLOCK_SIZE
,
typename
work_type
,
typename
work_elem_type
,
class
Reductor
>
struct
Reduce
<
BLOCK_SIZE
,
work_type
,
work_elem_type
,
Reductor
,
3
>
{
__device__
__forceinline__
static
void
call
(
work_elem_type
smem
[
3
][
BLOCK_SIZE
],
work_type
&
myVal
)
{
typename
Reductor
::
template
rebind
<
work_elem_type
>::
other
op
;
blockReduce
<
BLOCK_SIZE
>
(
smem_tuple
(
smem
[
0
],
smem
[
1
],
smem
[
2
]),
tie
(
myVal
.
x
,
myVal
.
y
,
myVal
.
z
),
threadIdx
.
x
,
make_tuple
(
op
,
op
,
op
));
}
};
template
<
int
BLOCK_SIZE
,
typename
work_type
,
typename
work_elem_type
,
class
Reductor
>
struct
Reduce
<
BLOCK_SIZE
,
work_type
,
work_elem_type
,
Reductor
,
4
>
{
__device__
__forceinline__
static
void
call
(
work_elem_type
smem
[
4
][
BLOCK_SIZE
],
work_type
&
myVal
)
{
typename
Reductor
::
template
rebind
<
work_elem_type
>::
other
op
;
blockReduce
<
BLOCK_SIZE
>
(
smem_tuple
(
smem
[
0
],
smem
[
1
],
smem
[
2
],
smem
[
3
]),
tie
(
myVal
.
x
,
myVal
.
y
,
myVal
.
z
,
myVal
.
w
),
threadIdx
.
x
,
make_tuple
(
op
,
op
,
op
,
op
));
}
};
template
<
class
Reductor
,
int
BLOCK_SIZE
,
class
SrcPtr
,
typename
ResType
,
class
MaskPtr
>
__global__
void
reduceToColumn
(
const
SrcPtr
src
,
ResType
*
dst
,
const
MaskPtr
mask
,
const
int
cols
)
{
typedef
typename
Reductor
::
work_type
work_type
;
typedef
typename
VecTraits
<
work_type
>::
elem_type
work_elem_type
;
const
int
cn
=
VecTraits
<
work_type
>::
cn
;
__shared__
work_
type
smem
[
BLOCK_SIZE
];
__shared__
work_
elem_type
smem
[
cn
]
[
BLOCK_SIZE
];
const
int
y
=
blockIdx
.
x
;
...
...
@@ -75,7 +115,7 @@ namespace grid_reduce_to_vec_detail
}
}
blockReduce
<
BLOCK_SIZE
>
(
smem
,
myVal
,
threadIdx
.
x
,
op
);
Reduce
<
BLOCK_SIZE
,
work_type
,
work_elem_type
,
Reductor
,
cn
>::
call
(
smem
,
myVal
);
if
(
threadIdx
.
x
==
0
)
dst
[
y
]
=
saturate_cast
<
ResType
>
(
Reductor
::
result
(
myVal
,
cols
));
...
...
modules/cudev/include/opencv2/cudev/grid/reduce_to_vec.hpp
View file @
1ef211b8
...
...
@@ -49,6 +49,7 @@
#include "../common.hpp"
#include "../util/vec_traits.hpp"
#include "../util/limits.hpp"
#include "../util/saturate_cast.hpp"
#include "../ptr2d/traits.hpp"
#include "../ptr2d/gpumat.hpp"
#include "../ptr2d/mask.hpp"
...
...
@@ -62,6 +63,11 @@ template <typename T> struct Sum : plus<T>
{
typedef
T
work_type
;
template
<
typename
U
>
struct
rebind
{
typedef
Sum
<
U
>
other
;
};
__device__
__forceinline__
static
T
initialValue
()
{
return
VecTraits
<
T
>::
all
(
0
);
...
...
@@ -77,14 +83,19 @@ template <typename T> struct Avg : plus<T>
{
typedef
T
work_type
;
template
<
typename
U
>
struct
rebind
{
typedef
Avg
<
U
>
other
;
};
__device__
__forceinline__
static
T
initialValue
()
{
return
VecTraits
<
T
>::
all
(
0
);
}
__device__
__forceinline__
static
T
result
(
T
r
,
in
t
sz
)
__device__
__forceinline__
static
T
result
(
T
r
,
floa
t
sz
)
{
return
r
/
sz
;
return
saturate_cast
<
T
>
(
r
/
sz
)
;
}
};
...
...
@@ -92,6 +103,11 @@ template <typename T> struct Min : minimum<T>
{
typedef
T
work_type
;
template
<
typename
U
>
struct
rebind
{
typedef
Min
<
U
>
other
;
};
__device__
__forceinline__
static
T
initialValue
()
{
return
VecTraits
<
T
>::
all
(
numeric_limits
<
typename
VecTraits
<
T
>::
elem_type
>::
max
());
...
...
@@ -107,6 +123,11 @@ template <typename T> struct Max : maximum<T>
{
typedef
T
work_type
;
template
<
typename
U
>
struct
rebind
{
typedef
Max
<
U
>
other
;
};
__device__
__forceinline__
static
T
initialValue
()
{
return
VecTraits
<
T
>::
all
(
-
numeric_limits
<
typename
VecTraits
<
T
>::
elem_type
>::
max
());
...
...
@@ -158,7 +179,7 @@ __host__ void gridReduceToColumn_(const SrcPtr& src, GpuMat_<ResType>& dst, cons
CV_Assert
(
getRows
(
mask
)
==
rows
&&
getCols
(
mask
)
==
cols
);
createContinuous
(
rows
,
1
,
DataType
<
ResType
>::
type
,
dst
);
dst
.
create
(
1
,
rows
);
grid_reduce_to_vec_detail
::
reduceToColumn
<
Reductor
,
Policy
>
(
shrinkPtr
(
src
),
dst
[
0
],
...
...
@@ -173,7 +194,7 @@ __host__ void gridReduceToColumn_(const SrcPtr& src, GpuMat_<ResType>& dst, Stre
const
int
rows
=
getRows
(
src
);
const
int
cols
=
getCols
(
src
);
createContinuous
(
rows
,
1
,
DataType
<
ResType
>::
type
,
dst
);
dst
.
create
(
1
,
rows
);
grid_reduce_to_vec_detail
::
reduceToColumn
<
Reductor
,
Policy
>
(
shrinkPtr
(
src
),
dst
[
0
],
...
...
modules/cudev/test/test_reduction.cu
View file @
1ef211b8
...
...
@@ -228,6 +228,9 @@ TEST(ReduceToColumn, Sum)
Mat dst_gold;
cv::reduce(src, dst_gold, 1, REDUCE_SUM, CV_32S);
dst_gold.cols = dst_gold.rows;
dst_gold.rows = 1;
dst_gold.step = dst_gold.cols * dst_gold.elemSize();
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
...
...
@@ -244,6 +247,9 @@ TEST(ReduceToColumn, Avg)
Mat dst_gold;
cv::reduce(src, dst_gold, 1, REDUCE_AVG, CV_32F);
dst_gold.cols = dst_gold.rows;
dst_gold.rows = 1;
dst_gold.step = dst_gold.cols * dst_gold.elemSize();
EXPECT_MAT_NEAR(dst_gold, dst, 1e-4);
}
...
...
@@ -260,6 +266,9 @@ TEST(ReduceToColumn, Min)
Mat dst_gold;
cv::reduce(src, dst_gold, 1, REDUCE_MIN);
dst_gold.cols = dst_gold.rows;
dst_gold.rows = 1;
dst_gold.step = dst_gold.cols * dst_gold.elemSize();
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
...
...
@@ -276,6 +285,9 @@ TEST(ReduceToColumn, Max)
Mat dst_gold;
cv::reduce(src, dst_gold, 1, REDUCE_MAX);
dst_gold.cols = dst_gold.rows;
dst_gold.rows = 1;
dst_gold.step = dst_gold.cols * dst_gold.elemSize();
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment