Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
e8f9762e
Commit
e8f9762e
authored
Nov 12, 2012
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
matrix reduction
parent
fbf3de43
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
252 additions
and
406 deletions
+252
-406
matrix_reductions.cu
modules/gpu/src/cuda/matrix_reductions.cu
+0
-0
matrix_reductions.cpp
modules/gpu/src/matrix_reductions.cpp
+241
-403
test_core.cpp
modules/gpu/test/test_core.cpp
+11
-3
No files found.
modules/gpu/src/cuda/matrix_reductions.cu
View file @
e8f9762e
This source diff could not be displayed because it is too large. You can
view the blob
instead.
modules/gpu/src/matrix_reductions.cpp
View file @
e8f9762e
...
...
@@ -204,34 +204,19 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
////////////////////////////////////////////////////////////////////////
// Sum
namespace
cv
{
namespace
gpu
{
namespace
device
namespace
sum
{
namespace
matrix_reductions
{
namespace
sum
{
template
<
typename
T
>
void
sumCaller
(
const
PtrStepSzb
src
,
PtrStepb
buf
,
double
*
sum
,
int
cn
);
template
<
typename
T
>
void
sumMultipassCaller
(
const
PtrStepSzb
src
,
PtrStepb
buf
,
double
*
sum
,
int
cn
);
template
<
typename
T
>
void
absSumCaller
(
const
PtrStepSzb
src
,
PtrStepb
buf
,
double
*
sum
,
int
cn
);
template
<
typename
T
>
void
absSumMultipassCaller
(
const
PtrStepSzb
src
,
PtrStepb
buf
,
double
*
sum
,
int
cn
);
void
getBufSize
(
int
cols
,
int
rows
,
int
cn
,
int
&
bufcols
,
int
&
bufrows
);
template
<
typename
T
>
void
sqrSumCaller
(
const
PtrStepSzb
src
,
PtrStepb
buf
,
double
*
sum
,
int
cn
);
template
<
typename
T
,
int
cn
>
void
run
(
PtrStepSzb
src
,
void
*
buf
,
double
*
sum
);
template
<
typename
T
>
void
sqrSumMultipassCaller
(
const
PtrStepSzb
src
,
PtrStepb
buf
,
double
*
sum
,
int
cn
);
template
<
typename
T
,
int
cn
>
void
runAbs
(
PtrStepSzb
src
,
void
*
buf
,
double
*
sum
);
void
getBufSizeRequired
(
int
cols
,
int
rows
,
int
cn
,
int
&
bufcols
,
int
&
bufrows
);
}
}
}}}
template
<
typename
T
,
int
cn
>
void
runSqr
(
PtrStepSzb
src
,
void
*
buf
,
double
*
sum
);
}
Scalar
cv
::
gpu
::
sum
(
const
GpuMat
&
src
)
{
...
...
@@ -239,159 +224,115 @@ Scalar cv::gpu::sum(const GpuMat& src)
return
sum
(
src
,
buf
);
}
Scalar
cv
::
gpu
::
sum
(
const
GpuMat
&
src
,
GpuMat
&
buf
)
{
using
namespace
cv
::
gpu
::
device
::
matrix_reductions
::
sum
;
typedef
void
(
*
Caller
)(
const
PtrStepSzb
,
PtrStepb
,
double
*
,
int
);
static
Caller
multipass_callers
[]
=
{
sumMultipassCaller
<
unsigned
char
>
,
sumMultipassCaller
<
char
>
,
sumMultipassCaller
<
unsigned
short
>
,
sumMultipassCaller
<
short
>
,
sumMultipassCaller
<
int
>
,
sumMultipassCaller
<
float
>
typedef
void
(
*
func_t
)(
PtrStepSzb
src
,
void
*
buf
,
double
*
sum
);
static
const
func_t
funcs
[
7
][
5
]
=
{
{
0
,
::
sum
::
run
<
uchar
,
1
>
,
::
sum
::
run
<
uchar
,
2
>
,
::
sum
::
run
<
uchar
,
3
>
,
::
sum
::
run
<
uchar
,
4
>
},
{
0
,
::
sum
::
run
<
schar
,
1
>
,
::
sum
::
run
<
schar
,
2
>
,
::
sum
::
run
<
schar
,
3
>
,
::
sum
::
run
<
schar
,
4
>
},
{
0
,
::
sum
::
run
<
ushort
,
1
>
,
::
sum
::
run
<
ushort
,
2
>
,
::
sum
::
run
<
ushort
,
3
>
,
::
sum
::
run
<
ushort
,
4
>
},
{
0
,
::
sum
::
run
<
short
,
1
>
,
::
sum
::
run
<
short
,
2
>
,
::
sum
::
run
<
short
,
3
>
,
::
sum
::
run
<
short
,
4
>
},
{
0
,
::
sum
::
run
<
int
,
1
>
,
::
sum
::
run
<
int
,
2
>
,
::
sum
::
run
<
int
,
3
>
,
::
sum
::
run
<
int
,
4
>
},
{
0
,
::
sum
::
run
<
float
,
1
>
,
::
sum
::
run
<
float
,
2
>
,
::
sum
::
run
<
float
,
3
>
,
::
sum
::
run
<
float
,
4
>
},
{
0
,
::
sum
::
run
<
double
,
1
>
,
::
sum
::
run
<
double
,
2
>
,
::
sum
::
run
<
double
,
3
>
,
::
sum
::
run
<
double
,
4
>
}
};
static
Caller
singlepass_callers
[]
=
{
sumCaller
<
unsigned
char
>
,
sumCaller
<
char
>
,
sumCaller
<
unsigned
short
>
,
sumCaller
<
short
>
,
sumCaller
<
int
>
,
sumCaller
<
float
>
};
CV_Assert
(
src
.
depth
()
<=
CV_32F
);
if
(
src
.
depth
()
==
CV_64F
)
{
if
(
!
TargetArchs
::
builtWith
(
NATIVE_DOUBLE
)
||
!
DeviceInfo
().
supports
(
NATIVE_DOUBLE
))
CV_Error
(
CV_StsUnsupportedFormat
,
"The device doesn't support double"
);
}
Size
buf_size
;
getBufSizeRequired
(
src
.
cols
,
src
.
rows
,
src
.
channels
(),
buf_size
.
width
,
buf_size
.
height
);
::
sum
::
getBufSize
(
src
.
cols
,
src
.
rows
,
src
.
channels
(),
buf_size
.
width
,
buf_size
.
height
);
ensureSizeIsEnough
(
buf_size
,
CV_8U
,
buf
);
buf
.
setTo
(
Scalar
::
all
(
0
));
Caller
*
callers
=
multipass_callers
;
if
(
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
&&
DeviceInfo
().
supports
(
GLOBAL_ATOMICS
))
callers
=
singlepass_callers
;
Caller
caller
=
callers
[
src
.
depth
()];
const
func_t
func
=
funcs
[
src
.
depth
()][
src
.
channels
()];
double
result
[
4
];
caller
(
src
,
buf
,
result
,
src
.
channels
());
func
(
src
,
buf
.
data
,
result
);
return
Scalar
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
}
Scalar
cv
::
gpu
::
absSum
(
const
GpuMat
&
src
)
{
GpuMat
buf
;
return
absSum
(
src
,
buf
);
}
Scalar
cv
::
gpu
::
absSum
(
const
GpuMat
&
src
,
GpuMat
&
buf
)
{
using
namespace
cv
::
gpu
::
device
::
matrix_reductions
::
sum
;
typedef
void
(
*
Caller
)(
const
PtrStepSzb
,
PtrStepb
,
double
*
,
int
);
static
Caller
multipass_callers
[]
=
{
absSumMultipassCaller
<
unsigned
char
>
,
absSumMultipassCaller
<
char
>
,
absSumMultipassCaller
<
unsigned
short
>
,
absSumMultipassCaller
<
short
>
,
absSumMultipassCaller
<
int
>
,
absSumMultipassCaller
<
float
>
};
static
Caller
singlepass_callers
[]
=
{
absSumCaller
<
unsigned
char
>
,
absSumCaller
<
char
>
,
absSumCaller
<
unsigned
short
>
,
absSumCaller
<
short
>
,
absSumCaller
<
int
>
,
absSumCaller
<
float
>
typedef
void
(
*
func_t
)(
PtrStepSzb
src
,
void
*
buf
,
double
*
sum
);
static
const
func_t
funcs
[
7
][
5
]
=
{
{
0
,
::
sum
::
runAbs
<
uchar
,
1
>
,
::
sum
::
runAbs
<
uchar
,
2
>
,
::
sum
::
runAbs
<
uchar
,
3
>
,
::
sum
::
runAbs
<
uchar
,
4
>
},
{
0
,
::
sum
::
runAbs
<
schar
,
1
>
,
::
sum
::
runAbs
<
schar
,
2
>
,
::
sum
::
runAbs
<
schar
,
3
>
,
::
sum
::
runAbs
<
schar
,
4
>
},
{
0
,
::
sum
::
runAbs
<
ushort
,
1
>
,
::
sum
::
runAbs
<
ushort
,
2
>
,
::
sum
::
runAbs
<
ushort
,
3
>
,
::
sum
::
runAbs
<
ushort
,
4
>
},
{
0
,
::
sum
::
runAbs
<
short
,
1
>
,
::
sum
::
runAbs
<
short
,
2
>
,
::
sum
::
runAbs
<
short
,
3
>
,
::
sum
::
runAbs
<
short
,
4
>
},
{
0
,
::
sum
::
runAbs
<
int
,
1
>
,
::
sum
::
runAbs
<
int
,
2
>
,
::
sum
::
runAbs
<
int
,
3
>
,
::
sum
::
runAbs
<
int
,
4
>
},
{
0
,
::
sum
::
runAbs
<
float
,
1
>
,
::
sum
::
runAbs
<
float
,
2
>
,
::
sum
::
runAbs
<
float
,
3
>
,
::
sum
::
runAbs
<
float
,
4
>
},
{
0
,
::
sum
::
runAbs
<
double
,
1
>
,
::
sum
::
runAbs
<
double
,
2
>
,
::
sum
::
runAbs
<
double
,
3
>
,
::
sum
::
runAbs
<
double
,
4
>
}
};
CV_Assert
(
src
.
depth
()
<=
CV_32F
);
Size
buf_size
;
getBufSizeRequired
(
src
.
cols
,
src
.
rows
,
src
.
channels
(),
buf_size
.
width
,
buf_size
.
height
);
::
sum
::
getBufSize
(
src
.
cols
,
src
.
rows
,
src
.
channels
(),
buf_size
.
width
,
buf_size
.
height
);
ensureSizeIsEnough
(
buf_size
,
CV_8U
,
buf
);
buf
.
setTo
(
Scalar
::
all
(
0
));
Caller
*
callers
=
multipass_callers
;
if
(
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
&&
DeviceInfo
().
supports
(
GLOBAL_ATOMICS
))
callers
=
singlepass_callers
;
Caller
caller
=
callers
[
src
.
depth
()];
const
func_t
func
=
funcs
[
src
.
depth
()][
src
.
channels
()];
double
result
[
4
];
caller
(
src
,
buf
,
result
,
src
.
channels
());
func
(
src
,
buf
.
data
,
result
);
return
Scalar
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
}
Scalar
cv
::
gpu
::
sqrSum
(
const
GpuMat
&
src
)
{
GpuMat
buf
;
return
sqrSum
(
src
,
buf
);
}
Scalar
cv
::
gpu
::
sqrSum
(
const
GpuMat
&
src
,
GpuMat
&
buf
)
{
using
namespace
cv
::
gpu
::
device
::
matrix_reductions
::
sum
;
typedef
void
(
*
Caller
)(
const
PtrStepSzb
,
PtrStepb
,
double
*
,
int
);
static
Caller
multipass_callers
[]
=
{
sqrSumMultipassCaller
<
unsigned
char
>
,
sqrSumMultipassCaller
<
char
>
,
sqrSumMultipassCaller
<
unsigned
short
>
,
sqrSumMultipassCaller
<
short
>
,
sqrSumMultipassCaller
<
int
>
,
sqrSumMultipassCaller
<
float
>
};
static
Caller
singlepass_callers
[
7
]
=
{
sqrSumCaller
<
unsigned
char
>
,
sqrSumCaller
<
char
>
,
sqrSumCaller
<
unsigned
short
>
,
sqrSumCaller
<
short
>
,
sqrSumCaller
<
int
>
,
sqrSumCaller
<
float
>
typedef
void
(
*
func_t
)(
PtrStepSzb
src
,
void
*
buf
,
double
*
sum
);
static
const
func_t
funcs
[
7
][
5
]
=
{
{
0
,
::
sum
::
runSqr
<
uchar
,
1
>
,
::
sum
::
runSqr
<
uchar
,
2
>
,
::
sum
::
runSqr
<
uchar
,
3
>
,
::
sum
::
runSqr
<
uchar
,
4
>
},
{
0
,
::
sum
::
runSqr
<
schar
,
1
>
,
::
sum
::
runSqr
<
schar
,
2
>
,
::
sum
::
runSqr
<
schar
,
3
>
,
::
sum
::
runSqr
<
schar
,
4
>
},
{
0
,
::
sum
::
runSqr
<
ushort
,
1
>
,
::
sum
::
runSqr
<
ushort
,
2
>
,
::
sum
::
runSqr
<
ushort
,
3
>
,
::
sum
::
runSqr
<
ushort
,
4
>
},
{
0
,
::
sum
::
runSqr
<
short
,
1
>
,
::
sum
::
runSqr
<
short
,
2
>
,
::
sum
::
runSqr
<
short
,
3
>
,
::
sum
::
runSqr
<
short
,
4
>
},
{
0
,
::
sum
::
runSqr
<
int
,
1
>
,
::
sum
::
runSqr
<
int
,
2
>
,
::
sum
::
runSqr
<
int
,
3
>
,
::
sum
::
runSqr
<
int
,
4
>
},
{
0
,
::
sum
::
runSqr
<
float
,
1
>
,
::
sum
::
runSqr
<
float
,
2
>
,
::
sum
::
runSqr
<
float
,
3
>
,
::
sum
::
runSqr
<
float
,
4
>
},
{
0
,
::
sum
::
runSqr
<
double
,
1
>
,
::
sum
::
runSqr
<
double
,
2
>
,
::
sum
::
runSqr
<
double
,
3
>
,
::
sum
::
runSqr
<
double
,
4
>
}
};
CV_Assert
(
src
.
depth
()
<=
CV_32F
);
Caller
*
callers
=
multipass_callers
;
if
(
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
&&
DeviceInfo
().
supports
(
GLOBAL_ATOMICS
))
callers
=
singlepass_callers
;
Size
buf_size
;
getBufSizeRequired
(
src
.
cols
,
src
.
rows
,
src
.
channels
(),
buf_size
.
width
,
buf_size
.
height
);
::
sum
::
getBufSize
(
src
.
cols
,
src
.
rows
,
src
.
channels
(),
buf_size
.
width
,
buf_size
.
height
);
ensureSizeIsEnough
(
buf_size
,
CV_8U
,
buf
);
buf
.
setTo
(
Scalar
::
all
(
0
));
Caller
caller
=
callers
[
src
.
depth
()];
const
func_t
func
=
funcs
[
src
.
depth
()][
src
.
channels
()];
double
result
[
4
];
caller
(
src
,
buf
,
result
,
src
.
channels
());
func
(
src
,
buf
.
data
,
result
);
return
Scalar
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
}
////////////////////////////////////////////////////////////////////////
//
Find min or m
ax
//
minM
ax
namespace
cv
{
namespace
gpu
{
namespace
device
namespace
minMax
{
namespace
matrix_reductions
{
namespace
minmax
{
void
getBufSizeRequired
(
int
cols
,
int
rows
,
int
elem_size
,
int
&
bufcols
,
int
&
bufrows
);
template
<
typename
T
>
void
minMaxCaller
(
const
PtrStepSzb
src
,
double
*
minval
,
double
*
maxval
,
PtrStepb
buf
);
void
getBufSize
(
int
cols
,
int
rows
,
int
&
bufcols
,
int
&
bufrows
);
template
<
typename
T
>
void
minMaxMaskCaller
(
const
PtrStepSzb
src
,
const
PtrStepb
mask
,
double
*
minval
,
double
*
maxval
,
PtrStepb
buf
);
template
<
typename
T
>
void
minMaxMultipassCaller
(
const
PtrStepSzb
src
,
double
*
minval
,
double
*
maxval
,
PtrStepb
buf
);
template
<
typename
T
>
void
minMaxMaskMultipassCaller
(
const
PtrStepSzb
src
,
const
PtrStepb
mask
,
double
*
minval
,
double
*
maxval
,
PtrStepb
buf
);
}
}
}}}
void
run
(
const
PtrStepSzb
src
,
const
PtrStepb
mask
,
double
*
minval
,
double
*
maxval
,
PtrStepb
buf
);
}
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
const
GpuMat
&
mask
)
{
...
...
@@ -399,45 +340,22 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
minMax
(
src
,
minVal
,
maxVal
,
mask
,
buf
);
}
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
const
GpuMat
&
mask
,
GpuMat
&
buf
)
{
using
namespace
::
cv
::
gpu
::
device
::
matrix_reductions
::
minmax
;
typedef
void
(
*
Caller
)(
const
PtrStepSzb
,
double
*
,
double
*
,
PtrStepb
);
typedef
void
(
*
MaskedCaller
)(
const
PtrStepSzb
,
const
PtrStepb
,
double
*
,
double
*
,
PtrStepb
);
static
Caller
multipass_callers
[]
=
{
minMaxMultipassCaller
<
unsigned
char
>
,
minMaxMultipassCaller
<
char
>
,
minMaxMultipassCaller
<
unsigned
short
>
,
minMaxMultipassCaller
<
short
>
,
minMaxMultipassCaller
<
int
>
,
minMaxMultipassCaller
<
float
>
,
0
};
static
Caller
singlepass_callers
[]
=
{
minMaxCaller
<
unsigned
char
>
,
minMaxCaller
<
char
>
,
minMaxCaller
<
unsigned
short
>
,
minMaxCaller
<
short
>
,
minMaxCaller
<
int
>
,
minMaxCaller
<
float
>
,
minMaxCaller
<
double
>
typedef
void
(
*
func_t
)(
const
PtrStepSzb
src
,
const
PtrStepb
mask
,
double
*
minval
,
double
*
maxval
,
PtrStepb
buf
);
static
const
func_t
funcs
[]
=
{
::
minMax
::
run
<
uchar
>
,
::
minMax
::
run
<
schar
>
,
::
minMax
::
run
<
ushort
>
,
::
minMax
::
run
<
short
>
,
::
minMax
::
run
<
int
>
,
::
minMax
::
run
<
float
>
,
::
minMax
::
run
<
double
>
};
static
MaskedCaller
masked_multipass_callers
[]
=
{
minMaxMaskMultipassCaller
<
unsigned
char
>
,
minMaxMaskMultipassCaller
<
char
>
,
minMaxMaskMultipassCaller
<
unsigned
short
>
,
minMaxMaskMultipassCaller
<
short
>
,
minMaxMaskMultipassCaller
<
int
>
,
minMaxMaskMultipassCaller
<
float
>
,
0
};
static
MaskedCaller
masked_singlepass_callers
[]
=
{
minMaxMaskCaller
<
unsigned
char
>
,
minMaxMaskCaller
<
char
>
,
minMaxMaskCaller
<
unsigned
short
>
,
minMaxMaskCaller
<
short
>
,
minMaxMaskCaller
<
int
>
,
minMaxMaskCaller
<
float
>
,
minMaxMaskCaller
<
double
>
};
CV_Assert
(
src
.
depth
()
<=
CV_64F
);
CV_Assert
(
src
.
channels
()
==
1
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8U
&&
src
.
size
()
==
mask
.
size
()));
CV_Assert
(
src
.
channels
()
==
1
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
size
()
==
src
.
size
()
&&
mask
.
type
()
==
CV_8U
)
);
if
(
src
.
depth
()
==
CV_64F
)
{
...
...
@@ -445,66 +363,26 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
CV_Error
(
CV_StsUnsupportedFormat
,
"The device doesn't support double"
);
}
double
minVal_
;
if
(
!
minVal
)
minVal
=
&
minVal_
;
double
maxVal_
;
if
(
!
maxVal
)
maxVal
=
&
maxVal_
;
Size
buf_size
;
getBufSizeRequired
(
src
.
cols
,
src
.
rows
,
static_cast
<
int
>
(
src
.
elemSize
())
,
buf_size
.
width
,
buf_size
.
height
);
::
minMax
::
getBufSize
(
src
.
cols
,
src
.
rows
,
buf_size
.
width
,
buf_size
.
height
);
ensureSizeIsEnough
(
buf_size
,
CV_8U
,
buf
);
if
(
mask
.
empty
())
{
Caller
*
callers
=
multipass_callers
;
if
(
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
&&
DeviceInfo
().
supports
(
GLOBAL_ATOMICS
))
callers
=
singlepass_callers
;
Caller
caller
=
callers
[
src
.
type
()];
CV_Assert
(
caller
!=
0
);
caller
(
src
,
minVal
,
maxVal
,
buf
);
}
else
{
MaskedCaller
*
callers
=
masked_multipass_callers
;
if
(
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
&&
DeviceInfo
().
supports
(
GLOBAL_ATOMICS
))
callers
=
masked_singlepass_callers
;
const
func_t
func
=
funcs
[
src
.
depth
()];
MaskedCaller
caller
=
callers
[
src
.
type
()];
CV_Assert
(
caller
!=
0
);
caller
(
src
,
mask
,
minVal
,
maxVal
,
buf
);
}
double
temp1
,
temp2
;
func
(
src
,
mask
,
minVal
?
minVal
:
&
temp1
,
maxVal
?
maxVal
:
&
temp2
,
buf
);
}
////////////////////////////////////////////////////////////////////////
//
Locate min and max
//
minMaxLoc
namespace
cv
{
namespace
gpu
{
namespace
device
namespace
minMaxLoc
{
namespace
matrix_reductions
{
namespace
minmaxloc
{
void
getBufSizeRequired
(
int
cols
,
int
rows
,
int
elem_size
,
int
&
b1cols
,
int
&
b1rows
,
int
&
b2cols
,
int
&
b2rows
);
template
<
typename
T
>
void
minMaxLocCaller
(
const
PtrStepSzb
src
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStepb
valBuf
,
PtrStepb
locBuf
);
void
getBufSize
(
int
cols
,
int
rows
,
size_t
elem_size
,
int
&
b1cols
,
int
&
b1rows
,
int
&
b2cols
,
int
&
b2rows
);
template
<
typename
T
>
void
minMaxLocMaskCaller
(
const
PtrStepSzb
src
,
const
PtrStepb
mask
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStepb
valBuf
,
PtrStepb
locBuf
);
template
<
typename
T
>
void
minMaxLocMultipassCaller
(
const
PtrStepSzb
src
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStepb
valBuf
,
PtrStepb
locBuf
);
template
<
typename
T
>
void
minMaxLocMaskMultipassCaller
(
const
PtrStepSzb
src
,
const
PtrStepb
mask
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStepb
valBuf
,
PtrStepb
locBuf
);
}
}
}}}
void
run
(
const
PtrStepSzb
src
,
const
PtrStepb
mask
,
double
*
minval
,
double
*
maxval
,
int
*
minloc
,
int
*
maxloc
,
PtrStepb
valbuf
,
PtrStep
<
unsigned
int
>
locbuf
);
}
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
Point
*
minLoc
,
Point
*
maxLoc
,
const
GpuMat
&
mask
)
{
...
...
@@ -515,42 +393,20 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
Point
*
minLoc
,
Point
*
maxLoc
,
const
GpuMat
&
mask
,
GpuMat
&
valBuf
,
GpuMat
&
locBuf
)
{
using
namespace
::
cv
::
gpu
::
device
::
matrix_reductions
::
minmaxloc
;
typedef
void
(
*
Caller
)(
const
PtrStepSzb
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStepb
,
PtrStepb
);
typedef
void
(
*
MaskedCaller
)(
const
PtrStepSzb
,
const
PtrStepb
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStepb
,
PtrStepb
);
static
Caller
multipass_callers
[]
=
{
minMaxLocMultipassCaller
<
unsigned
char
>
,
minMaxLocMultipassCaller
<
char
>
,
minMaxLocMultipassCaller
<
unsigned
short
>
,
minMaxLocMultipassCaller
<
shor
t
>
,
minMaxLocMultipassCaller
<
int
>
,
minMaxLocMultipassCaller
<
float
>
,
0
typedef
void
(
*
func_t
)(
const
PtrStepSzb
src
,
const
PtrStepb
mask
,
double
*
minval
,
double
*
maxval
,
int
*
minloc
,
int
*
maxloc
,
PtrStepb
valbuf
,
PtrStep
<
unsigned
int
>
locbuf
)
;
static
const
func_t
funcs
[]
=
{
::
minMaxLoc
::
run
<
uchar
>
,
::
minMaxLoc
::
run
<
schar
>
,
::
minMaxLoc
::
run
<
ushort
>
,
::
minMaxLoc
::
run
<
short
>
,
::
minMaxLoc
::
run
<
int
>
,
::
minMaxLoc
::
run
<
floa
t
>
,
::
minMaxLoc
::
run
<
double
>
};
static
Caller
singlepass_callers
[]
=
{
minMaxLocCaller
<
unsigned
char
>
,
minMaxLocCaller
<
char
>
,
minMaxLocCaller
<
unsigned
short
>
,
minMaxLocCaller
<
short
>
,
minMaxLocCaller
<
int
>
,
minMaxLocCaller
<
float
>
,
minMaxLocCaller
<
double
>
};
static
MaskedCaller
masked_multipass_callers
[]
=
{
minMaxLocMaskMultipassCaller
<
unsigned
char
>
,
minMaxLocMaskMultipassCaller
<
char
>
,
minMaxLocMaskMultipassCaller
<
unsigned
short
>
,
minMaxLocMaskMultipassCaller
<
short
>
,
minMaxLocMaskMultipassCaller
<
int
>
,
minMaxLocMaskMultipassCaller
<
float
>
,
0
};
static
MaskedCaller
masked_singlepass_callers
[]
=
{
minMaxLocMaskCaller
<
unsigned
char
>
,
minMaxLocMaskCaller
<
char
>
,
minMaxLocMaskCaller
<
unsigned
short
>
,
minMaxLocMaskCaller
<
short
>
,
minMaxLocMaskCaller
<
int
>
,
minMaxLocMaskCaller
<
float
>
,
minMaxLocMaskCaller
<
double
>
};
CV_Assert
(
src
.
depth
()
<=
CV_64F
);
CV_Assert
(
src
.
channels
()
==
1
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8U
&&
src
.
size
()
==
mask
.
size
()));
CV_Assert
(
src
.
channels
()
==
1
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
size
()
==
src
.
size
()
&&
mask
.
type
()
==
CV_8U
)
);
if
(
src
.
depth
()
==
CV_64F
)
{
...
...
@@ -558,61 +414,28 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
CV_Error
(
CV_StsUnsupportedFormat
,
"The device doesn't support double"
);
}
double
minVal_
;
if
(
!
minVal
)
minVal
=
&
minVal_
;
double
maxVal_
;
if
(
!
maxVal
)
maxVal
=
&
maxVal_
;
int
minLoc_
[
2
];
int
maxLoc_
[
2
];
Size
valbuf_size
,
locbuf_size
;
getBufSizeRequired
(
src
.
cols
,
src
.
rows
,
static_cast
<
int
>
(
src
.
elemSize
()),
valbuf_size
.
width
,
valbuf_size
.
height
,
locbuf_size
.
width
,
locbuf_size
.
height
);
::
minMaxLoc
::
getBufSize
(
src
.
cols
,
src
.
rows
,
src
.
elemSize
(),
valbuf_size
.
width
,
valbuf_size
.
height
,
locbuf_size
.
width
,
locbuf_size
.
height
);
ensureSizeIsEnough
(
valbuf_size
,
CV_8U
,
valBuf
);
ensureSizeIsEnough
(
locbuf_size
,
CV_8U
,
locBuf
);
if
(
mask
.
empty
())
{
Caller
*
callers
=
multipass_callers
;
if
(
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
&&
DeviceInfo
().
supports
(
GLOBAL_ATOMICS
))
callers
=
singlepass_callers
;
const
func_t
func
=
funcs
[
src
.
depth
()];
Caller
caller
=
callers
[
src
.
type
()];
CV_Assert
(
caller
!=
0
);
caller
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valBuf
,
locBuf
);
}
else
{
MaskedCaller
*
callers
=
masked_multipass_callers
;
if
(
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
&&
DeviceInfo
().
supports
(
GLOBAL_ATOMICS
))
callers
=
masked_singlepass_callers
;
MaskedCaller
caller
=
callers
[
src
.
type
()];
CV_Assert
(
caller
!=
0
);
caller
(
src
,
mask
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valBuf
,
locBuf
);
}
if
(
minLoc
)
{
minLoc
->
x
=
minLoc_
[
0
];
minLoc
->
y
=
minLoc_
[
1
];
}
if
(
maxLoc
)
{
maxLoc
->
x
=
maxLoc_
[
0
];
maxLoc
->
y
=
maxLoc_
[
1
];
}
double
temp1
,
temp2
;
Point
temp3
,
temp4
;
func
(
src
,
mask
,
minVal
?
minVal
:
&
temp1
,
maxVal
?
maxVal
:
&
temp2
,
minLoc
?
&
minLoc
->
x
:
&
temp3
.
x
,
maxLoc
?
&
maxLoc
->
x
:
&
temp4
.
x
,
valBuf
,
locBuf
);
}
//////////////////////////////////////////////////////////////////////////////
//
Count non-zero elements
//
countNonZero
namespace
c
v
{
namespace
gpu
{
namespace
device
namespace
c
ountNonZero
{
namespace
matrix_reductions
{
namespace
countnonzero
{
void
getBufSizeRequired
(
int
cols
,
int
rows
,
int
&
bufcols
,
int
&
bufrows
);
template
<
typename
T
>
int
countNonZeroCaller
(
const
PtrStepSzb
src
,
PtrStepb
buf
);
void
getBufSize
(
int
cols
,
int
rows
,
int
&
bufcols
,
int
&
bufrows
);
template
<
typename
T
>
int
countNonZeroMultipassCaller
(
const
PtrStepSzb
src
,
PtrStepb
buf
);
}
}
}}}
int
run
(
const
PtrStepSzb
src
,
PtrStep
<
unsigned
int
>
buf
);
}
int
cv
::
gpu
::
countNonZero
(
const
GpuMat
&
src
)
{
...
...
@@ -620,27 +443,20 @@ int cv::gpu::countNonZero(const GpuMat& src)
return
countNonZero
(
src
,
buf
);
}
int
cv
::
gpu
::
countNonZero
(
const
GpuMat
&
src
,
GpuMat
&
buf
)
{
using
namespace
::
cv
::
gpu
::
device
::
matrix_reductions
::
countnonzero
;
typedef
int
(
*
Caller
)(
const
PtrStepSzb
src
,
PtrStepb
buf
);
static
Caller
multipass_callers
[
7
]
=
{
countNonZeroMultipassCaller
<
unsigned
char
>
,
countNonZeroMultipassCaller
<
char
>
,
countNonZeroMultipassCaller
<
unsigned
short
>
,
countNonZeroMultipassCaller
<
short
>
,
countNonZeroMultipassCaller
<
int
>
,
countNonZeroMultipassCaller
<
float
>
,
0
typedef
int
(
*
func_t
)(
const
PtrStepSzb
src
,
PtrStep
<
unsigned
int
>
buf
);
static
const
func_t
funcs
[]
=
{
::
countNonZero
::
run
<
uchar
>
,
::
countNonZero
::
run
<
schar
>
,
::
countNonZero
::
run
<
ushort
>
,
::
countNonZero
::
run
<
short
>
,
::
countNonZero
::
run
<
int
>
,
::
countNonZero
::
run
<
float
>
,
::
countNonZero
::
run
<
double
>
};
static
Caller
singlepass_callers
[
7
]
=
{
countNonZeroCaller
<
unsigned
char
>
,
countNonZeroCaller
<
char
>
,
countNonZeroCaller
<
unsigned
short
>
,
countNonZeroCaller
<
short
>
,
countNonZeroCaller
<
int
>
,
countNonZeroCaller
<
float
>
,
countNonZeroCaller
<
double
>
};
CV_Assert
(
src
.
depth
()
<=
CV_64F
);
CV_Assert
(
src
.
channels
()
==
1
);
if
(
src
.
depth
()
==
CV_64F
)
...
...
@@ -650,168 +466,190 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
}
Size
buf_size
;
getBufSizeRequired
(
src
.
cols
,
src
.
rows
,
buf_size
.
width
,
buf_size
.
height
);
::
countNonZero
::
getBufSize
(
src
.
cols
,
src
.
rows
,
buf_size
.
width
,
buf_size
.
height
);
ensureSizeIsEnough
(
buf_size
,
CV_8U
,
buf
);
Caller
*
callers
=
multipass_callers
;
if
(
TargetArchs
::
builtWith
(
GLOBAL_ATOMICS
)
&&
DeviceInfo
().
supports
(
GLOBAL_ATOMICS
))
callers
=
singlepass_callers
;
const
func_t
func
=
funcs
[
src
.
depth
()];
Caller
caller
=
callers
[
src
.
type
()];
CV_Assert
(
caller
!=
0
);
return
caller
(
src
,
buf
);
return
func
(
src
,
buf
);
}
//////////////////////////////////////////////////////////////////////////////
// reduce
namespace
cv
{
namespace
gpu
{
namespace
devi
ce
namespace
redu
ce
{
namespace
matrix_reductions
{
template
<
typename
T
,
typename
S
,
typename
D
>
void
reduceRows_gpu
(
const
PtrStepSzb
&
src
,
const
PtrStepSzb
&
dst
,
int
reduceOp
,
cudaStream_t
stream
);
template
<
typename
T
,
typename
S
,
typename
D
>
void
reduceCols_gpu
(
const
PtrStepSzb
&
src
,
int
cn
,
const
PtrStepSzb
&
dst
,
int
reduceOp
,
cudaStream_t
stream
);
}
}
}}
template
<
typename
T
,
typename
S
,
typename
D
>
void
rows
(
PtrStepSzb
src
,
void
*
dst
,
int
op
,
cudaStream_t
stream
);
template
<
typename
T
,
typename
S
,
typename
D
>
void
cols
(
PtrStepSzb
src
,
void
*
dst
,
int
cn
,
int
op
,
cudaStream_t
stream
);
}
void
cv
::
gpu
::
reduce
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
dim
,
int
reduceOp
,
int
dtype
,
Stream
&
stream
)
{
using
namespace
::
cv
::
gpu
::
device
::
matrix_reductions
;
CV_Assert
(
src
.
depth
()
<=
CV_32F
&&
src
.
channels
()
<=
4
&&
dtype
<=
CV_32F
);
CV_Assert
(
dim
==
0
||
dim
==
1
);
CV_Assert
(
reduceOp
==
CV_REDUCE_SUM
||
reduceOp
==
CV_REDUCE_AVG
||
reduceOp
==
CV_REDUCE_MAX
||
reduceOp
==
CV_REDUCE_MIN
);
CV_Assert
(
src
.
channels
()
<=
4
);
CV_Assert
(
dim
==
0
||
dim
==
1
);
CV_Assert
(
reduceOp
==
CV_REDUCE_SUM
||
reduceOp
==
CV_REDUCE_AVG
||
reduceOp
==
CV_REDUCE_MAX
||
reduceOp
==
CV_REDUCE_MIN
);
if
(
dtype
<
0
)
dtype
=
src
.
depth
();
dst
.
create
(
1
,
dim
==
0
?
src
.
cols
:
src
.
rows
,
CV_MAKE
TYPE
(
dtype
,
src
.
channels
()));
dst
.
create
(
1
,
dim
==
0
?
src
.
cols
:
src
.
rows
,
CV_MAKE
_TYPE
(
CV_MAT_DEPTH
(
dtype
)
,
src
.
channels
()));
if
(
dim
==
0
)
{
typedef
void
(
*
caller_t
)(
const
PtrStepSzb
&
src
,
const
PtrStepSzb
&
dst
,
int
reduceO
p
,
cudaStream_t
stream
);
static
const
caller_t
callers
[
6
][
6
]
=
typedef
void
(
*
func_t
)(
PtrStepSzb
src
,
void
*
dst
,
int
o
p
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
7
]
=
{
{
::
reduce
::
rows
<
unsigned
char
,
int
,
unsigned
char
>
,
0
/*::reduce::rows<unsigned char, int, signed char>*/
,
0
/*::reduce::rows<unsigned char, int, unsigned short>*/
,
0
/*::reduce::rows<unsigned char, int, short>*/
,
::
reduce
::
rows
<
unsigned
char
,
int
,
int
>
,
::
reduce
::
rows
<
unsigned
char
,
float
,
float
>
,
::
reduce
::
rows
<
unsigned
char
,
double
,
double
>
},
{
reduceRows_gpu
<
unsigned
char
,
int
,
unsigned
char
>
,
0
/*reduceRows_gpu<unsigned char, int, signed char>*/
,
0
/*reduceRows_gpu<unsigned char, int, unsigned short>*/
,
0
/*reduceRows_gpu<unsigned char, int, short>*/
,
reduceRows_gpu
<
unsigned
char
,
int
,
int
>
,
reduceRows_gpu
<
unsigned
char
,
int
,
float
>
0
/*::reduce::rows<signed char, int, unsigned char>*/
,
0
/*::reduce::rows<signed char, int, signed char>*/
,
0
/*::reduce::rows<signed char, int, unsigned short>*/
,
0
/*::reduce::rows<signed char, int, short>*/
,
0
/*::reduce::rows<signed char, int, int>*/
,
0
/*::reduce::rows<signed char, float, float>*/
,
0
/*::reduce::rows<signed char, double, double>*/
},
{
0
/*reduceRows_gpu<signed char, int, unsigned char>*/
,
0
/*reduceRows_gpu<signed char, int, signed char>*/
,
0
/*reduceRows_gpu<signed char, int, unsigned short>*/
,
0
/*reduceRows_gpu<signed char, int, short>*/
,
0
/*reduceRows_gpu<signed char, int, int>*/
,
0
/*reduceRows_gpu<signed char, int, float>*/
0
/*::reduce::rows<unsigned short, int, unsigned char>*/
,
0
/*::reduce::rows<unsigned short, int, signed char>*/
,
::
reduce
::
rows
<
unsigned
short
,
int
,
unsigned
short
>
,
0
/*::reduce::rows<unsigned short, int, short>*/
,
::
reduce
::
rows
<
unsigned
short
,
int
,
int
>
,
::
reduce
::
rows
<
unsigned
short
,
float
,
float
>
,
::
reduce
::
rows
<
unsigned
short
,
double
,
double
>
},
{
0
/*reduceRows_gpu<unsigned short, int, unsigned char>*/
,
0
/*reduceRows_gpu<unsigned short, int, signed char>*/
,
reduceRows_gpu
<
unsigned
short
,
int
,
unsigned
short
>
,
0
/*reduceRows_gpu<unsigned short, int, short>*/
,
reduceRows_gpu
<
unsigned
short
,
int
,
int
>
,
reduceRows_gpu
<
unsigned
short
,
int
,
float
>
0
/*::reduce::rows<short, int, unsigned char>*/
,
0
/*::reduce::rows<short, int, signed char>*/
,
0
/*::reduce::rows<short, int, unsigned short>*/
,
::
reduce
::
rows
<
short
,
int
,
short
>
,
::
reduce
::
rows
<
short
,
int
,
int
>
,
::
reduce
::
rows
<
short
,
float
,
float
>
,
::
reduce
::
rows
<
short
,
double
,
double
>
},
{
0
/*reduceRows_gpu<short, int, unsigned char>*/
,
0
/*reduceRows_gpu<short, int, signed char>*/
,
0
/*reduceRows_gpu<short, int, unsigned short>*/
,
reduceRows_gpu
<
short
,
int
,
short
>
,
reduceRows_gpu
<
short
,
int
,
int
>
,
reduceRows_gpu
<
short
,
int
,
float
>
0
/*::reduce::rows<int, int, unsigned char>*/
,
0
/*::reduce::rows<int, int, signed char>*/
,
0
/*::reduce::rows<int, int, unsigned short>*/
,
0
/*::reduce::rows<int, int, short>*/
,
::
reduce
::
rows
<
int
,
int
,
int
>
,
::
reduce
::
rows
<
int
,
float
,
float
>
,
::
reduce
::
rows
<
int
,
double
,
double
>
},
{
0
/*reduceRows_gpu<int, int, unsigned char>*/
,
0
/*reduceRows_gpu<int, int, signed char>*/
,
0
/*reduceRows_gpu<int, int, unsigned short>*/
,
0
/*reduceRows_gpu<int, int, short>*/
,
reduceRows_gpu
<
int
,
int
,
int
>
,
reduceRows_gpu
<
int
,
int
,
float
>
0
/*::reduce::rows<float, float, unsigned char>*/
,
0
/*::reduce::rows<float, float, signed char>*/
,
0
/*::reduce::rows<float, float, unsigned short>*/
,
0
/*::reduce::rows<float, float, short>*/
,
0
/*::reduce::rows<float, float, int>*/
,
::
reduce
::
rows
<
float
,
float
,
float
>
,
::
reduce
::
rows
<
float
,
double
,
double
>
},
{
0
/*reduceRows_gpu<float, float, unsigned char>*/
,
0
/*reduceRows_gpu<float, float, signed char>*/
,
0
/*reduceRows_gpu<float, float, unsigned short>*/
,
0
/*reduceRows_gpu<float, float, short>*/
,
0
/*reduceRows_gpu<float, float, int>*/
,
reduceRows_gpu
<
float
,
float
,
float
>
0
/*::reduce::rows<double, double, unsigned char>*/
,
0
/*::reduce::rows<double, double, signed char>*/
,
0
/*::reduce::rows<double, double, unsigned short>*/
,
0
/*::reduce::rows<double, double, short>*/
,
0
/*::reduce::rows<double, double, int>*/
,
0
/*::reduce::rows<double, double, float>*/
,
::
reduce
::
rows
<
double
,
double
,
double
>
}
};
const
caller_t
func
=
caller
s
[
src
.
depth
()][
dst
.
depth
()];
const
func_t
func
=
func
s
[
src
.
depth
()][
dst
.
depth
()];
if
(
!
func
)
CV_Error
(
CV_StsUnsupportedFormat
,
"Unsupported combination of input and output array formats"
);
func
(
src
.
reshape
(
1
),
dst
.
reshape
(
1
)
,
reduceOp
,
StreamAccessor
::
getStream
(
stream
));
func
(
src
.
reshape
(
1
),
dst
.
data
,
reduceOp
,
StreamAccessor
::
getStream
(
stream
));
}
else
{
typedef
void
(
*
caller_t
)(
const
PtrStepSzb
&
src
,
int
cn
,
const
PtrStepSzb
&
dst
,
int
reduceOp
,
cudaStream_t
stream
);
static
const
caller_t
callers
[
6
][
6
]
=
typedef
void
(
*
func_t
)(
PtrStepSzb
src
,
void
*
dst
,
int
cn
,
int
op
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
7
]
=
{
{
reduceCols_gpu
<
unsigned
char
,
int
,
unsigned
char
>
,
0
/*reduceCols_gpu<unsigned char, int, signed char>*/
,
0
/*reduceCols_gpu<unsigned char, int, unsigned short>*/
,
0
/*reduceCols_gpu<unsigned char, int, short>*/
,
reduceCols_gpu
<
unsigned
char
,
int
,
int
>
,
reduceCols_gpu
<
unsigned
char
,
int
,
float
>
::
reduce
::
cols
<
unsigned
char
,
int
,
unsigned
char
>
,
0
/*::reduce::cols<unsigned char, int, signed char>*/
,
0
/*::reduce::cols<unsigned char, int, unsigned short>*/
,
0
/*::reduce::cols<unsigned char, int, short>*/
,
::
reduce
::
cols
<
unsigned
char
,
int
,
int
>
,
::
reduce
::
cols
<
unsigned
char
,
float
,
float
>
,
::
reduce
::
cols
<
unsigned
char
,
double
,
double
>
},
{
0
/*::reduce::cols<signed char, int, unsigned char>*/
,
0
/*::reduce::cols<signed char, int, signed char>*/
,
0
/*::reduce::cols<signed char, int, unsigned short>*/
,
0
/*::reduce::cols<signed char, int, short>*/
,
0
/*::reduce::cols<signed char, int, int>*/
,
0
/*::reduce::cols<signed char, float, float>*/
,
0
/*::reduce::cols<signed char, double, double>*/
},
{
0
/*reduceCols_gpu<signed char, int, unsigned char>*/
,
0
/*reduceCols_gpu<signed char, int, signed char>*/
,
0
/*reduceCols_gpu<signed char, int, unsigned short>*/
,
0
/*reduceCols_gpu<signed char, int, short>*/
,
0
/*reduceCols_gpu<signed char, int, int>*/
,
0
/*reduceCols_gpu<signed char, int, float>*/
0
/*::reduce::cols<unsigned short, int, unsigned char>*/
,
0
/*::reduce::cols<unsigned short, int, signed char>*/
,
::
reduce
::
cols
<
unsigned
short
,
int
,
unsigned
short
>
,
0
/*::reduce::cols<unsigned short, int, short>*/
,
::
reduce
::
cols
<
unsigned
short
,
int
,
int
>
,
::
reduce
::
cols
<
unsigned
short
,
float
,
float
>
,
::
reduce
::
cols
<
unsigned
short
,
double
,
double
>
},
{
0
/*reduceCols_gpu<unsigned short, int, unsigned char>*/
,
0
/*reduceCols_gpu<unsigned short, int, signed char>*/
,
reduceCols_gpu
<
unsigned
short
,
int
,
unsigned
short
>
,
0
/*reduceCols_gpu<unsigned short, int, short>*/
,
reduceCols_gpu
<
unsigned
short
,
int
,
int
>
,
reduceCols_gpu
<
unsigned
short
,
int
,
float
>
0
/*::reduce::cols<short, int, unsigned char>*/
,
0
/*::reduce::cols<short, int, signed char>*/
,
0
/*::reduce::cols<short, int, unsigned short>*/
,
::
reduce
::
cols
<
short
,
int
,
short
>
,
::
reduce
::
cols
<
short
,
int
,
int
>
,
::
reduce
::
cols
<
short
,
float
,
float
>
,
::
reduce
::
cols
<
short
,
double
,
double
>
},
{
0
/*reduceCols_gpu<short, int, unsigned char>*/
,
0
/*reduceCols_gpu<short, int, signed char>*/
,
0
/*reduceCols_gpu<short, int, unsigned short>*/
,
reduceCols_gpu
<
short
,
int
,
short
>
,
reduceCols_gpu
<
short
,
int
,
int
>
,
reduceCols_gpu
<
short
,
int
,
float
>
0
/*::reduce::cols<int, int, unsigned char>*/
,
0
/*::reduce::cols<int, int, signed char>*/
,
0
/*::reduce::cols<int, int, unsigned short>*/
,
0
/*::reduce::cols<int, int, short>*/
,
::
reduce
::
cols
<
int
,
int
,
int
>
,
::
reduce
::
cols
<
int
,
float
,
float
>
,
::
reduce
::
cols
<
int
,
double
,
double
>
},
{
0
/*reduceCols_gpu<int, int, unsigned char>*/
,
0
/*reduceCols_gpu<int, int, signed char>*/
,
0
/*reduceCols_gpu<int, int, unsigned short>*/
,
0
/*reduceCols_gpu<int, int, short>*/
,
reduceCols_gpu
<
int
,
int
,
int
>
,
reduceCols_gpu
<
int
,
int
,
float
>
0
/*::reduce::cols<float, float, unsigned char>*/
,
0
/*::reduce::cols<float, float, signed char>*/
,
0
/*::reduce::cols<float, float, unsigned short>*/
,
0
/*::reduce::cols<float, float, short>*/
,
0
/*::reduce::cols<float, float, int>*/
,
::
reduce
::
cols
<
float
,
float
,
float
>
,
::
reduce
::
cols
<
float
,
double
,
double
>
},
{
0
/*reduceCols_gpu<float, unsigned char>*/
,
0
/*reduceCols_gpu<float, signed char>*/
,
0
/*reduceCols_gpu<float, unsigned short>*/
,
0
/*reduceCols_gpu<float, short>*/
,
0
/*reduceCols_gpu<float, int>*/
,
reduceCols_gpu
<
float
,
float
,
float
>
0
/*::reduce::cols<double, double, unsigned char>*/
,
0
/*::reduce::cols<double, double, signed char>*/
,
0
/*::reduce::cols<double, double, unsigned short>*/
,
0
/*::reduce::cols<double, double, short>*/
,
0
/*::reduce::cols<double, double, int>*/
,
0
/*::reduce::cols<double, double, float>*/
,
::
reduce
::
cols
<
double
,
double
,
double
>
}
};
const
caller_t
func
=
caller
s
[
src
.
depth
()][
dst
.
depth
()];
const
func_t
func
=
func
s
[
src
.
depth
()][
dst
.
depth
()];
if
(
!
func
)
CV_Error
(
CV_StsUnsupportedFormat
,
"Unsupported combination of input and output array formats"
);
func
(
src
,
src
.
channels
(),
dst
,
reduceOp
,
StreamAccessor
::
getStream
(
stream
));
func
(
src
,
dst
.
data
,
src
.
channels
()
,
reduceOp
,
StreamAccessor
::
getStream
(
stream
));
}
}
...
...
modules/gpu/test/test_core.cpp
View file @
e8f9762e
...
...
@@ -2982,7 +2982,7 @@ TEST_P(Sum, Sqr)
INSTANTIATE_TEST_CASE_P
(
GPU_Core
,
Sum
,
testing
::
Combine
(
ALL_DEVICES
,
DIFFERENT_SIZES
,
TYPES
(
CV_8U
,
CV_
32
F
,
1
,
4
),
TYPES
(
CV_8U
,
CV_
64
F
,
1
,
4
),
WHOLE_SUBMAT
));
////////////////////////////////////////////////////////////////////////////////
...
...
@@ -3351,7 +3351,14 @@ PARAM_TEST_CASE(Reduce, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, Reduc
cv
::
gpu
::
setDevice
(
devInfo
.
deviceID
());
type
=
CV_MAKE_TYPE
(
depth
,
channels
);
dst_depth
=
(
reduceOp
==
CV_REDUCE_MAX
||
reduceOp
==
CV_REDUCE_MIN
)
?
depth
:
CV_32F
;
if
(
reduceOp
==
CV_REDUCE_MAX
||
reduceOp
==
CV_REDUCE_MIN
)
dst_depth
=
depth
;
else
if
(
reduceOp
==
CV_REDUCE_SUM
)
dst_depth
=
depth
==
CV_8U
?
CV_32S
:
depth
<
CV_64F
?
CV_32F
:
depth
;
else
dst_depth
=
depth
<
CV_32F
?
CV_32F
:
depth
;
dst_type
=
CV_MAKE_TYPE
(
dst_depth
,
channels
);
}
...
...
@@ -3392,7 +3399,8 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Reduce, testing::Combine(
testing
::
Values
(
MatDepth
(
CV_8U
),
MatDepth
(
CV_16U
),
MatDepth
(
CV_16S
),
MatDepth
(
CV_32F
)),
MatDepth
(
CV_32F
),
MatDepth
(
CV_64F
)),
ALL_CHANNELS
,
ALL_REDUCE_CODES
,
WHOLE_SUBMAT
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment