Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
b2603828
Commit
b2603828
authored
Nov 29, 2010
by
Alexey Spizhevoy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added masks support into gpu::minMaxLoc
parent
7c4cff99
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
164 additions
and
91 deletions
+164
-91
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+3
-2
arithm.cpp
modules/gpu/src/arithm.cpp
+63
-68
mathfunc.cu
modules/gpu/src/cuda/mathfunc.cu
+87
-16
arithm.cpp
tests/gpu/src/arithm.cpp
+11
-5
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
b2603828
...
@@ -431,11 +431,12 @@ namespace cv
...
@@ -431,11 +431,12 @@ namespace cv
CV_EXPORTS
void
minMax
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
const
GpuMat
&
mask
,
GpuMat
&
buf
);
CV_EXPORTS
void
minMax
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
const
GpuMat
&
mask
,
GpuMat
&
buf
);
//! finds global minimum and maximum array elements and returns their values with locations
//! finds global minimum and maximum array elements and returns their values with locations
CV_EXPORTS
void
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
=
0
,
Point
*
minLoc
=
0
,
Point
*
maxLoc
=
0
);
CV_EXPORTS
void
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
=
0
,
Point
*
minLoc
=
0
,
Point
*
maxLoc
=
0
,
const
GpuMat
&
mask
=
GpuMat
());
//! finds global minimum and maximum array elements and returns their values with locations
//! finds global minimum and maximum array elements and returns their values with locations
CV_EXPORTS
void
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
Point
*
minLoc
,
Point
*
maxLoc
,
CV_EXPORTS
void
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
Point
*
minLoc
,
Point
*
maxLoc
,
GpuMat
&
valbuf
,
GpuMat
&
locbuf
);
const
GpuMat
&
mask
,
GpuMat
&
valbuf
,
GpuMat
&
locbuf
);
//! counts non-zero array elements
//! counts non-zero array elements
CV_EXPORTS
int
countNonZero
(
const
GpuMat
&
src
);
CV_EXPORTS
int
countNonZero
(
const
GpuMat
&
src
);
...
...
modules/gpu/src/arithm.cpp
View file @
b2603828
...
@@ -67,8 +67,8 @@ void cv::gpu::flip(const GpuMat&, GpuMat&, int) { throw_nogpu(); }
...
@@ -67,8 +67,8 @@ void cv::gpu::flip(const GpuMat&, GpuMat&, int) { throw_nogpu(); }
Scalar
cv
::
gpu
::
sum
(
const
GpuMat
&
)
{
throw_nogpu
();
return
Scalar
();
}
Scalar
cv
::
gpu
::
sum
(
const
GpuMat
&
)
{
throw_nogpu
();
return
Scalar
();
}
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
,
double
*
,
double
*
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
,
double
*
,
double
*
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
,
double
*
,
double
*
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
,
double
*
,
double
*
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
,
double
*
,
double
*
,
Point
*
,
Point
*
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
,
double
*
,
double
*
,
Point
*
,
Point
*
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
,
double
*
,
double
*
,
Point
*
,
Point
*
,
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
,
double
*
,
double
*
,
Point
*
,
Point
*
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
int
cv
::
gpu
::
countNonZero
(
const
GpuMat
&
)
{
throw_nogpu
();
return
0
;
}
int
cv
::
gpu
::
countNonZero
(
const
GpuMat
&
)
{
throw_nogpu
();
return
0
;
}
int
cv
::
gpu
::
countNonZero
(
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
return
0
;
}
int
cv
::
gpu
::
countNonZero
(
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
return
0
;
}
void
cv
::
gpu
::
LUT
(
const
GpuMat
&
,
const
Mat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
LUT
(
const
GpuMat
&
,
const
Mat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
...
@@ -523,6 +523,8 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
...
@@ -523,6 +523,8 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
using
namespace
mathfunc
::
minmax
;
using
namespace
mathfunc
::
minmax
;
typedef
void
(
*
Caller
)(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
typedef
void
(
*
Caller
)(
const
DevMem2D
,
double
*
,
double
*
,
PtrStep
);
typedef
void
(
*
MaskedCaller
)(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
static
const
Caller
callers
[
2
][
7
]
=
static
const
Caller
callers
[
2
][
7
]
=
{
{
min_max_multipass_caller
<
unsigned
char
>
,
min_max_multipass_caller
<
char
>
,
{
{
min_max_multipass_caller
<
unsigned
char
>
,
min_max_multipass_caller
<
char
>
,
min_max_multipass_caller
<
unsigned
short
>
,
min_max_multipass_caller
<
short
>
,
min_max_multipass_caller
<
unsigned
short
>
,
min_max_multipass_caller
<
short
>
,
...
@@ -531,7 +533,6 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
...
@@ -531,7 +533,6 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
min_max_caller
<
unsigned
short
>
,
min_max_caller
<
short
>
,
min_max_caller
<
unsigned
short
>
,
min_max_caller
<
short
>
,
min_max_caller
<
int
>
,
min_max_caller
<
float
>
,
min_max_caller
<
double
>
}
};
min_max_caller
<
int
>
,
min_max_caller
<
float
>
,
min_max_caller
<
double
>
}
};
typedef
void
(
*
MaskedCaller
)(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
PtrStep
);
static
const
MaskedCaller
masked_callers
[
2
][
7
]
=
static
const
MaskedCaller
masked_callers
[
2
][
7
]
=
{
{
min_max_mask_multipass_caller
<
unsigned
char
>
,
min_max_mask_multipass_caller
<
char
>
,
{
{
min_max_mask_multipass_caller
<
unsigned
char
>
,
min_max_mask_multipass_caller
<
char
>
,
min_max_mask_multipass_caller
<
unsigned
short
>
,
min_max_mask_multipass_caller
<
short
>
,
min_max_mask_multipass_caller
<
unsigned
short
>
,
min_max_mask_multipass_caller
<
short
>
,
...
@@ -580,23 +581,54 @@ namespace cv { namespace gpu { namespace mathfunc { namespace minmaxloc {
...
@@ -580,23 +581,54 @@ namespace cv { namespace gpu { namespace mathfunc { namespace minmaxloc {
void
min_max_loc_caller
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
void
min_max_loc_caller
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
);
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
);
template
<
typename
T
>
void
min_max_loc_mask_caller
(
const
DevMem2D
src
,
const
PtrStep
mask
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
);
template
<
typename
T
>
template
<
typename
T
>
void
min_max_loc_multipass_caller
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
void
min_max_loc_multipass_caller
(
const
DevMem2D
src
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
);
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
);
template
<
typename
T
>
void
min_max_loc_mask_multipass_caller
(
const
DevMem2D
src
,
const
PtrStep
mask
,
double
*
minval
,
double
*
maxval
,
int
minloc
[
2
],
int
maxloc
[
2
],
PtrStep
valbuf
,
PtrStep
locbuf
);
}}}}
}}}}
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
Point
*
minLoc
,
Point
*
maxLoc
)
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
Point
*
minLoc
,
Point
*
maxLoc
,
const
GpuMat
&
mask
)
{
{
GpuMat
valbuf
,
locbuf
;
GpuMat
valbuf
,
locbuf
;
minMaxLoc
(
src
,
minVal
,
maxVal
,
minLoc
,
maxLoc
,
valbuf
,
locbuf
);
minMaxLoc
(
src
,
minVal
,
maxVal
,
minLoc
,
maxLoc
,
mask
,
valbuf
,
locbuf
);
}
}
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
Point
*
minLoc
,
Point
*
maxLoc
,
void
cv
::
gpu
::
minMaxLoc
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
,
Point
*
minLoc
,
Point
*
maxLoc
,
GpuMat
&
valbuf
,
GpuMat
&
locbuf
)
const
GpuMat
&
mask
,
GpuMat
&
valbuf
,
GpuMat
&
locbuf
)
{
{
using
namespace
mathfunc
::
minmaxloc
;
using
namespace
mathfunc
::
minmaxloc
;
typedef
void
(
*
Caller
)(
const
DevMem2D
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
typedef
void
(
*
MaskedCaller
)(
const
DevMem2D
,
const
PtrStep
,
double
*
,
double
*
,
int
[
2
],
int
[
2
],
PtrStep
,
PtrStep
);
static
const
Caller
callers
[
2
][
7
]
=
{
{
min_max_loc_multipass_caller
<
unsigned
char
>
,
min_max_loc_multipass_caller
<
char
>
,
min_max_loc_multipass_caller
<
unsigned
short
>
,
min_max_loc_multipass_caller
<
short
>
,
min_max_loc_multipass_caller
<
int
>
,
min_max_loc_multipass_caller
<
float
>
,
0
},
{
min_max_loc_caller
<
unsigned
char
>
,
min_max_loc_caller
<
char
>
,
min_max_loc_caller
<
unsigned
short
>
,
min_max_loc_caller
<
short
>
,
min_max_loc_caller
<
int
>
,
min_max_loc_caller
<
float
>
,
min_max_loc_caller
<
double
>
}
};
static
const
MaskedCaller
masked_callers
[
2
][
7
]
=
{
{
min_max_loc_mask_multipass_caller
<
unsigned
char
>
,
min_max_loc_mask_multipass_caller
<
char
>
,
min_max_loc_mask_multipass_caller
<
unsigned
short
>
,
min_max_loc_mask_multipass_caller
<
short
>
,
min_max_loc_mask_multipass_caller
<
int
>
,
min_max_loc_mask_multipass_caller
<
float
>
,
0
},
{
min_max_loc_mask_caller
<
unsigned
char
>
,
min_max_loc_mask_caller
<
char
>
,
min_max_loc_mask_caller
<
unsigned
short
>
,
min_max_loc_mask_caller
<
short
>
,
min_max_loc_mask_caller
<
int
>
,
min_max_loc_mask_caller
<
float
>
,
min_max_loc_mask_caller
<
double
>
}
};
CV_Assert
(
src
.
channels
()
==
1
);
CV_Assert
(
src
.
channels
()
==
1
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8U
&&
src
.
size
()
==
mask
.
size
()));
CV_Assert
(
src
.
type
()
!=
CV_64F
||
hasNativeDoubleSupport
(
getDevice
()));
double
minVal_
;
if
(
!
minVal
)
minVal
=
&
minVal_
;
double
minVal_
;
if
(
!
minVal
)
minVal
=
&
minVal_
;
double
maxVal_
;
if
(
!
maxVal
)
maxVal
=
&
maxVal_
;
double
maxVal_
;
if
(
!
maxVal
)
maxVal
=
&
maxVal_
;
...
@@ -609,38 +641,17 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
...
@@ -609,38 +641,17 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
valbuf
.
create
(
valbuf_size
,
CV_8U
);
valbuf
.
create
(
valbuf_size
,
CV_8U
);
locbuf
.
create
(
locbuf_size
,
CV_8U
);
locbuf
.
create
(
locbuf_size
,
CV_8U
);
int
device
=
getDevice
();
if
(
mask
.
empty
())
if
(
hasAtomicsSupport
(
device
))
{
{
Caller
caller
=
callers
[
hasAtomicsSupport
(
getDevice
())][
src
.
type
()];
switch
(
src
.
type
())
if
(
!
caller
)
CV_Error
(
CV_StsBadArg
,
"minMaxLoc: unsupported type"
);
{
caller
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
case
CV_8U
:
min_max_loc_caller
<
unsigned
char
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_8S
:
min_max_loc_caller
<
char
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_16U
:
min_max_loc_caller
<
unsigned
short
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_16S
:
min_max_loc_caller
<
short
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_32S
:
min_max_loc_caller
<
int
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_32F
:
min_max_loc_caller
<
float
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_64F
:
if
(
hasNativeDoubleSupport
(
device
))
{
min_max_loc_caller
<
double
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
}
default
:
CV_Error
(
CV_StsBadArg
,
"minMaxLoc: unsupported type"
);
}
}
}
else
else
{
{
switch
(
src
.
type
())
MaskedCaller
caller
=
masked_callers
[
hasAtomicsSupport
(
getDevice
())][
src
.
type
()];
{
if
(
!
caller
)
CV_Error
(
CV_StsBadArg
,
"minMaxLoc: unsupported type"
);
case
CV_8U
:
min_max_loc_multipass_caller
<
unsigned
char
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
caller
(
src
,
mask
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
case
CV_8S
:
min_max_loc_multipass_caller
<
char
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_16U
:
min_max_loc_multipass_caller
<
unsigned
short
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_16S
:
min_max_loc_multipass_caller
<
short
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_32S
:
min_max_loc_multipass_caller
<
int
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
case
CV_32F
:
min_max_loc_multipass_caller
<
float
>
(
src
,
minVal
,
maxVal
,
minLoc_
,
maxLoc_
,
valbuf
,
locbuf
);
break
;
default
:
CV_Error
(
CV_StsBadArg
,
"minMaxLoc: unsupported type"
);
}
}
}
if
(
minLoc
)
{
minLoc
->
x
=
minLoc_
[
0
];
minLoc
->
y
=
minLoc_
[
1
];
}
if
(
minLoc
)
{
minLoc
->
x
=
minLoc_
[
0
];
minLoc
->
y
=
minLoc_
[
1
];
}
...
@@ -671,43 +682,27 @@ int cv::gpu::countNonZero(const GpuMat& src)
...
@@ -671,43 +682,27 @@ int cv::gpu::countNonZero(const GpuMat& src)
int
cv
::
gpu
::
countNonZero
(
const
GpuMat
&
src
,
GpuMat
&
buf
)
int
cv
::
gpu
::
countNonZero
(
const
GpuMat
&
src
,
GpuMat
&
buf
)
{
{
using
namespace
mathfunc
::
countnonzero
;
using
namespace
mathfunc
::
countnonzero
;
typedef
int
(
*
Caller
)(
const
DevMem2D
src
,
PtrStep
buf
);
static
const
Caller
callers
[
2
][
7
]
=
{
{
count_non_zero_multipass_caller
<
unsigned
char
>
,
count_non_zero_multipass_caller
<
char
>
,
count_non_zero_multipass_caller
<
unsigned
short
>
,
count_non_zero_multipass_caller
<
short
>
,
count_non_zero_multipass_caller
<
int
>
,
count_non_zero_multipass_caller
<
float
>
,
0
},
{
count_non_zero_caller
<
unsigned
char
>
,
count_non_zero_caller
<
char
>
,
count_non_zero_caller
<
unsigned
short
>
,
count_non_zero_caller
<
short
>
,
count_non_zero_caller
<
int
>
,
count_non_zero_caller
<
float
>
,
count_non_zero_caller
<
double
>
}
};
CV_Assert
(
src
.
channels
()
==
1
);
CV_Assert
(
src
.
channels
()
==
1
);
CV_Assert
(
src
.
type
()
!=
CV_64F
||
hasNativeDoubleSupport
(
getDevice
()));
Size
buf_size
;
Size
buf_size
;
get_buf_size_required
(
buf_size
.
width
,
buf_size
.
height
);
get_buf_size_required
(
buf_size
.
width
,
buf_size
.
height
);
buf
.
create
(
buf_size
,
CV_8U
);
buf
.
create
(
buf_size
,
CV_8U
);
int
device
=
getDevice
();
Caller
caller
=
callers
[
hasAtomicsSupport
(
getDevice
())][
src
.
type
()];
if
(
hasAtomicsSupport
(
device
))
if
(
!
caller
)
CV_Error
(
CV_StsBadArg
,
"countNonZero: unsupported type"
);
{
return
caller
(
src
,
buf
);
switch
(
src
.
type
())
{
case
CV_8U
:
return
count_non_zero_caller
<
unsigned
char
>
(
src
,
buf
);
case
CV_8S
:
return
count_non_zero_caller
<
char
>
(
src
,
buf
);
case
CV_16U
:
return
count_non_zero_caller
<
unsigned
short
>
(
src
,
buf
);
case
CV_16S
:
return
count_non_zero_caller
<
short
>
(
src
,
buf
);
case
CV_32S
:
return
count_non_zero_caller
<
int
>
(
src
,
buf
);
case
CV_32F
:
return
count_non_zero_caller
<
float
>
(
src
,
buf
);
case
CV_64F
:
if
(
hasNativeDoubleSupport
(
device
))
return
count_non_zero_caller
<
double
>
(
src
,
buf
);
}
}
else
{
switch
(
src
.
type
())
{
case
CV_8U
:
return
count_non_zero_multipass_caller
<
unsigned
char
>
(
src
,
buf
);
case
CV_8S
:
return
count_non_zero_multipass_caller
<
char
>
(
src
,
buf
);
case
CV_16U
:
return
count_non_zero_multipass_caller
<
unsigned
short
>
(
src
,
buf
);
case
CV_16S
:
return
count_non_zero_multipass_caller
<
short
>
(
src
,
buf
);
case
CV_32S
:
return
count_non_zero_multipass_caller
<
int
>
(
src
,
buf
);
case
CV_32F
:
return
count_non_zero_multipass_caller
<
float
>
(
src
,
buf
);
}
}
CV_Error
(
CV_StsBadArg
,
"countNonZero: unsupported type"
);
return
0
;
}
}
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
...
...
modules/gpu/src/cuda/mathfunc.cu
View file @
b2603828
...
@@ -248,10 +248,10 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -248,10 +248,10 @@ namespace cv { namespace gpu { namespace mathfunc
struct Mask8U
struct Mask8U
{
{
explicit Mask8U(PtrStep mask): mask(mask) {}
explicit Mask8U(PtrStep mask): mask(mask) {}
__device__ bool operator()(int y, int x) { return mask.ptr(y)[x]; }
__device__ bool operator()(int y, int x)
const
{ return mask.ptr(y)[x]; }
PtrStep mask;
PtrStep mask;
};
};
struct MaskTrue { __device__ bool operator()(int y, int x) { return true; } };
struct MaskTrue { __device__ bool operator()(int y, int x)
const
{ return true; } };
// Unary operations
// Unary operations
...
@@ -788,8 +788,8 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -788,8 +788,8 @@ namespace cv { namespace gpu { namespace mathfunc
}
}
template <int nthreads, typename T>
template <int nthreads, typename T
, typename Mask
>
__global__ void min_max_loc_kernel(const DevMem2D src, T* minval, T* maxval,
__global__ void min_max_loc_kernel(const DevMem2D src,
Mask mask,
T* minval, T* maxval,
unsigned int* minloc, unsigned int* maxloc)
unsigned int* minloc, unsigned int* maxloc)
{
{
typedef typename MinMaxTypeTraits<T>::best_type best_type;
typedef typename MinMaxTypeTraits<T>::best_type best_type;
...
@@ -814,16 +814,11 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -814,16 +814,11 @@ namespace cv { namespace gpu { namespace mathfunc
const T* ptr = (const T*)src.ptr(y);
const T* ptr = (const T*)src.ptr(y);
for (unsigned int x = x0; x < x_end; x += blockDim.x)
for (unsigned int x = x0; x < x_end; x += blockDim.x)
{
{
T val = ptr[x];
if (mask(y, x))
if (val <= mymin)
{
mymin = val;
myminloc = y * src.cols + x;
}
if (val >= mymax)
{
{
mymax = val;
T val = ptr[x];
mymaxloc = y * src.cols + x;
if (val <= mymin) { mymin = val; myminloc = y * src.cols + x; }
if (val >= mymax) { mymax = val; mymaxloc = y * src.cols + x; }
}
}
}
}
}
}
...
@@ -886,6 +881,44 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -886,6 +881,44 @@ namespace cv { namespace gpu { namespace mathfunc
}
}
template <typename T>
void min_max_loc_mask_caller(const DevMem2D src, const PtrStep mask, double* minval, double* maxval,
int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf)
{
dim3 threads, grid;
estimate_thread_cfg(threads, grid);
estimate_kernel_consts(src.cols, src.rows, threads, grid);
T* minval_buf = (T*)valbuf.ptr(0);
T* maxval_buf = (T*)valbuf.ptr(1);
unsigned int* minloc_buf = (unsigned int*)locbuf.ptr(0);
unsigned int* maxloc_buf = (unsigned int*)locbuf.ptr(1);
min_max_loc_kernel<256, T, Mask8U><<<grid, threads>>>(src, Mask8U(mask), minval_buf, maxval_buf, minloc_buf, maxloc_buf);
cudaSafeCall(cudaThreadSynchronize());
T minval_, maxval_;
cudaSafeCall(cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost));
*minval = minval_;
*maxval = maxval_;
unsigned int minloc_, maxloc_;
cudaSafeCall(cudaMemcpy(&minloc_, minloc_buf, sizeof(int), cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(&maxloc_, maxloc_buf, sizeof(int), cudaMemcpyDeviceToHost));
minloc[1] = minloc_ / src.cols; minloc[0] = minloc_ - minloc[1] * src.cols;
maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
}
template void min_max_loc_mask_caller<unsigned char>(const DevMem2D, const PtrStep, double*, double*, int[2], int[2], PtrStep, PtrStep);
template void min_max_loc_mask_caller<char>(const DevMem2D, const PtrStep, double*, double*, int[2], int[2], PtrStep, PtrStep);
template void min_max_loc_mask_caller<unsigned short>(const DevMem2D, const PtrStep, double*, double*, int[2], int[2], PtrStep, PtrStep);
template void min_max_loc_mask_caller<short>(const DevMem2D, const PtrStep, double*, double*, int[2], int[2], PtrStep, PtrStep);
template void min_max_loc_mask_caller<int>(const DevMem2D, const PtrStep, double*, double*, int[2], int[2], PtrStep, PtrStep);
template void min_max_loc_mask_caller<float>(const DevMem2D, const PtrStep, double*, double*, int[2], int[2], PtrStep, PtrStep);
template void min_max_loc_mask_caller<double>(const DevMem2D, const PtrStep, double*, double*, int[2], int[2], PtrStep, PtrStep);
template <typename T>
template <typename T>
void min_max_loc_caller(const DevMem2D src, double* minval, double* maxval,
void min_max_loc_caller(const DevMem2D src, double* minval, double* maxval,
int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf)
int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf)
...
@@ -899,7 +932,7 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -899,7 +932,7 @@ namespace cv { namespace gpu { namespace mathfunc
unsigned int* minloc_buf = (unsigned int*)locbuf.ptr(0);
unsigned int* minloc_buf = (unsigned int*)locbuf.ptr(0);
unsigned int* maxloc_buf = (unsigned int*)locbuf.ptr(1);
unsigned int* maxloc_buf = (unsigned int*)locbuf.ptr(1);
min_max_loc_kernel<256, T
><<<grid, threads>>>(src
, minval_buf, maxval_buf, minloc_buf, maxloc_buf);
min_max_loc_kernel<256, T
, MaskTrue><<<grid, threads>>>(src, MaskTrue()
, minval_buf, maxval_buf, minloc_buf, maxloc_buf);
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaThreadSynchronize());
T minval_, maxval_;
T minval_, maxval_;
...
@@ -956,9 +989,47 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -956,9 +989,47 @@ namespace cv { namespace gpu { namespace mathfunc
}
}
template <typename T>
void min_max_loc_mask_multipass_caller(const DevMem2D src, const PtrStep mask, double* minval, double* maxval,
int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf)
{
dim3 threads, grid;
estimate_thread_cfg(threads, grid);
estimate_kernel_consts(src.cols, src.rows, threads, grid);
T* minval_buf = (T*)valbuf.ptr(0);
T* maxval_buf = (T*)valbuf.ptr(1);
unsigned int* minloc_buf = (unsigned int*)locbuf.ptr(0);
unsigned int* maxloc_buf = (unsigned int*)locbuf.ptr(1);
min_max_loc_kernel<256, T, Mask8U><<<grid, threads>>>(src, Mask8U(mask), minval_buf, maxval_buf, minloc_buf, maxloc_buf);
min_max_loc_pass2_kernel<256, T><<<1, 256>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y);
cudaSafeCall(cudaThreadSynchronize());
T minval_, maxval_;
cudaSafeCall(cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost));
*minval = minval_;
*maxval = maxval_;
unsigned int minloc_, maxloc_;
cudaSafeCall(cudaMemcpy(&minloc_, minloc_buf, sizeof(int), cudaMemcpyDeviceToHost));
cudaSafeCall(cudaMemcpy(&maxloc_, maxloc_buf, sizeof(int), cudaMemcpyDeviceToHost));
minloc[1] = minloc_ / src.cols; minloc[0] = minloc_ - minloc[1] * src.cols;
maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
}
template void min_max_loc_mask_multipass_caller<unsigned char>(const DevMem2D, const PtrStep, double*, double*, int[2], int[2], PtrStep, PtrStep);
template void min_max_loc_mask_multipass_caller<char>(const DevMem2D, const PtrStep, double*, double*, int[2], int[2], PtrStep, PtrStep);
template void min_max_loc_mask_multipass_caller<unsigned short>(const DevMem2D, const PtrStep, double*, double*, int[2], int[2], PtrStep, PtrStep);
template void min_max_loc_mask_multipass_caller<short>(const DevMem2D, const PtrStep, double*, double*, int[2], int[2], PtrStep, PtrStep);
template void min_max_loc_mask_multipass_caller<int>(const DevMem2D, const PtrStep, double*, double*, int[2], int[2], PtrStep, PtrStep);
template void min_max_loc_mask_multipass_caller<float>(const DevMem2D, const PtrStep, double*, double*, int[2], int[2], PtrStep, PtrStep);
template <typename T>
template <typename T>
void min_max_loc_multipass_caller(const DevMem2D src, double* minval, double* maxval,
void min_max_loc_multipass_caller(const DevMem2D src, double* minval, double* maxval,
int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf)
int minloc[2], int maxloc[2], PtrStep valbuf, PtrStep locbuf)
{
{
dim3 threads, grid;
dim3 threads, grid;
estimate_thread_cfg(threads, grid);
estimate_thread_cfg(threads, grid);
...
@@ -969,7 +1040,7 @@ namespace cv { namespace gpu { namespace mathfunc
...
@@ -969,7 +1040,7 @@ namespace cv { namespace gpu { namespace mathfunc
unsigned int* minloc_buf = (unsigned int*)locbuf.ptr(0);
unsigned int* minloc_buf = (unsigned int*)locbuf.ptr(0);
unsigned int* maxloc_buf = (unsigned int*)locbuf.ptr(1);
unsigned int* maxloc_buf = (unsigned int*)locbuf.ptr(1);
min_max_loc_kernel<256, T
><<<grid, threads>>>(src
, minval_buf, maxval_buf, minloc_buf, maxloc_buf);
min_max_loc_kernel<256, T
, MaskTrue><<<grid, threads>>>(src, MaskTrue()
, minval_buf, maxval_buf, minloc_buf, maxloc_buf);
min_max_loc_pass2_kernel<256, T><<<1, 256>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y);
min_max_loc_pass2_kernel<256, T><<<1, 256>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y);
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaThreadSynchronize());
...
...
tests/gpu/src/arithm.cpp
View file @
b2603828
...
@@ -684,7 +684,7 @@ struct CV_GpuMinMaxTest: public CvTest
...
@@ -684,7 +684,7 @@ struct CV_GpuMinMaxTest: public CvTest
if
(
cv
::
gpu
::
hasNativeDoubleSupport
(
cv
::
gpu
::
getDevice
()))
depth_end
=
CV_64F
;
else
depth_end
=
CV_32F
;
if
(
cv
::
gpu
::
hasNativeDoubleSupport
(
cv
::
gpu
::
getDevice
()))
depth_end
=
CV_64F
;
else
depth_end
=
CV_32F
;
for
(
int
depth
=
CV_8U
;
depth
<=
depth_end
;
++
depth
)
for
(
int
depth
=
CV_8U
;
depth
<=
depth_end
;
++
depth
)
{
{
for
(
int
i
=
0
;
i
<
1
;
++
i
)
for
(
int
i
=
0
;
i
<
3
;
++
i
)
{
{
int
rows
=
1
+
rand
()
%
1000
;
int
rows
=
1
+
rand
()
%
1000
;
int
cols
=
1
+
rand
()
%
1000
;
int
cols
=
1
+
rand
()
%
1000
;
...
@@ -829,11 +829,14 @@ struct CV_GpuMinMaxLocTest: public CvTest
...
@@ -829,11 +829,14 @@ struct CV_GpuMinMaxLocTest: public CvTest
rng
.
fill
(
row
,
RNG
::
UNIFORM
,
Scalar
(
0
),
Scalar
(
256
));
rng
.
fill
(
row
,
RNG
::
UNIFORM
,
Scalar
(
0
),
Scalar
(
256
));
}
}
cv
::
Mat
mask
(
src
.
size
(),
CV_8U
);
rng
.
fill
(
mask
,
RNG
::
UNIFORM
,
Scalar
(
0
),
Scalar
(
2
));
double
minVal
,
maxVal
;
double
minVal
,
maxVal
;
cv
::
Point
minLoc
,
maxLoc
;
cv
::
Point
minLoc
,
maxLoc
;
if
(
depth
!=
CV_8S
)
if
(
depth
!=
CV_8S
)
cv
::
minMaxLoc
(
src
,
&
minVal
,
&
maxVal
,
&
minLoc
,
&
maxLoc
);
cv
::
minMaxLoc
(
src
,
&
minVal
,
&
maxVal
,
&
minLoc
,
&
maxLoc
,
mask
);
else
else
{
{
// OpenCV's minMaxLoc doesn't support CV_8S type
// OpenCV's minMaxLoc doesn't support CV_8S type
...
@@ -843,14 +846,17 @@ struct CV_GpuMinMaxLocTest: public CvTest
...
@@ -843,14 +846,17 @@ struct CV_GpuMinMaxLocTest: public CvTest
for
(
int
j
=
0
;
j
<
src
.
cols
;
++
j
)
for
(
int
j
=
0
;
j
<
src
.
cols
;
++
j
)
{
{
char
val
=
src
.
at
<
char
>
(
i
,
j
);
char
val
=
src
.
at
<
char
>
(
i
,
j
);
if
(
val
<
minVal
)
{
minVal
=
val
;
minLoc
=
cv
::
Point
(
j
,
i
);
}
if
(
mask
.
at
<
unsigned
char
>
(
i
,
j
))
if
(
val
>
maxVal
)
{
maxVal
=
val
;
maxLoc
=
cv
::
Point
(
j
,
i
);
}
{
if
(
val
<
minVal
)
{
minVal
=
val
;
minLoc
=
cv
::
Point
(
j
,
i
);
}
if
(
val
>
maxVal
)
{
maxVal
=
val
;
maxLoc
=
cv
::
Point
(
j
,
i
);
}
}
}
}
}
}
double
minVal_
,
maxVal_
;
double
minVal_
,
maxVal_
;
cv
::
Point
minLoc_
,
maxLoc_
;
cv
::
Point
minLoc_
,
maxLoc_
;
cv
::
gpu
::
minMaxLoc
(
cv
::
gpu
::
GpuMat
(
src
),
&
minVal_
,
&
maxVal_
,
&
minLoc_
,
&
maxLoc_
,
valbuf
,
locbuf
);
cv
::
gpu
::
minMaxLoc
(
cv
::
gpu
::
GpuMat
(
src
),
&
minVal_
,
&
maxVal_
,
&
minLoc_
,
&
maxLoc_
,
cv
::
gpu
::
GpuMat
(
mask
),
valbuf
,
locbuf
);
CHECK
(
minVal
==
minVal_
,
CvTS
::
FAIL_INVALID_OUTPUT
);
CHECK
(
minVal
==
minVal_
,
CvTS
::
FAIL_INVALID_OUTPUT
);
CHECK
(
maxVal
==
maxVal_
,
CvTS
::
FAIL_INVALID_OUTPUT
);
CHECK
(
maxVal
==
maxVal_
,
CvTS
::
FAIL_INVALID_OUTPUT
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment