Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
863d61e9
Commit
863d61e9
authored
Nov 08, 2010
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix gpu module compilation under linux
parent
4cdcf371
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
425 additions
and
252 deletions
+425
-252
arithm.cpp
modules/gpu/src/arithm.cpp
+200
-89
graphcuts.cpp
modules/gpu/src/graphcuts.cpp
+9
-3
imgproc_gpu.cpp
modules/gpu/src/imgproc_gpu.cpp
+157
-131
matrix_operations.cpp
modules/gpu/src/matrix_operations.cpp
+59
-29
No files found.
modules/gpu/src/arithm.cpp
View file @
863d61e9
...
...
@@ -84,162 +84,230 @@ void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool,
#else
/* !defined (HAVE_CUDA) */
#define NPP_VERSION (10 * NPP_VERSION_MAJOR + NPP_VERSION_MINOR)
#if (defined(_WIN32) || defined(_WIN64)) && (NPP_VERSION >= 32)
# define NPP_HAVE_COMPLEX_TYPE
#endif
////////////////////////////////////////////////////////////////////////
// add subtract multiply divide
namespace
{
typedef
NppStatus
(
*
npp_arithm_8u_t
)(
const
Npp8u
*
pSrc1
,
int
nSrc1Step
,
const
Npp8u
*
pSrc2
,
int
nSrc2Step
,
Npp8u
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
,
int
nScaleFactor
);
typedef
NppStatus
(
*
npp_arithm_32s_t
)(
const
Npp32s
*
pSrc1
,
int
nSrc1Step
,
const
Npp32s
*
pSrc2
,
int
nSrc2Step
,
Npp32s
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
typedef
NppStatus
(
*
npp_arithm_32f_t
)(
const
Npp32f
*
pSrc1
,
int
nSrc1Step
,
const
Npp32f
*
pSrc2
,
int
nSrc2Step
,
Npp32f
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
typedef
NppStatus
(
*
npp_arithm_8u_t
)(
const
Npp8u
*
pSrc1
,
int
nSrc1Step
,
const
Npp8u
*
pSrc2
,
int
nSrc2Step
,
Npp8u
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
,
int
nScaleFactor
);
typedef
NppStatus
(
*
npp_arithm_32s_t
)(
const
Npp32s
*
pSrc1
,
int
nSrc1Step
,
const
Npp32s
*
pSrc2
,
int
nSrc2Step
,
Npp32s
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
typedef
NppStatus
(
*
npp_arithm_32f_t
)(
const
Npp32f
*
pSrc1
,
int
nSrc1Step
,
const
Npp32f
*
pSrc2
,
int
nSrc2Step
,
Npp32f
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
void
nppArithmCaller
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
npp_arithm_8u_t
npp_func_8uc1
,
npp_arithm_8u_t
npp_func_8uc4
,
void
nppArithmCaller
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
npp_arithm_8u_t
npp_func_8uc1
,
npp_arithm_8u_t
npp_func_8uc4
,
npp_arithm_32s_t
npp_func_32sc1
,
npp_arithm_32f_t
npp_func_32fc1
)
{
{
CV_DbgAssert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
#if NPP_VERSION >= 32
CV_Assert
(
src1
.
type
()
==
CV_8UC1
||
src1
.
type
()
==
CV_8UC4
||
src1
.
type
()
==
CV_32SC1
||
src1
.
type
()
==
CV_32FC1
);
#else
CV_Assert
(
src1
.
type
()
==
CV_8UC1
||
src1
.
type
()
==
CV_8UC4
||
src1
.
type
()
==
CV_32FC1
);
#endif
dst
.
create
(
src1
.
size
(),
src1
.
type
()
);
NppiSize
sz
;
sz
.
width
=
src1
.
cols
;
sz
.
height
=
src1
.
rows
;
NppiSize
sz
;
sz
.
width
=
src1
.
cols
;
sz
.
height
=
src1
.
rows
;
switch
(
src1
.
type
())
{
case
CV_8UC1
:
nppSafeCall
(
npp_func_8uc1
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
0
)
);
nppSafeCall
(
npp_func_8uc1
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
0
)
);
break
;
case
CV_8UC4
:
nppSafeCall
(
npp_func_8uc4
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
0
)
);
nppSafeCall
(
npp_func_8uc4
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
0
)
);
break
;
#if NPP_VERSION >= 32
case
CV_32SC1
:
nppSafeCall
(
npp_func_32sc1
(
src1
.
ptr
<
Npp32s
>
(),
src1
.
step
,
src2
.
ptr
<
Npp32s
>
(),
src2
.
step
,
dst
.
ptr
<
Npp32s
>
(),
dst
.
step
,
sz
)
);
nppSafeCall
(
npp_func_32sc1
(
src1
.
ptr
<
Npp32s
>
(),
src1
.
step
,
src2
.
ptr
<
Npp32s
>
(),
src2
.
step
,
dst
.
ptr
<
Npp32s
>
(),
dst
.
step
,
sz
)
);
break
;
#endif
case
CV_32FC1
:
nppSafeCall
(
npp_func_32fc1
(
src1
.
ptr
<
Npp32f
>
(),
src1
.
step
,
src2
.
ptr
<
Npp32f
>
(),
src2
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
nppSafeCall
(
npp_func_32fc1
(
src1
.
ptr
<
Npp32f
>
(),
src1
.
step
,
src2
.
ptr
<
Npp32f
>
(),
src2
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
break
;
default:
CV_Assert
(
!
"Unsupported source type"
);
}
}
}
template
<
int
SCN
>
struct
NppArithmScalarFunc
;
template
<>
struct
NppArithmScalarFunc
<
1
>
{
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32f
*
pSrc
,
int
nSrcStep
,
Npp32f
nValue
,
Npp32f
*
pDst
,
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32f
*
pSrc
,
int
nSrcStep
,
Npp32f
nValue
,
Npp32f
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
};
#ifdef NPP_HAVE_COMPLEX_TYPE
template
<>
struct
NppArithmScalarFunc
<
2
>
{
{
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32fc
*
pSrc
,
int
nSrcStep
,
Npp32fc
nValue
,
Npp32fc
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
};
#endif
template
<
int
SCN
,
typename
NppArithmScalarFunc
<
SCN
>::
func_ptr
func
>
struct
NppArithmScalar
;
template
<
typename
NppArithmScalarFunc
<
1
>::
func_ptr
func
>
struct
NppArithmScalar
<
1
,
func
>
{
static
void
calc
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
{
dst
.
create
(
src
.
size
(),
src
.
type
());
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
func
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
(
Npp32f
)
sc
[
0
],
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
}
nppSafeCall
(
func
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
(
Npp32f
)
sc
[
0
],
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
}
};
#ifdef NPP_HAVE_COMPLEX_TYPE
template
<
typename
NppArithmScalarFunc
<
2
>::
func_ptr
func
>
struct
NppArithmScalar
<
2
,
func
>
{
static
void
calc
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
{
dst
.
create
(
src
.
size
(),
src
.
type
());
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Npp32fc
nValue
;
nValue
.
re
=
(
Npp32f
)
sc
[
0
];
nValue
.
im
=
(
Npp32f
)
sc
[
1
];
nppSafeCall
(
func
(
src
.
ptr
<
Npp32fc
>
(),
src
.
step
,
nValue
,
dst
.
ptr
<
Npp32fc
>
(),
dst
.
step
,
sz
)
);
}
nppSafeCall
(
func
(
src
.
ptr
<
Npp32fc
>
(),
src
.
step
,
nValue
,
dst
.
ptr
<
Npp32fc
>
(),
dst
.
step
,
sz
)
);
}
};
#endif
}
void
cv
::
gpu
::
add
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
)
{
#if NPP_VERSION >= 32
nppArithmCaller
(
src1
,
src2
,
dst
,
nppiAdd_8u_C1RSfs
,
nppiAdd_8u_C4RSfs
,
nppiAdd_32s_C1R
,
nppiAdd_32f_C1R
);
#else
nppArithmCaller
(
src1
,
src2
,
dst
,
nppiAdd_8u_C1RSfs
,
nppiAdd_8u_C4RSfs
,
0
,
nppiAdd_32f_C1R
);
#endif
}
void
cv
::
gpu
::
subtract
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
)
void
cv
::
gpu
::
subtract
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
)
{
nppArithmCaller
(
src2
,
src1
,
dst
,
nppiSub_8u_C1RSfs
,
nppiSub_8u_C4RSfs
,
nppiSub_32s_C1R
,
nppiSub_32f_C1R
);
#if NPP_VERSION >= 32
nppArithmCaller
(
src2
,
src1
,
dst
,
nppiSub_8u_C1RSfs
,
nppiSub_8u_C4RSfs
,
nppiSub_32s_C1R
,
nppiSub_32f_C1R
);
#else
nppArithmCaller
(
src2
,
src1
,
dst
,
nppiSub_8u_C1RSfs
,
nppiSub_8u_C4RSfs
,
0
,
nppiSub_32f_C1R
);
#endif
}
void
cv
::
gpu
::
multiply
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
)
{
nppArithmCaller
(
src1
,
src2
,
dst
,
nppiMul_8u_C1RSfs
,
nppiMul_8u_C4RSfs
,
nppiMul_32s_C1R
,
nppiMul_32f_C1R
);
#if NPP_VERSION >= 32
nppArithmCaller
(
src1
,
src2
,
dst
,
nppiMul_8u_C1RSfs
,
nppiMul_8u_C4RSfs
,
nppiMul_32s_C1R
,
nppiMul_32f_C1R
);
#else
nppArithmCaller
(
src1
,
src2
,
dst
,
nppiMul_8u_C1RSfs
,
nppiMul_8u_C4RSfs
,
0
,
nppiMul_32f_C1R
);
#endif
}
void
cv
::
gpu
::
divide
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
)
{
nppArithmCaller
(
src2
,
src1
,
dst
,
nppiDiv_8u_C1RSfs
,
nppiDiv_8u_C4RSfs
,
nppiDiv_32s_C1R
,
nppiDiv_32f_C1R
);
#if NPP_VERSION >= 32
nppArithmCaller
(
src2
,
src1
,
dst
,
nppiDiv_8u_C1RSfs
,
nppiDiv_8u_C4RSfs
,
nppiDiv_32s_C1R
,
nppiDiv_32f_C1R
);
#else
nppArithmCaller
(
src2
,
src1
,
dst
,
nppiDiv_8u_C1RSfs
,
nppiDiv_8u_C4RSfs
,
0
,
nppiDiv_32f_C1R
);
#endif
}
void
cv
::
gpu
::
add
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
#ifdef NPP_HAVE_COMPLEX_TYPE
typedef
void
(
*
caller_t
)(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
);
static
const
caller_t
callers
[]
=
{
NppArithmScalar
<
1
,
nppiAddC_32f_C1R
>::
calc
,
NppArithmScalar
<
2
,
nppiAddC_32fc_C1R
>::
calc
};
static
const
caller_t
callers
[]
=
{
0
,
NppArithmScalar
<
1
,
nppiAddC_32f_C1R
>::
calc
,
NppArithmScalar
<
2
,
nppiAddC_32fc_C1R
>::
calc
};
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
callers
[
src
.
channels
()](
src
,
sc
,
dst
);
#else
# if NPP_VERSION >= 32
CV_Assert
(
src
.
type
()
==
CV_32FC1
);
NppArithmScalar
<
1
,
nppiAddC_32f_C1R
>::
calc
(
src
,
sc
,
dst
);
# else
CV_Assert
(
!
"This function doesn't supported"
);
# endif
#endif
}
void
cv
::
gpu
::
subtract
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
#ifdef NPP_HAVE_COMPLEX_TYPE
typedef
void
(
*
caller_t
)(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
);
static
const
caller_t
callers
[]
=
{
NppArithmScalar
<
1
,
nppiSubC_32f_C1R
>::
calc
,
NppArithmScalar
<
2
,
nppiSubC_32fc_C1R
>::
calc
};
static
const
caller_t
callers
[]
=
{
0
,
NppArithmScalar
<
1
,
nppiSubC_32f_C1R
>::
calc
,
NppArithmScalar
<
2
,
nppiSubC_32fc_C1R
>::
calc
};
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
callers
[
src
.
channels
()](
src
,
sc
,
dst
);
#else
# if NPP_VERSION >= 32
CV_Assert
(
src
.
type
()
==
CV_32FC1
);
NppArithmScalar
<
1
,
nppiSubC_32f_C1R
>::
calc
(
src
,
sc
,
dst
);
# else
CV_Assert
(
!
"This function doesn't supported"
);
# endif
#endif
}
void
cv
::
gpu
::
multiply
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
#ifdef NPP_HAVE_COMPLEX_TYPE
typedef
void
(
*
caller_t
)(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
);
static
const
caller_t
callers
[]
=
{
NppArithmScalar
<
1
,
nppiMulC_32f_C1R
>::
calc
,
NppArithmScalar
<
2
,
nppiMulC_32fc_C1R
>::
calc
};
static
const
caller_t
callers
[]
=
{
0
,
NppArithmScalar
<
1
,
nppiMulC_32f_C1R
>::
calc
,
NppArithmScalar
<
2
,
nppiMulC_32fc_C1R
>::
calc
};
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
callers
[
src
.
channels
()](
src
,
sc
,
dst
);
#else
# if NPP_VERSION >= 32
CV_Assert
(
src
.
type
()
==
CV_32FC1
);
NppArithmScalar
<
1
,
nppiMulC_32f_C1R
>::
calc
(
src
,
sc
,
dst
);
# else
CV_Assert
(
!
"This function doesn't supported"
);
# endif
#endif
}
void
cv
::
gpu
::
divide
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
#ifdef NPP_HAVE_COMPLEX_TYPE
typedef
void
(
*
caller_t
)(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
);
static
const
caller_t
callers
[]
=
{
NppArithmScalar
<
1
,
nppiDivC_32f_C1R
>::
calc
,
NppArithmScalar
<
2
,
nppiDivC_32fc_C1R
>::
calc
};
static
const
caller_t
callers
[]
=
{
0
,
NppArithmScalar
<
1
,
nppiDivC_32f_C1R
>::
calc
,
NppArithmScalar
<
2
,
nppiDivC_32fc_C1R
>::
calc
};
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
callers
[
src
.
channels
()](
src
,
sc
,
dst
);
#else
# if NPP_VERSION >= 32
CV_Assert
(
src
.
type
()
==
CV_32FC1
);
NppArithmScalar
<
1
,
nppiDivC_32f_C1R
>::
calc
(
src
,
sc
,
dst
);
# else
CV_Assert
(
!
"This function doesn't supported"
);
# endif
#endif
}
////////////////////////////////////////////////////////////////////////
...
...
@@ -263,9 +331,13 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
void
cv
::
gpu
::
absdiff
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
)
{
CV_DbgAssert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
CV_DbgAssert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
CV_Assert
(
src1
.
type
()
==
CV_8UC1
||
src1
.
type
()
==
CV_8UC4
||
src1
.
type
()
==
CV_32SC1
||
src1
.
type
()
==
CV_32FC1
);
#if NPP_VERSION >= 32
CV_Assert
(
src1
.
type
()
==
CV_8UC1
||
src1
.
type
()
==
CV_8UC4
||
src1
.
type
()
==
CV_32SC1
||
src1
.
type
()
==
CV_32FC1
);
#else
CV_Assert
(
src1
.
type
()
==
CV_8UC1
||
src1
.
type
()
==
CV_8UC4
||
src1
.
type
()
==
CV_32FC1
);
#endif
dst
.
create
(
src1
.
size
(),
src1
.
type
()
);
...
...
@@ -276,20 +348,22 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
switch
(
src1
.
type
())
{
case
CV_8UC1
:
nppSafeCall
(
nppiAbsDiff_8u_C1R
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
nppSafeCall
(
nppiAbsDiff_8u_C1R
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
)
);
break
;
case
CV_8UC4
:
nppSafeCall
(
nppiAbsDiff_8u_C4R
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
nppSafeCall
(
nppiAbsDiff_8u_C4R
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
)
);
break
;
#if NPP_VERSION >= 32
case
CV_32SC1
:
nppSafeCall
(
nppiAbsDiff_32s_C1R
(
src1
.
ptr
<
Npp32s
>
(),
src1
.
step
,
src2
.
ptr
<
Npp32s
>
(),
src2
.
step
,
dst
.
ptr
<
Npp32s
>
(),
dst
.
step
,
sz
)
);
break
;
#endif
case
CV_32FC1
:
nppSafeCall
(
nppiAbsDiff_32f_C1R
(
src1
.
ptr
<
Npp32f
>
(),
src1
.
step
,
src2
.
ptr
<
Npp32f
>
(),
src2
.
step
,
...
...
@@ -302,7 +376,8 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
void
cv
::
gpu
::
absdiff
(
const
GpuMat
&
src
,
const
Scalar
&
s
,
GpuMat
&
dst
)
{
CV_Assert
(
src
.
type
()
==
CV_32FC1
);
#if NPP_VERSION >= 32
CV_Assert
(
src
.
type
()
==
CV_32FC1
);
dst
.
create
(
src
.
size
(),
src
.
type
()
);
...
...
@@ -311,6 +386,9 @@ void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst)
sz
.
height
=
src
.
rows
;
nppSafeCall
(
nppiAbsDiffC_32f_C1R
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
(
Npp32f
)
s
[
0
])
);
#else
CV_Assert
(
!
"This function doesn't supported"
);
#endif
}
////////////////////////////////////////////////////////////////////////
...
...
@@ -322,7 +400,7 @@ namespace cv { namespace gpu { namespace mathfunc
void
compare_ne_32f
(
const
DevMem2D
&
src1
,
const
DevMem2D
&
src2
,
const
DevMem2D
&
dst
);
}}}
void
cv
::
gpu
::
compare
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
int
cmpop
)
void
cv
::
gpu
::
compare
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
int
cmpop
)
{
CV_DbgAssert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
...
...
@@ -340,8 +418,8 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
{
if
(
cmpop
!=
CMP_NE
)
{
nppSafeCall
(
nppiCompare_8u_C4R
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
nppSafeCall
(
nppiCompare_8u_C4R
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
nppCmpOp
[
cmpop
])
);
}
else
...
...
@@ -367,7 +445,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
////////////////////////////////////////////////////////////////////////
// meanStdDev
void
cv
::
gpu
::
meanStdDev
(
const
GpuMat
&
src
,
Scalar
&
mean
,
Scalar
&
stddev
)
void
cv
::
gpu
::
meanStdDev
(
const
GpuMat
&
src
,
Scalar
&
mean
,
Scalar
&
stddev
)
{
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
...
...
@@ -381,7 +459,7 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
////////////////////////////////////////////////////////////////////////
// norm
double
cv
::
gpu
::
norm
(
const
GpuMat
&
src1
,
int
normType
)
double
cv
::
gpu
::
norm
(
const
GpuMat
&
src1
,
int
normType
)
{
return
norm
(
src1
,
GpuMat
(
src1
.
size
(),
src1
.
type
(),
Scalar
::
all
(
0.0
)),
normType
);
}
...
...
@@ -393,7 +471,7 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
CV_Assert
(
src1
.
type
()
==
CV_8UC1
);
CV_Assert
(
normType
==
NORM_INF
||
normType
==
NORM_L1
||
normType
==
NORM_L2
);
typedef
NppStatus
(
*
npp_norm_diff_func_t
)(
const
Npp8u
*
pSrc1
,
int
nSrcStep1
,
const
Npp8u
*
pSrc2
,
int
nSrcStep2
,
typedef
NppStatus
(
*
npp_norm_diff_func_t
)(
const
Npp8u
*
pSrc1
,
int
nSrcStep1
,
const
Npp8u
*
pSrc2
,
int
nSrcStep2
,
NppiSize
oSizeROI
,
Npp64f
*
pRetVal
);
static
const
npp_norm_diff_func_t
npp_norm_diff_func
[]
=
{
nppiNormDiff_Inf_8u_C1R
,
nppiNormDiff_L1_8u_C1R
,
nppiNormDiff_L2_8u_C1R
};
...
...
@@ -405,8 +483,8 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
int
funcIdx
=
normType
>>
1
;
double
retVal
;
nppSafeCall
(
npp_norm_diff_func
[
funcIdx
](
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
nppSafeCall
(
npp_norm_diff_func
[
funcIdx
](
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
sz
,
&
retVal
)
);
return
retVal
;
...
...
@@ -427,14 +505,14 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode)
if
(
src
.
type
()
==
CV_8UC1
)
{
nppSafeCall
(
nppiMirror_8u_C1R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
nppSafeCall
(
nppiMirror_8u_C1R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
(
flipCode
==
0
?
NPP_HORIZONTAL_AXIS
:
(
flipCode
>
0
?
NPP_VERTICAL_AXIS
:
NPP_BOTH_AXIS
)))
);
}
else
{
nppSafeCall
(
nppiMirror_8u_C4R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
nppSafeCall
(
nppiMirror_8u_C4R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
(
flipCode
==
0
?
NPP_HORIZONTAL_AXIS
:
(
flipCode
>
0
?
NPP_VERTICAL_AXIS
:
NPP_BOTH_AXIS
)))
);
}
}
...
...
@@ -444,33 +522,40 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode)
Scalar
cv
::
gpu
::
sum
(
const
GpuMat
&
src
)
{
CV_Assert
(
!
"disabled until fix crash"
);
CV_Assert
(
src
.
type
()
==
CV_8UC1
||
src
.
type
()
==
CV_8UC4
);
CV_Assert
(
src
.
type
()
==
CV_8UC1
||
src
.
type
()
==
CV_8UC4
);
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Scalar
res
;
#if NPP_VERSION >= 32
CV_Assert
(
!
"disabled until fix crash"
);
int
bufsz
;
if
(
src
.
type
()
==
CV_8UC1
)
{
{
nppiReductionGetBufferHostSize_8u_C1R
(
sz
,
&
bufsz
);
GpuMat
buf
(
1
,
bufsz
,
CV_32S
);
Scalar
res
;
nppSafeCall
(
nppiSum_8u_C1R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
sz
,
buf
.
ptr
<
Npp32s
>
(),
res
.
val
)
);
return
res
;
}
else
{
{
nppiReductionGetBufferHostSize_8u_C4R
(
sz
,
&
bufsz
);
GpuMat
buf
(
1
,
bufsz
,
CV_32S
);
Scalar
res
;
nppSafeCall
(
nppiSum_8u_C4R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
sz
,
buf
.
ptr
<
Npp32s
>
(),
res
.
val
)
);
return
res
;
}
#else
if
(
src
.
type
()
==
CV_8UC1
)
nppSafeCall
(
nppiSum_8u_C1R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
sz
,
res
.
val
)
);
else
nppSafeCall
(
nppiSum_8u_C4R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
sz
,
res
.
val
)
);
#endif
return
res
;
}
////////////////////////////////////////////////////////////////////////
...
...
@@ -501,22 +586,30 @@ namespace
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Npp8u
*
cuMin
=
nppsMalloc_8u
(
4
);
Npp8u
*
cuMax
=
nppsMalloc_8u
(
4
);
Npp8u
*
cuMem
;
#if NPP_VERSION >= 32
cuMem
=
nppsMalloc_8u
(
8
);
#else
cudaSafeCall
(
cudaMalloc
((
void
**
)
&
cuMem
,
8
*
sizeof
(
Npp8u
))
);
#endif
nppSafeCall
(
nppiMinMax_8u_C4R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
sz
,
cuM
in
,
cuMax
)
);
nppSafeCall
(
nppiMinMax_8u_C4R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
sz
,
cuM
em
,
cuMem
+
4
)
);
if
(
minVal
)
cudaMemcpy
(
minVal
,
cuM
in
,
4
*
sizeof
(
Npp8u
),
cudaMemcpyDeviceToHost
);
cudaMemcpy
(
minVal
,
cuM
em
,
4
*
sizeof
(
Npp8u
),
cudaMemcpyDeviceToHost
);
if
(
maxVal
)
cudaMemcpy
(
maxVal
,
cuM
ax
,
4
*
sizeof
(
Npp8u
),
cudaMemcpyDeviceToHost
);
cudaMemcpy
(
maxVal
,
cuM
em
+
4
,
4
*
sizeof
(
Npp8u
),
cudaMemcpyDeviceToHost
);
nppsFree
(
cuMin
);
nppsFree
(
cuMax
);
#if NPP_VERSION >= 32
nppsFree
(
cuMem
);
#else
cudaSafeCall
(
cudaFree
(
cuMem
)
);
#endif
}
}
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
)
void
cv
::
gpu
::
minMax
(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
)
{
typedef
void
(
*
minMax_t
)(
const
GpuMat
&
src
,
double
*
minVal
,
double
*
maxVal
);
static
const
minMax_t
minMax_callers
[]
=
{
0
,
minMax_c1
,
0
,
0
,
minMax_c4
};
...
...
@@ -559,13 +652,13 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
NppiSize
sz
;
sz
.
height
=
src
.
rows
;
sz
.
width
=
src
.
cols
;
Mat
nppLut
;
lut
.
convertTo
(
nppLut
,
CV_32S
);
if
(
src
.
type
()
==
CV_8UC1
)
{
nppSafeCall
(
nppiLUT_Linear_8u_C1R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
nppSafeCall
(
nppiLUT_Linear_8u_C1R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
nppLut
.
ptr
<
Npp32s
>
(),
lvls
.
pLevels
,
256
)
);
}
else
...
...
@@ -578,10 +671,10 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
{
cv
::
split
(
nppLut
,
nppLut3
);
pValues3
[
0
]
=
nppLut3
[
0
].
ptr
<
Npp32s
>
();
pValues3
[
1
]
=
nppLut3
[
1
].
ptr
<
Npp32s
>
();
pValues3
[
1
]
=
nppLut3
[
1
].
ptr
<
Npp32s
>
();
pValues3
[
2
]
=
nppLut3
[
2
].
ptr
<
Npp32s
>
();
}
nppSafeCall
(
nppiLUT_Linear_8u_C3R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
nppSafeCall
(
nppiLUT_Linear_8u_C3R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
pValues3
,
lvls
.
pLevels3
,
lvls
.
nValues3
)
);
}
}
...
...
@@ -591,6 +684,7 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
void
cv
::
gpu
::
exp
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
{
#if NPP_VERSION >= 32
CV_Assert
(
src
.
type
()
==
CV_32FC1
);
dst
.
create
(
src
.
size
(),
src
.
type
());
...
...
@@ -600,6 +694,9 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst)
sz
.
height
=
src
.
rows
;
nppSafeCall
(
nppiExp_32f_C1R
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
#else
CV_Assert
(
!
"This function doesn't supported"
);
#endif
}
////////////////////////////////////////////////////////////////////////
...
...
@@ -607,6 +704,7 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst)
void
cv
::
gpu
::
log
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
{
#if NPP_VERSION >= 32
CV_Assert
(
src
.
type
()
==
CV_32FC1
);
dst
.
create
(
src
.
size
(),
src
.
type
());
...
...
@@ -616,11 +714,15 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst)
sz
.
height
=
src
.
rows
;
nppSafeCall
(
nppiLn_32f_C1R
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
#else
CV_Assert
(
!
"This function doesn't supported"
);
#endif
}
////////////////////////////////////////////////////////////////////////
// NPP magnitide
#ifdef NPP_HAVE_COMPLEX_TYPE
namespace
{
typedef
NppStatus
(
*
nppMagnitude_t
)(
const
Npp32fc
*
pSrc
,
int
nSrcStep
,
Npp32f
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
...
...
@@ -638,21 +740,30 @@ namespace
nppSafeCall
(
func
(
src
.
ptr
<
Npp32fc
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
}
}
#endif
void
cv
::
gpu
::
magnitude
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
{
#ifdef NPP_HAVE_COMPLEX_TYPE
::
npp_magnitude
(
src
,
dst
,
nppiMagnitude_32fc32f_C1R
);
#else
CV_Assert
(
!
"This function doesn't supported"
);
#endif
}
void
cv
::
gpu
::
magnitudeSqr
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
{
#ifdef NPP_HAVE_COMPLEX_TYPE
::
npp_magnitude
(
src
,
dst
,
nppiMagnitudeSqr_32fc32f_C1R
);
#else
CV_Assert
(
!
"This function doesn't supported"
);
#endif
}
////////////////////////////////////////////////////////////////////////
// Polar <-> Cart
namespace
cv
{
namespace
gpu
{
namespace
mathfunc
namespace
cv
{
namespace
gpu
{
namespace
mathfunc
{
void
cartToPolar_gpu
(
const
DevMem2Df
&
x
,
const
DevMem2Df
&
y
,
const
DevMem2Df
&
mag
,
bool
magSqr
,
const
DevMem2Df
&
angle
,
bool
angleInDegrees
,
cudaStream_t
stream
);
void
polarToCart_gpu
(
const
DevMem2Df
&
mag
,
const
DevMem2Df
&
angle
,
const
DevMem2Df
&
x
,
const
DevMem2Df
&
y
,
bool
angleInDegrees
,
cudaStream_t
stream
);
...
...
@@ -721,7 +832,7 @@ void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleI
}
void
cv
::
gpu
::
phase
(
const
GpuMat
&
x
,
const
GpuMat
&
y
,
GpuMat
&
angle
,
bool
angleInDegrees
,
const
Stream
&
stream
)
{
{
::
cartToPolar_caller
(
x
,
y
,
0
,
false
,
&
angle
,
angleInDegrees
,
StreamAccessor
::
getStream
(
stream
));
}
...
...
modules/gpu/src/graphcuts.cpp
View file @
863d61e9
...
...
@@ -48,15 +48,18 @@ void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Gpu
#else
/* !defined (HAVE_CUDA) */
#define NPP_VERSION (10 * NPP_VERSION_MAJOR + NPP_VERSION_MINOR)
void
cv
::
gpu
::
graphcut
(
GpuMat
&
terminals
,
GpuMat
&
leftTransp
,
GpuMat
&
rightTransp
,
GpuMat
&
top
,
GpuMat
&
bottom
,
GpuMat
&
labels
,
GpuMat
&
buf
)
{
#if NPP_VERSION >= 32
CV_Assert
(
leftTransp
.
type
()
==
CV_32S
&&
rightTransp
.
type
()
==
CV_32S
);
CV_Assert
(
terminals
.
type
()
==
CV_32S
&&
bottom
.
type
()
==
CV_32S
&&
top
.
type
()
==
CV_32S
);
CV_Assert
(
terminals
.
size
()
==
leftTransp
.
size
());
CV_Assert
(
terminals
.
size
()
==
rightTransp
.
size
());
CV_Assert
(
terminals
.
size
()
==
top
.
size
()
&&
terminals
.
size
()
==
bottom
.
size
());
CV_Assert
(
terminals
.
size
()
==
top
.
size
()
&&
terminals
.
size
()
==
bottom
.
size
());
CV_Assert
(
top
.
step
==
bottom
.
step
&&
top
.
step
==
terminals
.
step
&&
rightTransp
.
step
==
leftTransp
.
step
);
labels
.
create
(
terminals
.
size
(),
CV_8U
);
NppiSize
sznpp
;
...
...
@@ -69,8 +72,11 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans
if
((
size_t
)
bufsz
>
buf
.
cols
*
buf
.
rows
*
buf
.
elemSize
())
buf
.
create
(
1
,
bufsz
,
CV_8U
);
nppSafeCall
(
nppiGraphcut_32s8u
(
terminals
.
ptr
<
Npp32s
>
(),
leftTransp
.
ptr
<
Npp32s
>
(),
rightTransp
.
ptr
<
Npp32s
>
(),
top
.
ptr
<
Npp32s
>
(),
bottom
.
ptr
<
Npp32s
>
(),
nppSafeCall
(
nppiGraphcut_32s8u
(
terminals
.
ptr
<
Npp32s
>
(),
leftTransp
.
ptr
<
Npp32s
>
(),
rightTransp
.
ptr
<
Npp32s
>
(),
top
.
ptr
<
Npp32s
>
(),
bottom
.
ptr
<
Npp32s
>
(),
terminals
.
step
,
leftTransp
.
step
,
sznpp
,
labels
.
ptr
<
Npp8u
>
(),
labels
.
step
,
buf
.
ptr
<
Npp8u
>
())
);
#else
CV_Assert
(
!
"This function doesn't supported"
);
#endif
}
...
...
modules/gpu/src/imgproc_gpu.cpp
View file @
863d61e9
...
...
@@ -71,7 +71,9 @@ void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*) { throw_nogpu();
#else
/* !defined (HAVE_CUDA) */
namespace
cv
{
namespace
gpu
{
namespace
imgproc
#define NPP_VERSION (10 * NPP_VERSION_MAJOR + NPP_VERSION_MINOR)
namespace
cv
{
namespace
gpu
{
namespace
imgproc
{
void
remap_gpu_1c
(
const
DevMem2D
&
src
,
const
DevMem2Df
&
xmap
,
const
DevMem2Df
&
ymap
,
DevMem2D
dst
);
void
remap_gpu_3c
(
const
DevMem2D
&
src
,
const
DevMem2Df
&
xmap
,
const
DevMem2Df
&
ymap
,
DevMem2D
dst
);
...
...
@@ -83,7 +85,7 @@ namespace cv { namespace gpu { namespace imgproc
void
drawColorDisp_gpu
(
const
DevMem2D_
<
short
>&
src
,
const
DevMem2D
&
dst
,
int
ndisp
,
const
cudaStream_t
&
stream
);
void
reprojectImageTo3D_gpu
(
const
DevMem2D
&
disp
,
const
DevMem2Df
&
xyzw
,
const
float
*
q
,
const
cudaStream_t
&
stream
);
void
reprojectImageTo3D_gpu
(
const
DevMem2D_
<
short
>&
disp
,
const
DevMem2Df
&
xyzw
,
const
float
*
q
,
const
cudaStream_t
&
stream
);
void
reprojectImageTo3D_gpu
(
const
DevMem2D_
<
short
>&
disp
,
const
DevMem2Df
&
xyzw
,
const
float
*
q
,
const
cudaStream_t
&
stream
);
}}}
////////////////////////////////////////////////////////////////////////
...
...
@@ -101,9 +103,9 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp
out
=
dst
;
out
.
create
(
xmap
.
size
(),
src
.
type
());
callers
[
src
.
channels
()
-
1
](
src
,
xmap
,
ymap
,
out
);
dst
=
out
;
}
...
...
@@ -111,7 +113,7 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp
// meanShiftFiltering_GPU
void
cv
::
gpu
::
meanShiftFiltering
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
sp
,
int
sr
,
TermCriteria
criteria
)
{
{
if
(
src
.
empty
()
)
CV_Error
(
CV_StsBadArg
,
"The input image is empty"
);
...
...
@@ -119,25 +121,25 @@ void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
CV_Error
(
CV_StsUnsupportedFormat
,
"Only 8-bit, 4-channel images are supported"
);
dst
.
create
(
src
.
size
(),
CV_8UC4
);
if
(
!
(
criteria
.
type
&
TermCriteria
::
MAX_ITER
)
)
criteria
.
maxCount
=
5
;
int
maxIter
=
std
::
min
(
std
::
max
(
criteria
.
maxCount
,
1
),
100
);
float
eps
;
if
(
!
(
criteria
.
type
&
TermCriteria
::
EPS
)
)
eps
=
1.
f
;
eps
=
(
float
)
std
::
max
(
criteria
.
epsilon
,
0.0
);
eps
=
(
float
)
std
::
max
(
criteria
.
epsilon
,
0.0
);
imgproc
::
meanShiftFiltering_gpu
(
src
,
dst
,
sp
,
sr
,
maxIter
,
eps
);
imgproc
::
meanShiftFiltering_gpu
(
src
,
dst
,
sp
,
sr
,
maxIter
,
eps
);
}
////////////////////////////////////////////////////////////////////////
// meanShiftProc_GPU
void
cv
::
gpu
::
meanShiftProc
(
const
GpuMat
&
src
,
GpuMat
&
dstr
,
GpuMat
&
dstsp
,
int
sp
,
int
sr
,
TermCriteria
criteria
)
{
{
if
(
src
.
empty
()
)
CV_Error
(
CV_StsBadArg
,
"The input image is empty"
);
...
...
@@ -146,18 +148,18 @@ void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int
dstr
.
create
(
src
.
size
(),
CV_8UC4
);
dstsp
.
create
(
src
.
size
(),
CV_16SC2
);
if
(
!
(
criteria
.
type
&
TermCriteria
::
MAX_ITER
)
)
criteria
.
maxCount
=
5
;
int
maxIter
=
std
::
min
(
std
::
max
(
criteria
.
maxCount
,
1
),
100
);
float
eps
;
if
(
!
(
criteria
.
type
&
TermCriteria
::
EPS
)
)
eps
=
1.
f
;
eps
=
(
float
)
std
::
max
(
criteria
.
epsilon
,
0.0
);
eps
=
(
float
)
std
::
max
(
criteria
.
epsilon
,
0.0
);
imgproc
::
meanShiftProc_gpu
(
src
,
dstr
,
dstsp
,
sp
,
sr
,
maxIter
,
eps
);
imgproc
::
meanShiftProc_gpu
(
src
,
dstr
,
dstsp
,
sp
,
sr
,
maxIter
,
eps
);
}
////////////////////////////////////////////////////////////////////////
...
...
@@ -167,7 +169,7 @@ namespace
{
template
<
typename
T
>
void
drawColorDisp_caller
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
ndisp
,
const
cudaStream_t
&
stream
)
{
{
GpuMat
out
;
if
(
dst
.
data
!=
src
.
data
)
out
=
dst
;
...
...
@@ -186,14 +188,14 @@ namespace
void
cv
::
gpu
::
drawColorDisp
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
ndisp
)
{
CV_Assert
(
src
.
type
()
==
CV_8U
||
src
.
type
()
==
CV_16S
);
drawColorDisp_callers
[
src
.
type
()](
src
,
dst
,
ndisp
,
0
);
}
void
cv
::
gpu
::
drawColorDisp
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
ndisp
,
const
Stream
&
stream
)
{
CV_Assert
(
src
.
type
()
==
CV_8U
||
src
.
type
()
==
CV_16S
);
drawColorDisp_callers
[
src
.
type
()](
src
,
dst
,
ndisp
,
StreamAccessor
::
getStream
(
stream
));
}
...
...
@@ -204,35 +206,35 @@ namespace
{
template
<
typename
T
>
void
reprojectImageTo3D_caller
(
const
GpuMat
&
disp
,
GpuMat
&
xyzw
,
const
Mat
&
Q
,
const
cudaStream_t
&
stream
)
{
{
xyzw
.
create
(
disp
.
rows
,
disp
.
cols
,
CV_32FC4
);
imgproc
::
reprojectImageTo3D_gpu
((
DevMem2D_
<
T
>
)
disp
,
xyzw
,
Q
.
ptr
<
float
>
(),
stream
);
}
typedef
void
(
*
reprojectImageTo3D_caller_t
)(
const
GpuMat
&
disp
,
GpuMat
&
xyzw
,
const
Mat
&
Q
,
const
cudaStream_t
&
stream
);
const
reprojectImageTo3D_caller_t
reprojectImageTo3D_callers
[]
=
{
reprojectImageTo3D_caller
<
unsigned
char
>
,
0
,
0
,
reprojectImageTo3D_caller
<
short
>
,
0
,
0
,
0
,
0
};
}
void
cv
::
gpu
::
reprojectImageTo3D
(
const
GpuMat
&
disp
,
GpuMat
&
xyzw
,
const
Mat
&
Q
)
{
CV_Assert
((
disp
.
type
()
==
CV_8U
||
disp
.
type
()
==
CV_16S
)
&&
Q
.
type
()
==
CV_32F
&&
Q
.
rows
==
4
&&
Q
.
cols
==
4
);
reprojectImageTo3D_callers
[
disp
.
type
()](
disp
,
xyzw
,
Q
,
0
);
}
void
cv
::
gpu
::
reprojectImageTo3D
(
const
GpuMat
&
disp
,
GpuMat
&
xyzw
,
const
Mat
&
Q
,
const
Stream
&
stream
)
{
CV_Assert
((
disp
.
type
()
==
CV_8U
||
disp
.
type
()
==
CV_16S
)
&&
Q
.
type
()
==
CV_32F
&&
Q
.
rows
==
4
&&
Q
.
cols
==
4
);
reprojectImageTo3D_callers
[
disp
.
type
()](
disp
,
xyzw
,
Q
,
StreamAccessor
::
getStream
(
stream
));
}
////////////////////////////////////////////////////////////////////////
// threshold
double
cv
::
gpu
::
threshold
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
thresh
)
{
double
cv
::
gpu
::
threshold
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
thresh
)
{
CV_Assert
(
src
.
type
()
==
CV_32FC1
);
dst
.
create
(
src
.
size
(),
src
.
type
()
);
...
...
@@ -241,7 +243,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh)
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
nppiThreshold_32f_C1R
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
nppSafeCall
(
nppiThreshold_32f_C1R
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
static_cast
<
Npp32f
>
(
thresh
),
NPP_CMP_GREATER
)
);
return
thresh
;
...
...
@@ -298,7 +300,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
////////////////////////////////////////////////////////////////////////
// copyMakeBorder
void
cv
::
gpu
::
copyMakeBorder
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
top
,
int
bottom
,
int
left
,
int
right
,
const
Scalar
&
value
)
void
cv
::
gpu
::
copyMakeBorder
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
top
,
int
bottom
,
int
left
,
int
right
,
const
Scalar
&
value
)
{
CV_Assert
(
src
.
type
()
==
CV_8UC1
||
src
.
type
()
==
CV_8UC4
||
src
.
type
()
==
CV_32SC1
);
...
...
@@ -308,32 +310,32 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
srcsz
.
width
=
src
.
cols
;
srcsz
.
height
=
src
.
rows
;
NppiSize
dstsz
;
dstsz
.
width
=
dst
.
cols
;
dstsz
.
height
=
dst
.
rows
;
dstsz
.
width
=
dst
.
cols
;
dstsz
.
height
=
dst
.
rows
;
switch
(
src
.
type
())
{
case
CV_8UC1
:
{
{
Npp8u
nVal
=
static_cast
<
Npp8u
>
(
value
[
0
]);
nppSafeCall
(
nppiCopyConstBorder_8u_C1R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
srcsz
,
nppSafeCall
(
nppiCopyConstBorder_8u_C1R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
srcsz
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
dstsz
,
top
,
left
,
nVal
)
);
break
;
}
}
case
CV_8UC4
:
{
{
Npp8u
nVal
[]
=
{
static_cast
<
Npp8u
>
(
value
[
0
]),
static_cast
<
Npp8u
>
(
value
[
1
]),
static_cast
<
Npp8u
>
(
value
[
2
]),
static_cast
<
Npp8u
>
(
value
[
3
])};
nppSafeCall
(
nppiCopyConstBorder_8u_C4R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
srcsz
,
nppSafeCall
(
nppiCopyConstBorder_8u_C4R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
srcsz
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
dstsz
,
top
,
left
,
nVal
)
);
break
;
}
}
case
CV_32SC1
:
{
{
Npp32s
nVal
=
static_cast
<
Npp32s
>
(
value
[
0
]);
nppSafeCall
(
nppiCopyConstBorder_32s_C1R
(
src
.
ptr
<
Npp32s
>
(),
src
.
step
,
srcsz
,
nppSafeCall
(
nppiCopyConstBorder_32s_C1R
(
src
.
ptr
<
Npp32s
>
(),
src
.
step
,
srcsz
,
dst
.
ptr
<
Npp32s
>
(),
dst
.
step
,
dstsz
,
top
,
left
,
nVal
)
);
break
;
}
}
default
:
CV_Assert
(
!
"Unsupported source type"
);
}
...
...
@@ -343,26 +345,26 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
// warp
namespace
{
typedef
NppStatus
(
*
npp_warp_8u_t
)(
const
Npp8u
*
pSrc
,
NppiSize
srcSize
,
int
srcStep
,
NppiRect
srcRoi
,
Npp8u
*
pDst
,
int
dstStep
,
NppiRect
dstRoi
,
const
double
coeffs
[][
3
],
{
typedef
NppStatus
(
*
npp_warp_8u_t
)(
const
Npp8u
*
pSrc
,
NppiSize
srcSize
,
int
srcStep
,
NppiRect
srcRoi
,
Npp8u
*
pDst
,
int
dstStep
,
NppiRect
dstRoi
,
const
double
coeffs
[][
3
],
int
interpolation
);
typedef
NppStatus
(
*
npp_warp_16u_t
)(
const
Npp16u
*
pSrc
,
NppiSize
srcSize
,
int
srcStep
,
NppiRect
srcRoi
,
Npp16u
*
pDst
,
int
dstStep
,
NppiRect
dstRoi
,
const
double
coeffs
[][
3
],
typedef
NppStatus
(
*
npp_warp_16u_t
)(
const
Npp16u
*
pSrc
,
NppiSize
srcSize
,
int
srcStep
,
NppiRect
srcRoi
,
Npp16u
*
pDst
,
int
dstStep
,
NppiRect
dstRoi
,
const
double
coeffs
[][
3
],
int
interpolation
);
typedef
NppStatus
(
*
npp_warp_32s_t
)(
const
Npp32s
*
pSrc
,
NppiSize
srcSize
,
int
srcStep
,
NppiRect
srcRoi
,
Npp32s
*
pDst
,
int
dstStep
,
NppiRect
dstRoi
,
const
double
coeffs
[][
3
],
typedef
NppStatus
(
*
npp_warp_32s_t
)(
const
Npp32s
*
pSrc
,
NppiSize
srcSize
,
int
srcStep
,
NppiRect
srcRoi
,
Npp32s
*
pDst
,
int
dstStep
,
NppiRect
dstRoi
,
const
double
coeffs
[][
3
],
int
interpolation
);
typedef
NppStatus
(
*
npp_warp_32f_t
)(
const
Npp32f
*
pSrc
,
NppiSize
srcSize
,
int
srcStep
,
NppiRect
srcRoi
,
Npp32f
*
pDst
,
int
dstStep
,
NppiRect
dstRoi
,
const
double
coeffs
[][
3
],
typedef
NppStatus
(
*
npp_warp_32f_t
)(
const
Npp32f
*
pSrc
,
NppiSize
srcSize
,
int
srcStep
,
NppiRect
srcRoi
,
Npp32f
*
pDst
,
int
dstStep
,
NppiRect
dstRoi
,
const
double
coeffs
[][
3
],
int
interpolation
);
void
nppWarpCaller
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
coeffs
[][
3
],
const
Size
&
dsize
,
int
flags
,
npp_warp_8u_t
npp_warp_8u
[][
2
],
npp_warp_16u_t
npp_warp_16u
[][
2
],
npp_warp_32s_t
npp_warp_32s
[][
2
],
npp_warp_32f_t
npp_warp_32f
[][
2
])
void
nppWarpCaller
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
coeffs
[][
3
],
const
Size
&
dsize
,
int
flags
,
npp_warp_8u_t
npp_warp_8u
[][
2
],
npp_warp_16u_t
npp_warp_16u
[][
2
],
npp_warp_32s_t
npp_warp_32s
[][
2
],
npp_warp_32f_t
npp_warp_32f
[][
2
])
{
static
const
int
npp_inter
[]
=
{
NPPI_INTER_NN
,
NPPI_INTER_LINEAR
,
NPPI_INTER_CUBIC
};
int
interpolation
=
flags
&
INTER_MAX
;
CV_Assert
((
src
.
depth
()
==
CV_8U
||
src
.
depth
()
==
CV_16U
||
src
.
depth
()
==
CV_32S
||
src
.
depth
()
==
CV_32F
)
&&
src
.
channels
()
!=
2
);
...
...
@@ -387,19 +389,19 @@ namespace
switch
(
src
.
depth
())
{
case
CV_8U
:
nppSafeCall
(
npp_warp_8u
[
src
.
channels
()][
warpInd
](
src
.
ptr
<
Npp8u
>
(),
srcsz
,
src
.
step
,
srcroi
,
nppSafeCall
(
npp_warp_8u
[
src
.
channels
()][
warpInd
](
src
.
ptr
<
Npp8u
>
(),
srcsz
,
src
.
step
,
srcroi
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
dstroi
,
coeffs
,
npp_inter
[
interpolation
])
);
break
;
case
CV_16U
:
nppSafeCall
(
npp_warp_16u
[
src
.
channels
()][
warpInd
](
src
.
ptr
<
Npp16u
>
(),
srcsz
,
src
.
step
,
srcroi
,
nppSafeCall
(
npp_warp_16u
[
src
.
channels
()][
warpInd
](
src
.
ptr
<
Npp16u
>
(),
srcsz
,
src
.
step
,
srcroi
,
dst
.
ptr
<
Npp16u
>
(),
dst
.
step
,
dstroi
,
coeffs
,
npp_inter
[
interpolation
])
);
break
;
case
CV_32S
:
nppSafeCall
(
npp_warp_32s
[
src
.
channels
()][
warpInd
](
src
.
ptr
<
Npp32s
>
(),
srcsz
,
src
.
step
,
srcroi
,
nppSafeCall
(
npp_warp_32s
[
src
.
channels
()][
warpInd
](
src
.
ptr
<
Npp32s
>
(),
srcsz
,
src
.
step
,
srcroi
,
dst
.
ptr
<
Npp32s
>
(),
dst
.
step
,
dstroi
,
coeffs
,
npp_inter
[
interpolation
])
);
break
;
case
CV_32F
:
nppSafeCall
(
npp_warp_32f
[
src
.
channels
()][
warpInd
](
src
.
ptr
<
Npp32f
>
(),
srcsz
,
src
.
step
,
srcroi
,
nppSafeCall
(
npp_warp_32f
[
src
.
channels
()][
warpInd
](
src
.
ptr
<
Npp32f
>
(),
srcsz
,
src
.
step
,
srcroi
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
dstroi
,
coeffs
,
npp_inter
[
interpolation
])
);
break
;
default:
...
...
@@ -408,38 +410,38 @@ namespace
}
}
void
cv
::
gpu
::
warpAffine
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
Mat
&
M
,
Size
dsize
,
int
flags
)
void
cv
::
gpu
::
warpAffine
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
Mat
&
M
,
Size
dsize
,
int
flags
)
{
static
npp_warp_8u_t
npp_warpAffine_8u
[][
2
]
=
static
npp_warp_8u_t
npp_warpAffine_8u
[][
2
]
=
{
{
0
,
0
},
{
nppiWarpAffine_8u_C1R
,
nppiWarpAffineBack_8u_C1R
},
{
0
,
0
},
{
nppiWarpAffine_8u_C3R
,
nppiWarpAffineBack_8u_C3R
},
{
0
,
0
},
{
nppiWarpAffine_8u_C1R
,
nppiWarpAffineBack_8u_C1R
},
{
0
,
0
},
{
nppiWarpAffine_8u_C3R
,
nppiWarpAffineBack_8u_C3R
},
{
nppiWarpAffine_8u_C4R
,
nppiWarpAffineBack_8u_C4R
}
};
static
npp_warp_16u_t
npp_warpAffine_16u
[][
2
]
=
static
npp_warp_16u_t
npp_warpAffine_16u
[][
2
]
=
{
{
0
,
0
},
{
nppiWarpAffine_16u_C1R
,
nppiWarpAffineBack_16u_C1R
},
{
0
,
0
},
{
nppiWarpAffine_16u_C3R
,
nppiWarpAffineBack_16u_C3R
},
{
0
,
0
},
{
nppiWarpAffine_16u_C1R
,
nppiWarpAffineBack_16u_C1R
},
{
0
,
0
},
{
nppiWarpAffine_16u_C3R
,
nppiWarpAffineBack_16u_C3R
},
{
nppiWarpAffine_16u_C4R
,
nppiWarpAffineBack_16u_C4R
}
};
static
npp_warp_32s_t
npp_warpAffine_32s
[][
2
]
=
static
npp_warp_32s_t
npp_warpAffine_32s
[][
2
]
=
{
{
0
,
0
},
{
nppiWarpAffine_32s_C1R
,
nppiWarpAffineBack_32s_C1R
},
{
0
,
0
},
{
nppiWarpAffine_32s_C3R
,
nppiWarpAffineBack_32s_C3R
},
{
0
,
0
},
{
nppiWarpAffine_32s_C1R
,
nppiWarpAffineBack_32s_C1R
},
{
0
,
0
},
{
nppiWarpAffine_32s_C3R
,
nppiWarpAffineBack_32s_C3R
},
{
nppiWarpAffine_32s_C4R
,
nppiWarpAffineBack_32s_C4R
}
};
static
npp_warp_32f_t
npp_warpAffine_32f
[][
2
]
=
static
npp_warp_32f_t
npp_warpAffine_32f
[][
2
]
=
{
{
0
,
0
},
{
nppiWarpAffine_32f_C1R
,
nppiWarpAffineBack_32f_C1R
},
{
0
,
0
},
{
nppiWarpAffine_32f_C3R
,
nppiWarpAffineBack_32f_C3R
},
{
0
,
0
},
{
nppiWarpAffine_32f_C1R
,
nppiWarpAffineBack_32f_C1R
},
{
0
,
0
},
{
nppiWarpAffine_32f_C3R
,
nppiWarpAffineBack_32f_C3R
},
{
nppiWarpAffine_32f_C4R
,
nppiWarpAffineBack_32f_C4R
}
};
...
...
@@ -454,36 +456,36 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz
void
cv
::
gpu
::
warpPerspective
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
Mat
&
M
,
Size
dsize
,
int
flags
)
{
static
npp_warp_8u_t
npp_warpPerspective_8u
[][
2
]
=
static
npp_warp_8u_t
npp_warpPerspective_8u
[][
2
]
=
{
{
0
,
0
},
{
nppiWarpPerspective_8u_C1R
,
nppiWarpPerspectiveBack_8u_C1R
},
{
0
,
0
},
{
nppiWarpPerspective_8u_C3R
,
nppiWarpPerspectiveBack_8u_C3R
},
{
0
,
0
},
{
nppiWarpPerspective_8u_C1R
,
nppiWarpPerspectiveBack_8u_C1R
},
{
0
,
0
},
{
nppiWarpPerspective_8u_C3R
,
nppiWarpPerspectiveBack_8u_C3R
},
{
nppiWarpPerspective_8u_C4R
,
nppiWarpPerspectiveBack_8u_C4R
}
};
static
npp_warp_16u_t
npp_warpPerspective_16u
[][
2
]
=
static
npp_warp_16u_t
npp_warpPerspective_16u
[][
2
]
=
{
{
0
,
0
},
{
nppiWarpPerspective_16u_C1R
,
nppiWarpPerspectiveBack_16u_C1R
},
{
0
,
0
},
{
nppiWarpPerspective_16u_C3R
,
nppiWarpPerspectiveBack_16u_C3R
},
{
0
,
0
},
{
nppiWarpPerspective_16u_C1R
,
nppiWarpPerspectiveBack_16u_C1R
},
{
0
,
0
},
{
nppiWarpPerspective_16u_C3R
,
nppiWarpPerspectiveBack_16u_C3R
},
{
nppiWarpPerspective_16u_C4R
,
nppiWarpPerspectiveBack_16u_C4R
}
};
static
npp_warp_32s_t
npp_warpPerspective_32s
[][
2
]
=
static
npp_warp_32s_t
npp_warpPerspective_32s
[][
2
]
=
{
{
0
,
0
},
{
nppiWarpPerspective_32s_C1R
,
nppiWarpPerspectiveBack_32s_C1R
},
{
0
,
0
},
{
nppiWarpPerspective_32s_C3R
,
nppiWarpPerspectiveBack_32s_C3R
},
{
0
,
0
},
{
nppiWarpPerspective_32s_C1R
,
nppiWarpPerspectiveBack_32s_C1R
},
{
0
,
0
},
{
nppiWarpPerspective_32s_C3R
,
nppiWarpPerspectiveBack_32s_C3R
},
{
nppiWarpPerspective_32s_C4R
,
nppiWarpPerspectiveBack_32s_C4R
}
};
static
npp_warp_32f_t
npp_warpPerspective_32f
[][
2
]
=
static
npp_warp_32f_t
npp_warpPerspective_32f
[][
2
]
=
{
{
0
,
0
},
{
nppiWarpPerspective_32f_C1R
,
nppiWarpPerspectiveBack_32f_C1R
},
{
0
,
0
},
{
nppiWarpPerspective_32f_C3R
,
nppiWarpPerspectiveBack_32f_C3R
},
{
0
,
0
},
{
nppiWarpPerspective_32f_C1R
,
nppiWarpPerspectiveBack_32f_C1R
},
{
0
,
0
},
{
nppiWarpPerspective_32f_C3R
,
nppiWarpPerspectiveBack_32f_C3R
},
{
nppiWarpPerspective_32f_C4R
,
nppiWarpPerspectiveBack_32f_C4R
}
};
...
...
@@ -502,7 +504,7 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
void
cv
::
gpu
::
rotate
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
Size
dsize
,
double
angle
,
double
xShift
,
double
yShift
,
int
interpolation
)
{
static
const
int
npp_inter
[]
=
{
NPPI_INTER_NN
,
NPPI_INTER_LINEAR
,
NPPI_INTER_CUBIC
};
CV_Assert
(
src
.
type
()
==
CV_8UC1
||
src
.
type
()
==
CV_8UC4
);
CV_Assert
(
interpolation
==
INTER_NEAREST
||
interpolation
==
INTER_LINEAR
||
interpolation
==
INTER_CUBIC
);
...
...
@@ -522,12 +524,12 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d
if
(
src
.
type
()
==
CV_8UC1
)
{
nppSafeCall
(
nppiRotate_8u_C1R
(
src
.
ptr
<
Npp8u
>
(),
srcsz
,
src
.
step
,
srcroi
,
nppSafeCall
(
nppiRotate_8u_C1R
(
src
.
ptr
<
Npp8u
>
(),
srcsz
,
src
.
step
,
srcroi
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
dstroi
,
angle
,
xShift
,
yShift
,
npp_inter
[
interpolation
])
);
}
else
{
nppSafeCall
(
nppiRotate_8u_C4R
(
src
.
ptr
<
Npp8u
>
(),
srcsz
,
src
.
step
,
srcroi
,
nppSafeCall
(
nppiRotate_8u_C4R
(
src
.
ptr
<
Npp8u
>
(),
srcsz
,
src
.
step
,
srcroi
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
dstroi
,
angle
,
xShift
,
yShift
,
npp_inter
[
interpolation
])
);
}
}
...
...
@@ -538,7 +540,7 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d
void
cv
::
gpu
::
integral
(
GpuMat
&
src
,
GpuMat
&
sum
,
GpuMat
&
sqsum
)
{
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
int
w
=
src
.
cols
+
1
,
h
=
src
.
rows
+
1
;
sum
.
create
(
h
,
w
,
CV_32S
);
...
...
@@ -548,7 +550,7 @@ void cv::gpu::integral(GpuMat& src, GpuMat& sum, GpuMat& sqsum)
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
nppiSqrIntegral_8u32s32f_C1R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
sum
.
ptr
<
Npp32s
>
(),
nppSafeCall
(
nppiSqrIntegral_8u32s32f_C1R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
sum
.
ptr
<
Npp32s
>
(),
sum
.
step
,
sqsum
.
ptr
<
Npp32f
>
(),
sqsum
.
step
,
sz
,
0
,
0.0
f
,
h
)
);
}
...
...
@@ -569,7 +571,7 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons
nppRect
.
y
=
rect
.
y
;
nppSafeCall
(
nppiRectStdDev_32s32f_C1R
(
src
.
ptr
<
Npp32s
>
(),
src
.
step
,
sqr
.
ptr
<
Npp32f
>
(),
sqr
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
nppRect
)
);
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
nppRect
)
);
}
////////////////////////////////////////////////////////////////////////
...
...
@@ -577,6 +579,7 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
image
,
GpuMat
&
edges
,
double
threshold1
,
double
threshold2
,
int
apertureSize
)
{
#if NPP_VERSION >= 32
CV_Assert
(
!
"disabled until fix crash"
);
CV_Assert
(
image
.
type
()
==
CV_8UC1
);
...
...
@@ -598,8 +601,11 @@ void cv::gpu::Canny(const GpuMat& image, GpuMat& edges, double threshold1, doubl
nppSafeCall
(
nppiCannyGetBufferSize
(
sz
,
&
bufsz
)
);
GpuMat
buf
(
1
,
bufsz
,
CV_8UC1
);
nppSafeCall
(
nppiCanny_32f8u_C1R
(
srcDx
.
ptr
<
Npp32f
>
(),
srcDx
.
step
,
srcDy
.
ptr
<
Npp32f
>
(),
srcDy
.
step
,
nppSafeCall
(
nppiCanny_32f8u_C1R
(
srcDx
.
ptr
<
Npp32f
>
(),
srcDx
.
step
,
srcDy
.
ptr
<
Npp32f
>
(),
srcDy
.
step
,
edges
.
ptr
<
Npp8u
>
(),
edges
.
step
,
sz
,
(
Npp32f
)
threshold1
,
(
Npp32f
)
threshold2
,
buf
.
ptr
<
Npp8u
>
())
);
#else
CV_Assert
(
!
"This function doesn't supported"
);
#endif
}
////////////////////////////////////////////////////////////////////////
...
...
@@ -612,7 +618,7 @@ namespace
template
<>
struct
NPPTypeTraits
<
CV_16U
>
{
typedef
Npp16u
npp_type
;
};
template
<>
struct
NPPTypeTraits
<
CV_16S
>
{
typedef
Npp16s
npp_type
;
};
template
<>
struct
NPPTypeTraits
<
CV_32F
>
{
typedef
Npp32f
npp_type
;
};
typedef
NppStatus
(
*
get_buf_size_c1_t
)(
NppiSize
oSizeROI
,
int
nLevels
,
int
*
hpBufferSize
);
typedef
NppStatus
(
*
get_buf_size_c4_t
)(
NppiSize
oSizeROI
,
int
nLevels
[],
int
*
hpBufferSize
);
...
...
@@ -620,20 +626,20 @@ namespace
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
Npp32s
*
pHist
,
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
Npp32s
*
pHist
,
int
nLevels
,
Npp32s
nLowerLevel
,
Npp32s
nUpperLevel
,
Npp8u
*
pBuffer
);
};
template
<
int
SDEPTH
>
struct
NppHistogramEvenFuncC4
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
Npp32s
*
pHist
[
4
],
int
nLevels
[
4
],
Npp32s
nLowerLevel
[
4
],
Npp32s
nUpperLevel
[
4
],
Npp8u
*
pBuffer
);
};
template
<
int
SDEPTH
,
typename
NppHistogramEvenFuncC1
<
SDEPTH
>::
func_ptr
func
,
get_buf_size_c1_t
get_buf_size
>
template
<
int
SDEPTH
,
typename
NppHistogramEvenFuncC1
<
SDEPTH
>::
func_ptr
func
,
get_buf_size_c1_t
get_buf_size
>
struct
NppHistogramEvenC1
{
{
typedef
typename
NppHistogramEvenFuncC1
<
SDEPTH
>::
src_t
src_t
;
static
void
hist
(
const
GpuMat
&
src
,
GpuMat
&
hist
,
int
histSize
,
int
lowerLevel
,
int
upperLevel
)
...
...
@@ -650,13 +656,13 @@ namespace
get_buf_size
(
sz
,
levels
,
&
buf_size
);
buffer
.
create
(
1
,
buf_size
,
CV_8U
);
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
,
hist
.
ptr
<
Npp32s
>
(),
levels
,
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
,
hist
.
ptr
<
Npp32s
>
(),
levels
,
lowerLevel
,
upperLevel
,
buffer
.
ptr
<
Npp8u
>
())
);
}
};
template
<
int
SDEPTH
,
typename
NppHistogramEvenFuncC4
<
SDEPTH
>::
func_ptr
func
,
get_buf_size_c4_t
get_buf_size
>
};
template
<
int
SDEPTH
,
typename
NppHistogramEvenFuncC4
<
SDEPTH
>::
func_ptr
func
,
get_buf_size_c4_t
get_buf_size
>
struct
NppHistogramEvenC4
{
{
typedef
typename
NppHistogramEvenFuncC4
<
SDEPTH
>::
src_t
src_t
;
static
void
hist
(
const
GpuMat
&
src
,
GpuMat
hist
[
4
],
int
histSize
[
4
],
int
lowerLevel
[
4
],
int
upperLevel
[
4
])
...
...
@@ -688,7 +694,7 @@ namespace
typedef
Npp32s
level_t
;
enum
{
LEVEL_TYPE_CODE
=
CV_32SC1
};
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
Npp32s
*
pHist
,
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
Npp32s
*
pHist
,
const
Npp32s
*
pLevels
,
int
nLevels
,
Npp8u
*
pBuffer
);
};
template
<>
struct
NppHistogramRangeFuncC1
<
CV_32F
>
...
...
@@ -697,7 +703,7 @@ namespace
typedef
Npp32f
level_t
;
enum
{
LEVEL_TYPE_CODE
=
CV_32FC1
};
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32f
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
Npp32s
*
pHist
,
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32f
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
Npp32s
*
pHist
,
const
Npp32f
*
pLevels
,
int
nLevels
,
Npp8u
*
pBuffer
);
};
template
<
int
SDEPTH
>
struct
NppHistogramRangeFuncC4
...
...
@@ -706,7 +712,7 @@ namespace
typedef
Npp32s
level_t
;
enum
{
LEVEL_TYPE_CODE
=
CV_32SC1
};
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
Npp32s
*
pHist
[
4
],
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
Npp32s
*
pHist
[
4
],
const
Npp32s
*
pLevels
[
4
],
int
nLevels
[
4
],
Npp8u
*
pBuffer
);
};
template
<>
struct
NppHistogramRangeFuncC4
<
CV_32F
>
...
...
@@ -715,19 +721,19 @@ namespace
typedef
Npp32f
level_t
;
enum
{
LEVEL_TYPE_CODE
=
CV_32FC1
};
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32f
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
Npp32s
*
pHist
[
4
],
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32f
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
Npp32s
*
pHist
[
4
],
const
Npp32f
*
pLevels
[
4
],
int
nLevels
[
4
],
Npp8u
*
pBuffer
);
};
template
<
int
SDEPTH
,
typename
NppHistogramRangeFuncC1
<
SDEPTH
>::
func_ptr
func
,
get_buf_size_c1_t
get_buf_size
>
template
<
int
SDEPTH
,
typename
NppHistogramRangeFuncC1
<
SDEPTH
>::
func_ptr
func
,
get_buf_size_c1_t
get_buf_size
>
struct
NppHistogramRangeC1
{
{
typedef
typename
NppHistogramRangeFuncC1
<
SDEPTH
>::
src_t
src_t
;
typedef
typename
NppHistogramRangeFuncC1
<
SDEPTH
>::
level_t
level_t
;
enum
{
LEVEL_TYPE_CODE
=
NppHistogramRangeFuncC1
<
SDEPTH
>::
LEVEL_TYPE_CODE
};
static
void
hist
(
const
GpuMat
&
src
,
GpuMat
&
hist
,
const
GpuMat
&
levels
)
{
{
CV_Assert
(
levels
.
type
()
==
LEVEL_TYPE_CODE
&&
levels
.
rows
==
1
);
hist
.
create
(
1
,
levels
.
cols
-
1
,
CV_32S
);
...
...
@@ -743,10 +749,10 @@ namespace
buffer
.
create
(
1
,
buf_size
,
CV_8U
);
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
,
hist
.
ptr
<
Npp32s
>
(),
levels
.
ptr
<
level_t
>
(),
levels
.
cols
,
buffer
.
ptr
<
Npp8u
>
())
);
}
};
template
<
int
SDEPTH
,
typename
NppHistogramRangeFuncC4
<
SDEPTH
>::
func_ptr
func
,
get_buf_size_c4_t
get_buf_size
>
};
template
<
int
SDEPTH
,
typename
NppHistogramRangeFuncC4
<
SDEPTH
>::
func_ptr
func
,
get_buf_size_c4_t
get_buf_size
>
struct
NppHistogramRangeC4
{
{
typedef
typename
NppHistogramRangeFuncC4
<
SDEPTH
>::
src_t
src_t
;
typedef
typename
NppHistogramRangeFuncC1
<
SDEPTH
>::
level_t
level_t
;
enum
{
LEVEL_TYPE_CODE
=
NppHistogramRangeFuncC1
<
SDEPTH
>::
LEVEL_TYPE_CODE
};
...
...
@@ -778,22 +784,27 @@ namespace
buffer
.
create
(
1
,
buf_size
,
CV_8U
);
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
,
pHist
,
pLevels
,
nLevels
,
buffer
.
ptr
<
Npp8u
>
())
);
}
};
};
}
void
cv
::
gpu
::
evenLevels
(
GpuMat
&
levels
,
int
nLevels
,
int
lowerLevel
,
int
upperLevel
)
{
#if NPP_VERSION >= 32
Mat
host_levels
(
1
,
nLevels
,
CV_32SC1
);
nppSafeCall
(
nppiEvenLevelsHost_32s
(
host_levels
.
ptr
<
Npp32s
>
(),
nLevels
,
lowerLevel
,
upperLevel
)
);
levels
.
upload
(
host_levels
);
#else
CV_Assert
(
!
"This function doesn't supported"
);
#endif
}
void
cv
::
gpu
::
histEven
(
const
GpuMat
&
src
,
GpuMat
&
hist
,
int
histSize
,
int
lowerLevel
,
int
upperLevel
)
{
#if NPP_VERSION >= 32
CV_Assert
(
src
.
type
()
==
CV_8UC1
||
src
.
type
()
==
CV_16UC1
||
src
.
type
()
==
CV_16SC1
);
typedef
void
(
*
hist_t
)(
const
GpuMat
&
src
,
GpuMat
&
hist
,
int
levels
,
int
lowerLevel
,
int
upperLevel
);
static
const
hist_t
hist_callers
[]
=
static
const
hist_t
hist_callers
[]
=
{
NppHistogramEvenC1
<
CV_8U
,
nppiHistogramEven_8u_C1R
,
nppiHistogramEvenGetBufferSize_8u_C1R
>::
hist
,
0
,
...
...
@@ -802,14 +813,18 @@ void cv::gpu::histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerL
};
hist_callers
[
src
.
depth
()](
src
,
hist
,
histSize
,
lowerLevel
,
upperLevel
);
#else
CV_Assert
(
!
"This function doesn't supported"
);
#endif
}
void
cv
::
gpu
::
histEven
(
const
GpuMat
&
src
,
GpuMat
hist
[
4
],
int
histSize
[
4
],
int
lowerLevel
[
4
],
int
upperLevel
[
4
])
{
#if NPP_VERSION >= 32
CV_Assert
(
src
.
type
()
==
CV_8UC4
||
src
.
type
()
==
CV_16UC4
||
src
.
type
()
==
CV_16SC4
);
typedef
void
(
*
hist_t
)(
const
GpuMat
&
src
,
GpuMat
hist
[
4
],
int
levels
[
4
],
int
lowerLevel
[
4
],
int
upperLevel
[
4
]);
static
const
hist_t
hist_callers
[]
=
static
const
hist_t
hist_callers
[]
=
{
NppHistogramEvenC4
<
CV_8U
,
nppiHistogramEven_8u_C4R
,
nppiHistogramEvenGetBufferSize_8u_C4R
>::
hist
,
0
,
...
...
@@ -818,14 +833,18 @@ void cv::gpu::histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int l
};
hist_callers
[
src
.
depth
()](
src
,
hist
,
histSize
,
lowerLevel
,
upperLevel
);
#else
CV_Assert
(
!
"This function doesn't supported"
);
#endif
}
void
cv
::
gpu
::
histRange
(
const
GpuMat
&
src
,
GpuMat
&
hist
,
const
GpuMat
&
levels
)
{
#if NPP_VERSION >= 32
CV_Assert
(
src
.
type
()
==
CV_8UC1
||
src
.
type
()
==
CV_16UC1
||
src
.
type
()
==
CV_16SC1
||
src
.
type
()
==
CV_32FC1
);
typedef
void
(
*
hist_t
)(
const
GpuMat
&
src
,
GpuMat
&
hist
,
const
GpuMat
&
levels
);
static
const
hist_t
hist_callers
[]
=
static
const
hist_t
hist_callers
[]
=
{
NppHistogramRangeC1
<
CV_8U
,
nppiHistogramRange_8u_C1R
,
nppiHistogramRangeGetBufferSize_8u_C1R
>::
hist
,
0
,
...
...
@@ -836,14 +855,18 @@ void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels)
};
hist_callers
[
src
.
depth
()](
src
,
hist
,
levels
);
#else
CV_Assert
(
!
"This function doesn't supported"
);
#endif
}
void
cv
::
gpu
::
histRange
(
const
GpuMat
&
src
,
GpuMat
hist
[
4
],
const
GpuMat
levels
[
4
])
{
#if NPP_VERSION >= 32
CV_Assert
(
src
.
type
()
==
CV_8UC4
||
src
.
type
()
==
CV_16UC4
||
src
.
type
()
==
CV_16SC4
||
src
.
type
()
==
CV_32FC4
);
typedef
void
(
*
hist_t
)(
const
GpuMat
&
src
,
GpuMat
hist
[
4
],
const
GpuMat
levels
[
4
]);
static
const
hist_t
hist_callers
[]
=
static
const
hist_t
hist_callers
[]
=
{
NppHistogramRangeC4
<
CV_8U
,
nppiHistogramRange_8u_C4R
,
nppiHistogramRangeGetBufferSize_8u_C4R
>::
hist
,
0
,
...
...
@@ -854,6 +877,9 @@ void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4
};
hist_callers
[
src
.
depth
()](
src
,
hist
,
levels
);
#else
CV_Assert
(
!
"This function doesn't supported"
);
#endif
}
#endif
/* !defined (HAVE_CUDA) */
modules/gpu/src/matrix_operations.cpp
View file @
863d61e9
...
...
@@ -77,12 +77,14 @@ namespace cv
#else
/* !defined (HAVE_CUDA) */
namespace
cv
#define NPP_VERSION (10 * NPP_VERSION_MAJOR + NPP_VERSION_MINOR)
namespace
cv
{
namespace
gpu
{
namespace
matrix_operations
{
{
void
copy_to_with_mask
(
const
DevMem2D
&
src
,
DevMem2D
dst
,
int
depth
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
void
set_to_without_mask
(
DevMem2D
dst
,
int
depth
,
const
double
*
scalar
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
...
...
@@ -162,9 +164,9 @@ namespace
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32f
*
pSrc
,
int
nSrcStep
,
dst_t
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
,
NppRoundMode
eRoundMode
);
};
template
<
int
SDEPTH
,
int
DDEPTH
,
typename
NppConvertFunc
<
SDEPTH
,
DDEPTH
>::
func_ptr
func
>
struct
NppCvt
{
template
<
int
SDEPTH
,
int
DDEPTH
,
typename
NppConvertFunc
<
SDEPTH
,
DDEPTH
>::
func_ptr
func
>
struct
NppCvt
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
typename
NPPTypeTraits
<
DDEPTH
>::
npp_type
dst_t
;
...
...
@@ -177,7 +179,7 @@ namespace
}
};
template
<
int
DDEPTH
,
typename
NppConvertFunc
<
CV_32F
,
DDEPTH
>::
func_ptr
func
>
struct
NppCvt
<
CV_32F
,
DDEPTH
,
func
>
{
{
typedef
typename
NPPTypeTraits
<
DDEPTH
>::
npp_type
dst_t
;
static
void
cvt
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
...
...
@@ -203,7 +205,7 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
rtype
=
type
();
else
rtype
=
CV_MAKETYPE
(
CV_MAT_DEPTH
(
rtype
),
channels
());
int
scn
=
channels
();
int
sdepth
=
depth
(),
ddepth
=
CV_MAT_DEPTH
(
rtype
);
if
(
sdepth
==
ddepth
&&
noScale
)
...
...
@@ -224,7 +226,7 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
else
{
typedef
void
(
*
convert_caller_t
)(
const
GpuMat
&
src
,
GpuMat
&
dst
);
static
const
convert_caller_t
convert_callers
[
8
][
8
][
4
]
=
static
const
convert_caller_t
convert_callers
[
8
][
8
][
4
]
=
{
{
{
0
,
0
,
0
,
0
},
...
...
@@ -232,7 +234,11 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
{
NppCvt
<
CV_8U
,
CV_16U
,
nppiConvert_8u16u_C1R
>::
cvt
,
convertToKernelCaller
,
convertToKernelCaller
,
NppCvt
<
CV_8U
,
CV_16U
,
nppiConvert_8u16u_C4R
>::
cvt
},
{
NppCvt
<
CV_8U
,
CV_16S
,
nppiConvert_8u16s_C1R
>::
cvt
,
convertToKernelCaller
,
convertToKernelCaller
,
NppCvt
<
CV_8U
,
CV_16S
,
nppiConvert_8u16s_C4R
>::
cvt
},
{
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
#if NPP_VERSION >= 32
{
NppCvt
<
CV_8U
,
CV_32F
,
nppiConvert_8u32f_C1R
>::
cvt
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
#else
{
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
#endif
{
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
0
,
0
,
0
,
0
}
},
...
...
@@ -251,8 +257,8 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
{
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
0
,
0
,
0
,
0
},
{
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
NppCvt
<
CV_16U
,
CV_32S
,
nppiConvert_16u32s_C1R
>::
cvt
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
NppCvt
<
CV_16U
,
CV_32F
,
nppiConvert_16u32f_C1R
>::
cvt
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
NppCvt
<
CV_16U
,
CV_32S
,
nppiConvert_16u32s_C1R
>::
cvt
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
NppCvt
<
CV_16U
,
CV_32F
,
nppiConvert_16u32f_C1R
>::
cvt
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
0
,
0
,
0
,
0
}
},
...
...
@@ -261,8 +267,8 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
{
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
0
,
0
,
0
,
0
},
{
NppCvt
<
CV_16S
,
CV_32S
,
nppiConvert_16s32s_C1R
>::
cvt
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
NppCvt
<
CV_16S
,
CV_32F
,
nppiConvert_16s32f_C1R
>::
cvt
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
NppCvt
<
CV_16S
,
CV_32S
,
nppiConvert_16s32s_C1R
>::
cvt
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
NppCvt
<
CV_16S
,
CV_32F
,
nppiConvert_16s32f_C1R
>::
cvt
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
0
,
0
,
0
,
0
}
},
...
...
@@ -277,10 +283,14 @@ void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double be
{
0
,
0
,
0
,
0
}
},
{
#if NPP_VERSION >= 32
{
NppCvt
<
CV_32F
,
CV_8U
,
nppiConvert_32f8u_C1R
>::
cvt
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
#else
{
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
#endif
{
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
NppCvt
<
CV_32F
,
CV_16U
,
nppiConvert_32f16u_C1R
>::
cvt
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
NppCvt
<
CV_32F
,
CV_16S
,
nppiConvert_32f16s_C1R
>::
cvt
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
NppCvt
<
CV_32F
,
CV_16U
,
nppiConvert_32f16u_C1R
>::
cvt
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
NppCvt
<
CV_32F
,
CV_16S
,
nppiConvert_32f16s_C1R
>::
cvt
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
{
0
,
0
,
0
,
0
},
{
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
,
convertToKernelCaller
},
...
...
@@ -325,9 +335,9 @@ namespace
typedef
NppStatus
(
*
func_ptr
)(
src_t
val
,
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
);
};
template
<
int
SDEPTH
,
int
SCN
,
typename
NppSetFunc
<
SDEPTH
,
SCN
>::
func_ptr
func
>
struct
NppSet
{
template
<
int
SDEPTH
,
int
SCN
,
typename
NppSetFunc
<
SDEPTH
,
SCN
>::
func_ptr
func
>
struct
NppSet
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
set
(
GpuMat
&
src
,
const
Scalar
&
s
)
...
...
@@ -340,7 +350,7 @@ namespace
}
};
template
<
int
SDEPTH
,
typename
NppSetFunc
<
SDEPTH
,
1
>::
func_ptr
func
>
struct
NppSet
<
SDEPTH
,
1
,
func
>
{
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
set
(
GpuMat
&
src
,
const
Scalar
&
s
)
...
...
@@ -357,7 +367,7 @@ namespace
{
matrix_operations
::
set_to_without_mask
(
src
,
src
.
depth
(),
s
.
val
,
src
.
channels
());
}
template
<
int
SDEPTH
,
int
SCN
>
struct
NppSetMaskFunc
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
...
...
@@ -370,9 +380,9 @@ namespace
typedef
NppStatus
(
*
func_ptr
)(
src_t
val
,
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
const
Npp8u
*
pMask
,
int
nMaskStep
);
};
template
<
int
SDEPTH
,
int
SCN
,
typename
NppSetMaskFunc
<
SDEPTH
,
SCN
>::
func_ptr
func
>
struct
NppSetMask
{
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
set
(
GpuMat
&
src
,
const
Scalar
&
s
,
const
GpuMat
&
mask
)
...
...
@@ -385,7 +395,7 @@ namespace
}
};
template
<
int
SDEPTH
,
typename
NppSetMaskFunc
<
SDEPTH
,
1
>::
func_ptr
func
>
struct
NppSetMask
<
SDEPTH
,
1
,
func
>
{
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
set
(
GpuMat
&
src
,
const
Scalar
&
s
,
const
GpuMat
&
mask
)
...
...
@@ -397,7 +407,7 @@ namespace
nppSafeCall
(
func
(
nppS
[
0
],
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
,
mask
.
ptr
<
Npp8u
>
(),
mask
.
step
)
);
}
};
void
kernelSetMask
(
GpuMat
&
src
,
const
Scalar
&
s
,
const
GpuMat
&
mask
)
{
matrix_operations
::
set_to_with_mask
(
src
,
src
.
depth
(),
s
.
val
,
mask
,
src
.
channels
());
...
...
@@ -409,7 +419,7 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask)
CV_Assert
(
mask
.
type
()
==
CV_8UC1
);
CV_DbgAssert
(
!
this
->
empty
());
NppiSize
sz
;
sz
.
width
=
cols
;
sz
.
height
=
rows
;
...
...
@@ -421,17 +431,34 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask)
{
{
NppSet
<
CV_8U
,
1
,
nppiSet_8u_C1R
>::
set
,
kernelSet
,
kernelSet
,
NppSet
<
CV_8U
,
4
,
nppiSet_8u_C4R
>::
set
},
{
kernelSet
,
kernelSet
,
kernelSet
,
kernelSet
},
#if NPP_VERSION >= 32
{
NppSet
<
CV_16U
,
1
,
nppiSet_16u_C1R
>::
set
,
kernelSet
,
kernelSet
,
NppSet
<
CV_16U
,
4
,
nppiSet_16u_C4R
>::
set
},
#else
{
kernelSet
,
kernelSet
,
kernelSet
,
kernelSet
},
#endif
#if NPP_VERSION >= 32
{
NppSet
<
CV_16S
,
1
,
nppiSet_16s_C1R
>::
set
,
kernelSet
,
kernelSet
,
NppSet
<
CV_16S
,
4
,
nppiSet_16s_C4R
>::
set
},
#else
{
kernelSet
,
kernelSet
,
kernelSet
,
kernelSet
},
#endif
#if NPP_VERSION >= 32
{
NppSet
<
CV_32S
,
1
,
nppiSet_32s_C1R
>::
set
,
kernelSet
,
kernelSet
,
NppSet
<
CV_32S
,
4
,
nppiSet_32s_C4R
>::
set
},
#else
{
NppSet
<
CV_32S
,
1
,
nppiSet_32s_C1R
>::
set
,
kernelSet
,
kernelSet
,
kernelSet
},
#endif
#if NPP_VERSION >= 32
{
NppSet
<
CV_32F
,
1
,
nppiSet_32f_C1R
>::
set
,
kernelSet
,
kernelSet
,
NppSet
<
CV_32F
,
4
,
nppiSet_32f_C4R
>::
set
},
#else
{
NppSet
<
CV_32F
,
1
,
nppiSet_32f_C1R
>::
set
,
kernelSet
,
kernelSet
,
kernelSet
},
#endif
{
kernelSet
,
kernelSet
,
kernelSet
,
kernelSet
},
{
0
,
0
,
0
,
0
}
};
set_callers
[
depth
()][
channels
()
-
1
](
*
this
,
s
);
set_callers
[
depth
()][
channels
()
-
1
](
*
this
,
s
);
}
else
{
#if NPP_VERSION >= 32
typedef
void
(
*
set_caller_t
)(
GpuMat
&
src
,
const
Scalar
&
s
,
const
GpuMat
&
mask
);
static
const
set_caller_t
set_callers
[
8
][
4
]
=
{
...
...
@@ -445,6 +472,9 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask)
{
0
,
0
,
0
,
0
}
};
set_callers
[
depth
()][
channels
()
-
1
](
*
this
,
s
,
mask
);
#else
kernelSetMask
(
*
this
,
s
,
mask
);
#endif
}
return
*
this
;
...
...
@@ -550,7 +580,7 @@ bool cv::gpu::CudaMem::can_device_map_to_host()
}
void
cv
::
gpu
::
CudaMem
::
create
(
int
_rows
,
int
_cols
,
int
_type
,
int
_alloc_type
)
{
{
if
(
_alloc_type
==
ALLOC_ZEROCOPY
&&
!
can_device_map_to_host
())
cv
::
gpu
::
error
(
"ZeroCopy is not supported by current device"
,
__FILE__
,
__LINE__
);
...
...
@@ -561,7 +591,7 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
release
();
CV_DbgAssert
(
_rows
>=
0
&&
_cols
>=
0
);
if
(
_rows
>
0
&&
_cols
>
0
)
{
{
flags
=
Mat
::
MAGIC_VAL
+
Mat
::
CONTINUOUS_FLAG
+
_type
;
rows
=
_rows
;
cols
=
_cols
;
...
...
@@ -575,7 +605,7 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
//datastart = data = (uchar*)fastMalloc(datasize + sizeof(*refcount));
alloc_type
=
_alloc_type
;
void
*
ptr
;
switch
(
alloc_type
)
{
case
ALLOC_PAGE_LOCKED
:
cudaSafeCall
(
cudaHostAlloc
(
&
ptr
,
datasize
,
cudaHostAllocDefault
)
);
break
;
...
...
@@ -603,7 +633,7 @@ GpuMat cv::gpu::CudaMem::createGpuMatHeader () const
}
else
cv
::
gpu
::
error
(
"Zero-copy is not supported or memory was allocated without zero-copy flag"
,
__FILE__
,
__LINE__
);
return
res
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment