Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
811ba318
Commit
811ba318
authored
Jan 24, 2011
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added synchronization after NPP calls
parent
8abdb372
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
88 additions
and
0 deletions
+88
-0
arithm.cpp
modules/gpu/src/arithm.cpp
+12
-0
element_operations.cpp
modules/gpu/src/element_operations.cpp
+16
-0
filtering.cpp
modules/gpu/src/filtering.cpp
+16
-0
graphcuts.cpp
modules/gpu/src/graphcuts.cpp
+2
-0
imgproc_gpu.cpp
modules/gpu/src/imgproc_gpu.cpp
+26
-0
matrix_operations.cpp
modules/gpu/src/matrix_operations.cpp
+12
-0
matrix_reductions.cpp
modules/gpu/src/matrix_reductions.cpp
+4
-0
No files found.
modules/gpu/src/arithm.cpp
View file @
811ba318
...
@@ -103,6 +103,8 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
...
@@ -103,6 +103,8 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
nppSafeCall
(
nppiStTranspose_64u_C1R
(
const_cast
<
NppSt64u
*>
(
src
.
ptr
<
NppSt64u
>
()),
src
.
step
,
nppSafeCall
(
nppiStTranspose_64u_C1R
(
const_cast
<
NppSt64u
*>
(
src
.
ptr
<
NppSt64u
>
()),
src
.
step
,
dst
.
ptr
<
NppSt64u
>
(),
dst
.
step
,
sz
)
);
dst
.
ptr
<
NppSt64u
>
(),
dst
.
step
,
sz
)
);
}
}
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
...
@@ -130,6 +132,8 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode)
...
@@ -130,6 +132,8 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode)
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
(
flipCode
==
0
?
NPP_HORIZONTAL_AXIS
:
(
flipCode
>
0
?
NPP_VERTICAL_AXIS
:
NPP_BOTH_AXIS
)))
);
(
flipCode
==
0
?
NPP_HORIZONTAL_AXIS
:
(
flipCode
>
0
?
NPP_VERTICAL_AXIS
:
NPP_BOTH_AXIS
)))
);
}
}
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
...
@@ -187,6 +191,8 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
...
@@ -187,6 +191,8 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
}
}
nppSafeCall
(
nppiLUT_Linear_8u_C3R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
pValues3
,
lvls
.
pLevels3
,
lvls
.
nValues3
)
);
nppSafeCall
(
nppiLUT_Linear_8u_C3R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
pValues3
,
lvls
.
pLevels3
,
lvls
.
nValues3
)
);
}
}
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
...
@@ -203,6 +209,8 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst)
...
@@ -203,6 +209,8 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst)
sz
.
height
=
src
.
rows
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
nppiExp_32f_C1R
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
nppSafeCall
(
nppiExp_32f_C1R
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
...
@@ -219,6 +227,8 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst)
...
@@ -219,6 +227,8 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst)
sz
.
height
=
src
.
rows
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
nppiLn_32f_C1R
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
nppSafeCall
(
nppiLn_32f_C1R
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
...
@@ -239,6 +249,8 @@ namespace
...
@@ -239,6 +249,8 @@ namespace
sz
.
height
=
src
.
rows
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
func
(
src
.
ptr
<
Npp32fc
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
nppSafeCall
(
func
(
src
.
ptr
<
Npp32fc
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
}
}
...
...
modules/gpu/src/element_operations.cpp
View file @
811ba318
...
@@ -117,6 +117,8 @@ namespace
...
@@ -117,6 +117,8 @@ namespace
default:
default:
CV_Assert
(
!
"Unsupported source type"
);
CV_Assert
(
!
"Unsupported source type"
);
}
}
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
template
<
int
SCN
>
struct
NppArithmScalarFunc
;
template
<
int
SCN
>
struct
NppArithmScalarFunc
;
...
@@ -142,6 +144,8 @@ namespace
...
@@ -142,6 +144,8 @@ namespace
sz
.
height
=
src
.
rows
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
func
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
(
Npp32f
)
sc
[
0
],
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
nppSafeCall
(
func
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
(
Npp32f
)
sc
[
0
],
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
};
};
template
<
typename
NppArithmScalarFunc
<
2
>::
func_ptr
func
>
struct
NppArithmScalar
<
2
,
func
>
template
<
typename
NppArithmScalarFunc
<
2
>::
func_ptr
func
>
struct
NppArithmScalar
<
2
,
func
>
...
@@ -159,6 +163,8 @@ namespace
...
@@ -159,6 +163,8 @@ namespace
nValue
.
im
=
(
Npp32f
)
sc
[
1
];
nValue
.
im
=
(
Npp32f
)
sc
[
1
];
nppSafeCall
(
func
(
src
.
ptr
<
Npp32fc
>
(),
src
.
step
,
nValue
,
dst
.
ptr
<
Npp32fc
>
(),
dst
.
step
,
sz
)
);
nppSafeCall
(
func
(
src
.
ptr
<
Npp32fc
>
(),
src
.
step
,
nValue
,
dst
.
ptr
<
Npp32fc
>
(),
dst
.
step
,
sz
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
};
};
}
}
...
@@ -256,6 +262,8 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
...
@@ -256,6 +262,8 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
default
:
default
:
CV_Assert
(
!
"Unsupported source type"
);
CV_Assert
(
!
"Unsupported source type"
);
}
}
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
void
cv
::
gpu
::
absdiff
(
const
GpuMat
&
src
,
const
Scalar
&
s
,
GpuMat
&
dst
)
void
cv
::
gpu
::
absdiff
(
const
GpuMat
&
src
,
const
Scalar
&
s
,
GpuMat
&
dst
)
...
@@ -269,6 +277,8 @@ void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst)
...
@@ -269,6 +277,8 @@ void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst)
sz
.
height
=
src
.
rows
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
nppiAbsDiffC_32f_C1R
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
(
Npp32f
)
s
[
0
])
);
nppSafeCall
(
nppiAbsDiffC_32f_C1R
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
(
Npp32f
)
s
[
0
])
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
...
@@ -302,6 +312,8 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
...
@@ -302,6 +312,8 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
nppSafeCall
(
nppiCompare_8u_C4R
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
nppSafeCall
(
nppiCompare_8u_C4R
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
nppCmpOp
[
cmpop
])
);
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
nppCmpOp
[
cmpop
])
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
else
else
{
{
...
@@ -315,6 +327,8 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
...
@@ -315,6 +327,8 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
nppSafeCall
(
nppiCompare_32f_C1R
(
src1
.
ptr
<
Npp32f
>
(),
src1
.
step
,
nppSafeCall
(
nppiCompare_32f_C1R
(
src1
.
ptr
<
Npp32f
>
(),
src1
.
step
,
src2
.
ptr
<
Npp32f
>
(),
src2
.
step
,
src2
.
ptr
<
Npp32f
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
nppCmpOp
[
cmpop
])
);
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
nppCmpOp
[
cmpop
])
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
else
else
{
{
...
@@ -751,6 +765,8 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
...
@@ -751,6 +765,8 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
nppSafeCall
(
nppiThreshold_32f_C1R
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
nppSafeCall
(
nppiThreshold_32f_C1R
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
static_cast
<
Npp32f
>
(
thresh
),
NPP_CMP_GREATER
)
);
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
static_cast
<
Npp32f
>
(
thresh
),
NPP_CMP_GREATER
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
else
else
{
{
...
...
modules/gpu/src/filtering.cpp
View file @
811ba318
...
@@ -236,6 +236,8 @@ namespace
...
@@ -236,6 +236,8 @@ namespace
sz
.
height
=
src
.
rows
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
nppiSumWindowRow_8u32f_C1R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
ksize
,
anchor
)
);
nppSafeCall
(
nppiSumWindowRow_8u32f_C1R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
ksize
,
anchor
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
};
};
}
}
...
@@ -263,6 +265,8 @@ namespace
...
@@ -263,6 +265,8 @@ namespace
sz
.
height
=
src
.
rows
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
nppiSumWindowColumn_8u32f_C1R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
ksize
,
anchor
)
);
nppSafeCall
(
nppiSumWindowColumn_8u32f_C1R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
ksize
,
anchor
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
};
};
}
}
...
@@ -302,6 +306,8 @@ namespace
...
@@ -302,6 +306,8 @@ namespace
oAnchor
.
y
=
anchor
.
y
;
oAnchor
.
y
=
anchor
.
y
;
nppSafeCall
(
func
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
oKernelSize
,
oAnchor
)
);
nppSafeCall
(
func
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
oKernelSize
,
oAnchor
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
nppFilterBox_t
func
;
nppFilterBox_t
func
;
...
@@ -363,6 +369,8 @@ namespace
...
@@ -363,6 +369,8 @@ namespace
oAnchor
.
y
=
anchor
.
y
;
oAnchor
.
y
=
anchor
.
y
;
nppSafeCall
(
func
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
kernel
.
ptr
<
Npp8u
>
(),
oKernelSize
,
oAnchor
)
);
nppSafeCall
(
func
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
kernel
.
ptr
<
Npp8u
>
(),
oKernelSize
,
oAnchor
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
GpuMat
kernel
;
GpuMat
kernel
;
...
@@ -537,6 +545,8 @@ namespace
...
@@ -537,6 +545,8 @@ namespace
nppSafeCall
(
func
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
nppSafeCall
(
func
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
kernel
.
ptr
<
Npp32s
>
(),
oKernelSize
,
oAnchor
,
nDivisor
)
);
kernel
.
ptr
<
Npp32s
>
(),
oKernelSize
,
oAnchor
,
nDivisor
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
GpuMat
kernel
;
GpuMat
kernel
;
...
@@ -611,6 +621,8 @@ namespace
...
@@ -611,6 +621,8 @@ namespace
sz
.
height
=
src
.
rows
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
func
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
kernel
.
ptr
<
Npp32s
>
(),
ksize
,
anchor
,
nDivisor
)
);
nppSafeCall
(
func
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
kernel
.
ptr
<
Npp32s
>
(),
ksize
,
anchor
,
nDivisor
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
GpuMat
kernel
;
GpuMat
kernel
;
...
@@ -715,6 +727,8 @@ namespace
...
@@ -715,6 +727,8 @@ namespace
sz
.
height
=
src
.
rows
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
func
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
kernel
.
ptr
<
Npp32s
>
(),
ksize
,
anchor
,
nDivisor
)
);
nppSafeCall
(
func
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
kernel
.
ptr
<
Npp32s
>
(),
ksize
,
anchor
,
nDivisor
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
GpuMat
kernel
;
GpuMat
kernel
;
...
@@ -964,6 +978,8 @@ namespace
...
@@ -964,6 +978,8 @@ namespace
oAnchor
.
y
=
anchor
.
y
;
oAnchor
.
y
=
anchor
.
y
;
nppSafeCall
(
func
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
oKernelSize
,
oAnchor
)
);
nppSafeCall
(
func
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
oKernelSize
,
oAnchor
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
nppFilterRank_t
func
;
nppFilterRank_t
func
;
...
...
modules/gpu/src/graphcuts.cpp
View file @
811ba318
...
@@ -71,6 +71,8 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans
...
@@ -71,6 +71,8 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans
nppSafeCall
(
nppiGraphcut_32s8u
(
terminals
.
ptr
<
Npp32s
>
(),
leftTransp
.
ptr
<
Npp32s
>
(),
rightTransp
.
ptr
<
Npp32s
>
(),
top
.
ptr
<
Npp32s
>
(),
bottom
.
ptr
<
Npp32s
>
(),
nppSafeCall
(
nppiGraphcut_32s8u
(
terminals
.
ptr
<
Npp32s
>
(),
leftTransp
.
ptr
<
Npp32s
>
(),
rightTransp
.
ptr
<
Npp32s
>
(),
top
.
ptr
<
Npp32s
>
(),
bottom
.
ptr
<
Npp32s
>
(),
terminals
.
step
,
leftTransp
.
step
,
sznpp
,
labels
.
ptr
<
Npp8u
>
(),
labels
.
step
,
buf
.
ptr
<
Npp8u
>
())
);
terminals
.
step
,
leftTransp
.
step
,
sznpp
,
labels
.
ptr
<
Npp8u
>
(),
labels
.
step
,
buf
.
ptr
<
Npp8u
>
())
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
...
...
modules/gpu/src/imgproc_gpu.cpp
View file @
811ba318
...
@@ -286,6 +286,8 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
...
@@ -286,6 +286,8 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
nppSafeCall
(
nppiResize_8u_C4R
(
src
.
ptr
<
Npp8u
>
(),
srcsz
,
src
.
step
,
srcrect
,
nppSafeCall
(
nppiResize_8u_C4R
(
src
.
ptr
<
Npp8u
>
(),
srcsz
,
src
.
step
,
srcrect
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
dstsz
,
fx
,
fy
,
npp_inter
[
interpolation
])
);
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
dstsz
,
fx
,
fy
,
npp_inter
[
interpolation
])
);
}
}
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
...
@@ -338,6 +340,8 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
...
@@ -338,6 +340,8 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
default
:
default
:
CV_Assert
(
!
"Unsupported source type"
);
CV_Assert
(
!
"Unsupported source type"
);
}
}
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
...
@@ -406,6 +410,8 @@ namespace
...
@@ -406,6 +410,8 @@ namespace
default:
default:
CV_Assert
(
!
"Unsupported source type"
);
CV_Assert
(
!
"Unsupported source type"
);
}
}
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
}
}
...
@@ -531,6 +537,8 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d
...
@@ -531,6 +537,8 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d
nppSafeCall
(
nppiRotate_8u_C4R
(
src
.
ptr
<
Npp8u
>
(),
srcsz
,
src
.
step
,
srcroi
,
nppSafeCall
(
nppiRotate_8u_C4R
(
src
.
ptr
<
Npp8u
>
(),
srcsz
,
src
.
step
,
srcroi
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
dstroi
,
angle
,
xShift
,
yShift
,
npp_inter
[
interpolation
])
);
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
dstroi
,
angle
,
xShift
,
yShift
,
npp_inter
[
interpolation
])
);
}
}
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
...
@@ -554,6 +562,8 @@ void cv::gpu::integral(const GpuMat& src, GpuMat& sum)
...
@@ -554,6 +562,8 @@ void cv::gpu::integral(const GpuMat& src, GpuMat& sum)
nppSafeCall
(
nppiStIntegral_8u32u_C1R
(
const_cast
<
NppSt8u
*>
(
src
.
ptr
<
NppSt8u
>
()),
src
.
step
,
nppSafeCall
(
nppiStIntegral_8u32u_C1R
(
const_cast
<
NppSt8u
*>
(
src
.
ptr
<
NppSt8u
>
()),
src
.
step
,
sum
.
ptr
<
NppSt32u
>
(),
sum
.
step
,
roiSize
,
buffer
.
ptr
<
NppSt8u
>
(),
bufSize
)
);
sum
.
ptr
<
NppSt32u
>
(),
sum
.
step
,
roiSize
,
buffer
.
ptr
<
NppSt8u
>
(),
bufSize
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
void
cv
::
gpu
::
integral
(
const
GpuMat
&
src
,
GpuMat
&
sum
,
GpuMat
&
sqsum
)
void
cv
::
gpu
::
integral
(
const
GpuMat
&
src
,
GpuMat
&
sum
,
GpuMat
&
sqsum
)
...
@@ -571,6 +581,8 @@ void cv::gpu::integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum)
...
@@ -571,6 +581,8 @@ void cv::gpu::integral(const GpuMat& src, GpuMat& sum, GpuMat& sqsum)
nppSafeCall
(
nppiSqrIntegral_8u32s32f_C1R
(
const_cast
<
Npp8u
*>
(
src
.
ptr
<
Npp8u
>
()),
src
.
step
,
sum
.
ptr
<
Npp32s
>
(),
nppSafeCall
(
nppiSqrIntegral_8u32s32f_C1R
(
const_cast
<
Npp8u
*>
(
src
.
ptr
<
Npp8u
>
()),
src
.
step
,
sum
.
ptr
<
Npp32s
>
(),
sum
.
step
,
sqsum
.
ptr
<
Npp32f
>
(),
sqsum
.
step
,
sz
,
0
,
0.0
f
,
h
)
);
sum
.
step
,
sqsum
.
ptr
<
Npp32f
>
(),
sqsum
.
step
,
sz
,
0
,
0.0
f
,
h
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
...
@@ -593,6 +605,8 @@ void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum)
...
@@ -593,6 +605,8 @@ void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum)
const_cast
<
NppSt8u
*>
(
src
.
ptr
<
NppSt8u
>
(
0
)),
src
.
step
,
const_cast
<
NppSt8u
*>
(
src
.
ptr
<
NppSt8u
>
(
0
)),
src
.
step
,
sqsum
.
ptr
<
NppSt64u
>
(
0
),
sqsum
.
step
,
roiSize
,
sqsum
.
ptr
<
NppSt64u
>
(
0
),
sqsum
.
step
,
roiSize
,
buf
.
ptr
<
NppSt8u
>
(
0
),
bufSize
));
buf
.
ptr
<
NppSt8u
>
(
0
),
bufSize
));
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
...
@@ -629,6 +643,8 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons
...
@@ -629,6 +643,8 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons
nppSafeCall
(
nppiRectStdDev_32s32f_C1R
(
src
.
ptr
<
Npp32s
>
(),
src
.
step
,
sqr
.
ptr
<
Npp32f
>
(),
sqr
.
step
,
nppSafeCall
(
nppiRectStdDev_32s32f_C1R
(
src
.
ptr
<
Npp32s
>
(),
src
.
step
,
sqr
.
ptr
<
Npp32f
>
(),
sqr
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
nppRect
)
);
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
nppRect
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
...
@@ -659,6 +675,8 @@ void cv::gpu::Canny(const GpuMat& image, GpuMat& edges, double threshold1, doubl
...
@@ -659,6 +675,8 @@ void cv::gpu::Canny(const GpuMat& image, GpuMat& edges, double threshold1, doubl
nppSafeCall
(
nppiCanny_32f8u_C1R
(
srcDx
.
ptr
<
Npp32f
>
(),
srcDx
.
step
,
srcDy
.
ptr
<
Npp32f
>
(),
srcDy
.
step
,
nppSafeCall
(
nppiCanny_32f8u_C1R
(
srcDx
.
ptr
<
Npp32f
>
(),
srcDx
.
step
,
srcDy
.
ptr
<
Npp32f
>
(),
srcDy
.
step
,
edges
.
ptr
<
Npp8u
>
(),
edges
.
step
,
sz
,
(
Npp32f
)
threshold1
,
(
Npp32f
)
threshold2
,
buf
.
ptr
<
Npp8u
>
())
);
edges
.
ptr
<
Npp8u
>
(),
edges
.
step
,
sz
,
(
Npp32f
)
threshold1
,
(
Npp32f
)
threshold2
,
buf
.
ptr
<
Npp8u
>
())
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
...
@@ -711,6 +729,8 @@ namespace
...
@@ -711,6 +729,8 @@ namespace
buffer
.
create
(
1
,
buf_size
,
CV_8U
);
buffer
.
create
(
1
,
buf_size
,
CV_8U
);
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
,
hist
.
ptr
<
Npp32s
>
(),
levels
,
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
,
hist
.
ptr
<
Npp32s
>
(),
levels
,
lowerLevel
,
upperLevel
,
buffer
.
ptr
<
Npp8u
>
())
);
lowerLevel
,
upperLevel
,
buffer
.
ptr
<
Npp8u
>
())
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
};
};
template
<
int
SDEPTH
,
typename
NppHistogramEvenFuncC4
<
SDEPTH
>::
func_ptr
func
,
get_buf_size_c4_t
get_buf_size
>
template
<
int
SDEPTH
,
typename
NppHistogramEvenFuncC4
<
SDEPTH
>::
func_ptr
func
,
get_buf_size_c4_t
get_buf_size
>
...
@@ -738,6 +758,8 @@ namespace
...
@@ -738,6 +758,8 @@ namespace
get_buf_size
(
sz
,
levels
,
&
buf_size
);
get_buf_size
(
sz
,
levels
,
&
buf_size
);
buffer
.
create
(
1
,
buf_size
,
CV_8U
);
buffer
.
create
(
1
,
buf_size
,
CV_8U
);
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
,
pHist
,
levels
,
lowerLevel
,
upperLevel
,
buffer
.
ptr
<
Npp8u
>
())
);
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
,
pHist
,
levels
,
lowerLevel
,
upperLevel
,
buffer
.
ptr
<
Npp8u
>
())
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
};
};
...
@@ -801,6 +823,8 @@ namespace
...
@@ -801,6 +823,8 @@ namespace
get_buf_size
(
sz
,
levels
.
cols
,
&
buf_size
);
get_buf_size
(
sz
,
levels
.
cols
,
&
buf_size
);
buffer
.
create
(
1
,
buf_size
,
CV_8U
);
buffer
.
create
(
1
,
buf_size
,
CV_8U
);
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
,
hist
.
ptr
<
Npp32s
>
(),
levels
.
ptr
<
level_t
>
(),
levels
.
cols
,
buffer
.
ptr
<
Npp8u
>
())
);
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
,
hist
.
ptr
<
Npp32s
>
(),
levels
.
ptr
<
level_t
>
(),
levels
.
cols
,
buffer
.
ptr
<
Npp8u
>
())
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
};
};
template
<
int
SDEPTH
,
typename
NppHistogramRangeFuncC4
<
SDEPTH
>::
func_ptr
func
,
get_buf_size_c4_t
get_buf_size
>
template
<
int
SDEPTH
,
typename
NppHistogramRangeFuncC4
<
SDEPTH
>::
func_ptr
func
,
get_buf_size_c4_t
get_buf_size
>
...
@@ -836,6 +860,8 @@ namespace
...
@@ -836,6 +860,8 @@ namespace
get_buf_size
(
sz
,
nLevels
,
&
buf_size
);
get_buf_size
(
sz
,
nLevels
,
&
buf_size
);
buffer
.
create
(
1
,
buf_size
,
CV_8U
);
buffer
.
create
(
1
,
buf_size
,
CV_8U
);
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
,
pHist
,
pLevels
,
nLevels
,
buffer
.
ptr
<
Npp8u
>
())
);
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
,
pHist
,
pLevels
,
nLevels
,
buffer
.
ptr
<
Npp8u
>
())
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
};
};
}
}
...
...
modules/gpu/src/matrix_operations.cpp
View file @
811ba318
...
@@ -176,6 +176,8 @@ namespace
...
@@ -176,6 +176,8 @@ namespace
sz
.
width
=
src
.
cols
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
src
.
step
,
dst
.
ptr
<
dst_t
>
(),
dst
.
step
,
sz
)
);
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
src
.
step
,
dst
.
ptr
<
dst_t
>
(),
dst
.
step
,
sz
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
};
};
template
<
int
DDEPTH
,
typename
NppConvertFunc
<
CV_32F
,
DDEPTH
>::
func_ptr
func
>
struct
NppCvt
<
CV_32F
,
DDEPTH
,
func
>
template
<
int
DDEPTH
,
typename
NppConvertFunc
<
CV_32F
,
DDEPTH
>::
func_ptr
func
>
struct
NppCvt
<
CV_32F
,
DDEPTH
,
func
>
...
@@ -188,6 +190,8 @@ namespace
...
@@ -188,6 +190,8 @@ namespace
sz
.
width
=
src
.
cols
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
func
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
dst
.
ptr
<
dst_t
>
(),
dst
.
step
,
sz
,
NPP_RND_NEAR
)
);
nppSafeCall
(
func
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
dst
.
ptr
<
dst_t
>
(),
dst
.
step
,
sz
,
NPP_RND_NEAR
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
};
};
...
@@ -339,6 +343,8 @@ namespace
...
@@ -339,6 +343,8 @@ namespace
sz
.
height
=
src
.
rows
;
sz
.
height
=
src
.
rows
;
Scalar_
<
src_t
>
nppS
=
s
;
Scalar_
<
src_t
>
nppS
=
s
;
nppSafeCall
(
func
(
nppS
.
val
,
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
)
);
nppSafeCall
(
func
(
nppS
.
val
,
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
};
};
template
<
int
SDEPTH
,
typename
NppSetFunc
<
SDEPTH
,
1
>::
func_ptr
func
>
struct
NppSet
<
SDEPTH
,
1
,
func
>
template
<
int
SDEPTH
,
typename
NppSetFunc
<
SDEPTH
,
1
>::
func_ptr
func
>
struct
NppSet
<
SDEPTH
,
1
,
func
>
...
@@ -352,6 +358,8 @@ namespace
...
@@ -352,6 +358,8 @@ namespace
sz
.
height
=
src
.
rows
;
sz
.
height
=
src
.
rows
;
Scalar_
<
src_t
>
nppS
=
s
;
Scalar_
<
src_t
>
nppS
=
s
;
nppSafeCall
(
func
(
nppS
[
0
],
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
)
);
nppSafeCall
(
func
(
nppS
[
0
],
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
};
};
...
@@ -384,6 +392,8 @@ namespace
...
@@ -384,6 +392,8 @@ namespace
sz
.
height
=
src
.
rows
;
sz
.
height
=
src
.
rows
;
Scalar_
<
src_t
>
nppS
=
s
;
Scalar_
<
src_t
>
nppS
=
s
;
nppSafeCall
(
func
(
nppS
.
val
,
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
,
mask
.
ptr
<
Npp8u
>
(),
mask
.
step
)
);
nppSafeCall
(
func
(
nppS
.
val
,
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
,
mask
.
ptr
<
Npp8u
>
(),
mask
.
step
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
};
};
template
<
int
SDEPTH
,
typename
NppSetMaskFunc
<
SDEPTH
,
1
>::
func_ptr
func
>
struct
NppSetMask
<
SDEPTH
,
1
,
func
>
template
<
int
SDEPTH
,
typename
NppSetMaskFunc
<
SDEPTH
,
1
>::
func_ptr
func
>
struct
NppSetMask
<
SDEPTH
,
1
,
func
>
...
@@ -397,6 +407,8 @@ namespace
...
@@ -397,6 +407,8 @@ namespace
sz
.
height
=
src
.
rows
;
sz
.
height
=
src
.
rows
;
Scalar_
<
src_t
>
nppS
=
s
;
Scalar_
<
src_t
>
nppS
=
s
;
nppSafeCall
(
func
(
nppS
[
0
],
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
,
mask
.
ptr
<
Npp8u
>
(),
mask
.
step
)
);
nppSafeCall
(
func
(
nppS
[
0
],
src
.
ptr
<
src_t
>
(),
src
.
step
,
sz
,
mask
.
ptr
<
Npp8u
>
(),
mask
.
step
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
};
};
...
...
modules/gpu/src/matrix_reductions.cpp
View file @
811ba318
...
@@ -76,6 +76,8 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
...
@@ -76,6 +76,8 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev)
sz
.
height
=
src
.
rows
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
nppiMean_StdDev_8u_C1R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
sz
,
mean
.
val
,
stddev
.
val
)
);
nppSafeCall
(
nppiMean_StdDev_8u_C1R
(
src
.
ptr
<
Npp8u
>
(),
src
.
step
,
sz
,
mean
.
val
,
stddev
.
val
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
}
...
@@ -110,6 +112,8 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
...
@@ -110,6 +112,8 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
sz
,
&
retVal
)
);
sz
,
&
retVal
)
);
cudaSafeCall
(
cudaThreadSynchronize
()
);
return
retVal
;
return
retVal
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment