Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
0cbf9eb2
Commit
0cbf9eb2
authored
Nov 09, 2012
by
marina.kolpakova
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add support for CUDA streams
parent
40600fa5
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
143 additions
and
47 deletions
+143
-47
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+1
-1
perf_softcascade.cpp
modules/gpu/perf/perf_softcascade.cpp
+42
-2
integral_image.cu
modules/gpu/src/cuda/integral_image.cu
+0
-1
isf-sc.cu
modules/gpu/src/cuda/isf-sc.cu
+18
-11
icf.hpp
modules/gpu/src/icf.hpp
+1
-1
softcascade.cpp
modules/gpu/src/softcascade.cpp
+41
-31
test_softcascade.cpp
modules/gpu/test/test_softcascade.cpp
+40
-0
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
0cbf9eb2
...
...
@@ -1577,7 +1577,7 @@ public:
virtual
void
detect
(
InputArray
image
,
InputArray
rois
,
OutputArray
objects
,
Stream
&
stream
=
Stream
::
Null
())
const
;
virtual
void
detect
(
InputArray
image
,
InputArray
rois
,
OutputArray
objects
,
const
int
level
,
Stream
&
stream
=
Stream
::
Null
())
const
;
void
genRoi
(
InputArray
roi
,
OutputArray
mask
)
const
;
void
genRoi
(
InputArray
roi
,
OutputArray
mask
,
Stream
&
stream
=
Stream
::
Null
()
)
const
;
private
:
...
...
modules/gpu/perf/perf_softcascade.cpp
View file @
0cbf9eb2
...
...
@@ -284,4 +284,44 @@ RUN_GPU(SCascadeTest, detectOnIntegral)
SANITY_CHECK
(
sortDetections
(
objectBoxes
));
}
NO_CPU
(
SCascadeTest
,
detectOnIntegral
)
\ No newline at end of file
NO_CPU
(
SCascadeTest
,
detectOnIntegral
)
GPU_PERF_TEST_P
(
SCascadeTest
,
detectStream
,
testing
::
Combine
(
testing
::
Values
(
std
::
string
(
"cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml"
)),
testing
::
Values
(
std
::
string
(
"cv/cascadeandhog/bahnhof/image_00000000_0.png"
))))
{
}
RUN_GPU
(
SCascadeTest
,
detectStream
)
{
cv
::
Mat
cpu
=
readImage
(
GET_PARAM
(
1
));
ASSERT_FALSE
(
cpu
.
empty
());
cv
::
gpu
::
GpuMat
colored
(
cpu
);
cv
::
gpu
::
SCascade
cascade
;
cv
::
FileStorage
fs
(
perf
::
TestBase
::
getDataPath
(
GET_PARAM
(
0
)),
cv
::
FileStorage
::
READ
);
ASSERT_TRUE
(
fs
.
isOpened
());
ASSERT_TRUE
(
cascade
.
load
(
fs
.
getFirstTopLevelNode
()));
cv
::
gpu
::
GpuMat
objectBoxes
(
1
,
10000
*
sizeof
(
cv
::
gpu
::
SCascade
::
Detection
),
CV_8UC1
),
rois
(
colored
.
size
(),
CV_8UC1
),
trois
;
rois
.
setTo
(
1
);
cv
::
gpu
::
Stream
s
;
cascade
.
genRoi
(
rois
,
trois
,
s
);
cascade
.
detect
(
colored
,
trois
,
objectBoxes
,
s
);
TEST_CYCLE
()
{
cascade
.
detect
(
colored
,
trois
,
objectBoxes
,
s
);
}
cudaDeviceSynchronize
();
SANITY_CHECK
(
sortDetections
(
objectBoxes
));
}
NO_CPU
(
SCascadeTest
,
detectStream
)
\ No newline at end of file
modules/gpu/src/cuda/integral_image.cu
View file @
0cbf9eb2
...
...
@@ -444,7 +444,6 @@ namespace cv { namespace gpu { namespace device
}
// used for frame preprocessing before Soft Cascade evaluation: no synchronization needed
// ToDo: partial dy
void shfl_integral_gpu_buffered(PtrStepSzb img, PtrStepSz<uint4> buffer, PtrStepSz<unsigned int> integral,
int blockStep, cudaStream_t stream)
{
...
...
modules/gpu/src/cuda/isf-sc.cu
View file @
0cbf9eb2
...
...
@@ -71,7 +71,7 @@ namespace icf {
}
void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
const int fw, const int fh, const int bins)
const int fw, const int fh, const int bins
, cudaStream_t stream
)
{
const uchar* mag = (const uchar*)hogluv.ptr(fh * bins);
uchar* hog = (uchar*)hogluv.ptr();
...
...
@@ -80,9 +80,12 @@ namespace icf {
dim3 block(32, 8);
dim3 grid(fw / 32, fh / 8);
magToHist<<<grid, block>>>(mag, angle, nangle.step / sizeof(float), hog, hogluv.step, fh);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
magToHist<<<grid, block, 0, stream>>>(mag, angle, nangle.step / sizeof(float), hog, hogluv.step, fh);
if (!stream)
{
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
}
texture<int, cudaTextureType2D, cudaReadModeElementType> thogluv;
...
...
@@ -305,7 +308,7 @@ namespace icf {
template<>
void CascadeInvoker<CascadePolicy>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv,
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const int scale) const
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const int scale
, const cudaStream_t& stream
) const
{
int fw = 160;
int fh = 120;
...
...
@@ -325,22 +328,25 @@ namespace icf {
if (scale == -1)
{
test_kernel_warp<false><<<grid, block>>>(levels, octaves, stages, nodes, leaves, det, max_det, ctr, 0);
test_kernel_warp<false><<<grid, block
, 0, stream
>>>(levels, octaves, stages, nodes, leaves, det, max_det, ctr, 0);
cudaSafeCall( cudaGetLastError());
grid = dim3(fw, fh / 8, 47 - downscales);
test_kernel_warp<true><<<grid, block>>>(levels, octaves, stages, nodes, leaves, det, max_det, ctr, downscales);
test_kernel_warp<true><<<grid, block
, 0, stream
>>>(levels, octaves, stages, nodes, leaves, det, max_det, ctr, downscales);
}
else
{
if (scale >= downscales)
test_kernel_warp<true><<<grid, block>>>(levels, octaves, stages, nodes, leaves, det, max_det, ctr, scale);
test_kernel_warp<true><<<grid, block
, 0, stream
>>>(levels, octaves, stages, nodes, leaves, det, max_det, ctr, scale);
else
test_kernel_warp<false><<<grid, block>>>(levels, octaves, stages, nodes, leaves, det, max_det, ctr, scale);
test_kernel_warp<false><<<grid, block
, 0, stream
>>>(levels, octaves, stages, nodes, leaves, det, max_det, ctr, scale);
}
cudaSafeCall( cudaGetLastError());
cudaSafeCall( cudaDeviceSynchronize());
if (!stream)
{
cudaSafeCall( cudaGetLastError());
cudaSafeCall( cudaDeviceSynchronize());
}
}
}
}}}
\ No newline at end of file
modules/gpu/src/icf.hpp
View file @
0cbf9eb2
...
...
@@ -139,7 +139,7 @@ struct CascadeInvoker
const
float
*
leaves
;
void
operator
()(
const
PtrStepSzb
&
roi
,
const
PtrStepSzi
&
hogluv
,
PtrStepSz
<
uchar4
>
objects
,
PtrStepSzi
counter
,
const
int
downscales
,
const
int
csale
=
-
1
)
const
;
PtrStepSzi
counter
,
const
int
downscales
,
const
int
csale
=
-
1
,
const
cudaStream_t
&
stream
=
0
)
const
;
};
}
...
...
modules/gpu/src/softcascade.cpp
View file @
0cbf9eb2
...
...
@@ -54,7 +54,7 @@ bool cv::gpu::SCascade::load(const FileNode&) { throw_nogpu(); return false;}
void
cv
::
gpu
::
SCascade
::
detect
(
InputArray
,
InputArray
,
OutputArray
,
Stream
&
)
const
{
throw_nogpu
();
}
void
cv
::
gpu
::
SCascade
::
detect
(
InputArray
,
InputArray
,
OutputArray
,
const
int
,
Stream
&
)
const
{
throw_nogpu
();
}
void
cv
::
gpu
::
SCascade
::
genRoi
(
InputArray
,
OutputArray
)
const
{
throw_nogpu
();
}
void
cv
::
gpu
::
SCascade
::
genRoi
(
InputArray
,
OutputArray
,
Stream
&
)
const
{
throw_nogpu
();
}
void
cv
::
gpu
::
SCascade
::
read
(
const
FileNode
&
fn
)
{
Algorithm
::
read
(
fn
);
}
...
...
@@ -76,7 +76,7 @@ namespace cv { namespace gpu { namespace device {
namespace
icf
{
void
fillBins
(
cv
::
gpu
::
PtrStepSzb
hogluv
,
const
cv
::
gpu
::
PtrStepSzf
&
nangle
,
const
int
fw
,
const
int
fh
,
const
int
bins
);
const
int
fw
,
const
int
fh
,
const
int
bins
,
cudaStream_t
stream
);
}
namespace
imgproc
{
...
...
@@ -341,27 +341,30 @@ struct cv::gpu::SCascade::Fields
}
void
detect
(
int
scale
,
const
cv
::
gpu
::
GpuMat
&
roi
,
const
cv
::
gpu
::
GpuMat
&
count
,
cv
::
gpu
::
GpuMat
&
objects
,
c
udaStream_t
stream
)
const
void
detect
(
int
scale
,
const
cv
::
gpu
::
GpuMat
&
roi
,
const
cv
::
gpu
::
GpuMat
&
count
,
cv
::
gpu
::
GpuMat
&
objects
,
c
onst
cudaStream_t
&
stream
)
const
{
cudaMemset
(
count
.
data
,
0
,
sizeof
(
Detection
));
cudaSafeCall
(
cudaGetLastError
());
invoker
(
roi
,
hogluv
,
objects
,
count
,
downscales
,
scale
);
invoker
(
roi
,
hogluv
,
objects
,
count
,
downscales
,
scale
,
stream
);
}
void
preprocess
(
const
cv
::
gpu
::
GpuMat
&
colored
)
void
preprocess
(
const
cv
::
gpu
::
GpuMat
&
colored
,
Stream
&
s
)
{
cudaMemset
(
plane
.
data
,
0
,
plane
.
step
*
plane
.
rows
);
if
(
s
)
s
.
enqueueMemSet
(
plane
,
0
);
else
cudaMemset
(
plane
.
data
,
0
,
plane
.
step
*
plane
.
rows
);
static
const
int
fw
=
Fields
::
FRAME_WIDTH
;
static
const
int
fh
=
Fields
::
FRAME_HEIGHT
;
GpuMat
gray
(
plane
,
cv
::
Rect
(
0
,
fh
*
Fields
::
HOG_LUV_BINS
,
fw
,
fh
));
cv
::
gpu
::
cvtColor
(
colored
,
gray
,
CV_BGR2GRAY
);
createHogBins
(
gray
);
cv
::
gpu
::
cvtColor
(
colored
,
gray
,
CV_BGR2GRAY
,
s
);
createHogBins
(
gray
,
s
);
createLuvBins
(
colored
);
createLuvBins
(
colored
,
s
);
integrate
();
integrate
(
s
);
}
private
:
...
...
@@ -386,7 +389,7 @@ private:
return
res
;
}
void
createHogBins
(
const
cv
::
gpu
::
GpuMat
&
gray
)
void
createHogBins
(
const
cv
::
gpu
::
GpuMat
&
gray
,
Stream
&
s
)
{
static
const
int
fw
=
Fields
::
FRAME_WIDTH
;
static
const
int
fh
=
Fields
::
FRAME_HEIGHT
;
...
...
@@ -394,35 +397,38 @@ private:
GpuMat
dfdx
(
fplane
,
cv
::
Rect
(
0
,
0
,
fw
,
fh
));
GpuMat
dfdy
(
fplane
,
cv
::
Rect
(
0
,
fh
,
fw
,
fh
));
cv
::
gpu
::
Sobel
(
gray
,
dfdx
,
CV_32F
,
1
,
0
);
cv
::
gpu
::
Sobel
(
gray
,
dfdy
,
CV_32F
,
0
,
1
);
cv
::
gpu
::
Sobel
(
gray
,
dfdx
,
CV_32F
,
1
,
0
,
sobelBuf
,
3
,
1
,
BORDER_DEFAULT
,
-
1
,
s
);
cv
::
gpu
::
Sobel
(
gray
,
dfdy
,
CV_32F
,
0
,
1
,
sobelBuf
,
3
,
1
,
BORDER_DEFAULT
,
-
1
,
s
);
GpuMat
mag
(
fplane
,
cv
::
Rect
(
0
,
2
*
fh
,
fw
,
fh
));
GpuMat
ang
(
fplane
,
cv
::
Rect
(
0
,
3
*
fh
,
fw
,
fh
));
cv
::
gpu
::
cartToPolar
(
dfdx
,
dfdy
,
mag
,
ang
,
true
);
cv
::
gpu
::
cartToPolar
(
dfdx
,
dfdy
,
mag
,
ang
,
true
,
s
);
// normolize magnitude to uchar interval and angles to 6 bins
GpuMat
nmag
(
fplane
,
cv
::
Rect
(
0
,
4
*
fh
,
fw
,
fh
));
GpuMat
nang
(
fplane
,
cv
::
Rect
(
0
,
5
*
fh
,
fw
,
fh
));
cv
::
gpu
::
multiply
(
mag
,
cv
::
Scalar
::
all
(
1.
f
/
(
8
*::
log
(
2
))),
nmag
);
cv
::
gpu
::
multiply
(
ang
,
cv
::
Scalar
::
all
(
1.
f
/
60.
f
),
nang
);
cv
::
gpu
::
multiply
(
mag
,
cv
::
Scalar
::
all
(
1.
f
/
(
8
*::
log
(
2
))),
nmag
,
1
,
-
1
,
s
);
cv
::
gpu
::
multiply
(
ang
,
cv
::
Scalar
::
all
(
1.
f
/
60.
f
),
nang
,
1
,
-
1
,
s
);
//create uchar magnitude
GpuMat
cmag
(
plane
,
cv
::
Rect
(
0
,
fh
*
Fields
::
HOG_BINS
,
fw
,
fh
));
nmag
.
convertTo
(
cmag
,
CV_8UC1
);
if
(
s
)
s
.
enqueueConvert
(
nmag
,
cmag
,
CV_8UC1
);
else
nmag
.
convertTo
(
cmag
,
CV_8UC1
);
device
::
icf
::
fillBins
(
plane
,
nang
,
fw
,
fh
,
Fields
::
HOG_BINS
);
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
s
);
device
::
icf
::
fillBins
(
plane
,
nang
,
fw
,
fh
,
Fields
::
HOG_BINS
,
stream
);
}
void
createLuvBins
(
const
cv
::
gpu
::
GpuMat
&
colored
)
void
createLuvBins
(
const
cv
::
gpu
::
GpuMat
&
colored
,
Stream
&
s
)
{
static
const
int
fw
=
Fields
::
FRAME_WIDTH
;
static
const
int
fh
=
Fields
::
FRAME_HEIGHT
;
cv
::
gpu
::
cvtColor
(
colored
,
luv
,
CV_BGR2Luv
);
cv
::
gpu
::
cvtColor
(
colored
,
luv
,
CV_BGR2Luv
,
s
);
std
::
vector
<
GpuMat
>
splited
;
for
(
int
i
=
0
;
i
<
Fields
::
LUV_BINS
;
++
i
)
...
...
@@ -430,17 +436,18 @@ private:
splited
.
push_back
(
GpuMat
(
plane
,
cv
::
Rect
(
0
,
fh
*
(
7
+
i
),
fw
,
fh
)));
}
cv
::
gpu
::
split
(
luv
,
splited
);
cv
::
gpu
::
split
(
luv
,
splited
,
s
);
}
void
integrate
()
void
integrate
(
Stream
&
s
)
{
int
fw
=
Fields
::
FRAME_WIDTH
;
int
fh
=
Fields
::
FRAME_HEIGHT
;
GpuMat
channels
(
plane
,
cv
::
Rect
(
0
,
0
,
fw
,
fh
*
Fields
::
HOG_LUV_BINS
));
cv
::
gpu
::
resize
(
channels
,
shrunk
,
cv
::
Size
(),
0.25
,
0.25
,
CV_INTER_AREA
);
device
::
imgproc
::
shfl_integral_gpu_buffered
(
shrunk
,
integralBuffer
,
hogluv
,
12
,
0
);
cv
::
gpu
::
resize
(
channels
,
shrunk
,
cv
::
Size
(),
0.25
,
0.25
,
CV_INTER_AREA
,
s
);
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
s
);
device
::
imgproc
::
shfl_integral_gpu_buffered
(
shrunk
,
integralBuffer
,
hogluv
,
12
,
stream
);
}
public
:
...
...
@@ -482,6 +489,8 @@ public:
GpuMat
leaves
;
GpuMat
levels
;
GpuMat
sobelBuf
;
device
::
icf
::
CascadeInvoker
<
device
::
icf
::
CascadePolicy
>
invoker
;
enum
{
BOOST
=
0
};
...
...
@@ -516,6 +525,8 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
// only color images are supperted
CV_Assert
(
colored
.
type
()
==
CV_8UC3
||
colored
.
type
()
==
CV_32SC1
);
GpuMat
rois
=
_rois
.
getGpuMat
(),
objects
=
_objects
.
getGpuMat
();
// we guess user knows about shrincage
// CV_Assert((rois.size().width == getRoiSize().height) && (rois.type() == CV_8UC1));
...
...
@@ -525,14 +536,13 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
{
// only this window size allowed
CV_Assert
(
colored
.
cols
==
Fields
::
FRAME_WIDTH
&&
colored
.
rows
==
Fields
::
FRAME_HEIGHT
);
flds
.
preprocess
(
colored
);
flds
.
preprocess
(
colored
,
s
);
}
else
{
colored
.
copyTo
(
flds
.
hogluv
);
}
GpuMat
rois
=
_rois
.
getGpuMat
(),
objects
=
_objects
.
getGpuMat
();
GpuMat
tmp
=
GpuMat
(
objects
,
cv
::
Rect
(
0
,
0
,
sizeof
(
Detection
),
1
));
objects
=
GpuMat
(
objects
,
cv
::
Rect
(
sizeof
(
Detection
),
0
,
objects
.
cols
-
sizeof
(
Detection
),
1
));
...
...
@@ -556,7 +566,7 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
{
// only this window size allowed
CV_Assert
(
colored
.
cols
==
Fields
::
FRAME_WIDTH
&&
colored
.
rows
==
Fields
::
FRAME_HEIGHT
);
flds
.
preprocess
(
colored
);
flds
.
preprocess
(
colored
,
s
);
}
else
{
...
...
@@ -572,15 +582,15 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
flds
.
detect
(
level
,
rois
,
tmp
,
objects
,
stream
);
}
void
cv
::
gpu
::
SCascade
::
genRoi
(
InputArray
_roi
,
OutputArray
_mask
)
const
void
cv
::
gpu
::
SCascade
::
genRoi
(
InputArray
_roi
,
OutputArray
_mask
,
Stream
&
stream
)
const
{
const
GpuMat
roi
=
_roi
.
getGpuMat
();
_mask
.
create
(
roi
.
cols
/
4
,
roi
.
rows
/
4
,
roi
.
type
()
);
GpuMat
mask
=
_mask
.
getGpuMat
();
cv
::
gpu
::
GpuMat
tmp
;
cv
::
gpu
::
resize
(
roi
,
tmp
,
cv
::
Size
(),
0.25
,
0.25
,
CV_INTER_AREA
);
cv
::
gpu
::
transpose
(
tmp
,
mask
);
cv
::
gpu
::
resize
(
roi
,
tmp
,
cv
::
Size
(),
0.25
,
0.25
,
CV_INTER_AREA
,
stream
);
cv
::
gpu
::
transpose
(
tmp
,
mask
,
stream
);
}
void
cv
::
gpu
::
SCascade
::
read
(
const
FileNode
&
fn
)
...
...
modules/gpu/test/test_softcascade.cpp
View file @
0cbf9eb2
...
...
@@ -330,4 +330,43 @@ GPU_TEST_P(SCascadeTestAll, detectOnIntegral,
ASSERT_EQ
(
a
,
1024
);
}
GPU_TEST_P
(
SCascadeTestAll
,
detectStream
,
ALL_DEVICES
)
{
cv
::
gpu
::
setDevice
(
GetParam
().
deviceID
());
std
::
string
xml
=
cvtest
::
TS
::
ptr
()
->
get_data_path
()
+
"../cv/cascadeandhog/sc_cvpr_2012_to_opencv.xml"
;
cv
::
gpu
::
SCascade
cascade
;
cv
::
FileStorage
fs
(
xml
,
cv
::
FileStorage
::
READ
);
ASSERT_TRUE
(
fs
.
isOpened
());
ASSERT_TRUE
(
cascade
.
load
(
fs
.
getFirstTopLevelNode
()));
cv
::
Mat
coloredCpu
=
cv
::
imread
(
cvtest
::
TS
::
ptr
()
->
get_data_path
()
+
"../cv/cascadeandhog/bahnhof/image_00000000_0.png"
);
ASSERT_FALSE
(
coloredCpu
.
empty
());
GpuMat
colored
(
coloredCpu
),
objectBoxes
(
1
,
100000
,
CV_8UC1
),
rois
(
colored
.
size
(),
CV_8UC1
);
rois
.
setTo
(
0
);
GpuMat
sub
(
rois
,
cv
::
Rect
(
rois
.
cols
/
4
,
rois
.
rows
/
4
,
rois
.
cols
/
2
,
rois
.
rows
/
2
));
sub
.
setTo
(
cv
::
Scalar
::
all
(
1
));
cv
::
gpu
::
Stream
s
;
cv
::
gpu
::
GpuMat
trois
;
cascade
.
genRoi
(
rois
,
trois
,
s
);
cascade
.
detect
(
colored
,
trois
,
objectBoxes
,
s
);
cudaDeviceSynchronize
();
typedef
cv
::
gpu
::
SCascade
::
Detection
Detection
;
cv
::
Mat
detections
(
objectBoxes
);
int
a
=
*
(
detections
.
ptr
<
int
>
(
0
));
ASSERT_EQ
(
a
,
2460
);
}
#endif
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment