Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
e63ab8de
Commit
e63ab8de
authored
Jul 17, 2012
by
Marina Kolpakova
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
LBP: multiscale approach; refactored atomics usage
parent
5dc7752d
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
175 additions
and
123 deletions
+175
-123
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+1
-0
cascadeclassifier.cpp
modules/gpu/src/cascadeclassifier.cpp
+103
-31
lbp.cu
modules/gpu/src/cuda/lbp.cu
+0
-0
emulation.hpp
modules/gpu/src/opencv2/gpu/device/emulation.hpp
+60
-38
lbp.hpp
modules/gpu/src/opencv2/gpu/device/lbp.hpp
+11
-54
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
e63ab8de
...
...
@@ -1464,6 +1464,7 @@ private:
GpuMat
resuzeBuffer
;
GpuMat
candidates
;
static
const
int
integralFactor
=
4
;
};
////////////////////////////////// SURF //////////////////////////////////////////
...
...
modules/gpu/src/cascadeclassifier.cpp
View file @
e63ab8de
...
...
@@ -67,7 +67,7 @@ cv::gpu::CascadeClassifier_GPU_LBP::~CascadeClassifier_GPU_LBP()
bool
cv
::
gpu
::
CascadeClassifier_GPU_LBP
::
empty
()
const
{
throw_nogpu
();
return
true
;
}
bool
cv
::
gpu
::
CascadeClassifier_GPU_LBP
::
load
(
const
string
&
)
{
throw_nogpu
();
return
true
;
}
Size
cv
::
gpu
::
CascadeClassifier_GPU_LBP
::
getClassifierSize
()
const
{
throw_nogpu
();
return
Size
();
}
void
cv
::
gpu
::
CascadeClassifier_GPU_LBP
::
allocateBuffers
(
cv
::
Size
/*frame*/
)
{
throw_nogpu
();}
void
cv
::
gpu
::
CascadeClassifier_GPU_LBP
::
allocateBuffers
(
cv
::
Size
/*frame*/
)
{
throw_nogpu
();}
int
cv
::
gpu
::
CascadeClassifier_GPU_LBP
::
detectMultiScale
(
const
cv
::
gpu
::
GpuMat
&
/*image*/
,
cv
::
gpu
::
GpuMat
&
/*objectsBuf*/
,
double
/*scaleFactor*/
,
int
/*minNeighbors*/
,
cv
::
Size
/*maxObjectSize*/
){
throw_nogpu
();
return
0
;}
...
...
@@ -86,7 +86,7 @@ void cv::gpu::CascadeClassifier_GPU_LBP::allocateBuffers(cv::Size frame)
{
resuzeBuffer
.
create
(
frame
,
CV_8UC1
);
integral
.
create
(
frame
.
height
+
1
,
frame
.
width
+
1
,
CV_32SC1
);
integral
.
create
(
frame
.
height
+
1
,
integralFactor
*
(
frame
.
width
+
1
)
,
CV_32SC1
);
NcvSize32u
roiSize
;
roiSize
.
width
=
frame
.
width
;
roiSize
.
height
=
frame
.
height
;
...
...
@@ -284,14 +284,83 @@ namespace cv { namespace gpu { namespace device
DevMem2D_
<
int4
>
objects
,
unsigned
int
*
classified
);
void
classifyPyramid
(
int
frameW
,
int
frameH
,
int
windowW
,
int
windowH
,
float
initalScale
,
float
factor
,
int
total
,
const
DevMem2Db
&
mstages
,
const
int
nstages
,
const
DevMem2Di
&
mnodes
,
const
DevMem2Df
&
mleaves
,
const
DevMem2Di
&
msubsets
,
const
DevMem2Db
&
mfeatures
,
const
int
subsetSize
,
DevMem2D_
<
int4
>
objects
,
unsigned
int
*
classified
,
DevMem2Di
integral
);
void
connectedConmonents
(
DevMem2D_
<
int4
>
candidates
,
int
ncandidates
,
DevMem2D_
<
int4
>
objects
,
int
groupThreshold
,
float
grouping_eps
,
unsigned
int
*
nclasses
);
void
bindIntegral
(
DevMem2Di
integral
);
void
unbindIntegral
();
}
}}}
int
cv
::
gpu
::
CascadeClassifier_GPU_LBP
::
detectMultiScale
(
const
GpuMat
&
image
,
GpuMat
&
objects
,
double
scaleFactor
,
int
groupThreshold
,
cv
::
Size
maxObjectSize
/*, Size minSize=Size()*/
)
cv
::
Size
operator
-
(
const
cv
::
Size
&
a
,
const
cv
::
Size
&
b
)
{
return
cv
::
Size
(
a
.
width
-
b
.
width
,
a
.
height
-
b
.
height
);
}
cv
::
Size
operator
+
(
const
cv
::
Size
&
a
,
const
int
&
i
)
{
return
cv
::
Size
(
a
.
width
+
i
,
a
.
height
+
i
);
}
cv
::
Size
operator
*
(
const
cv
::
Size
&
a
,
const
float
&
f
)
{
return
cv
::
Size
(
cvRound
(
a
.
width
*
f
),
cvRound
(
a
.
height
*
f
));
}
cv
::
Size
operator
/
(
const
cv
::
Size
&
a
,
const
float
&
f
)
{
return
cv
::
Size
(
cvRound
(
a
.
width
/
f
),
cvRound
(
a
.
height
/
f
));
}
bool
operator
<=
(
const
cv
::
Size
&
a
,
const
cv
::
Size
&
b
)
{
return
a
.
width
<=
b
.
width
&&
a
.
height
<=
b
.
width
;
}
struct
PyrLavel
{
PyrLavel
(
int
_order
,
float
_scale
,
cv
::
Size
frame
,
cv
::
Size
window
)
:
order
(
_order
)
{
scale
=
pow
(
_scale
,
order
);
sFrame
=
frame
/
scale
;
workArea
=
sFrame
-
window
+
1
;
sWindow
=
window
*
scale
;
}
bool
isFeasible
(
cv
::
Size
maxObj
)
{
return
workArea
.
width
>
0
&&
workArea
.
height
>
0
&&
sWindow
<=
maxObj
;
}
PyrLavel
next
(
float
factor
,
cv
::
Size
frame
,
cv
::
Size
window
)
{
return
PyrLavel
(
order
+
1
,
factor
,
frame
,
window
);
}
int
order
;
float
scale
;
cv
::
Size
sFrame
;
cv
::
Size
workArea
;
cv
::
Size
sWindow
;
};
int
cv
::
gpu
::
CascadeClassifier_GPU_LBP
::
detectMultiScale
(
const
GpuMat
&
image
,
GpuMat
&
objects
,
double
scaleFactor
,
int
groupThreshold
,
cv
::
Size
maxObjectSize
)
{
CV_Assert
(
!
empty
()
&&
scaleFactor
>
1
&&
image
.
depth
()
==
CV_8U
);
...
...
@@ -306,6 +375,7 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp
// used for debug
// candidates.setTo(cv::Scalar::all(0));
// objects.setTo(cv::Scalar::all(0));
if
(
maxObjectSize
==
cv
::
Size
())
maxObjectSize
=
image
.
size
();
...
...
@@ -315,52 +385,54 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp
GpuMat
dclassified
(
1
,
1
,
CV_32S
);
cudaSafeCall
(
cudaMemcpy
(
dclassified
.
ptr
(),
&
classified
,
sizeof
(
int
),
cudaMemcpyHostToDevice
)
);
// cv::gpu::device::lbp::bindIntegral(integral
);
PyrLavel
level
(
0
,
1.0
f
,
image
.
size
(),
NxM
);
Size
scaledImageSize
(
image
.
cols
,
image
.
rows
);
Size
processingRectSize
(
scaledImageSize
.
width
-
NxM
.
width
+
1
,
scaledImageSize
.
height
-
NxM
.
height
+
1
);
Size
windowSize
(
NxM
.
width
,
NxM
.
height
);
float
factor
=
1
;
for
(;;)
while
(
level
.
isFeasible
(
maxObjectSize
))
{
if
(
processingRectSize
.
width
<=
0
||
processingRectSize
.
height
<=
0
)
break
;
int
acc
=
level
.
sFrame
.
width
+
1
;
float
iniScale
=
level
.
scale
;
cv
::
Size
area
=
level
.
workArea
;
float
step
=
(
float
)(
1
+
(
level
.
scale
<=
2.
f
));
if
(
windowSize
.
width
>
maxObjectSize
.
width
||
windowSize
.
height
>
maxObjectSize
.
height
)
break
;
int
total
=
0
,
prev
=
0
;
// if( windowSize.width < minObjectSize.width || windowSize.height < minObjectSize.height )
// continue;
while
(
acc
<=
integralFactor
*
(
image
.
cols
+
1
)
&&
level
.
isFeasible
(
maxObjectSize
))
{
// create sutable matrix headers
GpuMat
src
=
resuzeBuffer
(
cv
::
Rect
(
0
,
0
,
level
.
sFrame
.
width
,
level
.
sFrame
.
height
));
GpuMat
sint
=
integral
(
cv
::
Rect
(
prev
,
0
,
level
.
sFrame
.
width
+
1
,
level
.
sFrame
.
height
+
1
));
GpuMat
buff
=
integralBuffer
;
GpuMat
scaledImg
=
resuzeBuffer
(
cv
::
Rect
(
0
,
0
,
scaledImageSize
.
width
,
scaledImageSize
.
height
));
GpuMat
scaledIntegral
=
integral
(
cv
::
Rect
(
0
,
0
,
scaledImageSize
.
width
+
1
,
scaledImageSize
.
height
+
1
)
);
GpuMat
currBuff
=
integralBuffer
;
// generate integral for scale
gpu
::
resize
(
image
,
src
,
level
.
sFrame
,
0
,
0
,
CV_INTER_LINEAR
);
gpu
::
integralBuffered
(
src
,
sint
,
buff
)
;
gpu
::
resize
(
image
,
scaledImg
,
scaledImageSize
,
0
,
0
,
CV_INTER_LINEAR
);
gpu
::
integralBuffered
(
scaledImg
,
scaledIntegral
,
currBuff
)
;
total
+=
cvCeil
(
area
.
width
/
step
)
*
cvCeil
(
area
.
height
/
step
);
// std::cout << "Total for scale: " << total << " this step contribution " << cvCeil(area.width / step) * cvCeil(area.height / step) << " previous width shift " << prev << " acc " << acc << " scales: " << cvCeil(area.width / step) << std::endl
;
int
step
=
factor
<=
2.
f
?
2
:
1
;
// increment pyr lavel
level
=
level
.
next
(
scaleFactor
,
image
.
size
(),
NxM
);
area
=
level
.
workArea
;
device
::
lbp
::
classifyStumpFixed
(
integral
,
integral
.
step1
(),
stage_mat
,
stage_mat
.
cols
/
sizeof
(
Stage
),
nodes_mat
,
leaves_mat
,
subsets_mat
,
features_mat
,
processingRectSize
.
width
,
processingRectSize
.
height
,
windowSize
.
width
,
windowSize
.
height
,
factor
,
step
,
subsetSize
,
candidates
,
dclassified
.
ptr
<
unsigned
int
>
());
step
=
(
float
)(
1
+
(
level
.
scale
<=
2.
f
));
prev
=
acc
;
acc
+=
level
.
sFrame
.
width
+
1
;
}
factor
*=
scaleFactor
;
windowSize
=
cv
::
Size
(
cvRound
(
NxM
.
width
*
factor
),
cvRound
(
NxM
.
height
*
factor
));
scaledImageSize
=
cv
::
Size
(
cvRound
(
image
.
cols
/
factor
),
cvRound
(
image
.
rows
/
factor
));
processingRectSize
=
cv
::
Size
(
scaledImageSize
.
width
-
NxM
.
width
+
1
,
scaledImageSize
.
height
-
NxM
.
height
+
1
);
device
::
lbp
::
classifyPyramid
(
image
.
cols
,
image
.
rows
,
NxM
.
width
,
NxM
.
height
,
iniScale
,
scaleFactor
,
total
,
stage_mat
,
stage_mat
.
cols
/
sizeof
(
Stage
),
nodes_mat
,
leaves_mat
,
subsets_mat
,
features_mat
,
subsetSize
,
candidates
,
dclassified
.
ptr
<
unsigned
int
>
(),
integral
);
}
// cv::gpu::device::lbp::unbindIntegral();
if
(
groupThreshold
<=
0
||
objects
.
empty
())
return
0
;
cudaSafeCall
(
cudaMemcpy
(
&
classified
,
dclassified
.
ptr
(),
sizeof
(
int
),
cudaMemcpyDeviceToHost
)
);
device
::
lbp
::
connectedConmonents
(
candidates
,
classified
,
objects
,
groupThreshold
,
grouping_eps
,
dclassified
.
ptr
<
unsigned
int
>
());
// candidates.copyTo(objects);
cudaSafeCall
(
cudaMemcpy
(
&
classified
,
dclassified
.
ptr
(),
sizeof
(
int
),
cudaMemcpyDeviceToHost
)
);
cudaSafeCall
(
cudaDeviceSynchronize
()
);
// std::cout << classified << " !!!!!!!!!!" << std::endl;
return
classified
;
}
...
...
modules/gpu/src/cuda/lbp.cu
View file @
e63ab8de
This diff is collapsed.
Click to expand it.
modules/gpu/src/opencv2/gpu/device/emulation.hpp
View file @
e63ab8de
...
...
@@ -44,18 +44,19 @@
#define OPENCV_GPU_EMULATION_HPP_
#include "warp_reduce.hpp"
#include <stdio.h>
namespace
cv
{
namespace
gpu
{
namespace
device
{
struct
Emulation
{
template
<
int
CTA_SIZE
>
template
<
int
CTA_SIZE
>
static
__forceinline__
__device__
int
Ballot
(
int
predicate
)
{
#if
(__CUDA_ARCH__ >= 200)
#if
defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
return
__ballot
(
predicate
);
#else
__shared__
volatile
int
cta_buffer
[
CTA_SIZE
]
__shared__
volatile
int
cta_buffer
[
CTA_SIZE
];
int
tid
=
threadIdx
.
x
;
cta_buffer
[
tid
]
=
predicate
?
(
1
<<
(
tid
&
31
))
:
0
;
...
...
@@ -63,41 +64,62 @@ namespace cv { namespace gpu { namespace device
#endif
}
struct
smem
{
enum
{
TAG_MASK
=
(
1U
<<
(
(
sizeof
(
unsigned
int
)
<<
3
)
-
5U
))
-
1U
};
template
<
typename
T
>
static
__device__
__forceinline__
T
atomicInc
(
T
*
address
,
T
val
)
{
#if (__CUDA_ARCH__ < 120)
#else
#endif
}
template
<
typename
T
>
static
__device__
__forceinline__
void
atomicAdd
(
T
*
address
,
T
val
)
{
#if (__CUDA_ARCH__ < 120)
#else
#endif
}
template
<
typename
T
>
__device__
__forceinline__
T
__atomicMin
(
T
*
address
,
T
val
)
{
#if (__CUDA_ARCH__ < 120)
#else
#endif
}
};
struct
smem
{
enum
{
TAG_MASK
=
(
1U
<<
(
(
sizeof
(
unsigned
int
)
<<
3
)
-
5U
))
-
1U
};
template
<
typename
T
>
static
__device__
__forceinline__
T
atomicInc
(
T
*
address
,
T
val
)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
T
count
;
unsigned
int
tag
=
threadIdx
.
x
<<
(
(
sizeof
(
unsigned
int
)
<<
3
)
-
5U
);
do
{
count
=
*
address
&
TAG_MASK
;
count
=
tag
|
(
count
+
1
);
*
address
=
count
;
}
while
(
*
address
!=
count
);
return
(
count
&
TAG_MASK
)
-
1
;
#else
return
::
atomicInc
(
address
,
val
);
#endif
}
template
<
typename
T
>
static
__device__
__forceinline__
void
atomicAdd
(
T
*
address
,
T
val
)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
T
count
;
unsigned
int
tag
=
threadIdx
.
x
<<
(
(
sizeof
(
unsigned
int
)
<<
3
)
-
5U
);
do
{
count
=
*
address
&
TAG_MASK
;
count
=
tag
|
(
count
+
val
);
*
address
=
count
;
}
while
(
*
address
!=
count
);
#else
::
atomicAdd
(
address
,
val
);
#endif
}
template
<
typename
T
>
static
__device__
__forceinline__
T
atomicMin
(
T
*
address
,
T
val
)
{
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
T
count
=
min
(
*
address
,
val
);
do
{
*
address
=
count
;
}
while
(
*
address
>
count
);
return
count
;
#else
return
::
atomicMin
(
address
,
val
);
#endif
}
};
};
}}}
// namespace cv { namespace gpu { namespace device
...
...
modules/gpu/src/opencv2/gpu/device/lbp.hpp
View file @
e63ab8de
...
...
@@ -44,52 +44,11 @@
#define __OPENCV_GPU_DEVICE_LBP_HPP_
#include "internal_shared.hpp"
#include <opencv2/gpu/device/emulation.hpp>
namespace
cv
{
namespace
gpu
{
namespace
device
{
namespace
lbp
{
#define TAG_MASK ( (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U )
template
<
typename
T
>
__device__
__forceinline__
T
__atomicInc
(
T
*
address
,
T
val
)
{
T
count
;
unsigned
int
tag
=
threadIdx
.
x
<<
(
(
sizeof
(
unsigned
int
)
<<
3
)
-
5U
);
do
{
count
=
*
address
&
TAG_MASK
;
count
=
tag
|
(
count
+
1
);
*
address
=
count
;
}
while
(
*
address
!=
count
);
return
(
count
&
TAG_MASK
)
-
1
;
}
template
<
typename
T
>
__device__
__forceinline__
void
__atomicAdd
(
T
*
address
,
T
val
)
{
T
count
;
unsigned
int
tag
=
threadIdx
.
x
<<
(
(
sizeof
(
unsigned
int
)
<<
3
)
-
5U
);
do
{
count
=
*
address
&
TAG_MASK
;
count
=
tag
|
(
count
+
val
);
*
address
=
count
;
}
while
(
*
address
!=
count
);
}
template
<
typename
T
>
__device__
__forceinline__
T
__atomicMin
(
T
*
address
,
T
val
)
{
T
count
=
min
(
*
address
,
val
);
do
{
*
address
=
count
;
}
while
(
*
address
>
count
);
return
count
;
}
namespace
lbp
{
struct
Stage
{
...
...
@@ -127,27 +86,25 @@ namespace lbp{
unsigned
tid
=
threadIdx
.
x
;
labels
[
tid
]
=
tid
;
__syncthreads
();
for
(
unsigned
int
id
=
0
;
id
<
n
;
id
++
)
{
if
(
tid
!=
id
&&
predicate
(
vec
[
tid
],
vec
[
id
]))
{
int
p
=
labels
[
tid
];
int
q
=
labels
[
id
];
if
(
p
!=
q
)
{
int
m
=
min
(
p
,
q
);
#if (__CUDA_ARCH__ < 120)
__atomicMin
(
labels
+
id
,
m
);
#else
atomicMin
(
labels
+
id
,
m
);
#endif
}
if
(
p
<
q
)
{
Emulation
::
smem
::
atomicMin
(
labels
+
id
,
p
);
}
else
if
(
p
>
q
)
{
Emulation
::
smem
::
atomicMin
(
labels
+
tid
,
q
);
}
}
}
__syncthreads
();
}
}
// lbp
}
}
}
// namespaces
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment