Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
aa92be34
Commit
aa92be34
authored
Nov 12, 2012
by
marina.kolpakova
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
GK107 Policy
parent
580d8173
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
76 additions
and
138 deletions
+76
-138
isf-sc.cu
modules/gpu/src/cuda/isf-sc.cu
+38
-101
icf.hpp
modules/gpu/src/icf.hpp
+18
-8
softcascade.cpp
modules/gpu/src/softcascade.cpp
+15
-24
test_softcascade.cpp
modules/gpu/test/test_softcascade.cpp
+5
-5
No files found.
modules/gpu/src/cuda/isf-sc.cu
View file @
aa92be34
...
...
@@ -161,12 +161,11 @@ namespace icf {
}
texture<float2, cudaTextureType2D, cudaReadModeElementType> troi;
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
template<bool isUp>
__global__ void test_kernel_warp(const Level* levels, const Octave* octaves, const float* stages,
const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr,
const int downscales)
{
template<typename Policy>
template<bool isUp>
__device void CascadeInvoker<Policy>::detect(Detection* objects, const uint ndetections, uint* ctr, const int downscales) const
{
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x;
...
...
@@ -174,7 +173,8 @@ namespace icf {
__shared__ Level level;
// check POI
__shared__ volatile char roiCache[8];
__shared__ volatile char roiCache[Policy::STA_Y];
if (!threadIdx.y && !threadIdx.x)
((float2*)roiCache)[threadIdx.x] = tex2D(troi, blockIdx.y, x);
...
...
@@ -187,24 +187,20 @@ namespace icf {
if(x >= level.workRect.x || y >= level.workRect.y) return;
Octave octave = octaves[level.octave];
int st = octave.index * octave.stages;
const int stEnd = st + 1024;
int st = level.octave * level.step;
const int stEnd = st + level.step;
float confidence = 0.f;
for(; st < stEnd; st += 32)
for(; st < stEnd; st += Policy::WARP)
{
const int nId = (st + threadIdx.x) * 3;
dprintf("\n\n%d: stage: %d %d\n",threadIdx.x, st, nId);
Node node = nodes[nId];
float threshold = rescale<isUp>(level, node);
int sum = get<isUp>(x, y + (node.threshold >> 28) * 120, node.rect);
int next = 1 + (int)(sum >= threshold);
dprintf("%d: go: %d (%d >= %f)\n\n" ,threadIdx.x, next, sum, threshold);
node = nodes[nId + next];
threshold = rescale<isUp>(level, node);
...
...
@@ -213,21 +209,16 @@ namespace icf {
const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
float impact = leaves[(st + threadIdx.x) * 4 + lShift];
dprintf("%d: decided: %d (%d >= %f) %d %f\n\n" ,threadIdx.x, next, sum, threshold, lShift, impact);
dprintf("%d: extracted stage: %f\n",threadIdx.x, stages[(st + threadIdx.x)]);
dprintf("%d: computed score: %f\n",threadIdx.x, impact);
#pragma unroll
// scan on shuffl functions
for (int i = 1; i < 32
; i *= 2)
for (int i = 1; i < Policy::WARP
; i *= 2)
{
const float n = __shfl_up(impact, i, 32
);
const float n = __shfl_up(impact, i, Policy::WARP
);
if (threadIdx.x >= i)
impact += n;
}
dprintf("%d: impact scaned %f\n" ,threadIdx.x, impact);
confidence += impact;
if(__any((confidence <= stages[(st + threadIdx.x)]))) st += 2048;
}
...
...
@@ -235,86 +226,25 @@ namespace icf {
if(!threadIdx.x && st == stEnd && ((confidence - FLT_EPSILON) >= 0))
{
int idx = atomicInc(ctr, ndetections);
// store detection
objects[idx] = Detection(__float2int_rn(x * octave.shrinkage),
__float2int_rn(y * octave.shrinkage), level.objSize.x, level.objSize.y, confidence);
}
}
#else
template<bool isUp>
__global__ void test_kernel_warp(const Level* levels, const Octave* octaves, const float* stages,
const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr,
const int downscales)
{
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
Level level = levels[blockIdx.z];
// if (blockIdx.z != 31) return;
if(x >= level.workRect.x || y >= level.workRect.y) return;
// int roi = tex2D(troi, x, y);
// printf("%d\n", roi);
// if (!roi) return;
Octave octave = octaves[level.octave];
int st = octave.index * octave.stages;
const int stEnd = st + 1000;//octave.stages;
float confidence = 0.f;
for(; st < stEnd; ++st)
{
dprintf("\n\nstage: %d\n", st);
const int nId = st * 3;
Node node = nodes[nId];
dprintf("Node: [%d %d %d %d] %d %d\n", node.rect.x, node.rect.y, node.rect.z, node.rect.w,
node.threshold >> 28, node.threshold & 0x0FFFFFFFU);
float threshold = rescale<isUp>(level, node);
int sum = get<isUp>(x, y + (node.threshold >> 28) * 121, node.rect);
dprintf("Node: [%d %d %d %d] %f\n", node.rect.x, node.rect.y, node.rect.z,
node.rect.w, threshold);
int next = 1 + (int)(sum >= threshold);
dprintf("go: %d (%d >= %f)\n\n" ,next, sum, threshold);
node = nodes[nId + next];
threshold = rescale<isUp>(level, node);
sum = get<isUp>(x, y + (node.threshold >> 28) * 121, node.rect);
const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
float impact = leaves[st * 4 + lShift];
confidence += impact;
if (confidence <= stages[st]) st = stEnd + 10;
dprintf("decided: %d (%d >= %f) %d %f\n\n" ,next, sum, threshold, lShift, impact);
dprintf("extracted stage: %f\n", stages[st]);
dprintf("computed score: %f\n\n", confidence);
objects[idx] = Detection(__float2int_rn(x * Policy::SHRINKAGE),
__float2int_rn(y * Policy::SHRINKAGE), level.objSize.x, level.objSize.y, confidence);
}
}
if(st == stEnd)
{
int idx = atomicInc(ctr, ndetections);
// store detection
objects[idx] = Detection(__float2int_rn(x * octave.shrinkage),
__float2int_rn(y * octave.shrinkage), level.objSize.x, level.objSize.y, confidence);
}
}
#endif
template<typename Policy, bool isUp>
__global__ void soft_cascade(const CascadeInvoker<Policy> invoker, Detection* objects, const uint n, uint* ctr, const int downs)
{
invoker.template detect<isUp>(objects, n, ctr, downs);
}
template<
>
void CascadeInvoker<Cascade
Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv,
template<typename Policy
>
void CascadeInvoker<
Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv,
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const int scale, const cudaStream_t& stream) const
{
{
int fw = 160;
int fh = 120;
dim3 block(32, 8);
dim3 grid(fw, fh / 8, (scale == -1) ? downscales : 1);
dim3 grid(fw, fh / Policy::STA_Y, (scale == -1) ? downscales : 1);
uint* ctr = (uint*)(counter.ptr(0));
Detection* det = (Detection*)objects.ptr();
...
...
@@ -326,20 +256,22 @@ namespace icf {
cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc<float2>();
cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols / 8, roi.rows, roi.step));
const CascadeInvoker<Policy> inv = *this;
if (scale == -1)
{
test_kernel_warp<false><<<grid, block, 0, stream>>>(levels, octaves, stages, nodes, leaves
, det, max_det, ctr, 0);
soft_cascade<Policy, false><<<grid, Policy::block(), 0, stream>>>(inv
, det, max_det, ctr, 0);
cudaSafeCall( cudaGetLastError());
grid = dim3(fw, fh / 8, 47
- downscales);
test_kernel_warp<true><<<grid, block, 0, stream>>>(levels, octaves, stages, nodes, leaves
, det, max_det, ctr, downscales);
grid = dim3(fw, fh / Policy::STA_Y, scales
- downscales);
soft_cascade<Policy, true><<<grid, Policy::block(), 0, stream>>>(inv
, det, max_det, ctr, downscales);
}
else
{
if (scale >= downscales)
test_kernel_warp<true><<<grid, block, 0, stream>>>(levels, octaves, stages, nodes, leaves
, det, max_det, ctr, scale);
soft_cascade<Policy, true><<<grid, Policy::block(), 0, stream>>>(inv
, det, max_det, ctr, scale);
else
test_kernel_warp<false><<<grid, block, 0, stream>>>(levels, octaves, stages, nodes, leaves
, det, max_det, ctr, scale);
soft_cascade<Policy, false><<<grid, Policy::block(), 0, stream>>>(inv
, det, max_det, ctr, scale);
}
if (!stream)
...
...
@@ -347,6 +279,10 @@ namespace icf {
cudaSafeCall( cudaGetLastError());
cudaSafeCall( cudaDeviceSynchronize());
}
}
}
template void CascadeInvoker<GK107PolicyX4>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv,
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const int scale, const cudaStream_t& stream) const;
}
}}}
\ No newline at end of file
modules/gpu/src/icf.hpp
View file @
aa92be34
...
...
@@ -72,9 +72,9 @@ struct __align__(16) Octave
struct
__align__
(
8
)
Level
//is actually 24 bytes
{
int
octave
;
int
step
;
float
relScale
;
float
shrScale
;
// used for marking detection
float
scaling
[
2
];
// calculated according to Dollal paper
// for 640x480 we can not get overflow
...
...
@@ -115,31 +115,41 @@ struct __align__(16) Detection
:
x
(
_x
),
y
(
_y
),
w
(
_w
),
h
(
_h
),
confidence
(
c
),
kind
(
0
)
{};
};
struct
CascadePolicy
struct
GK107PolicyX4
{
enum
{
STA_X
=
32
,
STA_Y
=
8
};
enum
{
WARP
=
32
,
STA_X
=
WARP
,
STA_Y
=
8
,
SHRINKAGE
=
4
};
static
const
dim3
block
()
{
return
dim3
(
GK107PolicyX4
::
STA_X
,
GK107PolicyX4
::
STA_Y
);
}
};
template
<
typename
Policy
>
struct
CascadeInvoker
{
CascadeInvoker
()
:
levels
(
0
),
octaves
(
0
),
stages
(
0
),
nodes
(
0
),
leaves
(
0
)
{}
CascadeInvoker
()
:
levels
(
0
),
stages
(
0
),
nodes
(
0
),
leaves
(
0
),
scales
(
0
)
{}
CascadeInvoker
(
const
PtrStepSzb
&
_levels
,
const
PtrStepSzb
&
_octaves
,
const
PtrStepSzf
&
_stages
,
const
PtrStepSzb
&
_nodes
,
const
PtrStepSzf
&
_leaves
)
:
levels
((
const
Level
*
)
_levels
.
ptr
()),
octaves
((
const
Octave
*
)
_octaves
.
ptr
()),
stages
((
const
float
*
)
_stages
.
ptr
()),
nodes
((
const
Node
*
)
_nodes
.
ptr
()),
leaves
((
const
float
*
)
_leaves
.
ptr
())
:
levels
((
const
Level
*
)
_levels
.
ptr
()),
stages
((
const
float
*
)
_stages
.
ptr
()),
nodes
((
const
Node
*
)
_nodes
.
ptr
()),
leaves
((
const
float
*
)
_leaves
.
ptr
()),
scales
(
_levels
.
cols
/
sizeof
(
Level
))
{}
const
Level
*
levels
;
const
Octave
*
octaves
;
const
float
*
stages
;
const
Node
*
nodes
;
const
float
*
leaves
;
int
scales
;
void
operator
()(
const
PtrStepSzb
&
roi
,
const
PtrStepSzi
&
hogluv
,
PtrStepSz
<
uchar4
>
objects
,
PtrStepSzi
counter
,
const
int
downscales
,
const
int
csale
=
-
1
,
const
cudaStream_t
&
stream
=
0
)
const
;
template
<
bool
isUp
>
__device
void
detect
(
Detection
*
objects
,
const
uint
ndetections
,
uint
*
ctr
,
const
int
downscales
)
const
;
};
}
...
...
modules/gpu/src/softcascade.cpp
View file @
aa92be34
...
...
@@ -63,7 +63,7 @@ void cv::gpu::SCascade::read(const FileNode& fn) { Algorithm::read(fn); }
#include <icf.hpp>
cv
::
gpu
::
device
::
icf
::
Level
::
Level
(
int
idx
,
const
Octave
&
oct
,
const
float
scale
,
const
int
w
,
const
int
h
)
:
octave
(
idx
),
relScale
(
scale
/
oct
.
scale
),
shrScale
(
relScale
/
(
float
)
oct
.
shrinkag
e
)
:
octave
(
idx
),
step
(
oct
.
stages
),
relScale
(
scale
/
oct
.
scal
e
)
{
workRect
.
x
=
round
(
w
/
(
float
)
oct
.
shrinkage
);
workRect
.
y
=
round
(
h
/
(
float
)
oct
.
shrinkage
);
...
...
@@ -100,7 +100,7 @@ namespace imgproc {
struct
cv
::
gpu
::
SCascade
::
Fields
{
static
Fields
*
parseCascade
(
const
FileNode
&
root
,
const
float
mins
,
const
float
maxs
)
static
Fields
*
parseCascade
(
const
FileNode
&
root
,
const
float
mins
,
const
float
maxs
,
const
int
totals
)
{
static
const
char
*
const
SC_STAGE_TYPE
=
"stageType"
;
static
const
char
*
const
SC_BOOST
=
"BOOST"
;
...
...
@@ -120,10 +120,7 @@ struct cv::gpu::SCascade::Fields
static
const
char
*
const
SC_ORIG_H
=
"height"
;
int
origWidth
=
(
int
)
root
[
SC_ORIG_W
];
CV_Assert
(
origWidth
==
ORIG_OBJECT_WIDTH
);
int
origHeight
=
(
int
)
root
[
SC_ORIG_H
];
CV_Assert
(
origHeight
==
ORIG_OBJECT_HEIGHT
);
static
const
char
*
const
SC_OCTAVES
=
"octaves"
;
static
const
char
*
const
SC_STAGES
=
"stages"
;
...
...
@@ -142,7 +139,6 @@ struct cv::gpu::SCascade::Fields
static
const
char
*
const
SC_F_CHANNEL
=
"channel"
;
static
const
char
*
const
SC_F_RECT
=
"rect"
;
FileNode
fn
=
root
[
SC_OCTAVES
];
if
(
fn
.
empty
())
return
false
;
...
...
@@ -167,8 +163,8 @@ struct cv::gpu::SCascade::Fields
ushort
nstages
=
saturate_cast
<
ushort
>
((
int
)
fns
[
SC_OCT_STAGES
]);
ushort2
size
;
size
.
x
=
cvRound
(
ORIG_OBJECT_WIDTH
*
scale
);
size
.
y
=
cvRound
(
ORIG_OBJECT_HEIGHT
*
scale
);
size
.
x
=
cvRound
(
origWidth
*
scale
);
size
.
y
=
cvRound
(
origHeight
*
scale
);
shrinkage
=
saturate_cast
<
ushort
>
((
int
)
fns
[
SC_OCT_SHRINKAGE
]);
Octave
octave
(
octIndex
,
nstages
,
shrinkage
,
size
,
scale
);
...
...
@@ -245,11 +241,11 @@ struct cv::gpu::SCascade::Fields
CV_Assert
(
!
hleaves
.
empty
());
std
::
vector
<
Level
>
vlevels
;
float
logFactor
=
(
::
log
(
maxs
)
-
::
log
(
mins
))
/
(
TOTAL_SCALES
-
1
);
float
logFactor
=
(
::
log
(
maxs
)
-
::
log
(
mins
))
/
(
totals
-
1
);
float
scale
=
mins
;
int
downscales
=
0
;
for
(
int
sc
=
0
;
sc
<
TOTAL_SCALES
;
++
sc
)
for
(
int
sc
=
0
;
sc
<
totals
;
++
sc
)
{
int
width
=
::
std
::
max
(
0.0
f
,
FRAME_WIDTH
-
(
origWidth
*
scale
));
int
height
=
::
std
::
max
(
0.0
f
,
FRAME_HEIGHT
-
(
origHeight
*
scale
));
...
...
@@ -302,7 +298,7 @@ struct cv::gpu::SCascade::Fields
leaves
.
upload
(
hleaves
);
levels
.
upload
(
hlevels
);
invoker
=
device
::
icf
::
CascadeInvoker
<
device
::
icf
::
CascadePolicy
>
(
levels
,
octaves
,
stages
,
nodes
,
leaves
);
invoker
=
device
::
icf
::
CascadeInvoker
<
device
::
icf
::
GK107PolicyX4
>
(
levels
,
octaves
,
stages
,
nodes
,
leaves
);
}
...
...
@@ -456,16 +452,13 @@ public:
GpuMat
sobelBuf
;
device
::
icf
::
CascadeInvoker
<
device
::
icf
::
CascadePolicy
>
invoker
;
device
::
icf
::
CascadeInvoker
<
device
::
icf
::
GK107PolicyX4
>
invoker
;
enum
{
BOOST
=
0
};
enum
{
FRAME_WIDTH
=
640
,
FRAME_HEIGHT
=
480
,
TOTAL_SCALES
=
55
,
ORIG_OBJECT_WIDTH
=
64
,
ORIG_OBJECT_HEIGHT
=
128
,
HOG_BINS
=
6
,
LUV_BINS
=
3
,
HOG_LUV_BINS
=
10
...
...
@@ -480,21 +473,19 @@ cv::gpu::SCascade::~SCascade() { delete fields; }
bool
cv
::
gpu
::
SCascade
::
load
(
const
FileNode
&
fn
)
{
if
(
fields
)
delete
fields
;
fields
=
Fields
::
parseCascade
(
fn
,
minScale
,
maxScale
);
fields
=
Fields
::
parseCascade
(
fn
,
minScale
,
maxScale
,
scales
);
return
fields
!=
0
;
}
void
cv
::
gpu
::
SCascade
::
detect
(
InputArray
image
,
InputArray
_rois
,
OutputArray
_objects
,
Stream
&
s
)
const
{
CV_Assert
(
fields
);
const
GpuMat
colored
=
image
.
getGpuMat
();
// only color images are supperted
CV_Assert
(
colored
.
type
()
==
CV_8UC3
||
colored
.
type
()
==
CV_32SC1
);
GpuMat
rois
=
_rois
.
getGpuMat
(),
objects
=
_objects
.
getGpuMat
();
// we guess user knows about shrincage
// CV_Assert((rois.size().width == getRoiSize().height) && (rois.type() == CV_8UC1));
Fields
&
flds
=
*
fields
;
if
(
colored
.
type
()
==
CV_8UC3
)
...
...
@@ -518,15 +509,13 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
void
cv
::
gpu
::
SCascade
::
detect
(
InputArray
image
,
InputArray
_rois
,
OutputArray
_objects
,
const
int
level
,
Stream
&
s
)
const
{
CV_Assert
(
fields
);
const
GpuMat
colored
=
image
.
getGpuMat
();
// only color images are supperted
CV_Assert
(
colored
.
type
()
==
CV_8UC3
||
colored
.
type
()
==
CV_32SC1
);
// we guess user knows about shrincage
// CV_Assert((rois.size().width == getRoiSize().height) && (rois.type() == CV_8UC1));
Fields
&
flds
=
*
fields
;
if
(
colored
.
type
()
==
CV_8UC3
)
{
// only this window size allowed
...
...
@@ -549,6 +538,8 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
void
cv
::
gpu
::
SCascade
::
genRoi
(
InputArray
_roi
,
OutputArray
_mask
,
Stream
&
stream
)
const
{
CV_Assert
(
fields
);
const
GpuMat
roi
=
_roi
.
getGpuMat
();
_mask
.
create
(
roi
.
cols
/
4
,
roi
.
rows
/
4
,
roi
.
type
()
);
GpuMat
mask
=
_mask
.
getGpuMat
();
...
...
modules/gpu/test/test_softcascade.cpp
View file @
aa92be34
...
...
@@ -172,7 +172,7 @@ GPU_TEST_P(SCascadeTestRoi, detect,
sub
.
setTo
(
1
);
cv
::
rectangle
(
result
,
r
,
cv
::
Scalar
(
0
,
0
,
255
,
255
),
1
);
}
objectBoxes
.
setTo
(
0
);
cascade
.
genRoi
(
rois
,
trois
);
cascade
.
detect
(
colored
,
trois
,
objectBoxes
);
...
...
@@ -222,7 +222,7 @@ GPU_TEST_P(SCascadeTestLevel, detect,
cv
::
gpu
::
GpuMat
trois
;
cascade
.
genRoi
(
rois
,
trois
);
objectBoxes
.
setTo
(
0
);
int
level
=
GET_PARAM
(
3
);
cascade
.
detect
(
colored
,
trois
,
objectBoxes
,
level
);
...
...
@@ -281,7 +281,7 @@ GPU_TEST_P(SCascadeTestAll, detect,
cv
::
gpu
::
GpuMat
trois
;
cascade
.
genRoi
(
rois
,
trois
);
objectBoxes
.
setTo
(
0
);
cascade
.
detect
(
colored
,
trois
,
objectBoxes
);
typedef
cv
::
gpu
::
SCascade
::
Detection
Detection
;
...
...
@@ -321,7 +321,7 @@ GPU_TEST_P(SCascadeTestAll, detectOnIntegral,
cv
::
gpu
::
GpuMat
trois
;
cascade
.
genRoi
(
rois
,
trois
);
objectBoxes
.
setTo
(
0
);
cascade
.
detect
(
hogluv
,
trois
,
objectBoxes
);
typedef
cv
::
gpu
::
SCascade
::
Detection
Detection
;
...
...
@@ -357,7 +357,7 @@ GPU_TEST_P(SCascadeTestAll, detectStream,
cv
::
gpu
::
GpuMat
trois
;
cascade
.
genRoi
(
rois
,
trois
,
s
);
objectBoxes
.
setTo
(
0
);
cascade
.
detect
(
colored
,
trois
,
objectBoxes
,
s
);
cudaDeviceSynchronize
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment