Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
dca27b46
Commit
dca27b46
authored
Sep 25, 2012
by
marina.kolpakova
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix cast bug; add logging
parent
e606a0d6
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
103 additions
and
23 deletions
+103
-23
isf-sc.cu
modules/gpu/src/cuda/isf-sc.cu
+72
-16
icf.hpp
modules/gpu/src/icf.hpp
+2
-2
softcascade.cpp
modules/gpu/src/softcascade.cpp
+29
-5
No files found.
modules/gpu/src/cuda/isf-sc.cu
View file @
dca27b46
...
@@ -45,6 +45,8 @@
...
@@ -45,6 +45,8 @@
#include <stdio.h>
#include <stdio.h>
#include <float.h>
#include <float.h>
//#define LOG_CUDA_CASCADE
namespace cv { namespace gpu { namespace device {
namespace cv { namespace gpu { namespace device {
namespace icf {
namespace icf {
...
@@ -85,7 +87,7 @@ namespace icf {
...
@@ -85,7 +87,7 @@ namespace icf {
}
}
}
}
__global__ void detect(const cv::gpu::icf::Cascade cascade, const
uchar
* __restrict__ hogluv, const int pitch,
__global__ void detect(const cv::gpu::icf::Cascade cascade, const
int
* __restrict__ hogluv, const int pitch,
PtrStepSz<uchar4> objects)
PtrStepSz<uchar4> objects)
{
{
cascade.detectAt(hogluv, pitch, objects);
cascade.detectAt(hogluv, pitch, objects);
...
@@ -96,6 +98,11 @@ __global__ void detect(const cv::gpu::icf::Cascade cascade, const uchar* __restr
...
@@ -96,6 +98,11 @@ __global__ void detect(const cv::gpu::icf::Cascade cascade, const uchar* __restr
float __device icf::Cascade::rescale(const icf::Level& level, uchar4& scaledRect,
float __device icf::Cascade::rescale(const icf::Level& level, uchar4& scaledRect,
const int channel, const float threshold) const
const int channel, const float threshold) const
{
{
#if defined LOG_CUDA_CASCADE
printf("feature %d box %d %d %d %d\n", channel, scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w);
printf("rescale: %f [%f %f]\n",level.relScale, level.scaling[0], level.scaling[1]);
#endif
float relScale = level.relScale;
float relScale = level.relScale;
float farea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
float farea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
...
@@ -107,6 +114,7 @@ float __device icf::Cascade::rescale(const icf::Level& level, uchar4& scaledRect
...
@@ -107,6 +114,7 @@ float __device icf::Cascade::rescale(const icf::Level& level, uchar4& scaledRect
float sarea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
float sarea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
float approx = 1.f;
float approx = 1.f;
if (fabs(farea - 0.f) > FLT_EPSILON && fabs(farea - 0.f) > FLT_EPSILON)
if (fabs(farea - 0.f) > FLT_EPSILON && fabs(farea - 0.f) > FLT_EPSILON)
{
{
...
@@ -114,40 +122,72 @@ float __device icf::Cascade::rescale(const icf::Level& level, uchar4& scaledRect
...
@@ -114,40 +122,72 @@ float __device icf::Cascade::rescale(const icf::Level& level, uchar4& scaledRect
approx = expected_new_area / sarea;
approx = expected_new_area / sarea;
}
}
#if defined LOG_CUDA_CASCADE
printf("new rect: %d box %d %d %d %d rel areas %f %f\n", channel,
scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea);
#endif
// compensation areas rounding
// compensation areas rounding
float rootThreshold = threshold / approx;
float rootThreshold = threshold / approx;
// printf(" approx %f\n", rootThreshold);
rootThreshold *= level.scaling[(int)(channel > 6)];
rootThreshold *= level.scaling[(int)(channel > 6)];
#if defined LOG_CUDA_CASCADE
printf("approximation %f %f -> %f %f\n", approx, threshold, rootThreshold, level.scaling[(int)(channel > 6)]);
#endif
return rootThreshold;
return rootThreshold;
}
}
typedef unsigned char uchar;
typedef unsigned char uchar;
float __device get(const
uchar
* __restrict__ hogluv, const int pitch,
float __device get(const
int
* __restrict__ hogluv, const int pitch,
const int x, const int y, int channel, uchar4 area)
const int x, const int y, int channel, uchar4 area)
{
{
const uchar* curr = hogluv + ((channel * 121) + y) * pitch;
#if defined LOG_CUDA_CASCADE
printf("feature box %d %d %d %d ", area.x, area.y, area.z, area.w);
printf("get for channel %d\n", channel);
printf("extract feature for: [%d %d] [%d %d] [%d %d] [%d %d]\n",
x + area.x, y + area.y, x + area.z, y + area.y, x + area.z,y + area.w,
x + area.x, y + area.w);
printf("at point %d %d with offset %d\n", x, y, 0);
#endif
const int* curr = hogluv + ((channel * 121) + y) * pitch;
int a = curr[area.y * pitch + x + area.x];
int a = curr[area.y * pitch + x + area.x];
int b = curr[area.y * pitch + x + area.z];
int b = curr[area.y * pitch + x + area.z];
int c = curr[area.w * pitch + x + area.z];
int c = curr[area.w * pitch + x + area.z];
int d = curr[area.w * pitch + x + area.x];
int d = curr[area.w * pitch + x + area.x];
#if defined LOG_CUDA_CASCADE
printf(" retruved integral values: %d %d %d %d\n", a, b, c, d);
#endif
return (a - b + c - d);
return (a - b + c - d);
}
}
void __device icf::Cascade::detectAt(const
uchar
* __restrict__ hogluv, const int pitch,
void __device icf::Cascade::detectAt(const
int
* __restrict__ hogluv, const int pitch,
PtrStepSz<uchar4>& objects) const
PtrStepSz<uchar4>& objects) const
{
{
const icf::Level* lls = (const icf::Level*)levels.ptr();
const icf::Level* lls = (const icf::Level*)levels.ptr();
Level level = lls[0];
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
// if (x > 0 || y > 0) return;
Level level = lls[0];
if (x >= level.workRect.x || y >= level.workRect.y) return;
if (x >= level.workRect.x || y >= level.workRect.y) return;
#if defined LOG_CUDA_CASCADE
printf("level: %d (%f %f) [%f %f] (%d %d) (%d %d)\n", level.octave, level.relScale, level.shrScale,
level.scaling[0], level.scaling[1], level.workRect.x, level.workRect.y, level.objSize.x, level.objSize.y);
#endif
const Octave octave = ((const Octave*)octaves.ptr())[level.octave];
const Octave octave = ((const Octave*)octaves.ptr())[level.octave];
// printf("Octave: %d %d %d (%d %d) %f\n", octave.index, octave.stages,
// octave.shrinkage, octave.size.x, octave.size.y, octave.scale);
const int stBegin = octave.index * octave.stages, stEnd = stBegin + octave.stages;
const int stBegin = octave.index * octave.stages, stEnd = stBegin + octave.stages;
float detectionScore = 0.f;
float detectionScore = 0.f;
...
@@ -156,11 +196,17 @@ void __device icf::Cascade::detectAt(const uchar* __restrict__ hogluv, const int
...
@@ -156,11 +196,17 @@ void __device icf::Cascade::detectAt(const uchar* __restrict__ hogluv, const int
for(; st < stEnd; ++st)
for(; st < stEnd; ++st)
{
{
const float stage = stages(0, st);
const float stage = stages(0, st);
#if defined LOG_CUDA_CASCADE
printf("Stage: %f\n", stage);
#endif
{
{
const int nId = st * 3;
const int nId = st * 3;
// work with root node
// work with root node
const Node node = ((const Node*)nodes.ptr())[nId];
const Node node = ((const Node*)nodes.ptr())[nId];
#if defined LOG_CUDA_CASCADE
printf("Node: %d %f\n", node.feature, node.threshold);
#endif
const Feature feature = ((const Feature*)features.ptr())[node.feature];
const Feature feature = ((const Feature*)features.ptr())[node.feature];
uchar4 scaledRect = feature.rect;
uchar4 scaledRect = feature.rect;
...
@@ -168,31 +214,46 @@ void __device icf::Cascade::detectAt(const uchar* __restrict__ hogluv, const int
...
@@ -168,31 +214,46 @@ void __device icf::Cascade::detectAt(const uchar* __restrict__ hogluv, const int
float sum = get(hogluv,pitch, x, y, feature.channel, scaledRect);
float sum = get(hogluv,pitch, x, y, feature.channel, scaledRect);
#if defined LOG_CUDA_CASCADE
printf("root feature %d %f\n",feature.channel, sum);
#endif
int next = 1 + (int)(sum >= threshold);
int next = 1 + (int)(sum >= threshold);
#if defined LOG_CUDA_CASCADE
printf("go: %d (%f >= %f)\n\n" ,next, sum, threshold);
#endif
// leaves
// leaves
const Node leaf = ((const Node*)nodes.ptr())[nId + next];
const Node leaf = ((const Node*)nodes.ptr())[nId + next];
const Feature fLeaf = ((const Feature*)features.ptr())[leaf.feature];
const Feature fLeaf = ((const Feature*)features.ptr())[leaf.feature];
scaledRect = fLeaf.rect;
scaledRect = fLeaf.rect;
threshold = rescale(level, scaledRect, f
eature.channel, node
.threshold);
threshold = rescale(level, scaledRect, f
Leaf.channel, leaf
.threshold);
sum = get(hogluv, pitch, x, y, fLeaf.channel, scaledRect);
sum = get(hogluv, pitch, x, y, fLeaf.channel, scaledRect);
const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
float impact = leaves(0, (st * 4) + lShift);
float impact = leaves(0, (st * 4) + lShift);
detectionScore += impact;
detectionScore += impact;
#if defined LOG_CUDA_CASCADE
printf("decided: %d (%f >= %f) %d %f\n\n" ,next, sum, threshold, lShift, impact);
printf("extracted stage:\n");
printf("ct %f\n", stage);
printf("computed score %f\n\n", detectionScore);
printf("\n\n");
#endif
}
}
if (detectionScore <= stage) break;
if (detectionScore <= stage) break;
}
}
// if (!threadIdx.x && !threadIdx.y)// printf("%f %d\n", detectionScore, st);
#if defined LOG_CUDA_CASCADE
// printf("x %d y %d: %d\n", x, y, st);
// printf("x %d y %d: %d\n", x, y, st - stBegin);
#endif
if (st == stEnd)
if (st == stEnd)
{
{
// printf(" got %d\n", st);
uchar4 a;
uchar4 a;
a.x = level.workRect.x;
a.x = level.workRect.x;
a.y = level.workRect.y;
a.y = level.workRect.y;
...
@@ -200,18 +261,14 @@ void __device icf::Cascade::detectAt(const uchar* __restrict__ hogluv, const int
...
@@ -200,18 +261,14 @@ void __device icf::Cascade::detectAt(const uchar* __restrict__ hogluv, const int
}
}
}
}
void icf::Cascade::detect(const cv::gpu::PtrStepSzb& hogluv, PtrStepSz<uchar4> objects,
void icf::Cascade::detect(const cv::gpu::PtrStepSzi& hogluv, PtrStepSz<uchar4> objects, cudaStream_t stream) const
cudaStream_t stream) const
{
{
// detection kernel
dim3 block(32, 8, 1);
dim3 block(32, 8, 1);
// dim3 grid(32 * ChannelStorage::FRAME_WIDTH / 32, ChannelStorage::FRAME_HEIGHT / 8, 1);
dim3 grid(ChannelStorage::FRAME_WIDTH / 32, ChannelStorage::FRAME_HEIGHT / 8, 1);
dim3 grid(ChannelStorage::FRAME_WIDTH / 32, ChannelStorage::FRAME_HEIGHT / 8, 1);
device::detect<<<grid, block, 0, stream>>>(*this, hogluv, hogluv.step / sizeof(
ushor
t), objects);
device::detect<<<grid, block, 0, stream>>>(*this, hogluv, hogluv.step / sizeof(
in
t), objects);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
if (!stream)
if (!stream)
cudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
}
}}
}}
\ No newline at end of file
modules/gpu/src/icf.hpp
View file @
dca27b46
...
@@ -102,8 +102,8 @@ struct Cascade
...
@@ -102,8 +102,8 @@ struct Cascade
const
cv
::
gpu
::
PtrStepSzf
&
lvs
,
const
cv
::
gpu
::
PtrStepSzb
&
fts
,
const
cv
::
gpu
::
PtrStepSzb
&
lls
)
const
cv
::
gpu
::
PtrStepSzf
&
lvs
,
const
cv
::
gpu
::
PtrStepSzb
&
fts
,
const
cv
::
gpu
::
PtrStepSzb
&
lls
)
:
octaves
(
octs
),
stages
(
sts
),
nodes
(
nds
),
leaves
(
lvs
),
features
(
fts
),
levels
(
lls
)
{}
:
octaves
(
octs
),
stages
(
sts
),
nodes
(
nds
),
leaves
(
lvs
),
features
(
fts
),
levels
(
lls
)
{}
void
detect
(
const
cv
::
gpu
::
PtrStepSz
b
&
hogluv
,
cv
::
gpu
::
PtrStepSz
<
uchar4
>
objects
,
cudaStream_t
stream
)
const
;
void
detect
(
const
cv
::
gpu
::
PtrStepSz
i
&
hogluv
,
cv
::
gpu
::
PtrStepSz
<
uchar4
>
objects
,
cudaStream_t
stream
)
const
;
void
__device
detectAt
(
const
uchar
*
__restrict__
hogluv
,
const
int
pitch
,
PtrStepSz
<
uchar4
>&
objects
)
const
;
void
__device
detectAt
(
const
int
*
__restrict__
hogluv
,
const
int
pitch
,
PtrStepSz
<
uchar4
>&
objects
)
const
;
float
__device
rescale
(
const
icf
::
Level
&
level
,
uchar4
&
scaledRect
,
float
__device
rescale
(
const
icf
::
Level
&
level
,
uchar4
&
scaledRect
,
const
int
channel
,
const
float
threshold
)
const
;
const
int
channel
,
const
float
threshold
)
const
;
...
...
modules/gpu/src/softcascade.cpp
View file @
dca27b46
...
@@ -381,6 +381,9 @@ inline void cv::gpu::SoftCascade::Filds::calcLevels(const std::vector<icf::Octav
...
@@ -381,6 +381,9 @@ inline void cv::gpu::SoftCascade::Filds::calcLevels(const std::vector<icf::Octav
if
(
::
fabs
(
scale
-
maxScale
)
<
FLT_EPSILON
)
break
;
if
(
::
fabs
(
scale
-
maxScale
)
<
FLT_EPSILON
)
break
;
scale
=
::
std
::
min
(
maxScale
,
::
expf
(
::
log
(
scale
)
+
logFactor
));
scale
=
::
std
::
min
(
maxScale
,
::
expf
(
::
log
(
scale
)
+
logFactor
));
// printf("level: %d (%f %f) [%f %f] (%d %d) (%d %d)\n", level.octave, level.relScale, level.shrScale,
// level.scaling[0], level.scaling[1], level.workRect.x, level.workRect.y, level.objSize.x, level.objSize.y);
// std::cout << "level " << sc
// std::cout << "level " << sc
// << " octeve "
// << " octeve "
// << vlevels[sc].octave
// << vlevels[sc].octave
...
@@ -421,6 +424,15 @@ bool cv::gpu::SoftCascade::load( const string& filename, const float minScale, c
...
@@ -421,6 +424,15 @@ bool cv::gpu::SoftCascade::load( const string& filename, const float minScale, c
return
true
;
return
true
;
}
}
namespace
{
char
*
itoa
(
long
i
,
char
*
s
,
int
/*dummy_radix*/
)
{
sprintf
(
s
,
"%ld"
,
i
);
return
s
;
}
}
#define USE_REFERENCE_VALUES
void
cv
::
gpu
::
SoftCascade
::
detectMultiScale
(
const
GpuMat
&
colored
,
const
GpuMat
&
/*rois*/
,
void
cv
::
gpu
::
SoftCascade
::
detectMultiScale
(
const
GpuMat
&
colored
,
const
GpuMat
&
/*rois*/
,
GpuMat
&
objects
,
const
int
/*rejectfactor*/
,
Stream
s
)
GpuMat
&
objects
,
const
int
/*rejectfactor*/
,
Stream
s
)
{
{
...
@@ -431,14 +443,26 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat&
...
@@ -431,14 +443,26 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat&
CV_Assert
(
colored
.
cols
==
640
&&
colored
.
rows
==
480
);
CV_Assert
(
colored
.
cols
==
640
&&
colored
.
rows
==
480
);
Filds
&
flds
=
*
filds
;
Filds
&
flds
=
*
filds
;
#if defined USE_REFERENCE_VALUES
cudaMemset
(
flds
.
hogluv
.
data
,
0
,
flds
.
hogluv
.
step
*
flds
.
hogluv
.
rows
);
cv
::
FileStorage
imgs
(
"/home/kellan/testInts.xml"
,
cv
::
FileStorage
::
READ
);
char
buff
[
33
];
for
(
int
i
=
0
;
i
<
Filds
::
HOG_LUV_BINS
;
++
i
)
{
cv
::
Mat
channel
;
imgs
[
std
::
string
(
"channel"
)
+
itoa
(
i
,
buff
,
10
)]
>>
channel
;
GpuMat
gchannel
(
flds
.
hogluv
,
cv
::
Rect
(
0
,
121
*
i
,
161
,
121
));
gchannel
.
upload
(
channel
);
}
#else
GpuMat
&
dmem
=
flds
.
dmem
;
GpuMat
&
dmem
=
flds
.
dmem
;
cudaMemset
(
dmem
.
data
,
0
,
dmem
.
step
*
dmem
.
rows
);
cudaMemset
(
dmem
.
data
,
0
,
dmem
.
step
*
dmem
.
rows
);
GpuMat
&
shrunk
=
flds
.
shrunk
;
GpuMat
&
shrunk
=
flds
.
shrunk
;
int
w
=
shrunk
.
cols
;
int
w
=
shrunk
.
cols
;
int
h
=
colored
.
rows
/
flds
.
storage
.
shrinkage
;
int
h
=
colored
.
rows
/
flds
.
storage
.
shrinkage
;
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
s
);
std
::
vector
<
GpuMat
>
splited
;
std
::
vector
<
GpuMat
>
splited
;
for
(
int
i
=
0
;
i
<
3
;
++
i
)
for
(
int
i
=
0
;
i
<
3
;
++
i
)
{
{
...
@@ -468,9 +492,6 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat&
...
@@ -468,9 +492,6 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat&
GpuMat
plane
(
dmem
,
cv
::
Rect
(
0
,
0
,
colored
.
cols
,
colored
.
rows
*
Filds
::
HOG_LUV_BINS
));
GpuMat
plane
(
dmem
,
cv
::
Rect
(
0
,
0
,
colored
.
cols
,
colored
.
rows
*
Filds
::
HOG_LUV_BINS
));
cv
::
gpu
::
resize
(
plane
,
flds
.
shrunk
,
cv
::
Size
(),
0.25
,
0.25
,
CV_INTER_AREA
);
cv
::
gpu
::
resize
(
plane
,
flds
.
shrunk
,
cv
::
Size
(),
0.25
,
0.25
,
CV_INTER_AREA
);
// cv::Mat cpu(plane);
// cv::imshow("channels", cpu);
// cv::waitKey(0);
// fer debug purpose
// fer debug purpose
// cudaMemset(flds.hogluv.data, 0, flds.hogluv.step * flds.hogluv.rows);
// cudaMemset(flds.hogluv.data, 0, flds.hogluv.step * flds.hogluv.rows);
...
@@ -482,6 +503,9 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat&
...
@@ -482,6 +503,9 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat&
cv
::
gpu
::
integralBuffered
(
channel
,
sum
,
flds
.
integralBuffer
);
cv
::
gpu
::
integralBuffered
(
channel
,
sum
,
flds
.
integralBuffer
);
}
}
#endif
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
s
);
// detection
// detection
flds
.
detect
(
objects
,
stream
);
flds
.
detect
(
objects
,
stream
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment