Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
df392cc8
Commit
df392cc8
authored
Nov 05, 2012
by
marina.kolpakova
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
use fast integral for soft cascade
parent
ac5cd482
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
102 additions
and
17 deletions
+102
-17
integral_image.cu
modules/gpu/src/cuda/integral_image.cu
+83
-0
isf-sc.cu
modules/gpu/src/cuda/isf-sc.cu
+2
-2
softcascade.cpp
modules/gpu/src/softcascade.cpp
+16
-15
test_softcascade.cpp
modules/gpu/test/test_softcascade.cpp
+1
-0
No files found.
modules/gpu/src/cuda/integral_image.cu
View file @
df392cc8
...
...
@@ -383,6 +383,89 @@ namespace cv { namespace gpu { namespace device
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void shfl_integral_vertical(PtrStepSz<unsigned int> buffer, PtrStepSz<unsigned int> integral)
{
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
__shared__ unsigned int sums[32][9];
const int tidx = blockIdx.x * blockDim.x + threadIdx.x;
const int lane_id = tidx % 8;
if (tidx >= integral.cols)
return;
sums[threadIdx.x][threadIdx.y] = 0;
__syncthreads();
unsigned int stepSum = 0;
for (int y = threadIdx.y; y < integral.rows; y += blockDim.y)
{
unsigned int* p = buffer.ptr(y) + tidx;
unsigned int* dst = integral.ptr(y + 1) + tidx + 1;
unsigned int sum = *p;
sums[threadIdx.x][threadIdx.y] = sum;
__syncthreads();
// place into SMEM
// shfl scan reduce the SMEM, reformating so the column
// sums are computed in a warp
// then read out properly
const int j = threadIdx.x % 8;
const int k = threadIdx.x / 8 + threadIdx.y * 4;
int partial_sum = sums[k][j];
for (int i = 1; i <= 8; i *= 2)
{
int n = __shfl_up(partial_sum, i, 32);
if (lane_id >= i)
partial_sum += n;
}
sums[k][j] = partial_sum;
__syncthreads();
if (threadIdx.y > 0)
sum += sums[threadIdx.x][threadIdx.y - 1];
sum += stepSum;
stepSum += sums[threadIdx.x][blockDim.y - 1];
__syncthreads();
*dst = sum;
}
#endif
}
// used for frame preprocessing before Soft Cascade evaluation: no synchronization needed
// ToDo: partial dy
void shfl_integral_gpu_buffered(PtrStepSzb img, PtrStepSz<uint4> buffer, PtrStepSz<unsigned int> integral,
int blockStep, cudaStream_t stream)
{
{
const int block = blockStep;
const int grid = img.rows;
cudaSafeCall( cudaFuncSetCacheConfig(shfl_integral_horizontal, cudaFuncCachePreferL1) );
shfl_integral_horizontal<<<grid, block, 0, stream>>>((PtrStepSz<uint4>) img, buffer);
cudaSafeCall( cudaGetLastError() );
}
{
const dim3 block(32, 8);
const dim3 grid(divUp(integral.cols, block.x), 1);
shfl_integral_vertical<<<grid, block, 0, stream>>>((PtrStepSz<uint>)buffer, integral);
cudaSafeCall( cudaGetLastError() );
}
}
}
}}}
...
...
modules/gpu/src/cuda/isf-sc.cu
View file @
df392cc8
...
...
@@ -198,14 +198,14 @@ namespace icf {
Node node = nodes[nId];
float threshold = rescale<isUp>(level, node);
int sum = get<isUp>(x, y + (node.threshold >> 28) * 12
1
, node.rect);
int sum = get<isUp>(x, y + (node.threshold >> 28) * 12
0
, node.rect);
int next = 1 + (int)(sum >= threshold);
dprintf("%d: go: %d (%d >= %f)\n\n" ,threadIdx.x, next, sum, threshold);
node = nodes[nId + next];
threshold = rescale<isUp>(level, node);
sum = get<isUp>(x, y + (node.threshold >> 28) * 12
1
, node.rect);
sum = get<isUp>(x, y + (node.threshold >> 28) * 12
0
, node.rect);
const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
float impact = leaves[(st + threadIdx.x) * 4 + lShift];
...
...
modules/gpu/src/softcascade.cpp
View file @
df392cc8
...
...
@@ -76,14 +76,20 @@ cv::gpu::device::icf::Level::Level(int idx, const Octave& oct, const float scale
}
namespace
cv
{
namespace
gpu
{
namespace
device
{
namespace
icf
{
void
fillBins
(
cv
::
gpu
::
PtrStepSzb
hogluv
,
const
cv
::
gpu
::
PtrStepSzf
&
nangle
,
const
int
fw
,
const
int
fh
,
const
int
bins
);
}
namespace
imgproc
{
void
shfl_integral_gpu
(
PtrStepSzb
img
,
PtrStepSz
<
unsigned
int
>
integral
,
cudaStream_t
stream
);
namespace
imgproc
{
void
shfl_integral_gpu_buffered
(
PtrStepSzb
,
PtrStepSz
<
uint4
>
,
PtrStepSz
<
unsigned
int
>
,
int
,
cudaStream_t
);
template
<
typename
T
>
void
resize_gpu
(
PtrStepSzb
src
,
PtrStepSzb
srcWhole
,
int
xoff
,
int
yoff
,
float
fx
,
float
fy
,
PtrStepSzb
dst
,
int
interpolation
,
cudaStream_t
stream
);
}
}}}
struct
cv
::
gpu
::
SoftCascade
::
Filds
...
...
@@ -319,9 +325,13 @@ struct cv::gpu::SoftCascade::Filds
plane
.
create
(
FRAME_HEIGHT
*
(
HOG_LUV_BINS
+
1
),
FRAME_WIDTH
,
CV_8UC1
);
fplane
.
create
(
FRAME_HEIGHT
*
6
,
FRAME_WIDTH
,
CV_32FC1
);
luv
.
create
(
FRAME_HEIGHT
,
FRAME_WIDTH
,
CV_8UC3
);
shrunk
.
create
(
FRAME_HEIGHT
/
shr
*
HOG_LUV_BINS
,
FRAME_WIDTH
/
shr
,
CV_8UC1
);
integralBuffer
.
create
(
1
,
(
shrunk
.
rows
+
1
)
*
HOG_LUV_BINS
*
(
shrunk
.
cols
+
1
),
CV_32SC1
);
hogluv
.
create
((
FRAME_HEIGHT
/
shr
+
1
)
*
HOG_LUV_BINS
,
FRAME_WIDTH
/
shr
+
64
,
CV_32SC1
);
integralBuffer
.
create
(
shrunk
.
rows
,
shrunk
.
cols
,
CV_32SC1
);
hogluv
.
create
((
FRAME_HEIGHT
/
shr
)
*
HOG_LUV_BINS
+
1
,
FRAME_WIDTH
/
shr
+
1
,
CV_32SC1
);
hogluv
.
setTo
(
cv
::
Scalar
::
all
(
0
));
detCounter
.
create
(
1
,
1
,
CV_32SC1
);
octaves
.
upload
(
hoctaves
);
...
...
@@ -432,16 +442,7 @@ private:
GpuMat
channels
(
plane
,
cv
::
Rect
(
0
,
0
,
fw
,
fh
*
Filds
::
HOG_LUV_BINS
));
cv
::
gpu
::
resize
(
channels
,
shrunk
,
cv
::
Size
(),
0.25
,
0.25
,
CV_INTER_AREA
);
fw
/=
shrinkage
;
fh
/=
shrinkage
;
for
(
int
i
=
0
;
i
<
Filds
::
HOG_LUV_BINS
;
++
i
)
{
GpuMat
channel
(
shrunk
,
cv
::
Rect
(
0
,
fh
*
i
,
fw
,
fh
));
GpuMat
sum
(
hogluv
,
cv
::
Rect
(
0
,
(
fh
+
1
)
*
i
,
fw
+
1
,
fh
+
1
));
cv
::
gpu
::
integralBuffered
(
channel
,
sum
,
integralBuffer
);
}
device
::
imgproc
::
shfl_integral_gpu_buffered
(
shrunk
,
integralBuffer
,
hogluv
,
12
,
0
);
}
public
:
...
...
modules/gpu/test/test_softcascade.cpp
View file @
df392cc8
...
...
@@ -271,6 +271,7 @@ GPU_TEST_P(SoftCascadeTestAll, detect,
ASSERT_EQ
(
detections
.
cols
/
sizeof
(
Detection
)
,
3670U
);
}
//ToDo: fix me
GPU_TEST_P
(
SoftCascadeTestAll
,
detectOnIntegral
,
ALL_DEVICES
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment