Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
30bce16a
Commit
30bce16a
authored
Oct 08, 2012
by
marina.kolpakova
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
optimize roi loads
only one thread load roi for all block
parent
fdef0adf
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
63 additions
and
66 deletions
+63
-66
perf_objdetect.cpp
modules/gpu/perf/perf_objdetect.cpp
+25
-17
isf-sc.cu
modules/gpu/src/cuda/isf-sc.cu
+20
-42
softcascade.cpp
modules/gpu/src/softcascade.cpp
+1
-1
test_softcascade.cpp
modules/gpu/test/test_softcascade.cpp
+17
-6
No files found.
modules/gpu/perf/perf_objdetect.cpp
View file @
30bce16a
...
...
@@ -176,33 +176,35 @@ PERF_TEST_P(SoftCascadeTest, detect,
{
if
(
runOnGpu
)
{
cv
::
Mat
cpu
=
readImage
(
G
etParam
().
second
);
cv
::
Mat
cpu
=
readImage
(
G
ET_PARAM
(
1
)
);
ASSERT_FALSE
(
cpu
.
empty
());
cv
::
gpu
::
GpuMat
colored
(
cpu
);
cv
::
gpu
::
SoftCascade
cascade
;
ASSERT_TRUE
(
cascade
.
load
(
perf
::
TestBase
::
getDataPath
(
GetParam
().
first
)));
cv
::
gpu
::
GpuMat
objectBoxes
(
1
,
16384
,
CV_8UC1
),
rois
(
cascade
.
getRoiSize
(),
CV_8UC1
);
ASSERT_TRUE
(
cascade
.
load
(
perf
::
TestBase
::
getDataPath
(
GET_PARAM
(
0
))));
rois
.
setTo
(
0
)
;
cv
::
gpu
::
GpuMat
sub
(
rois
,
cv
::
Rect
(
rois
.
cols
/
4
,
rois
.
rows
/
4
,
rois
.
cols
/
2
,
rois
.
rows
/
2
)
);
sub
.
setTo
(
cv
::
Scalar
::
all
(
1
)
);
cascade
.
detectMultiScale
(
colored
,
rois
,
objectBoxes
);
cv
::
gpu
::
GpuMat
objectBoxes
(
1
,
16384
,
CV_8UC1
),
rois
(
cascade
.
getRoiSize
(),
CV_8UC1
),
trois
;
rois
.
setTo
(
1
);
cv
::
gpu
::
transpose
(
rois
,
trois
);
cascade
.
detectMultiScale
(
colored
,
t
rois
,
objectBoxes
);
TEST_CYCLE
()
{
cascade
.
detectMultiScale
(
colored
,
rois
,
objectBoxes
);
cascade
.
detectMultiScale
(
colored
,
t
rois
,
objectBoxes
);
}
}
else
}
else
{
cv
::
Mat
colored
=
readImage
(
G
etParam
().
second
);
cv
::
Mat
colored
=
readImage
(
G
ET_PARAM
(
1
)
);
ASSERT_FALSE
(
colored
.
empty
());
cv
::
SoftCascade
cascade
;
ASSERT_TRUE
(
cascade
.
load
(
getDataPath
(
G
etParam
().
first
)));
ASSERT_TRUE
(
cascade
.
load
(
getDataPath
(
G
ET_PARAM
(
0
)
)));
std
::
vector
<
cv
::
Rect
>
rois
,
objectBoxes
;
std
::
vector
<
cv
::
Rect
>
rois
;
typedef
cv
::
SoftCascade
::
Detection
Detection
;
std
::
vector
<
Detection
>
objectBoxes
;
cascade
.
detectMultiScale
(
colored
,
rois
,
objectBoxes
);
TEST_CYCLE
()
...
...
@@ -262,13 +264,16 @@ PERF_TEST_P(SoftCascadeTestRoi, detectInRoi,
sub
.
setTo
(
1
);
}
cv
::
gpu
::
GpuMat
trois
;
cv
::
gpu
::
transpose
(
rois
,
trois
);
cv
::
gpu
::
GpuMat
curr
=
objectBoxes
;
cascade
.
detectMultiScale
(
colored
,
rois
,
curr
);
cascade
.
detectMultiScale
(
colored
,
t
rois
,
curr
);
TEST_CYCLE
()
{
curr
=
objectBoxes
;
cascade
.
detectMultiScale
(
colored
,
rois
,
curr
);
cascade
.
detectMultiScale
(
colored
,
t
rois
,
curr
);
}
}
else
...
...
@@ -301,7 +306,10 @@ PERF_TEST_P(SoftCascadeTestRoi, detectEachRoi,
sub
.
setTo
(
1
);
cv
::
gpu
::
GpuMat
curr
=
objectBoxes
;
cascade
.
detectMultiScale
(
colored
,
rois
,
curr
);
cv
::
gpu
::
GpuMat
trois
;
cv
::
gpu
::
transpose
(
rois
,
trois
);
cascade
.
detectMultiScale
(
colored
,
trois
,
curr
);
TEST_CYCLE
()
{
...
...
@@ -372,7 +380,7 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
cv
::
Mat
img
=
readImage
(
GetParam
().
first
,
cv
::
IMREAD_GRAYSCALE
);
ASSERT_FALSE
(
img
.
empty
());
if
(
PERF_RUN_GPU
()
)
if
(
runOnGpu
)
{
cv
::
gpu
::
CascadeClassifier_GPU
d_cascade
;
ASSERT_TRUE
(
d_cascade
.
load
(
perf
::
TestBase
::
getDataPath
(
GetParam
().
second
)));
...
...
modules/gpu/src/cuda/isf-sc.cu
View file @
30bce16a
...
...
@@ -86,7 +86,6 @@ namespace icf {
}
texture<int, cudaTextureType2D, cudaReadModeElementType> thogluv;
texture<char, cudaTextureType2D, cudaReadModeElementType> troi;
template<bool isUp>
__device__ __forceinline__ float rescale(const Level& level, Node& node)
...
...
@@ -130,11 +129,6 @@ namespace icf {
float relScale = level.relScale;
float farea = scaledRect.z * scaledRect.w;
dprintf("%d: feature %d box %d %d %d %d\n",threadIdx.x, (node.threshold >> 28), scaledRect.x, scaledRect.y,
scaledRect.z, scaledRect.w);
dprintf("%d: rescale: %f [%f %f] selected %f\n",threadIdx.x, level.relScale, level.scaling[0], level.scaling[1],
level.scaling[(node.threshold >> 28) > 6]);
// rescale
scaledRect.x = __float2int_rn(relScale * scaledRect.x);
scaledRect.y = __float2int_rn(relScale * scaledRect.y);
...
...
@@ -146,15 +140,7 @@ namespace icf {
const float expected_new_area = farea * relScale * relScale;
float approx = __fdividef(sarea, expected_new_area);
dprintf("%d: new rect: %d box %d %d %d %d rel areas %f %f\n",threadIdx.x, (node.threshold >> 28),
scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea);
float rootThreshold = (node.threshold & 0x0FFFFFFFU) * approx;
rootThreshold *= level.scaling[(node.threshold >> 28) > 6];
dprintf("%d: approximation %f %d -> %f %f\n",threadIdx.x, approx, (node.threshold & 0x0FFFFFFFU), rootThreshold,
level.scaling[(node.threshold >> 28) > 6]);
float rootThreshold = (node.threshold & 0x0FFFFFFFU) * approx * level.scaling[(node.threshold >> 28) > 6];
return rootThreshold;
}
...
...
@@ -162,33 +148,17 @@ namespace icf {
template<bool isUp>
__device__ __forceinline__ int get(int x, int y, uchar4 area)
{
dprintf("%d: feature box %d %d %d %d\n",threadIdx.x, area.x, area.y, area.z, area.w);
dprintf("%d: extract feature for: [%d %d] [%d %d] [%d %d] [%d %d]\n",threadIdx.x,
x + area.x, y + area.y, x + area.z, y + area.y, x + area.z,y + area.w,
x + area.x, y + area.w);
dprintf("%d: at point %d %d with offset %d\n", x, y, 0);
int a = tex2D(thogluv, x + area.x, y + area.y);
int b = tex2D(thogluv, x + area.z, y + area.y);
int c = tex2D(thogluv, x + area.z, y + area.w);
int d = tex2D(thogluv, x + area.x, y + area.w);
dprintf("%d retruved integral values: %d %d %d %d\n",threadIdx.x, a, b, c, d);
return (a - b + c - d);
}
template<>
__device__ __forceinline__ int get<true>(int x, int y, uchar4 area)
{
dprintf("%d: feature box %d %d %d %d\n",threadIdx.x, area.x, area.y, area.z, area.w);
dprintf("%d: extract feature for: [%d %d] [%d %d] [%d %d] [%d %d]\n",threadIdx.x,
x + area.x, y + area.y, x + area.z, y + area.y, x + area.z,y + area.w,
x + area.x, y + area.w);
dprintf("%d: at point %d %d with offset %d\n", x, y, 0);
x += area.x;
y += area.y;
int a = tex2D(thogluv, x, y);
...
...
@@ -196,11 +166,10 @@ namespace icf {
int c = tex2D(thogluv, x + area.z, y + area.w);
int d = tex2D(thogluv, x, y + area.w);
dprintf("%d retruved integral values: %d %d %d %d\n",threadIdx.x, a, b, c, d);
return (a - b + c - d);
}
texture<float2, cudaTextureType2D, cudaReadModeElementType> troi;
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
template<bool isUp>
__global__ void test_kernel_warp(const Level* levels, const Octave* octaves, const float* stages,
...
...
@@ -210,12 +179,21 @@ namespace icf {
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x;
__shared__ volatile char roiCache[8];
if (!threadIdx.y && !threadIdx.x)
{
((float2*)roiCache)[threadIdx.x] = tex2D(troi, blockIdx.y, x);
}
__syncthreads();
if (!roiCache[threadIdx.y]) return;
Level level = levels[downscales + blockIdx.z];
if(x >= level.workRect.x || y >= level.workRect.y) return;
if (!tex2D(troi, x, y)) return;
Octave octave = octaves[level.octave];
int st = octave.index * octave.stages;
const int stEnd = st + 1024;
...
...
@@ -282,9 +260,9 @@ namespace icf {
// if (blockIdx.z != 31) return;
if(x >= level.workRect.x || y >= level.workRect.y) return;
int roi = tex2D(troi, x, y);
printf("%d\n", roi);
if (!roi) return;
//
int roi = tex2D(troi, x, y);
//
printf("%d\n", roi);
//
if (!roi) return;
Octave octave = octaves[level.octave];
...
...
@@ -357,8 +335,8 @@ namespace icf {
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc<
char
>();
cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols, roi.rows, roi.step));
cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc<
float2
>();
cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols
/ 8
, roi.rows, roi.step));
test_kernel_warp<false><<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, 0);
cudaSafeCall( cudaGetLastError());
...
...
@@ -391,8 +369,8 @@ namespace icf {
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc<
char
>();
cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols, roi.rows, roi.step));
cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc<
float2
>();
cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols
/ 8
, roi.rows, roi.step));
if (scale >= downscales)
test_kernel_warp<true><<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, scale);
...
...
modules/gpu/src/softcascade.cpp
View file @
30bce16a
...
...
@@ -481,7 +481,7 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat&
CV_Assert
(
colored
.
type
()
==
CV_8UC3
);
// we guess user knows about shrincage
CV_Assert
((
rois
.
size
()
==
getRoiSize
()
)
&&
(
rois
.
type
()
==
CV_8UC1
));
CV_Assert
((
rois
.
size
()
.
width
==
getRoiSize
().
height
)
&&
(
rois
.
type
()
==
CV_8UC1
));
// only this window size allowed
CV_Assert
(
colored
.
cols
==
Filds
::
FRAME_WIDTH
&&
colored
.
rows
==
Filds
::
FRAME_HEIGHT
);
...
...
modules/gpu/test/test_softcascade.cpp
View file @
30bce16a
...
...
@@ -47,7 +47,7 @@
using
cv
::
gpu
::
GpuMat
;
// show detection results on input image with cv::imshow
//
#define SHOW_DETECTIONS
#define SHOW_DETECTIONS
#if defined SHOW_DETECTIONS
# define SHOW(res) \
...
...
@@ -154,26 +154,30 @@ GPU_TEST_P(SoftCascadeTest, detectInROI,
cv
::
gpu
::
SoftCascade
cascade
;
ASSERT_TRUE
(
cascade
.
load
(
cvtest
::
TS
::
ptr
()
->
get_data_path
()
+
GET_PARAM
(
0
)));
GpuMat
colored
(
coloredCpu
),
objectBoxes
(
1
,
16384
,
CV_8UC1
),
rois
(
cascade
.
getRoiSize
(),
CV_8UC1
);
GpuMat
colored
(
coloredCpu
),
objectBoxes
(
1
,
16384
,
CV_8UC1
),
rois
(
cascade
.
getRoiSize
(),
CV_8UC1
)
,
trois
;
rois
.
setTo
(
0
);
int
nroi
=
GET_PARAM
(
2
);
cv
::
Mat
result
(
coloredCpu
);
cv
::
RNG
rng
;
for
(
int
i
=
0
;
i
<
nroi
;
++
i
)
{
cv
::
Rect
r
=
getFromTable
(
rng
(
10
));
GpuMat
sub
(
rois
,
r
);
sub
.
setTo
(
1
);
r
.
x
*=
4
;
r
.
y
*=
4
;
r
.
width
*=
4
;
r
.
height
*=
4
;
cv
::
rectangle
(
result
,
r
,
cv
::
Scalar
(
0
,
0
,
255
,
255
),
1
);
}
cascade
.
detectMultiScale
(
colored
,
rois
,
objectBoxes
);
cv
::
gpu
::
transpose
(
rois
,
trois
);
cascade
.
detectMultiScale
(
colored
,
trois
,
objectBoxes
);
///
cv
::
Mat
dt
(
objectBoxes
);
typedef
cv
::
gpu
::
SoftCascade
::
Detection
detection_t
;
detection_t
*
dts
=
(
detection_t
*
)
dt
.
data
;
cv
::
Mat
result
(
coloredCpu
);
printTotal
(
std
::
cout
,
dt
.
cols
);
for
(
int
i
=
0
;
i
<
(
int
)(
dt
.
cols
/
sizeof
(
detection_t
));
++
i
)
...
...
@@ -204,8 +208,11 @@ GPU_TEST_P(SoftCascadeTest, detectInLevel,
GpuMat
colored
(
coloredCpu
),
objectBoxes
(
1
,
100
*
sizeof
(
detection_t
),
CV_8UC1
),
rois
(
cascade
.
getRoiSize
(),
CV_8UC1
);
rois
.
setTo
(
1
);
cv
::
gpu
::
GpuMat
trois
;
cv
::
gpu
::
transpose
(
rois
,
trois
);
int
level
=
GET_PARAM
(
2
);
cascade
.
detectMultiScale
(
colored
,
rois
,
objectBoxes
,
1
,
level
);
cascade
.
detectMultiScale
(
colored
,
t
rois
,
objectBoxes
,
1
,
level
);
cv
::
Mat
dt
(
objectBoxes
);
...
...
@@ -246,6 +253,9 @@ TEST(SoftCascadeTest, detect)
GpuMat
sub
(
rois
,
cv
::
Rect
(
rois
.
cols
/
4
,
rois
.
rows
/
4
,
rois
.
cols
/
2
,
rois
.
rows
/
2
));
sub
.
setTo
(
cv
::
Scalar
::
all
(
1
));
cascade
.
detectMultiScale
(
colored
,
rois
,
objectBoxes
);
cv
::
gpu
::
GpuMat
trois
;
cv
::
gpu
::
transpose
(
rois
,
trois
);
cascade
.
detectMultiScale
(
colored
,
trois
,
objectBoxes
);
}
#endif
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment