Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
0e1005ca
Commit
0e1005ca
authored
Nov 28, 2012
by
marina.kolpakova
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
optimize hog bin computing
parent
ee291a15
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
179 additions
and
41 deletions
+179
-41
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+22
-3
icf-sc.cu
modules/gpu/src/cuda/icf-sc.cu
+70
-20
softcascade.cpp
modules/gpu/src/softcascade.cpp
+87
-18
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
0e1005ca
...
...
@@ -1534,6 +1534,28 @@ class CV_EXPORTS SCascade : public Algorithm
{
public
:
enum
{
GENERIC
=
1
,
SEPARABLE
=
2
};
class
CV_EXPORTS
Preprocessor
{
public
:
// Appends specified number of HOG first-order features integrals into given vector.
// Param frame is an input 3-channel bgr image.
// Param channels is a GPU matrix of integrals.
// Param stream is stream is a high-level CUDA stream abstraction used for asynchronous execution.
virtual
void
apply
(
InputArray
frame
,
OutputArray
channels
,
Stream
&
stream
=
Stream
::
Null
())
=
0
;
// Creates a specific preprocessor implementation.
// Param shrinkage is a resizing factor. Resize is applied before the computing integral sum
// Param bins is a number of HOG-like channels.
// Param method is a channel computing method.
static
cv
::
Ptr
<
Preprocessor
>
create
(
const
int
shrinkage
,
const
int
bins
,
const
int
method
=
GENERIC
);
protected
:
Preprocessor
();
};
// Representation of detectors result.
struct
CV_EXPORTS
Detection
{
...
...
@@ -1576,9 +1598,6 @@ public:
// Param stream is stream is a high-level CUDA stream abstraction used for asynchronous execution
virtual
void
detect
(
InputArray
image
,
InputArray
rois
,
OutputArray
objects
,
Stream
&
stream
=
Stream
::
Null
())
const
;
// Preprocesing only
virtual
void
preprocess
(
InputArray
image
,
OutputArray
channels
,
Stream
&
stream
=
Stream
::
Null
())
const
;
// Convert ROI matrix into the suitable for detect method.
// Param roi is an input matrix of the same size as the image.
// There non zero value mean that detector should be executed in this point.
...
...
modules/gpu/src/cuda/icf-sc.cu
View file @
0e1005ca
...
...
@@ -99,6 +99,7 @@ namespace icf {
cudaSafeCall(cudaDeviceSynchronize());
}
template<bool isDefaultNum>
__device__ __forceinline__ int fast_angle_bin(const float& dx, const float& dy)
{
const float angle_quantum = M_PI / 6.f;
...
...
@@ -110,36 +111,82 @@ namespace icf {
return static_cast<int>(angle * angle_scaling) % 6;
}
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tgray;
__global__ void magnitude_d(PtrStepSzb mag)
template<>
__device__ __forceinline__ int fast_angle_bin<true>(const float& dy, const float& dx)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
int index = 0;
float max_dot = fabs(dx);
{
const float dot_product = fabs(dx * 0.8660254037844386f + dy * 0.5f);
const float dx_a = tex2D(tgray, x + 1, y),
dx_b = tex2D(tgray, x - 1, y),
dx = dx_a - dx_b,
if(dot_product > max_dot)
{
max_dot = dot_product;
index = 1;
}
}
{
const float dot_product = fabs(dy * 0.8660254037844386f + dx * 0.5f);
dy_a = tex2D(tgray, x, y + 1),
dy_b = tex2D(tgray, x, y - 1),
dy = dy_a - dy_b;
if(dot_product > max_dot)
{
max_dot = dot_product;
index = 2;
}
}
{
int i = 3;
float2 bin_vector_i;
bin_vector_i.x = ::cos(i * (M_PI / 6.f));
bin_vector_i.y = ::sin(i * (M_PI / 6.f));
const float dot_product = fabs(dx * bin_vector_i.x + dy * bin_vector_i.y);
if(dot_product > max_dot)
{
max_dot = dot_product;
index = i;
}
}
{
const float dot_product = fabs(dx * (-0.4999999999999998f) + dy * 0.8660254037844387f);
if(dot_product > max_dot)
{
max_dot = dot_product;
index = 4;
}
}
{
const float dot_product = fabs(dx * (-0.8660254037844387f) + dy * 0.49999999999999994f);
if(dot_product > max_dot)
{
max_dot = dot_product;
index = 5;
}
}
return index;
}
const float magnitude_scaling = 1.0f/ sqrtf(2)
;
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tgray
;
const float magnitude = sqrtf((dx * dx) + (dy * dy)) * magnitude_scaling;
const uchar magnitude_u8 = static_cast<uchar>(magnitude);
template<bool isDefaultNum>
__global__ void gray2hog(PtrStepSzb mag)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
mag( 480 * 6 + y, x) = magnitude_u8;
const float dx = tex2D(tgray, x + 1, y + 0) - tex2D(tgray, x - 1, y - 0);
const float dy = tex2D(tgray, x + 0, y + 1) - tex2D(tgray, x - 0, y - 1);
int angle_channel_index;
const float magnitude = sqrtf((dx * dx) + (dy * dy)) * (1.0f / sqrtf(2));
const uchar cmag = static_cast<uchar>(magnitude);
angle_channel_index = fast_angle_bin(dy, dx)
;
mag( 480 *
angle_channel_index + y, x) = magnitude_u8
;
mag( 480 * 6 + y, x) = cmag
;
mag( 480 *
fast_angle_bin<isDefaultNum>(dy, dx) + y, x) = cmag
;
}
void
magnitude(const PtrStepSzb& gray, PtrStepSzb mag
)
void
gray2hog(const PtrStepSzb& gray, PtrStepSzb mag, const int bins
)
{
dim3 block(32, 8);
dim3 grid(gray.cols / 32, gray.rows / 8);
...
...
@@ -147,7 +194,10 @@ namespace icf {
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar>();
cudaSafeCall( cudaBindTexture2D(0, tgray, gray.data, desc, gray.cols, gray.rows, gray.step) );
magnitude_d<<<grid, block>>>(mag);
if (bins == 6)
gray2hog<true><<<grid, block>>>(mag);
else
gray2hog<false><<<grid, block>>>(mag);
cudaSafeCall(cudaDeviceSynchronize());
}
...
...
modules/gpu/src/softcascade.cpp
View file @
0e1005ca
...
...
@@ -57,6 +57,11 @@ void cv::gpu::SCascade::genRoi(InputArray, OutputArray, Stream&) const { throw_n
void
cv
::
gpu
::
SCascade
::
read
(
const
FileNode
&
fn
)
{
Algorithm
::
read
(
fn
);
}
cv
::
gpu
::
SCascade
::
Preprocessor
::
Preprocessor
()
{
throw_nogpu
();
}
void
cv
::
gpu
::
SCascade
::
Preprocessor
::
create
(
const
int
,
const
int
,
const
int
)
{
throw_nogpu
();
}
#else
#include <icf.hpp>
...
...
@@ -90,7 +95,7 @@ namespace icf {
PtrStepSzb
suppressed
,
cudaStream_t
stream
);
void
bgr2Luv
(
const
PtrStepSzb
&
bgr
,
PtrStepSzb
luv
);
void
magnitude
(
const
PtrStepSzb
&
gray
,
PtrStepSzb
mag
);
void
gray2hog
(
const
PtrStepSzb
&
gray
,
PtrStepSzb
mag
,
const
int
bins
);
}
namespace
imgproc
{
...
...
@@ -609,34 +614,97 @@ void cv::gpu::SCascade::read(const FileNode& fn)
Algorithm
::
read
(
fn
);
}
// namespace {
// void bgr2Luv(const cv::gpu::GpuMat& input, cv::gpu::GpuMat& luv /*integral*/)
// {
// cv::gpu::GpuMat bgr;
// cv::gpu::GaussianBlur(input, bgr, cv::Size(3, 3), -1);
// cv::gpu::GpuMat gray, /*luv,*/ shrunk, buffer;
// luv.create(bgr.rows * 10, bgr.cols, CV_8UC1);
// luv.setTo(0);
// cv::gpu::cvtColor(bgr, gray, CV_BGR2GRAY);
// cv::gpu::device::icf::magnitude(gray, luv(cv::Rect(0, 0, bgr.cols, bgr.rows * 7)));
// cv::gpu::GpuMat __luv(luv, cv::Rect(0, bgr.rows * 7, bgr.cols, bgr.rows * 3));
// cv::gpu::device::icf::bgr2Luv(bgr, __luv);
// // cv::gpu::resize(luv, shrunk, cv::Size(), 0.25f, 0.25f, CV_INTER_AREA);
// // cv::gpu::integralBuffered(shrunk, integral, buffer);
// }
// }
namespace
{
void
bgr2Luv
(
const
cv
::
gpu
::
GpuMat
&
input
,
cv
::
gpu
::
GpuMat
&
integral
)
using
cv
::
InputArray
;
using
cv
::
OutputArray
;
using
cv
::
gpu
::
Stream
;
using
cv
::
gpu
::
GpuMat
;
struct
GenricPreprocessor
:
public
cv
::
gpu
::
SCascade
::
Preprocessor
{
cv
::
gpu
::
GpuMat
bgr
;
cv
::
gpu
::
GaussianBlur
(
input
,
bgr
,
cv
::
Size
(
3
,
3
),
-
1
);
GenricPreprocessor
(
const
int
s
,
const
int
b
)
:
cv
::
gpu
::
SCascade
::
Preprocessor
(),
shrinkage
(
s
),
bins
(
b
)
{}
cv
::
gpu
::
GpuMat
gray
,
luv
,
shrunk
,
buffer
;
luv
.
create
(
bgr
.
rows
*
10
,
bgr
.
cols
,
CV_8UC1
);
luv
.
setTo
(
0
);
virtual
void
apply
(
InputArray
/*frame*/
,
OutputArray
/*channels*/
,
Stream
&
/*s*/
=
Stream
::
Null
())
{
cv
::
gpu
::
cvtColor
(
bgr
,
gray
,
CV_BGR2GRAY
);
cv
::
gpu
::
device
::
icf
::
magnitude
(
gray
,
luv
(
cv
::
Rect
(
0
,
0
,
bgr
.
cols
,
bgr
.
rows
*
7
)));
}
cv
::
gpu
::
GpuMat
__luv
(
luv
,
cv
::
Rect
(
0
,
bgr
.
rows
*
7
,
bgr
.
cols
,
bgr
.
rows
*
3
));
cv
::
gpu
::
device
::
icf
::
bgr2Luv
(
bgr
,
__luv
);
private
:
const
int
shrinkage
;
const
int
bins
;
};
cv
::
gpu
::
resize
(
luv
,
shrunk
,
cv
::
Size
(),
0.25
f
,
0.25
f
,
CV_INTER_AREA
);
cv
::
gpu
::
integralBuffered
(
shrunk
,
integral
,
buffer
);
}
inline
void
setZero
(
cv
::
gpu
::
GpuMat
&
m
,
Stream
&
s
)
{
if
(
s
)
s
.
enqueueMemSet
(
m
,
0
);
else
m
.
setTo
(
0
);
}
void
cv
::
gpu
::
SCascade
::
preprocess
(
InputArray
_bgr
,
OutputArray
_channels
,
Stream
&
stream
)
const
struct
SeparablePreprocessor
:
public
cv
::
gpu
::
SCascade
::
Preprocessor
{
CV_Assert
(
fields
);
(
void
)
stream
;
const
GpuMat
bgr
=
_bgr
.
getGpuMat
(),
channels
=
_channels
.
getGpuMat
();
SeparablePreprocessor
(
const
int
s
,
const
int
b
)
:
cv
::
gpu
::
SCascade
::
Preprocessor
(),
shrinkage
(
s
),
bins
(
b
)
{}
virtual
void
apply
(
InputArray
_frame
,
OutputArray
_channels
,
Stream
&
s
=
Stream
::
Null
())
{
const
GpuMat
frame
=
_frame
.
getGpuMat
();
cv
::
gpu
::
GaussianBlur
(
frame
,
bgr
,
cv
::
Size
(
3
,
3
),
-
1.0
);
_channels
.
create
(
frame
.
rows
*
(
4
+
bins
),
frame
.
cols
,
CV_8UC1
);
GpuMat
channels
=
_channels
.
getGpuMat
();
setZero
(
channels
,
s
);
cv
::
gpu
::
cvtColor
(
bgr
,
gray
,
CV_BGR2GRAY
);
cv
::
gpu
::
device
::
icf
::
gray2hog
(
gray
,
channels
(
cv
::
Rect
(
0
,
0
,
bgr
.
cols
,
bgr
.
rows
*
(
bins
+
1
))),
bins
);
cv
::
gpu
::
GpuMat
luv
(
channels
,
cv
::
Rect
(
0
,
bgr
.
rows
*
(
bins
+
1
),
bgr
.
cols
,
bgr
.
rows
*
3
));
cv
::
gpu
::
device
::
icf
::
bgr2Luv
(
bgr
,
luv
);
}
private
:
const
int
shrinkage
;
const
int
bins
;
GpuMat
bgr
;
GpuMat
gray
;
};
}
cv
::
gpu
::
SCascade
::
Preprocessor
::
Preprocessor
(){}
cv
::
Ptr
<
cv
::
gpu
::
SCascade
::
Preprocessor
>
cv
::
gpu
::
SCascade
::
Preprocessor
::
create
(
const
int
s
,
const
int
b
,
const
int
m
)
{
CV_Assert
(
m
==
SEPARABLE
||
m
==
GENERIC
);
if
(
m
==
GENERIC
)
return
cv
::
Ptr
<
cv
::
gpu
::
SCascade
::
Preprocessor
>
(
new
GenricPreprocessor
(
s
,
b
));
return
cv
::
Ptr
<
cv
::
gpu
::
SCascade
::
Preprocessor
>
(
new
SeparablePreprocessor
(
s
,
b
));
}
#endif
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment