Commit 8257dc3c authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

refactor CUDA HOG algorithm:

use abstract interface with hidden implementation
parent 0af7597d
...@@ -65,19 +65,8 @@ namespace cv { namespace cuda { ...@@ -65,19 +65,8 @@ namespace cv { namespace cuda {
// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector // HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector
// //
struct CV_EXPORTS HOGConfidence
{
double scale;
std::vector<Point> locations;
std::vector<double> confidences;
std::vector<double> part_scores[4];
};
/** @brief The class implements Histogram of Oriented Gradients (@cite Dalal2005) object detector. /** @brief The class implements Histogram of Oriented Gradients (@cite Dalal2005) object detector.
Interfaces of all methods are kept similar to the CPU HOG descriptor and detector analogues as much
as possible.
@note @note
- An example applying the HOG descriptor for people detection can be found at - An example applying the HOG descriptor for people detection can be found at
opencv_source_code/samples/cpp/peopledetect.cpp opencv_source_code/samples/cpp/peopledetect.cpp
...@@ -86,11 +75,14 @@ as possible. ...@@ -86,11 +75,14 @@ as possible.
- (Python) An example applying the HOG descriptor for people detection can be found at - (Python) An example applying the HOG descriptor for people detection can be found at
opencv_source_code/samples/python2/peopledetect.py opencv_source_code/samples/python2/peopledetect.py
*/ */
struct CV_EXPORTS HOGDescriptor class CV_EXPORTS HOG : public cv::Algorithm
{ {
enum { DEFAULT_WIN_SIGMA = -1 }; public:
enum { DEFAULT_NLEVELS = 64 }; enum
enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL }; {
DESCR_FORMAT_ROW_BY_ROW,
DESCR_FORMAT_COL_BY_COL
};
/** @brief Creates the HOG descriptor and detector. /** @brief Creates the HOG descriptor and detector.
...@@ -99,132 +91,105 @@ struct CV_EXPORTS HOGDescriptor ...@@ -99,132 +91,105 @@ struct CV_EXPORTS HOGDescriptor
@param block_stride Block stride. It must be a multiple of cell size. @param block_stride Block stride. It must be a multiple of cell size.
@param cell_size Cell size. Only (8, 8) is supported for now. @param cell_size Cell size. Only (8, 8) is supported for now.
@param nbins Number of bins. Only 9 bins per cell are supported for now. @param nbins Number of bins. Only 9 bins per cell are supported for now.
@param win_sigma Gaussian smoothing window parameter.
@param threshold_L2hys L2-Hys normalization method shrinkage.
@param gamma_correction Flag to specify whether the gamma correction preprocessing is required or
not.
@param nlevels Maximum number of detection window increases.
*/ */
HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16), static Ptr<HOG> create(Size win_size = Size(64, 128),
Size block_stride=Size(8, 8), Size cell_size=Size(8, 8), Size block_size = Size(16, 16),
int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA, Size block_stride = Size(8, 8),
double threshold_L2hys=0.2, bool gamma_correction=true, Size cell_size = Size(8, 8),
int nlevels=DEFAULT_NLEVELS); int nbins = 9);
//! Gaussian smoothing window parameter.
virtual void setWinSigma(double win_sigma) = 0;
virtual double getWinSigma() const = 0;
//! L2-Hys normalization method shrinkage.
virtual void setL2HysThreshold(double threshold_L2hys) = 0;
virtual double getL2HysThreshold() const = 0;
//! Flag to specify whether the gamma correction preprocessing is required or not.
virtual void setGammaCorrection(bool gamma_correction) = 0;
virtual bool getGammaCorrection() const = 0;
//! Maximum number of detection window increases.
virtual void setNumLevels(int nlevels) = 0;
virtual int getNumLevels() const = 0;
//! Threshold for the distance between features and SVM classifying plane.
//! Usually it is 0 and should be specfied in the detector coefficients (as the last free
//! coefficient). But if the free coefficient is omitted (which is allowed), you can specify it
//! manually here.
virtual void setHitThreshold(double hit_threshold) = 0;
virtual double getHitThreshold() const = 0;
//! Window stride. It must be a multiple of block stride.
virtual void setWinStride(Size win_stride) = 0;
virtual Size getWinStride() const = 0;
//! Coefficient of the detection window increase.
virtual void setScaleFactor(double scale0) = 0;
virtual double getScaleFactor() const = 0;
//! Coefficient to regulate the similarity threshold. When detected, some
//! objects can be covered by many rectangles. 0 means not to perform grouping.
//! See groupRectangles.
virtual void setGroupThreshold(int group_threshold) = 0;
virtual int getGroupThreshold() const = 0;
//! Descriptor storage format:
//! - **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
//! - **DESCR_FORMAT_COL_BY_COL** - Column-major order.
virtual void setDescriptorFormat(int descr_format) = 0;
virtual int getDescriptorFormat() const = 0;
/** @brief Returns the number of coefficients required for the classification. /** @brief Returns the number of coefficients required for the classification.
*/ */
size_t getDescriptorSize() const; virtual size_t getDescriptorSize() const = 0;
/** @brief Returns the block histogram size. /** @brief Returns the block histogram size.
*/ */
size_t getBlockHistogramSize() const; virtual size_t getBlockHistogramSize() const = 0;
/** @brief Sets coefficients for the linear SVM classifier. /** @brief Sets coefficients for the linear SVM classifier.
*/ */
void setSVMDetector(const std::vector<float>& detector); virtual void setSVMDetector(InputArray detector) = 0;
/** @brief Returns coefficients of the classifier trained for people detection (for default window size). /** @brief Returns coefficients of the classifier trained for people detection.
*/ */
static std::vector<float> getDefaultPeopleDetector(); virtual Mat getDefaultPeopleDetector() const = 0;
/** @brief Returns coefficients of the classifier trained for people detection (for 48x96 windows).
*/
static std::vector<float> getPeopleDetector48x96();
/** @brief Returns coefficients of the classifier trained for people detection (for 64x128 windows).
*/
static std::vector<float> getPeopleDetector64x128();
/** @brief Performs object detection without a multi-scale window. /** @brief Performs object detection without a multi-scale window.
@param img Source image. CV_8UC1 and CV_8UC4 types are supported for now. @param img Source image. CV_8UC1 and CV_8UC4 types are supported for now.
@param found_locations Left-top corner points of detected objects boundaries. @param found_locations Left-top corner points of detected objects boundaries.
@param hit_threshold Threshold for the distance between features and SVM classifying plane. @param confidences Optional output array for confidences.
Usually it is 0 and should be specfied in the detector coefficients (as the last free
coefficient). But if the free coefficient is omitted (which is allowed), you can specify it
manually here.
@param win_stride Window stride. It must be a multiple of block stride.
@param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
*/ */
void detect(const GpuMat& img, std::vector<Point>& found_locations, virtual void detect(InputArray img,
double hit_threshold=0, Size win_stride=Size(), std::vector<Point>& found_locations,
Size padding=Size()); std::vector<double>* confidences = NULL) = 0;
/** @brief Performs object detection with a multi-scale window. /** @brief Performs object detection with a multi-scale window.
@param img Source image. See cuda::HOGDescriptor::detect for type limitations. @param img Source image. See cuda::HOGDescriptor::detect for type limitations.
@param found_locations Detected objects boundaries. @param found_locations Detected objects boundaries.
@param confidences Optional output array for confidences.
@param hit_threshold Threshold for the distance between features and SVM classifying plane. See @param hit_threshold Threshold for the distance between features and SVM classifying plane. See
cuda::HOGDescriptor::detect for details. cuda::HOGDescriptor::detect for details.
@param win_stride Window stride. It must be a multiple of block stride. @param win_stride Window stride. It must be a multiple of block stride.
@param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0). @param padding Mock parameter to keep the CPU interface compatibility. It must be (0,0).
@param scale0 Coefficient of the detection window increase.
@param group_threshold Coefficient to regulate the similarity threshold. When detected, some
objects can be covered by many rectangles. 0 means not to perform grouping. See groupRectangles .
*/ */
void detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations, virtual void detectMultiScale(InputArray img,
double hit_threshold=0, Size win_stride=Size(), std::vector<Rect>& found_locations,
Size padding=Size(), double scale0=1.05, std::vector<double>* confidences = NULL) = 0;
int group_threshold=2);
void computeConfidence(const GpuMat& img, std::vector<Point>& hits, double hit_threshold,
Size win_stride, Size padding, std::vector<Point>& locations, std::vector<double>& confidences);
void computeConfidenceMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
double hit_threshold, Size win_stride, Size padding,
std::vector<HOGConfidence> &conf_out, int group_threshold);
/** @brief Returns block descriptors computed for the whole image. /** @brief Returns block descriptors computed for the whole image.
@param img Source image. See cuda::HOGDescriptor::detect for type limitations. @param img Source image. See cuda::HOGDescriptor::detect for type limitations.
@param win_stride Window stride. It must be a multiple of block stride.
@param descriptors 2D array of descriptors. @param descriptors 2D array of descriptors.
@param descr_format Descriptor storage format: @param stream CUDA stream.
- **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
- **DESCR_FORMAT_COL_BY_COL** - Column-major order.
The function is mainly used to learn the classifier.
*/ */
void getDescriptors(const GpuMat& img, Size win_stride, virtual void compute(InputArray img,
GpuMat& descriptors, OutputArray descriptors,
int descr_format=DESCR_FORMAT_COL_BY_COL); Stream& stream = Stream::Null()) = 0;
Size win_size;
Size block_size;
Size block_stride;
Size cell_size;
int nbins;
double win_sigma;
double threshold_L2hys;
bool gamma_correction;
int nlevels;
protected:
void computeBlockHistograms(const GpuMat& img);
void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
double getWinSigma() const;
bool checkDetectorSize() const;
static int numPartsWithin(int size, int part_size, int stride);
static Size numPartsWithin(Size size, Size part_size, Size stride);
// Coefficients of the separating plane
float free_coef;
GpuMat detector;
// Results of the last classification step
GpuMat labels, labels_buf;
Mat labels_host;
// Results of the last histogram evaluation step
GpuMat block_hists, block_hists_buf;
// Gradients conputation results
GpuMat grad, qangle, grad_buf, qangle_buf;
// returns subbuffer with required size, reallocates buffer if nessesary.
static GpuMat getBuffer(const Size& sz, int type, GpuMat& buf);
static GpuMat getBuffer(int rows, int cols, int type, GpuMat& buf);
std::vector<GpuMat> image_scales;
}; };
// //
......
...@@ -71,10 +71,10 @@ PERF_TEST_P(Image, ObjDetect_HOG, ...@@ -71,10 +71,10 @@ PERF_TEST_P(Image, ObjDetect_HOG,
const cv::cuda::GpuMat d_img(img); const cv::cuda::GpuMat d_img(img);
std::vector<cv::Rect> gpu_found_locations; std::vector<cv::Rect> gpu_found_locations;
cv::cuda::HOGDescriptor d_hog; cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
d_hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector()); d_hog->setSVMDetector(d_hog->getDefaultPeopleDetector());
TEST_CYCLE() d_hog.detectMultiScale(d_img, gpu_found_locations); TEST_CYCLE() d_hog->detectMultiScale(d_img, gpu_found_locations);
SANITY_CHECK(gpu_found_locations); SANITY_CHECK(gpu_found_locations);
} }
...@@ -82,8 +82,10 @@ PERF_TEST_P(Image, ObjDetect_HOG, ...@@ -82,8 +82,10 @@ PERF_TEST_P(Image, ObjDetect_HOG,
{ {
std::vector<cv::Rect> cpu_found_locations; std::vector<cv::Rect> cpu_found_locations;
cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
cv::HOGDescriptor hog; cv::HOGDescriptor hog;
hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector()); hog.setSVMDetector(d_hog->getDefaultPeopleDetector());
TEST_CYCLE() hog.detectMultiScale(img, cpu_found_locations); TEST_CYCLE() hog.detectMultiScale(img, cpu_found_locations);
......
...@@ -42,23 +42,12 @@ ...@@ -42,23 +42,12 @@
#include "precomp.hpp" #include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
cv::cuda::HOGDescriptor::HOGDescriptor(Size, Size, Size, Size, int, double, double, bool, int) { throw_no_cuda(); } Ptr<cuda::HOG> cv::cuda::HOG::create(Size, Size, Size, Size, int) { throw_no_cuda(); return Ptr<cuda::HOG>(); }
size_t cv::cuda::HOGDescriptor::getDescriptorSize() const { throw_no_cuda(); return 0; }
size_t cv::cuda::HOGDescriptor::getBlockHistogramSize() const { throw_no_cuda(); return 0; }
double cv::cuda::HOGDescriptor::getWinSigma() const { throw_no_cuda(); return 0; }
bool cv::cuda::HOGDescriptor::checkDetectorSize() const { throw_no_cuda(); return false; }
void cv::cuda::HOGDescriptor::setSVMDetector(const std::vector<float>&) { throw_no_cuda(); }
void cv::cuda::HOGDescriptor::detect(const GpuMat&, std::vector<Point>&, double, Size, Size) { throw_no_cuda(); }
void cv::cuda::HOGDescriptor::detectMultiScale(const GpuMat&, std::vector<Rect>&, double, Size, Size, double, int) { throw_no_cuda(); }
void cv::cuda::HOGDescriptor::computeBlockHistograms(const GpuMat&) { throw_no_cuda(); }
void cv::cuda::HOGDescriptor::getDescriptors(const GpuMat&, Size, GpuMat&, int) { throw_no_cuda(); }
std::vector<float> cv::cuda::HOGDescriptor::getDefaultPeopleDetector() { throw_no_cuda(); return std::vector<float>(); }
std::vector<float> cv::cuda::HOGDescriptor::getPeopleDetector48x96() { throw_no_cuda(); return std::vector<float>(); }
std::vector<float> cv::cuda::HOGDescriptor::getPeopleDetector64x128() { throw_no_cuda(); return std::vector<float>(); }
void cv::cuda::HOGDescriptor::computeConfidence(const GpuMat&, std::vector<Point>&, double, Size, Size, std::vector<Point>&, std::vector<double>&) { throw_no_cuda(); }
void cv::cuda::HOGDescriptor::computeConfidenceMultiScale(const GpuMat&, std::vector<Rect>&, double, Size, Size, std::vector<HOGConfidence>&, int) { throw_no_cuda(); }
#else #else
...@@ -102,244 +91,323 @@ namespace cv { namespace cuda { namespace device ...@@ -102,244 +91,323 @@ namespace cv { namespace cuda { namespace device
} }
}}} }}}
using namespace ::cv::cuda::device; using namespace cv::cuda::device;
cv::cuda::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_, namespace
int nbins_, double win_sigma_, double threshold_L2hys_, bool gamma_correction_, int nlevels_)
: win_size(win_size_),
block_size(block_size_),
block_stride(block_stride_),
cell_size(cell_size_),
nbins(nbins_),
win_sigma(win_sigma_),
threshold_L2hys(threshold_L2hys_),
gamma_correction(gamma_correction_),
nlevels(nlevels_)
{ {
class HOG_Impl : public cv::cuda::HOG
{
public:
HOG_Impl(Size win_size,
Size block_size,
Size block_stride,
Size cell_size,
int nbins);
virtual void setWinSigma(double win_sigma) { win_sigma_ = win_sigma; }
virtual double getWinSigma() const;
virtual void setL2HysThreshold(double threshold_L2hys) { threshold_L2hys_ = threshold_L2hys; }
virtual double getL2HysThreshold() const { return threshold_L2hys_; }
virtual void setGammaCorrection(bool gamma_correction) { gamma_correction_ = gamma_correction; }
virtual bool getGammaCorrection() const { return gamma_correction_; }
virtual void setNumLevels(int nlevels) { nlevels_ = nlevels; }
virtual int getNumLevels() const { return nlevels_; }
virtual void setHitThreshold(double hit_threshold) { hit_threshold_ = hit_threshold; }
virtual double getHitThreshold() const { return hit_threshold_; }
virtual void setWinStride(Size win_stride) { win_stride_ = win_stride; }
virtual Size getWinStride() const { return win_stride_; }
virtual void setScaleFactor(double scale0) { scale0_ = scale0; }
virtual double getScaleFactor() const { return scale0_; }
virtual void setGroupThreshold(int group_threshold) { group_threshold_ = group_threshold; }
virtual int getGroupThreshold() const { return group_threshold_; }
virtual void setDescriptorFormat(int descr_format) { descr_format_ = descr_format; }
virtual int getDescriptorFormat() const { return descr_format_; }
virtual size_t getDescriptorSize() const;
virtual size_t getBlockHistogramSize() const;
virtual void setSVMDetector(InputArray detector);
virtual Mat getDefaultPeopleDetector() const;
virtual void detect(InputArray img,
std::vector<Point>& found_locations,
std::vector<double>* confidences);
virtual void detectMultiScale(InputArray img,
std::vector<Rect>& found_locations,
std::vector<double>* confidences);
virtual void compute(InputArray img,
OutputArray descriptors,
Stream& stream);
private:
Size win_size_;
Size block_size_;
Size block_stride_;
Size cell_size_;
int nbins_;
double win_sigma_;
double threshold_L2hys_;
bool gamma_correction_;
int nlevels_;
double hit_threshold_;
Size win_stride_;
double scale0_;
int group_threshold_;
int descr_format_;
private:
int getTotalHistSize(Size img_size) const;
void computeBlockHistograms(const GpuMat& img, GpuMat& block_hists);
void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle);
// Coefficients of the separating plane
float free_coef_;
GpuMat detector_;
};
HOG_Impl::HOG_Impl(Size win_size,
Size block_size,
Size block_stride,
Size cell_size,
int nbins) :
win_size_(win_size),
block_size_(block_size),
block_stride_(block_stride),
cell_size_(cell_size),
nbins_(nbins),
win_sigma_(-1.0),
threshold_L2hys_(0.2),
gamma_correction_(true),
nlevels_(64),
hit_threshold_(0.0),
win_stride_(block_stride),
scale0_(1.05),
group_threshold_(2),
descr_format_(DESCR_FORMAT_COL_BY_COL)
{
CV_Assert((win_size.width - block_size.width ) % block_stride.width == 0 && CV_Assert((win_size.width - block_size.width ) % block_stride.width == 0 &&
(win_size.height - block_size.height) % block_stride.height == 0); (win_size.height - block_size.height) % block_stride.height == 0);
CV_Assert(block_size.width % cell_size.width == 0 && block_size.height % cell_size.height == 0); CV_Assert(block_size.width % cell_size.width == 0 &&
block_size.height % cell_size.height == 0);
CV_Assert(block_stride == cell_size); CV_Assert(block_stride == cell_size);
CV_Assert(cell_size == Size(8, 8)); CV_Assert(cell_size == Size(8, 8));
Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height); Size cells_per_block(block_size.width / cell_size.width, block_size.height / cell_size.height);
CV_Assert(cells_per_block == Size(2, 2)); CV_Assert(cells_per_block == Size(2, 2));
} }
size_t cv::cuda::HOGDescriptor::getDescriptorSize() const
{
return numPartsWithin(win_size, block_size, block_stride).area() * getBlockHistogramSize();
}
size_t cv::cuda::HOGDescriptor::getBlockHistogramSize() const
{
Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height);
return (size_t)(nbins * cells_per_block.area());
}
double cv::cuda::HOGDescriptor::getWinSigma() const
{
return win_sigma >= 0 ? win_sigma : (block_size.width + block_size.height) / 8.0;
}
bool cv::cuda::HOGDescriptor::checkDetectorSize() const
{
size_t detector_size = detector.rows * detector.cols;
size_t descriptor_size = getDescriptorSize();
return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1;
}
void cv::cuda::HOGDescriptor::setSVMDetector(const std::vector<float>& _detector)
{
std::vector<float> detector_reordered(_detector.size());
size_t block_hist_size = getBlockHistogramSize();
cv::Size blocks_per_img = numPartsWithin(win_size, block_size, block_stride);
for (int i = 0; i < blocks_per_img.height; ++i) static int numPartsWithin(int size, int part_size, int stride)
for (int j = 0; j < blocks_per_img.width; ++j)
{ {
const float* src = &_detector[0] + (j * blocks_per_img.height + i) * block_hist_size; return (size - part_size + stride) / stride;
float* dst = &detector_reordered[0] + (i * blocks_per_img.width + j) * block_hist_size;
for (size_t k = 0; k < block_hist_size; ++k)
dst[k] = src[k];
} }
this->detector.upload(Mat(detector_reordered).reshape(1, 1)); static Size numPartsWithin(Size size, Size part_size, Size stride)
{
size_t descriptor_size = getDescriptorSize(); return Size(numPartsWithin(size.width, part_size.width, stride.width),
free_coef = _detector.size() > descriptor_size ? _detector[descriptor_size] : 0; numPartsWithin(size.height, part_size.height, stride.height));
}
CV_Assert(checkDetectorSize());
}
cv::cuda::GpuMat cv::cuda::HOGDescriptor::getBuffer(const Size& sz, int type, GpuMat& buf)
{
if (buf.empty() || buf.type() != type)
buf.create(sz, type);
else
if (buf.cols < sz.width || buf.rows < sz.height)
buf.create(std::max(buf.rows, sz.height), std::max(buf.cols, sz.width), type);
return buf(Rect(Point(0,0), sz));
}
cv::cuda::GpuMat cv::cuda::HOGDescriptor::getBuffer(int rows, int cols, int type, GpuMat& buf)
{
return getBuffer(Size(cols, rows), type, buf);
}
void cv::cuda::HOGDescriptor::computeGradient(const GpuMat& img, GpuMat& _grad, GpuMat& _qangle)
{
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
// grad.create(img.size(), CV_32FC2);
_grad = getBuffer(img.size(), CV_32FC2, grad_buf);
// qangle.create(img.size(), CV_8UC2);
_qangle = getBuffer(img.size(), CV_8UC2, qangle_buf);
float angleScale = (float)(nbins / CV_PI); size_t HOG_Impl::getDescriptorSize() const
switch (img.type())
{ {
case CV_8UC1: return numPartsWithin(win_size_, block_size_, block_stride_).area() * getBlockHistogramSize();
hog::compute_gradients_8UC1(nbins, img.rows, img.cols, img, angleScale, _grad, _qangle, gamma_correction);
break;
case CV_8UC4:
hog::compute_gradients_8UC4(nbins, img.rows, img.cols, img, angleScale, _grad, _qangle, gamma_correction);
break;
} }
}
size_t HOG_Impl::getBlockHistogramSize() const
{
Size cells_per_block(block_size_.width / cell_size_.width, block_size_.height / cell_size_.height);
return nbins_ * cells_per_block.area();
}
void cv::cuda::HOGDescriptor::computeBlockHistograms(const GpuMat& img) double HOG_Impl::getWinSigma() const
{ {
cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); return win_sigma_ >= 0 ? win_sigma_ : (block_size_.width + block_size_.height) / 8.0;
hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height); }
computeGradient(img, grad, qangle); void HOG_Impl::setSVMDetector(InputArray _detector)
{
const int descriptor_size = static_cast<int>(getDescriptorSize());
size_t block_hist_size = getBlockHistogramSize(); const Mat detector = _detector.getMat();
Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
// block_hists.create(1, block_hist_size * blocks_per_img.area(), CV_32F); CV_Assert( detector.type() == CV_32FC1 );
block_hists = getBuffer(1, static_cast<int>(block_hist_size * blocks_per_img.area()), CV_32F, block_hists_buf); CV_Assert( detector.rows == 1 );
CV_Assert( detector.cols == descriptor_size || detector.cols == descriptor_size + 1 );
hog::compute_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, std::vector<float> detector_reordered(detector.ptr<float>(), detector.ptr<float>() + detector.cols);
grad, qangle, (float)getWinSigma(), block_hists.ptr<float>());
hog::normalize_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, size_t block_hist_size = getBlockHistogramSize();
block_hists.ptr<float>(), (float)threshold_L2hys); Size blocks_per_win = numPartsWithin(win_size_, block_size_, block_stride_);
}
for (int i = 0; i < blocks_per_win.height; ++i)
{
for (int j = 0; j < blocks_per_win.width; ++j)
{
const float* src = detector.ptr<float>() + (j * blocks_per_win.height + i) * block_hist_size;
float* dst = &detector_reordered[0] + (i * blocks_per_win.width + j) * block_hist_size;
for (size_t k = 0; k < block_hist_size; ++k)
dst[k] = src[k];
}
}
void cv::cuda::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors, int descr_format) detector_.upload(Mat(detector_reordered).reshape(1, 1));
{ free_coef_ = detector.cols > descriptor_size ? detector.at<float>(0, descriptor_size) : 0;
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); }
computeBlockHistograms(img); static Mat getPeopleDetector64x128();
static Mat getPeopleDetector48x96();
const size_t block_hist_size = getBlockHistogramSize(); Mat HOG_Impl::getDefaultPeopleDetector() const
Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); {
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); CV_Assert( win_size_ == Size(64, 128) || win_size_ == Size(48, 96) );
descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32F); if (win_size_ == Size(64, 128))
return getPeopleDetector64x128();
else
return getPeopleDetector48x96();
}
switch (descr_format) void HOG_Impl::detect(InputArray _img, std::vector<Point>& hits, std::vector<double>* confidences)
{ {
case DESCR_FORMAT_ROW_BY_ROW: const GpuMat img = _img.getGpuMat();
hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width,
win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), descriptors);
break;
case DESCR_FORMAT_COL_BY_COL:
hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width,
win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), descriptors);
break;
default:
CV_Error(cv::Error::StsBadArg, "Unknown descriptor format");
}
}
void cv::cuda::HOGDescriptor::computeConfidence(const GpuMat& img, std::vector<Point>& hits, double hit_threshold, CV_Assert( img.type() == CV_8UC1 || img.type() == CV_8UC4 );
Size win_stride, Size padding, std::vector<Point>& locations, std::vector<double>& confidences) CV_Assert( win_stride_.width % block_stride_.width == 0 && win_stride_.height % block_stride_.height == 0 );
{
CV_Assert(padding == Size(0, 0));
hits.clear(); hits.clear();
if (detector.empty()) if (detector_.empty())
return; return;
computeBlockHistograms(img); BufferPool pool(Stream::Null());
if (win_stride == Size()) GpuMat block_hists = pool.getBuffer(1, getTotalHistSize(img.size()), CV_32FC1);
win_stride = block_stride; computeBlockHistograms(img, block_hists);
else
CV_Assert(win_stride.width % block_stride.width == 0 &&
win_stride.height % block_stride.height == 0);
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); Size wins_per_img = numPartsWithin(img.size(), win_size_, win_stride_);
labels.create(1, wins_per_img.area(), CV_32F);
hog::compute_confidence_hists(win_size.height, win_size.width, block_stride.height, block_stride.width, if (confidences == NULL)
win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), {
detector.ptr<float>(), (float)free_coef, (float)hit_threshold, labels.ptr<float>()); GpuMat labels = pool.getBuffer(1, wins_per_img.area(), CV_8UC1);
hog::classify_hists(win_size_.height, win_size_.width,
block_stride_.height, block_stride_.width,
win_stride_.height, win_stride_.width,
img.rows, img.cols,
block_hists.ptr<float>(),
detector_.ptr<float>(),
(float)free_coef_,
(float)hit_threshold_,
labels.ptr());
Mat labels_host;
labels.download(labels_host);
unsigned char* vec = labels_host.ptr();
for (int i = 0; i < wins_per_img.area(); i++)
{
int y = i / wins_per_img.width;
int x = i - wins_per_img.width * y;
if (vec[i])
hits.push_back(Point(x * win_stride_.width, y * win_stride_.height));
}
}
else
{
GpuMat labels = pool.getBuffer(1, wins_per_img.area(), CV_32FC1);
hog::compute_confidence_hists(win_size_.height, win_size_.width,
block_stride_.height, block_stride_.width,
win_stride_.height, win_stride_.width,
img.rows, img.cols,
block_hists.ptr<float>(),
detector_.ptr<float>(),
(float)free_coef_,
(float)hit_threshold_,
labels.ptr<float>());
Mat labels_host;
labels.download(labels_host); labels.download(labels_host);
float* vec = labels_host.ptr<float>(); float* vec = labels_host.ptr<float>();
// does not support roi for now.. confidences->clear();
locations.clear();
confidences.clear();
for (int i = 0; i < wins_per_img.area(); i++) for (int i = 0; i < wins_per_img.area(); i++)
{ {
int y = i / wins_per_img.width; int y = i / wins_per_img.width;
int x = i - wins_per_img.width * y; int x = i - wins_per_img.width * y;
if (vec[i] >= hit_threshold)
hits.push_back(Point(x * win_stride.width, y * win_stride.height));
Point pt(win_stride.width * x, win_stride.height * y); if (vec[i] >= hit_threshold_)
locations.push_back(pt); {
confidences.push_back((double)vec[i]); hits.push_back(Point(x * win_stride_.width, y * win_stride_.height));
confidences->push_back((double)vec[i]);
}
}
}
} }
}
void cv::cuda::HOGDescriptor::computeConfidenceMultiScale(const GpuMat& img, std::vector<Rect>& found_locations, void HOG_Impl::detectMultiScale(InputArray _img,
double hit_threshold, Size win_stride, Size padding, std::vector<Rect>& found_locations,
std::vector<HOGConfidence> &conf_out, int group_threshold) std::vector<double>* confidences)
{ {
const GpuMat img = _img.getGpuMat();
CV_Assert( img.type() == CV_8UC1 || img.type() == CV_8UC4 );
CV_Assert( confidences == NULL || group_threshold_ == 0 );
std::vector<double> level_scale; std::vector<double> level_scale;
double scale = 1.; double scale = 1.0;
int levels = 0; int levels = 0;
for (levels = 0; levels < nlevels_; levels++)
for (levels = 0; levels < (int)conf_out.size(); levels++)
{ {
scale = conf_out[levels].scale;
level_scale.push_back(scale); level_scale.push_back(scale);
if (cvRound(img.cols/scale) < win_size.width || cvRound(img.rows/scale) < win_size.height)
if (cvRound(img.cols / scale) < win_size_.width ||
cvRound(img.rows / scale) < win_size_.height ||
scale0_ <= 1)
{
break; break;
} }
scale *= scale0_;
}
levels = std::max(levels, 1); levels = std::max(levels, 1);
level_scale.resize(levels); level_scale.resize(levels);
std::vector<Rect> all_candidates; std::vector<Point> level_hits;
std::vector<Point> locations; std::vector<double> level_confidences;
BufferPool pool(Stream::Null());
found_locations.clear();
for (size_t i = 0; i < level_scale.size(); i++) for (size_t i = 0; i < level_scale.size(); i++)
{ {
scale = level_scale[i]; scale = level_scale[i];
Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale)); Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
GpuMat smaller_img;
GpuMat smaller_img;
if (sz == img.size()) if (sz == img.size())
{
smaller_img = img; smaller_img = img;
}
else else
{ {
smaller_img.create(sz, img.type()); smaller_img = pool.getBuffer(sz, img.type());
switch (img.type()) switch (img.type())
{ {
case CV_8UC1: hog::resize_8UC1(img, smaller_img); break; case CV_8UC1: hog::resize_8UC1(img, smaller_img); break;
...@@ -347,127 +415,137 @@ void cv::cuda::HOGDescriptor::computeConfidenceMultiScale(const GpuMat& img, std ...@@ -347,127 +415,137 @@ void cv::cuda::HOGDescriptor::computeConfidenceMultiScale(const GpuMat& img, std
} }
} }
computeConfidence(smaller_img, locations, hit_threshold, win_stride, padding, conf_out[i].locations, conf_out[i].confidences); detect(smaller_img, level_hits,
confidences ? &level_confidences : NULL);
Size scaled_win_size(cvRound(win_size_.width * scale),
cvRound(win_size_.height * scale));
Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale)); for (size_t j = 0; j < level_hits.size(); j++)
for (size_t j = 0; j < locations.size(); j++) {
all_candidates.push_back(Rect(Point2d(locations[j]) * scale, scaled_win_size)); found_locations.push_back(Rect(Point2d(level_hits[j]) * scale, scaled_win_size));
if (confidences)
confidences->push_back(level_confidences[j]);
}
} }
found_locations.assign(all_candidates.begin(), all_candidates.end()); if (group_threshold_ > 0)
groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/); {
} groupRectangles(found_locations, group_threshold_, 0.2/*magic number copied from CPU version*/);
}
}
void HOG_Impl::compute(InputArray _img,
OutputArray _descriptors,
Stream& stream)
{
const GpuMat img = _img.getGpuMat();
void cv::cuda::HOGDescriptor::detect(const GpuMat& img, std::vector<Point>& hits, double hit_threshold, Size win_stride, Size padding) CV_Assert( img.type() == CV_8UC1 || img.type() == CV_8UC4 );
{ CV_Assert( win_stride_.width % block_stride_.width == 0 && win_stride_.height % block_stride_.height == 0 );
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); CV_Assert( !stream );
CV_Assert(padding == Size(0, 0));
hits.clear(); BufferPool pool(stream);
if (detector.empty())
return;
computeBlockHistograms(img); GpuMat block_hists = pool.getBuffer(1, getTotalHistSize(img.size()), CV_32FC1);
computeBlockHistograms(img, block_hists);
if (win_stride == Size()) const size_t block_hist_size = getBlockHistogramSize();
win_stride = block_stride; Size blocks_per_win = numPartsWithin(win_size_, block_size_, block_stride_);
else Size wins_per_img = numPartsWithin(img.size(), win_size_, win_stride_);
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); _descriptors.create(wins_per_img.area(), static_cast<int>(blocks_per_win.area() * block_hist_size), CV_32FC1);
// labels.create(1, wins_per_img.area(), CV_8U); GpuMat descriptors = _descriptors.getGpuMat();
labels = getBuffer(1, wins_per_img.area(), CV_8U, labels_buf);
hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width, switch (descr_format_)
win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(), {
detector.ptr<float>(), (float)free_coef, (float)hit_threshold, labels.ptr()); case DESCR_FORMAT_ROW_BY_ROW:
hog::extract_descrs_by_rows(win_size_.height, win_size_.width,
block_stride_.height, block_stride_.width,
win_stride_.height, win_stride_.width,
img.rows, img.cols,
block_hists.ptr<float>(),
descriptors);
break;
case DESCR_FORMAT_COL_BY_COL:
hog::extract_descrs_by_cols(win_size_.height, win_size_.width,
block_stride_.height, block_stride_.width,
win_stride_.height, win_stride_.width,
img.rows, img.cols,
block_hists.ptr<float>(),
descriptors);
break;
default:
CV_Error(cv::Error::StsBadArg, "Unknown descriptor format");
}
}
labels.download(labels_host); int HOG_Impl::getTotalHistSize(Size img_size) const
unsigned char* vec = labels_host.ptr();
for (int i = 0; i < wins_per_img.area(); i++)
{ {
int y = i / wins_per_img.width; size_t block_hist_size = getBlockHistogramSize();
int x = i - wins_per_img.width * y; Size blocks_per_img = numPartsWithin(img_size, block_size_, block_stride_);
if (vec[i]) return static_cast<int>(block_hist_size * blocks_per_img.area());
hits.push_back(Point(x * win_stride.width, y * win_stride.height));
} }
}
void HOG_Impl::computeBlockHistograms(const GpuMat& img, GpuMat& block_hists)
{
cv::Size blocks_per_win = numPartsWithin(win_size_, block_size_, block_stride_);
hog::set_up_constants(nbins_, block_stride_.width, block_stride_.height, blocks_per_win.width, blocks_per_win.height);
BufferPool pool(Stream::Null());
void cv::cuda::HOGDescriptor::detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations, double hit_threshold, GpuMat grad = pool.getBuffer(img.size(), CV_32FC2);
Size win_stride, Size padding, double scale0, int group_threshold) GpuMat qangle = pool.getBuffer(img.size(), CV_8UC2);
{ computeGradient(img, grad, qangle);
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); block_hists.create(1, getTotalHistSize(img.size()), CV_32FC1);
std::vector<double> level_scale; hog::compute_hists(nbins_,
double scale = 1.; block_stride_.width, block_stride_.height,
int levels = 0; img.rows, img.cols,
grad, qangle,
(float)getWinSigma(),
block_hists.ptr<float>());
for (levels = 0; levels < nlevels; levels++) hog::normalize_hists(nbins_,
{ block_stride_.width, block_stride_.height,
level_scale.push_back(scale); img.rows, img.cols,
if (cvRound(img.cols/scale) < win_size.width || block_hists.ptr<float>(),
cvRound(img.rows/scale) < win_size.height || scale0 <= 1) (float)threshold_L2hys_);
break;
scale *= scale0;
} }
levels = std::max(levels, 1);
level_scale.resize(levels);
image_scales.resize(levels);
std::vector<Rect> all_candidates;
std::vector<Point> locations;
for (size_t i = 0; i < level_scale.size(); i++) void HOG_Impl::computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle)
{ {
scale = level_scale[i]; grad.create(img.size(), CV_32FC2);
Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale)); qangle.create(img.size(), CV_8UC2);
GpuMat smaller_img;
if (sz == img.size()) float angleScale = (float)(nbins_ / CV_PI);
smaller_img = img;
else
{
image_scales[i].create(sz, img.type());
switch (img.type()) switch (img.type())
{ {
case CV_8UC1: hog::resize_8UC1(img, image_scales[i]); break; case CV_8UC1:
case CV_8UC4: hog::resize_8UC4(img, image_scales[i]); break; hog::compute_gradients_8UC1(nbins_, img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction_);
} break;
smaller_img = image_scales[i]; case CV_8UC4:
hog::compute_gradients_8UC4(nbins_, img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction_);
break;
} }
detect(smaller_img, locations, hit_threshold, win_stride, padding);
Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale));
for (size_t j = 0; j < locations.size(); j++)
all_candidates.push_back(Rect(Point2d(locations[j]) * scale, scaled_win_size));
} }
found_locations.assign(all_candidates.begin(), all_candidates.end());
groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/);
} }
int cv::cuda::HOGDescriptor::numPartsWithin(int size, int part_size, int stride) Ptr<cuda::HOG> cv::cuda::HOG::create(Size win_size,
Size block_size,
Size block_stride,
Size cell_size,
int nbins)
{ {
return (size - part_size + stride) / stride; return makePtr<HOG_Impl>(win_size, block_size, block_stride, cell_size, nbins);
}
cv::Size cv::cuda::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, cv::Size stride)
{
return Size(numPartsWithin(size.width, part_size.width, stride.width), numPartsWithin(size.height, part_size.height, stride.height));
} }
std::vector<float> cv::cuda::HOGDescriptor::getDefaultPeopleDetector() namespace
{ {
return getPeopleDetector64x128(); static Mat getPeopleDetector48x96()
} {
static float detector[] = {
std::vector<float> cv::cuda::HOGDescriptor::getPeopleDetector48x96()
{
static const float detector[] = {
0.294350f, -0.098796f, -0.129522f, 0.078753f, 0.387527f, 0.261529f, 0.294350f, -0.098796f, -0.129522f, 0.078753f, 0.387527f, 0.261529f,
0.145939f, 0.061520f, 0.328699f, 0.227148f, -0.066467f, -0.086723f, 0.145939f, 0.061520f, 0.328699f, 0.227148f, -0.066467f, -0.086723f,
0.047559f, 0.106714f, 0.037897f, 0.111461f, -0.024406f, 0.304769f, 0.047559f, 0.106714f, 0.037897f, 0.111461f, -0.024406f, 0.304769f,
...@@ -799,15 +877,13 @@ std::vector<float> cv::cuda::HOGDescriptor::getPeopleDetector48x96() ...@@ -799,15 +877,13 @@ std::vector<float> cv::cuda::HOGDescriptor::getPeopleDetector48x96()
-0.119002f, 0.026722f, 0.034853f, -0.060934f, -0.025054f, -0.093026f, -0.119002f, 0.026722f, 0.034853f, -0.060934f, -0.025054f, -0.093026f,
-0.035372f, -0.233209f, -0.049869f, -0.039151f, -0.022279f, -0.065380f, -0.035372f, -0.233209f, -0.049869f, -0.039151f, -0.022279f, -0.065380f,
-9.063785f }; -9.063785f };
return std::vector<float>(detector, detector + sizeof(detector)/sizeof(detector[0]));
}
return Mat(1, static_cast<int>(sizeof(detector)/sizeof(detector[0])), CV_32FC1, detector);
}
std::vector<float> cv::cuda::HOGDescriptor::getPeopleDetector64x128() Mat getPeopleDetector64x128()
{ {
static const float detector[] = { static float detector[] = {
0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f, 0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f,
0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f, 0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f,
0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f, 0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f,
...@@ -1613,7 +1689,9 @@ std::vector<float> cv::cuda::HOGDescriptor::getPeopleDetector64x128() ...@@ -1613,7 +1689,9 @@ std::vector<float> cv::cuda::HOGDescriptor::getPeopleDetector64x128()
-0.01612278f, -1.46097376e-003f, 0.14013411f, -8.96181818e-003f, -0.01612278f, -1.46097376e-003f, 0.14013411f, -8.96181818e-003f,
-0.03250246f, 3.38630192e-003f, 2.64779478e-003f, 0.03359732f, -0.03250246f, 3.38630192e-003f, 2.64779478e-003f, 0.03359732f,
-0.02411991f, -0.04229729f, 0.10666174f, -6.66579151f }; -0.02411991f, -0.04229729f, 0.10666174f, -6.66579151f };
return std::vector<float>(detector, detector + sizeof(detector)/sizeof(detector[0]));
return Mat(1, static_cast<int>(sizeof(detector)/sizeof(detector[0])), CV_32FC1, detector);
}
} }
#endif #endif
...@@ -48,9 +48,10 @@ using namespace cvtest; ...@@ -48,9 +48,10 @@ using namespace cvtest;
//#define DUMP //#define DUMP
struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescriptor struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>
{ {
cv::cuda::DeviceInfo devInfo; cv::cuda::DeviceInfo devInfo;
cv::Ptr<cv::cuda::HOG> hog;
#ifdef DUMP #ifdef DUMP
std::ofstream f; std::ofstream f;
...@@ -69,23 +70,13 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript ...@@ -69,23 +70,13 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
devInfo = GetParam(); devInfo = GetParam();
cv::cuda::setDevice(devInfo.deviceID()); cv::cuda::setDevice(devInfo.deviceID());
hog = cv::cuda::HOG::create();
} }
#ifdef DUMP #ifdef DUMP
void dump(const cv::Mat& blockHists, const std::vector<cv::Point>& locations) void dump(const std::vector<cv::Point>& locations)
{
f.write((char*)&blockHists.rows, sizeof(blockHists.rows));
f.write((char*)&blockHists.cols, sizeof(blockHists.cols));
for (int i = 0; i < blockHists.rows; ++i)
{ {
for (int j = 0; j < blockHists.cols; ++j)
{
float val = blockHists.at<float>(i, j);
f.write((char*)&val, sizeof(val));
}
}
int nlocations = locations.size(); int nlocations = locations.size();
f.write((char*)&nlocations, sizeof(nlocations)); f.write((char*)&nlocations, sizeof(nlocations));
...@@ -93,21 +84,18 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript ...@@ -93,21 +84,18 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
f.write((char*)&locations[i], sizeof(locations[i])); f.write((char*)&locations[i], sizeof(locations[i]));
} }
#else #else
void compare(const cv::Mat& blockHists, const std::vector<cv::Point>& locations) void compare(const std::vector<cv::Point>& locations)
{ {
// skip block_hists check
int rows, cols; int rows, cols;
f.read((char*)&rows, sizeof(rows)); f.read((char*)&rows, sizeof(rows));
f.read((char*)&cols, sizeof(cols)); f.read((char*)&cols, sizeof(cols));
ASSERT_EQ(rows, blockHists.rows); for (int i = 0; i < rows; ++i)
ASSERT_EQ(cols, blockHists.cols);
for (int i = 0; i < blockHists.rows; ++i)
{ {
for (int j = 0; j < blockHists.cols; ++j) for (int j = 0; j < cols; ++j)
{ {
float val; float val;
f.read((char*)&val, sizeof(val)); f.read((char*)&val, sizeof(val));
ASSERT_NEAR(val, blockHists.at<float>(i, j), 1e-3);
} }
} }
...@@ -126,54 +114,41 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript ...@@ -126,54 +114,41 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
void testDetect(const cv::Mat& img) void testDetect(const cv::Mat& img)
{ {
gamma_correction = false; hog->setGammaCorrection(false);
setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector()); hog->setSVMDetector(hog->getDefaultPeopleDetector());
std::vector<cv::Point> locations; std::vector<cv::Point> locations;
// Test detect // Test detect
detect(loadMat(img), locations, 0); hog->detect(loadMat(img), locations);
#ifdef DUMP #ifdef DUMP
dump(cv::Mat(block_hists), locations); dump(locations);
#else #else
compare(cv::Mat(block_hists), locations); compare(locations);
#endif #endif
// Test detect on smaller image // Test detect on smaller image
cv::Mat img2; cv::Mat img2;
cv::resize(img, img2, cv::Size(img.cols / 2, img.rows / 2)); cv::resize(img, img2, cv::Size(img.cols / 2, img.rows / 2));
detect(loadMat(img2), locations, 0); hog->detect(loadMat(img2), locations);
#ifdef DUMP #ifdef DUMP
dump(cv::Mat(block_hists), locations); dump(locations);
#else #else
compare(cv::Mat(block_hists), locations); compare(locations);
#endif #endif
// Test detect on greater image // Test detect on greater image
cv::resize(img, img2, cv::Size(img.cols * 2, img.rows * 2)); cv::resize(img, img2, cv::Size(img.cols * 2, img.rows * 2));
detect(loadMat(img2), locations, 0); hog->detect(loadMat(img2), locations);
#ifdef DUMP #ifdef DUMP
dump(cv::Mat(block_hists), locations); dump(locations);
#else #else
compare(cv::Mat(block_hists), locations); compare(locations);
#endif #endif
} }
// Does not compare border value, as interpolation leads to delta
void compare_inner_parts(cv::Mat d1, cv::Mat d2)
{
for (int i = 1; i < blocks_per_win_y - 1; ++i)
for (int j = 1; j < blocks_per_win_x - 1; ++j)
for (int k = 0; k < block_hist_size; ++k)
{
float a = d1.at<float>(0, (i * blocks_per_win_x + j) * block_hist_size);
float b = d2.at<float>(0, (i * blocks_per_win_x + j) * block_hist_size);
ASSERT_FLOAT_EQ(a, b);
}
}
}; };
// desabled while resize does not fixed // desabled while resize does not fixed
...@@ -182,13 +157,8 @@ CUDA_TEST_P(HOG, DISABLED_Detect) ...@@ -182,13 +157,8 @@ CUDA_TEST_P(HOG, DISABLED_Detect)
cv::Mat img_rgb = readImage("hog/road.png"); cv::Mat img_rgb = readImage("hog/road.png");
ASSERT_FALSE(img_rgb.empty()); ASSERT_FALSE(img_rgb.empty());
#ifdef DUMP
f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary); f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
ASSERT_TRUE(f.is_open()); ASSERT_TRUE(f.is_open());
#else
f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
ASSERT_TRUE(f.is_open());
#endif
// Test on color image // Test on color image
cv::Mat img; cv::Mat img;
...@@ -198,8 +168,6 @@ CUDA_TEST_P(HOG, DISABLED_Detect) ...@@ -198,8 +168,6 @@ CUDA_TEST_P(HOG, DISABLED_Detect)
// Test on gray image // Test on gray image
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2GRAY); cv::cvtColor(img_rgb, img, cv::COLOR_BGR2GRAY);
testDetect(img); testDetect(img);
f.close();
} }
CUDA_TEST_P(HOG, GetDescriptors) CUDA_TEST_P(HOG, GetDescriptors)
...@@ -216,8 +184,14 @@ CUDA_TEST_P(HOG, GetDescriptors) ...@@ -216,8 +184,14 @@ CUDA_TEST_P(HOG, GetDescriptors)
// Convert train images into feature vectors (train table) // Convert train images into feature vectors (train table)
cv::cuda::GpuMat descriptors, descriptors_by_cols; cv::cuda::GpuMat descriptors, descriptors_by_cols;
getDescriptors(d_img, win_size, descriptors, DESCR_FORMAT_ROW_BY_ROW);
getDescriptors(d_img, win_size, descriptors_by_cols, DESCR_FORMAT_COL_BY_COL); hog->setWinStride(Size(64, 128));
hog->setDescriptorFormat(cv::cuda::HOG::DESCR_FORMAT_ROW_BY_ROW);
hog->compute(d_img, descriptors);
hog->setDescriptorFormat(cv::cuda::HOG::DESCR_FORMAT_COL_BY_COL);
hog->compute(d_img, descriptors_by_cols);
// Check size of the result train table // Check size of the result train table
wins_per_img_x = 3; wins_per_img_x = 3;
...@@ -242,48 +216,6 @@ CUDA_TEST_P(HOG, GetDescriptors) ...@@ -242,48 +216,6 @@ CUDA_TEST_P(HOG, GetDescriptors)
ASSERT_EQ(l[(y * blocks_per_win_x + x) * block_hist_size + k], ASSERT_EQ(l[(y * blocks_per_win_x + x) * block_hist_size + k],
r[(x * blocks_per_win_y + y) * block_hist_size + k]); r[(x * blocks_per_win_y + y) * block_hist_size + k]);
} }
/* Now we want to extract the same feature vectors, but from single images. NOTE: results will
be defferent, due to border values interpolation. Using of many small images is slower, however we
wont't call getDescriptors and will use computeBlockHistograms instead of. computeBlockHistograms
works good, it can be checked in the gpu_hog sample */
img_rgb = readImage("hog/positive1.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
computeBlockHistograms(cv::cuda::GpuMat(img));
// Everything is fine with interpolation for left top subimage
ASSERT_EQ(0.0, cv::norm((cv::Mat)block_hists, (cv::Mat)descriptors.rowRange(0, 1)));
img_rgb = readImage("hog/positive2.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
computeBlockHistograms(cv::cuda::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(1, 2)));
img_rgb = readImage("hog/negative1.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
computeBlockHistograms(cv::cuda::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(2, 3)));
img_rgb = readImage("hog/negative2.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
computeBlockHistograms(cv::cuda::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(3, 4)));
img_rgb = readImage("hog/positive3.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
computeBlockHistograms(cv::cuda::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(4, 5)));
img_rgb = readImage("hog/negative3.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
computeBlockHistograms(cv::cuda::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(5, 6)));
} }
INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, HOG, ALL_DEVICES); INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, HOG, ALL_DEVICES);
...@@ -310,12 +242,12 @@ CUDA_TEST_P(CalTech, HOG) ...@@ -310,12 +242,12 @@ CUDA_TEST_P(CalTech, HOG)
cv::cuda::GpuMat d_img(img); cv::cuda::GpuMat d_img(img);
cv::Mat markedImage(img.clone()); cv::Mat markedImage(img.clone());
cv::cuda::HOGDescriptor d_hog; cv::Ptr<cv::cuda::HOG> d_hog = cv::cuda::HOG::create();
d_hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector()); d_hog->setSVMDetector(d_hog->getDefaultPeopleDetector());
d_hog.nlevels = d_hog.nlevels + 32; d_hog->setNumLevels(d_hog->getNumLevels() + 32);
std::vector<cv::Rect> found_locations; std::vector<cv::Rect> found_locations;
d_hog.detectMultiScale(d_img, found_locations); d_hog->detectMultiScale(d_img, found_locations);
#if defined (LOG_CASCADE_STATISTIC) #if defined (LOG_CASCADE_STATISTIC)
for (int i = 0; i < (int)found_locations.size(); i++) for (int i = 0; i < (int)found_locations.size(); i++)
...@@ -326,7 +258,8 @@ CUDA_TEST_P(CalTech, HOG) ...@@ -326,7 +258,8 @@ CUDA_TEST_P(CalTech, HOG)
cv::rectangle(markedImage, r , CV_RGB(255, 0, 0)); cv::rectangle(markedImage, r , CV_RGB(255, 0, 0));
} }
cv::imshow("Res", markedImage); cv::waitKey(); cv::imshow("Res", markedImage);
cv::waitKey();
#endif #endif
} }
......
...@@ -244,19 +244,13 @@ void App::run() ...@@ -244,19 +244,13 @@ void App::run()
Size win_size(args.win_width, args.win_width * 2); //(64, 128) or (48, 96) Size win_size(args.win_width, args.win_width * 2); //(64, 128) or (48, 96)
Size win_stride(args.win_stride_width, args.win_stride_height); Size win_stride(args.win_stride_width, args.win_stride_height);
cv::Ptr<cv::cuda::HOG> gpu_hog = cv::cuda::HOG::create(win_size);
cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9);
// Create HOG descriptors and detectors here // Create HOG descriptors and detectors here
vector<float> detector; Mat detector = gpu_hog->getDefaultPeopleDetector();
if (win_size == Size(64, 128))
detector = cv::cuda::HOGDescriptor::getPeopleDetector64x128(); gpu_hog->setSVMDetector(detector);
else
detector = cv::cuda::HOGDescriptor::getPeopleDetector48x96();
cv::cuda::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
cv::cuda::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
cv::cuda::HOGDescriptor::DEFAULT_NLEVELS);
cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
gpu_hog.setSVMDetector(detector);
cpu_hog.setSVMDetector(detector); cpu_hog.setSVMDetector(detector);
while (running) while (running)
...@@ -307,9 +301,6 @@ void App::run() ...@@ -307,9 +301,6 @@ void App::run()
else img = img_aux; else img = img_aux;
img_to_show = img; img_to_show = img;
gpu_hog.nlevels = nlevels;
cpu_hog.nlevels = nlevels;
vector<Rect> found; vector<Rect> found;
// Perform HOG classification // Perform HOG classification
...@@ -317,11 +308,19 @@ void App::run() ...@@ -317,11 +308,19 @@ void App::run()
if (use_gpu) if (use_gpu)
{ {
gpu_img.upload(img); gpu_img.upload(img);
gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride, gpu_hog->setNumLevels(nlevels);
Size(0, 0), scale, gr_threshold); gpu_hog->setHitThreshold(hit_threshold);
gpu_hog->setWinStride(win_stride);
gpu_hog->setScaleFactor(scale);
gpu_hog->setGroupThreshold(gr_threshold);
gpu_hog->detectMultiScale(gpu_img, found);
} }
else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride, else
{
cpu_hog.nlevels = nlevels;
cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold); Size(0, 0), scale, gr_threshold);
}
hogWorkEnd(); hogWorkEnd();
// Draw positive classified windows // Draw positive classified windows
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment