Commit a9f2f522 authored by Marina Kolpakova's avatar Marina Kolpakova

LBP classifier was refactored, added parameter for max size of detected object

parent e6f7e4d8
...@@ -1435,7 +1435,8 @@ public: ...@@ -1435,7 +1435,8 @@ public:
bool load(const std::string& filename); bool load(const std::string& filename);
void release(); void release();
int detectMultiScale(const GpuMat& image, GpuMat& scaledImageBuffer, GpuMat& objectsBuf, double scaleFactor = 1.1, int minNeighbors = 4/*, Size minSize = Size()*/); int detectMultiScale(const GpuMat& image, GpuMat& scaledImageBuffer, GpuMat& objectsBuf, double scaleFactor = 1.1, int minNeighbors = 4,
cv::Size maxObjectSize = cv::Size()/*, Size minSize = Size()*/);
void preallocateIntegralBuffer(cv::Size desired); void preallocateIntegralBuffer(cv::Size desired);
bool findLargestObject; bool findLargestObject;
......
...@@ -48,20 +48,6 @@ using namespace cv; ...@@ -48,20 +48,6 @@ using namespace cv;
using namespace cv::gpu; using namespace cv::gpu;
using namespace std; using namespace std;
struct Stage
{
int first;
int ntrees;
float threshold;
};
struct DTreeNode
{
int featureIdx;
int left;
int right;
};
#if !defined (HAVE_CUDA) #if !defined (HAVE_CUDA)
// ============ old fashioned haar cascade ==============================================// // ============ old fashioned haar cascade ==============================================//
cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() { throw_nogpu(); } cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() { throw_nogpu(); }
...@@ -128,6 +114,13 @@ bool cv::gpu::CascadeClassifier_GPU_LBP::load(const string& classifierAsXml) ...@@ -128,6 +114,13 @@ bool cv::gpu::CascadeClassifier_GPU_LBP::load(const string& classifierAsXml)
#define GPU_CC_FEATURES "features" #define GPU_CC_FEATURES "features"
#define GPU_CC_RECT "rect" #define GPU_CC_RECT "rect"
struct Stage
{
int first;
int ntrees;
float threshold;
};
// currently only stump based boost classifiers are supported // currently only stump based boost classifiers are supported
bool CascadeClassifier_GPU_LBP::read(const FileNode &root) bool CascadeClassifier_GPU_LBP::read(const FileNode &root)
{ {
...@@ -279,12 +272,26 @@ namespace cv { namespace gpu { namespace device ...@@ -279,12 +272,26 @@ namespace cv { namespace gpu { namespace device
{ {
namespace lbp namespace lbp
{ {
void cascadeClassify(const DevMem2Db stages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets, const DevMem2Db features, void classifyStump(const DevMem2Db mstages,
const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects, int minNeighbors = 4, cudaStream_t stream = 0); const int nstages,
const DevMem2Di mnodes,
const DevMem2Df mleaves,
const DevMem2Di msubsets,
const DevMem2Db mfeatures,
const DevMem2Di integral,
const int workWidth,
const int workHeight,
const int clWidth,
const int clHeight,
float scale,
int step,
int subsetSize,
DevMem2D_<int4> objects);
} }
}}} }}}
int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, GpuMat& scaledImageBuffer, GpuMat& objects, double scaleFactor, int minNeighbors /*, Size minSize=Size()*/) int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, GpuMat& scaledImageBuffer, GpuMat& objects,
double scaleFactor, int minNeighbors, cv::Size maxObjectSize /*, Size minSize=Size()*/)
{ {
CV_Assert( scaleFactor > 1 && image.depth() == CV_8U ); CV_Assert( scaleFactor > 1 && image.depth() == CV_8U );
CV_Assert(!empty()); CV_Assert(!empty());
...@@ -299,28 +306,35 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp ...@@ -299,28 +306,35 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp
// temp solution // temp solution
objects.create(image.rows, image.cols, CV_32SC4); objects.create(image.rows, image.cols, CV_32SC4);
scaledImageBuffer.create(image.size(), image.type()); if (maxObjectSize == cv::Size())
maxObjectSize = image.size();
scaledImageBuffer.create(image.rows + 1, image.cols + 1, CV_8U);
// TODO: specify max objects size
for( double factor = 1; ; factor *= scaleFactor ) for( double factor = 1; ; factor *= scaleFactor )
{ {
cv::Size windowSize(cvRound(NxM.width * factor), cvRound(NxM.height * factor)); cv::Size windowSize(cvRound(NxM.width * factor), cvRound(NxM.height * factor));
cv::Size scaledImageSize(cvRound( image.cols / factor ), cvRound( image.rows / factor )); cv::Size scaledImageSize(cvRound( image.cols / factor ), cvRound( image.rows / factor ));
cv::Size processingRectSize( scaledImageSize.width - NxM.width + 1, scaledImageSize.height - NxM.height + 1 ); cv::Size processingRectSize( scaledImageSize.width - NxM.width + 1, scaledImageSize.height - NxM.height + 1 );
// nothing to do
if (processingRectSize.width <= 0 || processingRectSize.height <= 0 ) if (processingRectSize.width <= 0 || processingRectSize.height <= 0 )
break; break;
// TODO: min max object sizes cheching
cv::gpu::resize(image, scaledImageBuffer, scaledImageSize, 0, 0, INTER_NEAREST); if( windowSize.width > maxObjectSize.width || windowSize.height > maxObjectSize.height )
//prepare image for evaluation break;
// if( windowSize.width < minObjectSize.width || windowSize.height < minObjectSize.height )
// continue;
cv::gpu::resize(image, scaledImageBuffer, scaledImageSize, 0, 0, CV_INTER_LINEAR);
integral.create(cv::Size(scaledImageSize.width + 1, scaledImageSize.height + 1), CV_32SC1); integral.create(cv::Size(scaledImageSize.width + 1, scaledImageSize.height + 1), CV_32SC1);
cv::gpu::integral(scaledImageBuffer, integral); cv::gpu::integral(scaledImageBuffer, integral);
int step = (factor <= 2.) + 1; int step = (factor <= 2.) + 1;
cv::gpu::device::lbp::cascadeClassify(stage_mat, trees_mat, nodes_mat, leaves_mat, subsets_mat, features_mat, cv::gpu::device::lbp::classifyStump(stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat, leaves_mat, subsets_mat, features_mat,
integral, processingRectSize.width, processingRectSize.height, windowSize.width, windowSize.height, scaleFactor, step, subsetSize, objects, minNeighbors); integral, processingRectSize.width, processingRectSize.height, windowSize.width, windowSize.height, scaleFactor, step, subsetSize, objects);
} }
// TODO: reject levels // TODO: reject levels
......
...@@ -46,54 +46,69 @@ namespace cv { namespace gpu { namespace device ...@@ -46,54 +46,69 @@ namespace cv { namespace gpu { namespace device
{ {
namespace lbp namespace lbp
{ {
__global__ void lbp_classify(const DevMem2D_< ::cv::gpu::device::Stage> stages, const DevMem2Di trees, const DevMem2D_< ::cv::gpu::device::ClNode> nodes, __global__ void lbp_classify_stump(Stage* stages, int nstages, ClNode* nodes, const float* leaves, const int* subsets, const uchar4* features,
const DevMem2Df leaves, const DevMem2Di subsets, const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects)
const DevMem2D_<uchar4> features, const DevMem2Di integral, float step, int subsetSize, DevMem2D_<int4> objects, float scale, int clWidth, int clHeight)
{ {
unsigned int x = threadIdx.x * step; int y = threadIdx.x * scale;
unsigned int y = blockIdx.x * step; int x = blockIdx.x * scale;
int nodeOfs = 0, leafOfs = 0;
::cv::gpu::device::Feature evaluator;
for (int s = 0; s < stages.cols; s++ ) int i = 0;
int current_node = 0;
int current_leave = 0;
LBP evaluator;
for (int s = 0; s < nstages; s++ )
{ {
::cv::gpu::device::Stage stage = stages(0, s); float sum = 0;
int sum = 0; Stage stage = stages[s];
for (int w = 0; w < stage.ntrees; w++)
for (int t = 0; t < stage.ntrees; t++)
{ {
::cv::gpu::device::ClNode node = nodes(0, nodeOfs); ClNode node = nodes[current_node];
uchar4 feature = features(0, node.featureIdx);
uchar4 feature = features[node.featureIdx];
int c = evaluator(y, x, feature, integral);
const int* subsetIdx = subsets + (current_node * subsetSize);
uchar c = evaluator(y, x, feature, integral); int idx = (subsetIdx[c >> 5] & ( 1 << (c & 31))) ? current_leave : current_leave + 1;
const int subsetIdx = (nodeOfs * subsetSize); sum += leaves[idx];
int idx = subsetIdx + ((c >> 5) & ( 1 << (c & 31)) ? leafOfs : leafOfs + 1); current_node += 1;
sum += leaves(0, subsets(0, idx) ); current_leave += 2;
nodeOfs++;
leafOfs += 2;
} }
i = s;
if (sum < stage.threshold) if (sum < stage.threshold)
return; return;
} }
int4 rect; int4 rect;
rect.x = roundf(x * scale); rect.x = roundf(x * scale);
rect.y = roundf(y * scale); rect.y = roundf(y * scale);
rect.z = roundf(clWidth * scale); rect.z = roundf(clWidth);
rect.w = roundf(clHeight * scale); rect.w = roundf(clHeight);
objects(blockIdx.x, threadIdx.x) = rect;
if(i >= 19)
printf( "GPU detected [%d, %d] - [%d, %d]\n", rect.x, rect.y, rect.z, rect.w);
} }
void cascadeClassify(const DevMem2Db bstages, const DevMem2Di trees, const DevMem2Db bnodes, const DevMem2Df leaves, const DevMem2Di subsets, const DevMem2Db bfeatures, void classifyStump(const DevMem2Db mstages, const int nstages, const DevMem2Di mnodes, const DevMem2Df mleaves, const DevMem2Di msubsets, const DevMem2Db mfeatures,
const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects, int minNeighbors, cudaStream_t stream) const DevMem2Di integral, const int workWidth, const int workHeight, const int clWidth, const int clHeight, float scale, int step, int subsetSize,
DevMem2D_<int4> objects)
{ {
printf("CascadeClassify"); int blocks = ceilf(workHeight / (float)step);
int blocks = ceilf(workHeight / (float)step);
int threads = ceilf(workWidth / (float)step); int threads = ceilf(workWidth / (float)step);
DevMem2D_< ::cv::gpu::device::Stage> stages = DevMem2D_< ::cv::gpu::device::Stage>(bstages); printf("blocks %d, threads %d\n", blocks, threads);
DevMem2D_<uchar4> features = (DevMem2D_<uchar4>)bfeatures;
DevMem2D_< ::cv::gpu::device::ClNode> nodes = DevMem2D_< ::cv::gpu::device::ClNode>(bnodes); Stage* stages = (Stage*)(mstages.ptr());
ClNode* nodes = (ClNode*)(mnodes.ptr());
const float* leaves = mleaves.ptr();
const int* subsets = msubsets.ptr();
const uchar4* features = (uchar4*)(mfeatures.ptr());
lbp_classify<<<blocks, threads>>>(stages, trees, nodes, leaves, subsets, features, integral, step, subsetSize, objects, scale, clWidth, clHeight); lbp_classify_stump<<<blocks, threads>>>(stages, nstages, nodes, leaves, subsets, features, integral,
workWidth, workHeight, clWidth, clHeight, scale, step, subsetSize, objects);
} }
} }
}}} }}}
\ No newline at end of file
...@@ -44,62 +44,58 @@ ...@@ -44,62 +44,58 @@
#define __OPENCV_GPU_DEVICE_LBP_HPP_ #define __OPENCV_GPU_DEVICE_LBP_HPP_
#include "internal_shared.hpp" #include "internal_shared.hpp"
// #include "opencv2/gpu/device/border_interpolate.hpp"
// #include "opencv2/gpu/device/vec_traits.hpp"
// #include "opencv2/gpu/device/vec_math.hpp"
// #include "opencv2/gpu/device/saturate_cast.hpp"
// #include "opencv2/gpu/device/filters.hpp"
// #define CALC_SUM_(p0, p1, p2, p3, offset) \
// ((p0)[offset] - (p1)[offset] - (p2)[offset] + (p3)[offset])
// __device__ __forceinline__ int sum(p0, p1, p2, p3, offset)
// {
// }
namespace cv { namespace gpu { namespace device { namespace cv { namespace gpu { namespace device {
namespace lbp{
struct Stage struct Stage
{ {
int first; int first;
int ntrees; int ntrees;
float threshold; float threshold;
__device__ __forceinline__ Stage(int f = 0, int n = 0, float t = 0.f) : first(f), ntrees(n), threshold(t) {}
__device__ __forceinline__ Stage(const Stage& other) : first(other.first), ntrees(other.ntrees), threshold(other.threshold) {}
}; };
struct ClNode struct ClNode
{ {
int featureIdx;
int left; int left;
int right; int right;
__device__ __forceinline__ ClNode(int f = 0, int l = 0, int r = 0) : featureIdx(f), left(l), right(r) {} int featureIdx;
__device__ __forceinline__ ClNode(const ClNode& other) : featureIdx(other.featureIdx), left(other.left), right(other.right) {}
}; };
struct Feature struct LBP
{ {
__device__ __forceinline__ Feature(const Feature& other) {(void)other;} __device__ __forceinline__ LBP(const LBP& other) {(void)other;}
__device__ __forceinline__ Feature() {} __device__ __forceinline__ LBP() {}
//feature as uchar x, y - left top, z,w - right bottom //feature as uchar x, y - left top, z,w - right bottom
__device__ __forceinline__ uchar operator() (unsigned int y, unsigned int x, uchar4 feature, const DevMem2Di integral) const __device__ __forceinline__ int operator() (unsigned int y, unsigned int x, uchar4 feature, const DevMem2Di integral) const
{ {
int x_off = 2 * feature.z; int x_off = 2 * feature.z;
int y_off = 2 * feature.w; int y_off = 2 * feature.w;
// printf("feature: %d %d %d %d\n", (int)feature.x, (int)feature.y, (int)feature.z, (int)feature.w);
feature.z += feature.x;
feature.w += feature.y;
// load feature key points // load feature key points
int anchors[16]; int anchors[16];
/*
P0-----P1-----P2-----P3
| | | |
P4-----P5-----P6-----P7
| | | |
P8-----P9-----P10----P11
| | | |
P12----P13----P14----15
*/
anchors[0] = integral(y + feature.y, x + feature.x); anchors[0] = integral(y + feature.y, x + feature.x);
anchors[1] = integral(y + feature.y, x + feature.z); anchors[1] = integral(y + feature.y, x + feature.z);
anchors[2] = integral(y + feature.y, x + x_off + feature.x); anchors[2] = integral(y + feature.y, x + feature.x + x_off);
anchors[3] = integral(y + feature.y, x + x_off + feature.z); anchors[3] = integral(y + feature.y, x + feature.z + x_off);
anchors[4] = integral(y + feature.w, x + feature.x); anchors[4] = integral(y + feature.w, x + feature.x);
anchors[5] = integral(y + feature.w, x + feature.z); anchors[5] = integral(y + feature.w, x + feature.z);
anchors[6] = integral(y + feature.w, x + x_off + feature.x); anchors[6] = integral(y + feature.w, x + feature.x + x_off);
anchors[7] = integral(y + feature.w, x + x_off + feature.z); anchors[7] = integral(y + feature.w, x + feature.z + x_off);
anchors[8] = integral(y + y_off + feature.y, x + feature.x); anchors[8] = integral(y + y_off + feature.y, x + feature.x);
anchors[9] = integral(y + y_off + feature.y, x + feature.z); anchors[9] = integral(y + y_off + feature.y, x + feature.z);
...@@ -114,7 +110,7 @@ namespace cv { namespace gpu { namespace device { ...@@ -114,7 +110,7 @@ namespace cv { namespace gpu { namespace device {
// calculate feature // calculate feature
int sum = anchors[5] - anchors[6] - anchors[9] + anchors[10]; int sum = anchors[5] - anchors[6] - anchors[9] + anchors[10];
uchar response = (( (anchors[ 0] - anchors[ 1] - anchors[ 4] + anchors[ 5]) >= sum )? 128 : 0) int response = (( (anchors[ 0] - anchors[ 1] - anchors[ 4] + anchors[ 5]) >= sum )? 128 : 0)
|(( (anchors[ 1] - anchors[ 2] - anchors[ 5] + anchors[ 6]) >= sum )? 64 : 0) |(( (anchors[ 1] - anchors[ 2] - anchors[ 5] + anchors[ 6]) >= sum )? 64 : 0)
|(( (anchors[ 2] - anchors[ 3] - anchors[ 6] + anchors[ 7]) >= sum )? 32 : 0) |(( (anchors[ 2] - anchors[ 3] - anchors[ 6] + anchors[ 7]) >= sum )? 32 : 0)
|(( (anchors[ 6] - anchors[ 7] - anchors[10] + anchors[11]) >= sum )? 16 : 0) |(( (anchors[ 6] - anchors[ 7] - anchors[10] + anchors[11]) >= sum )? 16 : 0)
...@@ -122,11 +118,12 @@ namespace cv { namespace gpu { namespace device { ...@@ -122,11 +118,12 @@ namespace cv { namespace gpu { namespace device {
|(( (anchors[ 9] - anchors[10] - anchors[13] + anchors[14]) >= sum )? 4 : 0) |(( (anchors[ 9] - anchors[10] - anchors[13] + anchors[14]) >= sum )? 4 : 0)
|(( (anchors[ 8] - anchors[ 9] - anchors[12] + anchors[13]) >= sum )? 2 : 0) |(( (anchors[ 8] - anchors[ 9] - anchors[12] + anchors[13]) >= sum )? 2 : 0)
|(( (anchors[ 4] - anchors[ 5] - anchors[ 8] + anchors[ 9]) >= sum )? 1 : 0); |(( (anchors[ 4] - anchors[ 5] - anchors[ 8] + anchors[ 9]) >= sum )? 1 : 0);
return response; return response;
} }
}; };
} // lbp
} } }// namespaces } } }// namespaces
#endif #endif
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment