Commit a743eca0 authored by Marina Kolpakova's avatar Marina Kolpakova

LBP features: GPU representation

parent 55567582
...@@ -1460,6 +1460,7 @@ private: ...@@ -1460,6 +1460,7 @@ private:
GpuMat nodes_mat; GpuMat nodes_mat;
GpuMat leaves_mat; GpuMat leaves_mat;
GpuMat subsets_mat; GpuMat subsets_mat;
GpuMat features_mat;
// current integral image // current integral image
GpuMat integral; GpuMat integral;
......
...@@ -99,10 +99,9 @@ cv::gpu::CascadeClassifier_GPU_LBP::~CascadeClassifier_GPU_LBP() ...@@ -99,10 +99,9 @@ cv::gpu::CascadeClassifier_GPU_LBP::~CascadeClassifier_GPU_LBP()
void cv::gpu::CascadeClassifier_GPU_LBP::preallocateIntegralBuffer(cv::Size desired) void cv::gpu::CascadeClassifier_GPU_LBP::preallocateIntegralBuffer(cv::Size desired)
{ {
integral.create(desired.width + 1, desired.height + 1, CV_32FC1); integral.create(desired.width + 1, desired.height + 1, CV_32SC1);
} }
bool cv::gpu::CascadeClassifier_GPU_LBP::empty() const bool cv::gpu::CascadeClassifier_GPU_LBP::empty() const
{ {
return stage_mat.empty(); return stage_mat.empty();
...@@ -132,6 +131,8 @@ bool cv::gpu::CascadeClassifier_GPU_LBP::load(const string& classifierAsXml) ...@@ -132,6 +131,8 @@ bool cv::gpu::CascadeClassifier_GPU_LBP::load(const string& classifierAsXml)
#define GPU_CC_WEAK_CLASSIFIERS "weakClassifiers" #define GPU_CC_WEAK_CLASSIFIERS "weakClassifiers"
#define GPU_CC_INTERNAL_NODES "internalNodes" #define GPU_CC_INTERNAL_NODES "internalNodes"
#define GPU_CC_LEAF_VALUES "leafValues" #define GPU_CC_LEAF_VALUES "leafValues"
#define GPU_CC_FEATURES "features"
#define GPU_CC_RECT "rect"
bool CascadeClassifier_GPU_LBP::read(const FileNode &root) bool CascadeClassifier_GPU_LBP::read(const FileNode &root)
{ {
...@@ -225,6 +226,22 @@ bool CascadeClassifier_GPU_LBP::read(const FileNode &root) ...@@ -225,6 +226,22 @@ bool CascadeClassifier_GPU_LBP::read(const FileNode &root)
cl_leaves.push_back((float)*iIt); cl_leaves.push_back((float)*iIt);
} }
} }
fn = root[GPU_CC_FEATURES];
if( fn.empty() )
return false;
std::vector<char> features;
features.reserve(fn.size() * 4);
FileNodeIterator f_it = fn.begin(), f_end = fn.end();
for (; f_it != f_end; ++f_it)
{
FileNode rect = fn[GPU_CC_RECT];
FileNodeIterator r_it = rect.begin();
features.push_back(saturate_cast<uchar>((int)*(r_it++)));
features.push_back(saturate_cast<uchar>((int)*(r_it++)));
features.push_back(saturate_cast<uchar>((int)*(r_it++)));
features.push_back(saturate_cast<uchar>((int)*(r_it++)));
}
// copy data structures on gpu // copy data structures on gpu
stage_mat = cv::gpu::GpuMat(1, (int)stages.size() * sizeof(Stage), CV_8UC1); stage_mat = cv::gpu::GpuMat(1, (int)stages.size() * sizeof(Stage), CV_8UC1);
stage_mat.upload(cv::Mat(1, stages.size() * sizeof(Stage), CV_8UC1, &(stages[0]) )); stage_mat.upload(cv::Mat(1, stages.size() * sizeof(Stage), CV_8UC1, &(stages[0]) ));
...@@ -241,6 +258,9 @@ bool CascadeClassifier_GPU_LBP::read(const FileNode &root) ...@@ -241,6 +258,9 @@ bool CascadeClassifier_GPU_LBP::read(const FileNode &root)
subsets_mat = cv::gpu::GpuMat(1, (int)subsets.size(), CV_32SC1); subsets_mat = cv::gpu::GpuMat(1, (int)subsets.size(), CV_32SC1);
stage_mat.upload(cv::Mat(subsets)); stage_mat.upload(cv::Mat(subsets));
features_mat = cv::gpu::GpuMat(1, (int)features.size(), CV_8UC1);
features_mat.upload(cv::Mat(features));
return true; return true;
} }
...@@ -270,22 +290,25 @@ namespace cv { namespace gpu { namespace device ...@@ -270,22 +290,25 @@ namespace cv { namespace gpu { namespace device
{ {
namespace lbp namespace lbp
{ {
void cascadeClassify(const DevMem2Db stages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets, void cascadeClassify(const DevMem2Db stages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets, const DevMem2Db features,
const DevMem2Db integral, int workWidth, int workHeight, int step, int subsetSize, DevMem2D_<int4> objects, int minNeighbors = 4, cudaStream_t stream = 0); const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects, int minNeighbors = 4, cudaStream_t stream = 0);
} }
}}} }}}
int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, GpuMat& scaledImageBuffer, GpuMat& objects, double scaleFactor, int minNeighbors /*, Size minSize=Size()*/) int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, GpuMat& scaledImageBuffer, GpuMat& objects, double scaleFactor, int minNeighbors /*, Size minSize=Size()*/)
{ {
CV_Assert( scaleFactor > 1 && image.depth() == CV_8U ); CV_Assert( scaleFactor > 1 && image.depth() == CV_8U );
CV_Assert(empty()); CV_Assert(!empty());
const int defaultObjSearchNum = 100; const int defaultObjSearchNum = 100;
if( !objects.empty() && objects.depth() == CV_32S) // if( !objects.empty() && objects.depth() == CV_32S)
objects.reshape(4, 1); // objects.reshape(4, 1);
else // else
objects.create(1 , defaultObjSearchNum, CV_32SC4); // objects.create(1 , defaultObjSearchNum, CV_32SC4);
// temp solution
objects.create(image.rows, image.cols, CV_32SC4);
scaledImageBuffer.create(image.size(), image.type()); scaledImageBuffer.create(image.size(), image.type());
...@@ -302,14 +325,13 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp ...@@ -302,14 +325,13 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp
// TODO: min max object sizes cheching // TODO: min max object sizes cheching
cv::gpu::resize(image, scaledImageBuffer, scaledImageSize, 0, 0, INTER_NEAREST); cv::gpu::resize(image, scaledImageBuffer, scaledImageSize, 0, 0, INTER_NEAREST);
//prepare image for evaluation //prepare image for evaluation
integral.create(cv::Size(scaledImageSize.width + 1, scaledImageSize.height + 1), CV_32FC1); integral.create(cv::Size(scaledImageSize.width + 1, scaledImageSize.height + 1), CV_32SC1);
cv::gpu::integral(scaledImageBuffer, integral); cv::gpu::integral(scaledImageBuffer, integral);
int step = (factor <= 2.) + 1; int step = (factor <= 2.) + 1;
int stripCount = 1, stripSize = processingRectSize.height;
cv::gpu::device::lbp::cascadeClassify(stage_mat, trees_mat, nodes_mat, leaves_mat, subsets_mat, cv::gpu::device::lbp::cascadeClassify(stage_mat, trees_mat, nodes_mat, leaves_mat, subsets_mat, features_mat,
integral, processingRectSize.width, processingRectSize.height, step, subsetSize, objects, minNeighbors); integral, processingRectSize.width, processingRectSize.height, windowSize.width, windowSize.height, scaleFactor, step, subsetSize, objects, minNeighbors);
} }
// TODO: reject levels // TODO: reject levels
......
...@@ -46,13 +46,14 @@ namespace cv { namespace gpu { namespace device ...@@ -46,13 +46,14 @@ namespace cv { namespace gpu { namespace device
{ {
namespace lbp namespace lbp
{ {
__global__ void lbp_classify(const DevMem2D_< ::cv::gpu::device::Stage> stages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets, __global__ void lbp_classify(const DevMem2D_< ::cv::gpu::device::Stage> stages, const DevMem2Di trees, const DevMem2D_< ::cv::gpu::device::ClNode> nodes,
const DevMem2Db integral, float step, int subsetSize, DevMem2D_<int4> objects) const DevMem2Df leaves, const DevMem2Di subsets,
const DevMem2D_<uchar4> features, const DevMem2Di integral, float step, int subsetSize, DevMem2D_<int4> objects, float scale, int clWidth, int clHeight)
{ {
unsigned int x = threadIdx.x; unsigned int x = threadIdx.x * step;
unsigned int y = blockIdx.x; unsigned int y = blockIdx.x * step;
int nodeOfs = 0, leafOfs = 0; int nodeOfs = 0, leafOfs = 0;
::cv::gpu::device::Feature feature; ::cv::gpu::device::Feature evaluator;
for (int s = 0; s < stages.cols; s++ ) for (int s = 0; s < stages.cols; s++ )
{ {
...@@ -61,7 +62,9 @@ namespace cv { namespace gpu { namespace device ...@@ -61,7 +62,9 @@ namespace cv { namespace gpu { namespace device
for (int w = 0; w < stage.ntrees; w++) for (int w = 0; w < stage.ntrees; w++)
{ {
::cv::gpu::device::ClNode node = nodes(0, nodeOfs); ::cv::gpu::device::ClNode node = nodes(0, nodeOfs);
char c = feature();// TODO: inmplement it uchar4 feature = features(0, node.featureIdx);
uchar c = evaluator(y, x, feature, integral);
const int subsetIdx = (nodeOfs * subsetSize); const int subsetIdx = (nodeOfs * subsetSize);
int idx = subsetIdx + ((c >> 5) & ( 1 << (c & 31)) ? leafOfs : leafOfs + 1); int idx = subsetIdx + ((c >> 5) & ( 1 << (c & 31)) ? leafOfs : leafOfs + 1);
sum += leaves(0, subsets(0, idx) ); sum += leaves(0, subsets(0, idx) );
...@@ -70,21 +73,27 @@ namespace cv { namespace gpu { namespace device ...@@ -70,21 +73,27 @@ namespace cv { namespace gpu { namespace device
} }
if (sum < stage.threshold) if (sum < stage.threshold)
return; // nothing matched return;
return;//mathed
} }
int4 rect;
rect.x = roundf(x * scale);
rect.y = roundf(y * scale);
rect.z = roundf(clWidth * scale);
rect.w = roundf(clHeight * scale);
objects(blockIdx.x, threadIdx.x) = rect;
} }
void cascadeClassify(const DevMem2Db bstages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets, void cascadeClassify(const DevMem2Db bstages, const DevMem2Di trees, const DevMem2Db bnodes, const DevMem2Df leaves, const DevMem2Di subsets, const DevMem2Db bfeatures,
const DevMem2Db integral, int workWidth, int workHeight, int step, int subsetSize, DevMem2D_<int4> objects, int minNeighbors, cudaStream_t stream) const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects, int minNeighbors, cudaStream_t stream)
{ {
printf("CascadeClassify"); printf("CascadeClassify");
int blocks = ceilf(workHeight / (float)step); int blocks = ceilf(workHeight / (float)step);
int threads = ceilf(workWidth / (float)step); int threads = ceilf(workWidth / (float)step);
DevMem2D_< ::cv::gpu::device::Stage> stages = DevMem2D_< ::cv::gpu::device::Stage>(bstages); DevMem2D_< ::cv::gpu::device::Stage> stages = DevMem2D_< ::cv::gpu::device::Stage>(bstages);
DevMem2D_<uchar4> features = (DevMem2D_<uchar4>)bfeatures;
DevMem2D_< ::cv::gpu::device::ClNode> nodes = DevMem2D_< ::cv::gpu::device::ClNode>(bnodes);
lbp_classify<<<blocks, threads>>>(stages, trees, nodes, leaves, subsets, integral, step, subsetSize, objects); lbp_classify<<<blocks, threads>>>(stages, trees, nodes, leaves, subsets, features, integral, step, subsetSize, objects, scale, clWidth, clHeight);
} }
} }
}}} }}}
\ No newline at end of file
...@@ -82,9 +82,48 @@ namespace cv { namespace gpu { namespace device { ...@@ -82,9 +82,48 @@ namespace cv { namespace gpu { namespace device {
{ {
__device__ __forceinline__ Feature(const Feature& other) {(void)other;} __device__ __forceinline__ Feature(const Feature& other) {(void)other;}
__device__ __forceinline__ Feature() {} __device__ __forceinline__ Feature() {}
__device__ __forceinline__ char operator() ()//(volatile int* ptr, int offset)
//feature as uchar x, y - left top, z,w - right bottom
__device__ __forceinline__ uchar operator() (unsigned int y, unsigned int x, uchar4 feature, const DevMem2Di integral) const
{ {
return char(0); int x_off = 2 * feature.z;
int y_off = 2 * feature.w;
// load feature key points
int anchors[16];
anchors[0] = integral(y + feature.y, x + feature.x);
anchors[1] = integral(y + feature.y, x + feature.z);
anchors[2] = integral(y + feature.y, x + x_off + feature.x);
anchors[3] = integral(y + feature.y, x + x_off + feature.z);
anchors[4] = integral(y + feature.w, x + feature.x);
anchors[5] = integral(y + feature.w, x + feature.z);
anchors[6] = integral(y + feature.w, x + x_off + feature.x);
anchors[7] = integral(y + feature.w, x + x_off + feature.z);
anchors[8] = integral(y + y_off + feature.y, x + feature.x);
anchors[9] = integral(y + y_off + feature.y, x + feature.z);
anchors[10] = integral(y + y_off + feature.y, x + x_off + feature.x);
anchors[11] = integral(y + y_off + feature.y, x + x_off + feature.z);
anchors[12] = integral(y + y_off + feature.w, x + feature.x);
anchors[13] = integral(y + y_off + feature.w, x + feature.z);
anchors[14] = integral(y + y_off + feature.w, x + x_off + feature.x);
anchors[15] = integral(y + y_off + feature.w, x + x_off + feature.z);
// calculate feature
int sum = anchors[5] - anchors[6] - anchors[9] + anchors[10];
uchar response = (( (anchors[ 0] - anchors[ 1] - anchors[ 4] + anchors[ 5]) >= sum )? 128 : 0)
|(( (anchors[ 1] - anchors[ 2] - anchors[ 5] + anchors[ 6]) >= sum )? 64 : 0)
|(( (anchors[ 2] - anchors[ 3] - anchors[ 6] + anchors[ 7]) >= sum )? 32 : 0)
|(( (anchors[ 6] - anchors[ 7] - anchors[10] + anchors[11]) >= sum )? 16 : 0)
|(( (anchors[10] - anchors[11] - anchors[14] + anchors[15]) >= sum )? 8 : 0)
|(( (anchors[ 9] - anchors[10] - anchors[13] + anchors[14]) >= sum )? 4 : 0)
|(( (anchors[ 8] - anchors[ 9] - anchors[12] + anchors[13]) >= sum )? 2 : 0)
|(( (anchors[ 4] - anchors[ 5] - anchors[ 8] + anchors[ 9]) >= sum )? 1 : 0);
return response;
} }
}; };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment