Commit 8108bd30 authored by marina.kolpakova's avatar marina.kolpakova

optimize memory usage

parent b83d4add
This diff is collapsed.
...@@ -40,11 +40,13 @@ ...@@ -40,11 +40,13 @@
// //
//M //M
#include <opencv2/gpu/device/common.hpp>
#ifndef __OPENCV_ICF_HPP__ #ifndef __OPENCV_ICF_HPP__
#define __OPENCV_ICF_HPP__ #define __OPENCV_ICF_HPP__
#include <opencv2/gpu/device/common.hpp>
#include <stdio.h>
// #if defined __CUDACC__ // #if defined __CUDACC__
// # define __device __device__ __forceinline__ // # define __device __device__ __forceinline__
// #else // #else
...@@ -92,20 +94,27 @@ struct __align__(8) Level //is actually 24 bytes ...@@ -92,20 +94,27 @@ struct __align__(8) Level //is actually 24 bytes
struct __align__(8) Node struct __align__(8) Node
{ {
// int feature;
uchar4 rect; uchar4 rect;
float threshold; // ushort channel;
uint threshold;
Node(const uchar4 c, const int t) : rect(c), threshold(t) {} enum { THRESHOLD_MASK = 0x0FFFFFFF };
Node(const uchar4 r, const uint ch, const uint t) : rect(r), threshold(t + (ch << 28))
{
// printf("%d\n", t);
// printf("[%d %d %d %d] %d, %d\n",rect.x, rect.y, rect.z, rect.w, (int)(threshold >> 28),
// (int)(0x0FFFFFFF & threshold));
}
}; };
struct __align__(8) Feature // struct __align__(8) Feature
{ // {
int channel; // int channel;
uchar4 rect; // uchar4 rect;
Feature(const int c, const uchar4 r) : channel(c), rect(r) {} // Feature(const int c, const uchar4 r) : channel(c), rect(r) {}
}; // };
} }
}}} }}}
// struct Cascade // struct Cascade
......
...@@ -60,19 +60,10 @@ namespace icf { ...@@ -60,19 +60,10 @@ namespace icf {
void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle, void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
const int fw, const int fh, const int bins); const int fw, const int fh, const int bins);
void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages, void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
const PtrStepSzb& nodes, const PtrStepSzb& features, const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects);
PtrStepSz<uchar4> objects);
} }
}}} }}}
// namespace {
// char *itoa(long i, char* s, int /*dummy_radix*/)
// {
// sprintf(s, "%ld", i);
// return s;
// }
// }
struct cv::gpu::SoftCascade::Filds struct cv::gpu::SoftCascade::Filds
{ {
...@@ -97,7 +88,6 @@ struct cv::gpu::SoftCascade::Filds ...@@ -97,7 +88,6 @@ struct cv::gpu::SoftCascade::Filds
GpuMat stages; GpuMat stages;
GpuMat nodes; GpuMat nodes;
GpuMat leaves; GpuMat leaves;
GpuMat features;
GpuMat levels; GpuMat levels;
// preallocated buffer 640x480x10 for hogluv + 640x480 got gray // preallocated buffer 640x480x10 for hogluv + 640x480 got gray
...@@ -137,7 +127,7 @@ struct cv::gpu::SoftCascade::Filds ...@@ -137,7 +127,7 @@ struct cv::gpu::SoftCascade::Filds
bool fill(const FileNode &root, const float mins, const float maxs); bool fill(const FileNode &root, const float mins, const float maxs);
void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const
{ {
device::icf::detect(levels, octaves, stages, nodes, features, objects); device::icf::detect(levels, octaves, stages, nodes, leaves, hogluv, objects);
} }
private: private:
...@@ -216,10 +206,9 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float ...@@ -216,10 +206,9 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float
if (fn.empty()) return false; if (fn.empty()) return false;
std::vector<Octave> voctaves; std::vector<Octave> voctaves;
std::vector<float> vstages; std::vector<float> vstages;
std::vector<Node> vnodes; std::vector<Node> vnodes;
std::vector<float> vleaves; std::vector<float> vleaves;
std::vector<Feature> vfeatures;
scales.clear(); scales.clear();
FileNodeIterator it = fn.begin(), it_end = fn.end(); FileNodeIterator it = fn.begin(), it_end = fn.end();
...@@ -245,6 +234,8 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float ...@@ -245,6 +234,8 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float
FileNode ffs = fns[SC_FEATURES]; FileNode ffs = fns[SC_FEATURES];
if (ffs.empty()) return false; if (ffs.empty()) return false;
FileNodeIterator ftrs = ffs.begin();
fns = fns[SC_STAGES]; fns = fns[SC_STAGES];
if (fn.empty()) return false; if (fn.empty()) return false;
...@@ -263,10 +254,21 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float ...@@ -263,10 +254,21 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float
FileNodeIterator inIt = fns.begin(), inIt_end = fns.end(); FileNodeIterator inIt = fns.begin(), inIt_end = fns.end();
for (; inIt != inIt_end;) for (; inIt != inIt_end;)
{ {
int feature = (int)(*(inIt +=2)++) + feature_offset; // int feature = (int)(*(inIt +=2)) + feature_offset;
float th = (float)(*(inIt++)); inIt +=3;
// extract feature, Todo:check it
uint th = saturate_cast<uint>((float)(*(inIt++)));
cv::FileNode ftn = (*ftrs)[SC_F_RECT];
cv::FileNodeIterator r_it = ftn.begin();
uchar4 rect; uchar4 rect;
vnodes.push_back(Node(rect, th)); rect.x = saturate_cast<uchar>((int)*(r_it++));
rect.y = saturate_cast<uchar>((int)*(r_it++));
rect.z = saturate_cast<uchar>((int)*(r_it++));
rect.w = saturate_cast<uchar>((int)*(r_it++));
uint channel = saturate_cast<uint>((int)(*ftrs)[SC_F_CHANNEL]);
vnodes.push_back(Node(rect, channel, th));
++ftrs;
} }
fns = (*ftr)[SC_LEAF]; fns = (*ftr)[SC_LEAF];
...@@ -276,19 +278,6 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float ...@@ -276,19 +278,6 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float
} }
} }
st = ffs.begin(), st_end = ffs.end();
for (; st != st_end; ++st )
{
cv::FileNode rn = (*st)[SC_F_RECT];
cv::FileNodeIterator r_it = rn.begin();
uchar4 rect;
rect.x = saturate_cast<uchar>((int)*(r_it++));
rect.y = saturate_cast<uchar>((int)*(r_it++));
rect.z = saturate_cast<uchar>((int)*(r_it++));
rect.w = saturate_cast<uchar>((int)*(r_it++));
vfeatures.push_back(Feature((int)(*st)[SC_F_CHANNEL], rect));
}
feature_offset += octave.stages * 3; feature_offset += octave.stages * 3;
++octIndex; ++octIndex;
} }
...@@ -306,9 +295,6 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float ...@@ -306,9 +295,6 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float
leaves.upload(cv::Mat(vleaves).reshape(1,1)); leaves.upload(cv::Mat(vleaves).reshape(1,1));
CV_Assert(!leaves.empty()); CV_Assert(!leaves.empty());
features.upload(cv::Mat(1, vfeatures.size() * sizeof(Feature), CV_8UC1, (uchar*)&(vfeatures[0]) ));
CV_Assert(!features.empty());
// compute levels // compute levels
calcLevels(voctaves, FRAME_WIDTH, FRAME_HEIGHT, TOTAL_SCALES); calcLevels(voctaves, FRAME_WIDTH, FRAME_HEIGHT, TOTAL_SCALES);
CV_Assert(!levels.empty()); CV_Assert(!levels.empty());
...@@ -425,7 +411,14 @@ bool cv::gpu::SoftCascade::load( const string& filename, const float minScale, c ...@@ -425,7 +411,14 @@ bool cv::gpu::SoftCascade::load( const string& filename, const float minScale, c
return true; return true;
} }
// #define USE_REFERENCE_VALUES #define USE_REFERENCE_VALUES
namespace {
char *itoa(long i, char* s, int /*dummy_radix*/)
{
sprintf(s, "%ld", i);
return s;
}
}
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& /*rois*/, void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& /*rois*/,
GpuMat& objects, const int /*rejectfactor*/, Stream s) GpuMat& objects, const int /*rejectfactor*/, Stream s)
{ {
...@@ -438,17 +431,20 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& ...@@ -438,17 +431,20 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat&
Filds& flds = *filds; Filds& flds = *filds;
#if defined USE_REFERENCE_VALUES #if defined USE_REFERENCE_VALUES
// cudaMemset(flds.hogluv.data, 0, flds.hogluv.step * flds.hogluv.rows); cudaMemset(flds.hogluv.data, 0, flds.hogluv.step * flds.hogluv.rows);
// cv::FileStorage imgs("/home/kellan/testInts.xml", cv::FileStorage::READ);
// char buff[33]; cv::FileStorage imgs("/home/kellan/testInts.xml", cv::FileStorage::READ);
char buff[33];
// for(int i = 0; i < Filds::HOG_LUV_BINS; ++i)
// { for(int i = 0; i < Filds::HOG_LUV_BINS; ++i)
// cv::Mat channel; {
// imgs[std::string("channel") + itoa(i, buff, 10)] >> channel; cv::Mat channel;
// GpuMat gchannel(flds.hogluv, cv::Rect(0, 121 * i, 161, 121)); imgs[std::string("channel") + itoa(i, buff, 10)] >> channel;
// gchannel.upload(channel);
// } // std::cout << "channel " << i << std::endl << channel << std::endl;
GpuMat gchannel(flds.hogluv, cv::Rect(0, 121 * i, 161, 121));
gchannel.upload(channel);
}
#else #else
GpuMat& plane = flds.plane; GpuMat& plane = flds.plane;
GpuMat& shrunk = flds.shrunk; GpuMat& shrunk = flds.shrunk;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment