Commit a360a19c authored by Maria Dimashova's avatar Maria Dimashova

added TBB optimization to DOT

parent 3d50df37
...@@ -652,6 +652,8 @@ public: ...@@ -652,6 +652,8 @@ public:
cv::Mat quantizedImage; cv::Mat quantizedImage;
float texturelessRatio; float texturelessRatio;
int area;
std::vector<int> objectClassIDs; std::vector<int> objectClassIDs;
std::vector<TrainData> trainData; std::vector<TrainData> trainData;
}; };
...@@ -680,8 +682,8 @@ public: ...@@ -680,8 +682,8 @@ public:
protected: protected:
void detectQuantized( const Mat& queryQuantizedImage, float minRatio, void detectQuantized( const Mat& queryQuantizedImage, float minRatio,
vector<vector<Rect> >& rects, vector<vector<Rect> >& rects,
vector<vector<float> >& ratios, vector<vector<float> >* ratios,
vector<vector<int> >& dotTemplateIndices ) const; vector<vector<int> >* dotTemplateIndices ) const;
TrainParams trainParams; TrainParams trainParams;
......
...@@ -70,7 +70,7 @@ static void readDirContent( const string& descrFilename, vector<string>& names ) ...@@ -70,7 +70,7 @@ static void readDirContent( const string& descrFilename, vector<string>& names )
file.close(); file.close();
} }
static void computeGradients( const Mat& image, Mat& magnitudes, Mat& angles ) inline void computeGradients( const Mat& image, Mat& magnitudes, Mat& angles )
{ {
Mat dx, dy; Mat dx, dy;
cv::Sobel( image, dx, CV_32F, 1, 0, 3 ); cv::Sobel( image, dx, CV_32F, 1, 0, 3 );
...@@ -171,49 +171,47 @@ inline int countNonZeroBits( uchar val ) ...@@ -171,49 +171,47 @@ inline int countNonZeroBits( uchar val )
return (v & 0x0f) + ((v >> 4) & 0x0f); return (v & 0x0f) + ((v >> 4) & 0x0f);
} }
const uchar texturelessValue = 1 << DOTDetector::TrainParams::BIN_COUNT;
inline void countNonZeroAndTexturelessBits( const Mat& mat, int& nonZeroBitsCount, int& texturelessBitsCount ) inline void countNonZeroAndTexturelessBits( const Mat& mat, int& nonZeroBitsCount, int& texturelessBitsCount )
{ {
CV_Assert( mat.type() == CV_8UC1 ); CV_DbgAssert( mat.type() == CV_8UC1 );
nonZeroBitsCount = 0; nonZeroBitsCount = 0;
texturelessBitsCount = 0; texturelessBitsCount = 0;
const uchar texturelessValue = 1 << DOTDetector::TrainParams::BIN_COUNT; int step = mat.step1();
for( int y = 0; y < mat.rows; y++ ) for( int y = 0; y < mat.rows; y++ )
{ {
const uchar* rowPtr = mat.data + y*step;
for( int x = 0; x < mat.cols; x++ ) for( int x = 0; x < mat.cols; x++ )
{ {
int curCount = countNonZeroBits( mat.at<uchar>(y,x) ); int curCount = countNonZeroBits( rowPtr[x] );
if( curCount ) if( curCount )
{ {
nonZeroBitsCount += curCount; nonZeroBitsCount += curCount;
if( mat.at<uchar>(y,x) == texturelessValue ) if( rowPtr[x] == texturelessValue )
texturelessBitsCount++; texturelessBitsCount++;
} }
} }
} }
} }
static void quantizeToTrain( const Mat& _magnitudesExt, const Mat& _anglesExt, const Mat& maskExt, struct TrainImageQuantizer
Mat& quantizedImage, const DOTDetector::TrainParams& params )
{ {
CV_DbgAssert( params.winSize.height % params.regionSize == 0 ); TrainImageQuantizer( const DOTDetector::TrainParams& _params, const Mat& _magnitudesExt, const Mat& _anglesExt, Mat& _quantizedImage ) :
CV_DbgAssert( params.winSize.width % params.regionSize == 0 ); params(_params), verticalRegionCount(_params.winSize.height/_params.regionSize), horizontalRegionCount(_params.winSize.width/_params.regionSize),
CV_DbgAssert( params.regionSize % 2 == 1 ); regionSize_2(params.regionSize/2), magnitudesExt(_magnitudesExt), anglesExt(_anglesExt), quantizedImage(&_quantizedImage)
{
const int regionSize_2 = params.regionSize / 2; quantizedImage->create(verticalRegionCount, horizontalRegionCount, CV_8UC1 );
quantizedImage->setTo( Scalar::all(0) );
Mat magnitudesExt, anglesExt; }
copyTrainData( _magnitudesExt, _anglesExt, maskExt, magnitudesExt, anglesExt );
const int verticalRegionCount = params.winSize.height / params.regionSize;
const int horizontalRegionCount = params.winSize.width / params.regionSize;
quantizedImage = Mat( verticalRegionCount, horizontalRegionCount, CV_8UC1, Scalar::all(0) );
Rect curRect( regionSize_2, regionSize_2, params.regionSize, params.regionSize ); void operator()( const cv::BlockedRange& range) const
{
Rect curRect( regionSize_2, regionSize_2 + params.regionSize*range.begin(), params.regionSize, params.regionSize );
for( int vRegIdx = 0; vRegIdx < verticalRegionCount; vRegIdx++ ) for( int vRegIdx = range.begin(); vRegIdx < range.end(); vRegIdx++)
{ {
for( int hRegIdx = 0; hRegIdx < horizontalRegionCount; hRegIdx++ ) for( int hRegIdx = 0; hRegIdx < horizontalRegionCount; hRegIdx++ )
{ {
...@@ -258,29 +256,61 @@ static void quantizeToTrain( const Mat& _magnitudesExt, const Mat& _anglesExt, c ...@@ -258,29 +256,61 @@ static void quantizeToTrain( const Mat& _magnitudesExt, const Mat& _anglesExt, c
if( !( curRectBits == (1 << DOTDetector::TrainParams::BIN_COUNT) && cv::countNonZero(magnitudesExt(curRect) == -1) ) ) if( !( curRectBits == (1 << DOTDetector::TrainParams::BIN_COUNT) && cv::countNonZero(magnitudesExt(curRect) == -1) ) )
{ {
if( countNonZeroBits( curRectBits ) <= params.maxNonzeroBits ) if( countNonZeroBits( curRectBits ) <= params.maxNonzeroBits )
quantizedImage.at<uchar>(vRegIdx, hRegIdx) = curRectBits; quantizedImage->at<uchar>(vRegIdx, hRegIdx) = curRectBits;
} }
curRect.x += params.regionSize; curRect.x += params.regionSize;
} }
curRect.x = regionSize_2; curRect.x = regionSize_2;
curRect.y += params.regionSize; curRect.y += params.regionSize;
} }
} }
static void quantizeToDetect( const Mat& _magnitudes, const Mat& angles, const DOTDetector::TrainParams& params;
Mat& quantizedImage, int regionSize, const DOTDetector::TrainParams& params ) const int verticalRegionCount;
const int horizontalRegionCount;
const int regionSize_2;
const Mat& magnitudesExt;
const Mat& anglesExt;
// Result matrix
Mat* quantizedImage;
};
static void quantizeToTrain( const Mat& _magnitudesExt, const Mat& _anglesExt, const Mat& maskExt,
Mat& quantizedImage, const DOTDetector::TrainParams& params )
{ {
Mat magnitudes; _magnitudes.copyTo( magnitudes ); CV_DbgAssert( params.winSize.height % params.regionSize == 0 );
CV_DbgAssert( params.winSize.width % params.regionSize == 0 );
CV_DbgAssert( params.regionSize % 2 == 1 );
const int verticalRegionCount = magnitudes.rows / regionSize; Mat magnitudesExt, anglesExt;
const int horizontalRegionCount = magnitudes.cols / regionSize; copyTrainData( _magnitudesExt, _anglesExt, maskExt, magnitudesExt, anglesExt );
const int verticalRegionCount = params.winSize.height / params.regionSize;
TrainImageQuantizer quantizer( params, magnitudesExt, anglesExt, quantizedImage );
quantizedImage = Mat( verticalRegionCount, horizontalRegionCount, CV_8UC1, Scalar::all(0) ); parallel_for( cv::BlockedRange(0, verticalRegionCount), quantizer );
}
struct DetectImageQuantizer
{
DetectImageQuantizer( const DOTDetector::TrainParams& _params, int _regionSize, const Mat& _magnitudes, const Mat& _angles, Mat& _quantizedImage ) :
params(_params), verticalRegionCount(_magnitudes.rows/_regionSize), horizontalRegionCount(_magnitudes.cols/_regionSize),
regionSize(_regionSize), regionSize_2(_regionSize/2), magnitudes(_magnitudes), angles(_angles), quantizedImage(&_quantizedImage)
{
quantizedImage->create(verticalRegionCount, horizontalRegionCount, CV_8UC1 );
quantizedImage->setTo( Scalar::all(0) );
}
void operator()( const cv::BlockedRange& range) const
{
Rect curRect( 0, regionSize*range.begin(), regionSize, regionSize );
Rect curRect(0, 0, regionSize, regionSize);
const int maxStrongestCount = 1; const int maxStrongestCount = 1;
for( int vRegIdx = 0; vRegIdx < verticalRegionCount; vRegIdx++ ) for( int vRegIdx = range.begin(); vRegIdx < range.end(); vRegIdx++)
{ {
for( int hRegIdx = 0; hRegIdx < horizontalRegionCount; hRegIdx++ ) for( int hRegIdx = 0; hRegIdx < horizontalRegionCount; hRegIdx++ )
{ {
...@@ -311,23 +341,47 @@ static void quantizeToDetect( const Mat& _magnitudes, const Mat& angles, ...@@ -311,23 +341,47 @@ static void quantizeToDetect( const Mat& _magnitudes, const Mat& angles,
if( strongestCount == 0 && maxMagnitude > 0 ) if( strongestCount == 0 && maxMagnitude > 0 )
curRectBits |= 1 << DOTDetector::TrainParams::BIN_COUNT; curRectBits |= 1 << DOTDetector::TrainParams::BIN_COUNT;
quantizedImage.at<uchar>(vRegIdx, hRegIdx) = curRectBits; quantizedImage->at<uchar>(vRegIdx, hRegIdx) = curRectBits;
curRect.x += regionSize; curRect.x += regionSize;
} }
curRect.x = 0; curRect.x = 0;
curRect.y += regionSize; curRect.y += regionSize;
} }
}
const DOTDetector::TrainParams& params;
const int verticalRegionCount;
const int horizontalRegionCount;
const int regionSize;
const int regionSize_2;
const Mat& magnitudes;
const Mat& angles;
// Result matrix
Mat* quantizedImage;
};
static void quantizeToDetect( const Mat& _magnitudes, const Mat& angles,
Mat& quantizedImage, int regionSize, const DOTDetector::TrainParams& params )
{
Mat magnitudes; _magnitudes.copyTo( magnitudes );
const int verticalRegionCount = magnitudes.rows / regionSize;
DetectImageQuantizer quantizer( params, regionSize, magnitudes, angles, quantizedImage );
parallel_for( cv::BlockedRange(0, verticalRegionCount), quantizer );
} }
inline void andQuantizedImages( const Mat& queryQuantizedImage, const Mat& trainQuantizedImage, float& ratio, float& texturelessRatio ) inline void andQuantizedImages( const Mat& queryQuantizedImage, const DOTDetector::DOTTemplate& trainTemplate, float& ratio, float& texturelessRatio )
{ {
int nonZeroCount = 0, texturelessCount = 0; int nonZeroCount = 0, texturelessCount = 0;
countNonZeroAndTexturelessBits( trainQuantizedImage & queryQuantizedImage, nonZeroCount, texturelessCount ); countNonZeroAndTexturelessBits( trainTemplate.quantizedImage & queryQuantizedImage, nonZeroCount, texturelessCount );
CV_Assert( nonZeroCount > 0 ); CV_Assert( nonZeroCount > 0 );
int area = cv::countNonZero( trainQuantizedImage );
ratio = (float)nonZeroCount / area; ratio = (float)nonZeroCount / trainTemplate.area;
texturelessRatio = (float)texturelessCount / nonZeroCount; texturelessRatio = (float)texturelessCount / nonZeroCount;
} }
...@@ -493,10 +547,10 @@ DOTDetector::DOTTemplate::TrainData::TrainData( const Mat& _maskedImage, const c ...@@ -493,10 +547,10 @@ DOTDetector::DOTTemplate::TrainData::TrainData( const Mat& _maskedImage, const c
{ {
} }
DOTDetector::DOTTemplate::DOTTemplate() : texturelessRatio(-1.f) {} DOTDetector::DOTTemplate::DOTTemplate() : texturelessRatio(-1.f), area(0) {}
DOTDetector::DOTTemplate::DOTTemplate( const cv::Mat& _quantizedImage, int _objectClassID, const cv::Mat& _maskedImage, const cv::Mat& _strongestGradientsMask ) : DOTDetector::DOTTemplate::DOTTemplate( const cv::Mat& _quantizedImage, int _objectClassID, const cv::Mat& _maskedImage, const cv::Mat& _strongestGradientsMask ) :
quantizedImage(_quantizedImage), texturelessRatio(computeTexturelessRatio(_quantizedImage)) quantizedImage(_quantizedImage), texturelessRatio(computeTexturelessRatio(_quantizedImage)), area(cv::countNonZero(_quantizedImage))
{ {
addObjectClassID( _objectClassID, _maskedImage, _strongestGradientsMask ); addObjectClassID( _objectClassID, _maskedImage, _strongestGradientsMask );
} }
...@@ -751,7 +805,7 @@ void DOTDetector::train( const string& _baseDirName, const TrainParams& _trainPa ...@@ -751,7 +805,7 @@ void DOTDetector::train( const string& _baseDirName, const TrainParams& _trainPa
vector<vector<float> > ratios; vector<vector<float> > ratios;
vector<vector<int> > dotTemplateIndices; vector<vector<int> > dotTemplateIndices;
detectQuantized( queryQuantizedImage, trainParams.minRatio, rects, ratios, dotTemplateIndices ); detectQuantized( queryQuantizedImage, trainParams.minRatio, rects, &ratios, &dotTemplateIndices );
Mat trainMaskedImage, trainStrongestGradientMask; Mat trainMaskedImage, trainStrongestGradientMask;
if( isAddImageAndGradientMask ) if( isAddImageAndGradientMask )
...@@ -791,24 +845,28 @@ void DOTDetector::train( const string& _baseDirName, const TrainParams& _trainPa ...@@ -791,24 +845,28 @@ void DOTDetector::train( const string& _baseDirName, const TrainParams& _trainPa
} }
} }
void DOTDetector::detectQuantized( const Mat& queryQuantizedImage, float minRatio, #ifdef HAVE_TBB
vector<vector<Rect> >& rects, typedef tbb::concurrent_vector<float> ConcurrentFloatVector;
vector<vector<float> >& ratios, typedef tbb::concurrent_vector<int> ConcurrentIntVector;
vector<vector<int> >& dotTemplateIndices ) const #else
{ typedef std::vector<float> ConcurrentFloatVector;
if( dotTemplates.empty() ) typedef std::vector<int> ConcurrentIntVector;
return; #endif
const int regionsPerRow = dotTemplates[0].quantizedImage.rows;
const int regionsPerCol = dotTemplates[0].quantizedImage.cols;
int objectClassCount = objectClassNames.size();
rects.resize( objectClassCount ); struct TemplateComparator
ratios.resize( objectClassCount ); {
dotTemplateIndices.resize( objectClassCount ); TemplateComparator( const Mat& _queryQuantizedImage, const vector<DOTDetector::DOTTemplate>& _dotTemplates,
float _minRatio,
for( size_t tIdx = 0; tIdx < dotTemplates.size(); tIdx++ ) vector<ConcurrentRectVector>& _concurrRects,
vector<ConcurrentFloatVector>* _concurrRatiosPtr, vector<ConcurrentIntVector>* _concurrTemplateIndicesPtr )
: regionsPerRow(_dotTemplates[0].quantizedImage.rows), regionsPerCol(_dotTemplates[0].quantizedImage.cols), minRatio(_minRatio),
queryQuantizedImage(_queryQuantizedImage), dotTemplates(_dotTemplates), concurrRectsPtr(&_concurrRects),
concurrRatiosPtr(_concurrRatiosPtr), concurrTemplateIndicesPtr(_concurrTemplateIndicesPtr)
{};
void operator()( const cv::BlockedRange& range ) const
{
for( int tIdx = range.begin(); tIdx < range.end(); tIdx++ )
{ {
Rect r( 0, 0, regionsPerCol, regionsPerRow ); Rect r( 0, 0, regionsPerCol, regionsPerRow );
for( r.y = 0; r.y <= queryQuantizedImage.rows-r.height; r.y++ ) for( r.y = 0; r.y <= queryQuantizedImage.rows-r.height; r.y++ )
...@@ -816,18 +874,88 @@ void DOTDetector::detectQuantized( const Mat& queryQuantizedImage, float minRati ...@@ -816,18 +874,88 @@ void DOTDetector::detectQuantized( const Mat& queryQuantizedImage, float minRati
for( r.x = 0; r.x <= queryQuantizedImage.cols-r.width; r.x++ ) for( r.x = 0; r.x <= queryQuantizedImage.cols-r.width; r.x++ )
{ {
float ratio, texturelessRatio; float ratio, texturelessRatio;
andQuantizedImages( queryQuantizedImage(r), dotTemplates[tIdx].quantizedImage, ratio, texturelessRatio ); andQuantizedImages( queryQuantizedImage(r), dotTemplates[tIdx], ratio, texturelessRatio );
if( ratio > minRatio && texturelessRatio < dotTemplates[tIdx].texturelessRatio ) if( ratio > minRatio && texturelessRatio < dotTemplates[tIdx].texturelessRatio )
{ {
for( size_t cIdx = 0; cIdx < dotTemplates[tIdx].objectClassIDs.size(); cIdx++ ) for( size_t cIdx = 0; cIdx < dotTemplates[tIdx].objectClassIDs.size(); cIdx++ )
{ {
int objectClassID = dotTemplates[tIdx].objectClassIDs[cIdx]; int objectClassID = dotTemplates[tIdx].objectClassIDs[cIdx];
rects[objectClassID].push_back( r ); (*concurrRectsPtr)[objectClassID].push_back( r );
ratios[objectClassID].push_back( ratio ); if( concurrRatiosPtr )
dotTemplateIndices[objectClassID].push_back( tIdx ); (*concurrRatiosPtr)[objectClassID].push_back( ratio );
if( concurrTemplateIndicesPtr )
(*concurrTemplateIndicesPtr)[objectClassID].push_back( tIdx );
}
}
}
}
}
}
const int regionsPerRow;
const int regionsPerCol;
const float minRatio;
const Mat& queryQuantizedImage;
const vector<DOTDetector::DOTTemplate>& dotTemplates;
vector<ConcurrentRectVector>* concurrRectsPtr;
vector<ConcurrentFloatVector>* concurrRatiosPtr;
vector<ConcurrentIntVector>* concurrTemplateIndicesPtr;
};
void DOTDetector::detectQuantized( const Mat& queryQuantizedImage, float minRatio,
vector<vector<Rect> >& rects,
vector<vector<float> >* ratios,
vector<vector<int> >* dotTemplateIndices ) const
{
if( dotTemplates.empty() )
return;
int objectClassCount = objectClassNames.size();
vector<ConcurrentRectVector> concurrRects( objectClassCount );
vector<ConcurrentFloatVector> concurrRatios;
vector<ConcurrentIntVector> concurrTemplateIndices;
vector<ConcurrentFloatVector>* concurrRatiosPtr = 0;
vector<ConcurrentIntVector>* concurrTemplateIndicesPtr = 0;
if( ratios )
{
concurrRatios.resize( objectClassCount );
concurrRatiosPtr = &concurrRatios;
} }
if( dotTemplateIndices )
{
concurrTemplateIndices.resize( objectClassCount );
concurrTemplateIndicesPtr = &concurrTemplateIndices;
} }
TemplateComparator templatesComparator( queryQuantizedImage, dotTemplates, minRatio, concurrRects, concurrRatiosPtr, concurrTemplateIndicesPtr );
parallel_for( cv::BlockedRange(0, dotTemplates.size()), templatesComparator );
// copy to the output vectors
rects.resize( objectClassCount );
if( ratios )
ratios->resize( objectClassCount );
if( dotTemplateIndices )
dotTemplateIndices->resize( objectClassCount );
for( int i = 0; i < objectClassCount; i++ )
{
rects[i].clear();
rects[i].insert( rects[i].end(), concurrRects[i].begin(), concurrRects[i].end() );
if( ratios )
{
(*ratios)[i].clear();
(*ratios)[i].insert( (*ratios)[i].end(), (*concurrRatiosPtr)[i].begin(), (*concurrRatiosPtr)[i].end() );
} }
if( dotTemplateIndices )
{
(*dotTemplateIndices)[i].clear();
(*dotTemplateIndices)[i].insert( (*dotTemplateIndices)[i].end(), (*concurrTemplateIndicesPtr)[i].begin(), (*concurrTemplateIndicesPtr)[i].end() );
} }
} }
} }
...@@ -852,16 +980,20 @@ void DOTDetector::detectMultiScale( const Mat& image, vector<vector<Rect> >& rec ...@@ -852,16 +980,20 @@ void DOTDetector::detectMultiScale( const Mat& image, vector<vector<Rect> >& rec
dotTemplateIndices->resize( objectClassCount ); dotTemplateIndices->resize( objectClassCount );
} }
vector<vector<Rect> > curRects;
vector<vector<float> > curRatios;
vector<vector<int> > curDotTemlateIndices;
Mat magnitudes, angles; Mat magnitudes, angles;
computeGradients( image, magnitudes, angles ); computeGradients( image, magnitudes, angles );
for( int regionSize = detectParams.minRegionSize; regionSize <= detectParams.maxRegionSize; regionSize += detectParams.regionSizeStep ) for( int regionSize = detectParams.minRegionSize; regionSize <= detectParams.maxRegionSize; regionSize += detectParams.regionSizeStep )
{ {
Mat quantizedImage; Mat quantizedImage;
vector<vector<Rect> > curRects;
vector<vector<float> > curRatios;
vector<vector<int> > curDotTemlateIndices;
quantizeToDetect( magnitudes, angles, quantizedImage, regionSize, trainParams ); quantizeToDetect( magnitudes, angles, quantizedImage, regionSize, trainParams );
detectQuantized( quantizedImage, detectParams.minRatio, curRects, curRatios, curDotTemlateIndices );
detectQuantized( quantizedImage, detectParams.minRatio, curRects,
ratios ? &curRatios : 0, dotTemplateIndices ? &curDotTemlateIndices : 0 );
for( int ci = 0; ci < objectClassCount; ci++ ) for( int ci = 0; ci < objectClassCount; ci++ )
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment