/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2015, Itseez Inc, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Itseez Inc or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "dpm_cascade.hpp" #include "dpm_nms.hpp" #include <limits> #include <fstream> #include <iostream> #include <stdio.h> using namespace std; namespace cv { namespace dpm { void DPMCascade::loadCascadeModel(const string &modelPath) { // load cascade model from xml bool is_success = model.deserialize(modelPath); if (!is_success) { string errorMessage = format("Unable to parse the model: %s", modelPath.c_str()); CV_Error(CV_StsBadArg, errorMessage); } model.initModel(); } void DPMCascade::initDPMCascade() { // compute the size of temporary storage needed by cascade int nlevels = (int) pyramid.size(); int numPartFilters = model.getNumPartFilters(); int numDefParams = model.getNumDefParams(); featDimsProd.resize(nlevels); tempStorageSize = 0; for (int i = 0; i < nlevels; i++) { int w = pyramid[i].cols/feature.dimHOG; int h = pyramid[i].rows; featDimsProd[i] = w*h; tempStorageSize += w*h; } tempStorageSize *= numPartFilters; convValues.resize(tempStorageSize); pcaConvValues.resize(tempStorageSize); dtValues.resize(tempStorageSize); pcaDtValues.resize(tempStorageSize); fill(convValues.begin(), convValues.end(), -numeric_limits<double>::infinity()); fill(pcaConvValues.begin(), pcaConvValues.end(), -numeric_limits<double>::infinity()); fill(dtValues.begin(), dtValues.end(), -numeric_limits<double>::infinity()); fill(pcaDtValues.begin(), pcaDtValues.end(), -numeric_limits<double>::infinity()); // each pyramid (convolution and distance transform) is stored // in a 1D array. Since pyramid levels have different sizes, // we build an array of offset values in order to index by // level. The last offset is the total length of the pyramid // storage array. convLevelOffset.resize(nlevels + 1); dtLevelOffset.resize(nlevels + 1); convLevelOffset[0] = 0; dtLevelOffset[0] = 0; for (int i = 1; i < nlevels + 1; i++) { convLevelOffset[i] = convLevelOffset[i-1] + numPartFilters*featDimsProd[i-1]; dtLevelOffset[i] = dtLevelOffset[i-1] + numDefParams*featDimsProd[i-1]; } // cache of precomputed deformation costs defCostCacheX.resize(numDefParams); defCostCacheY.resize(numDefParams); for (int i = 0; i < numDefParams; i++) { vector< double > def = model.defs[i]; CV_Assert((int) def.size() >= 4); defCostCacheX[i].resize(2*halfWindowSize + 1); defCostCacheY[i].resize(2*halfWindowSize + 1); for (int j = 0; j < 2*halfWindowSize + 1; j++) { int delta = j - halfWindowSize; int deltaSquare = delta*delta; defCostCacheX[i][j] = -def[0]*deltaSquare - def[1]*delta; defCostCacheY[i][j] = -def[2]*deltaSquare - def[3]*delta; } } dtArgmaxX.resize(dtLevelOffset[nlevels]); pcaDtArgmaxX.resize(dtLevelOffset[nlevels]); dtArgmaxY.resize(dtLevelOffset[nlevels]); pcaDtArgmaxY.resize(dtLevelOffset[nlevels]); } vector< vector<double> > DPMCascade::detect(Mat &image) { if (image.channels() == 1) cvtColor(image, image, COLOR_GRAY2BGR); if (image.depth() != CV_64F) image.convertTo(image, CV_64FC3); // compute features computeFeatures(image); // pre-allocate storage initDPMCascade(); // cascade process vector< vector<double> > detections; process(detections); // non-maximum suppression NonMaximumSuppression nms; nms.process(detections, 0.5); return detections; } void DPMCascade::computeFeatures(const Mat &im) { // initialize feature pyramid PyramidParameter params; params.padx = model.maxSizeX; params.pady = model.maxSizeY; params.interval = model.interval; params.binSize = model.sBin; feature = Feature(params); // compute pyramid feature.computeFeaturePyramid(im, pyramid); // compute projected pyramid feature.projectFeaturePyramid(model.pcaCoeff, pyramid, pcaPyramid); } void DPMCascade::computeLocationScores(vector< vector< double > > &locationScores) { vector< vector < double > > locationWeight = model.locationWeight; CV_Assert((int)locationWeight.size() == model.numComponents); Mat locationFeature; int nlevels = (int) pyramid.size(); feature.computeLocationFeatures(nlevels, locationFeature); locationScores.resize(model.numComponents); for (int comp = 0; comp < model.numComponents; comp++) { locationScores[comp].resize(locationFeature.cols); for (int level = 0; level < locationFeature.cols; level++) { double val = 0; for (int k = 0; k < locationFeature.rows; k++) val += locationWeight[comp][k]* locationFeature.at<double>(k, level); locationScores[comp][level] = val; } } } void DPMCascade::computeRootPCAScores(vector< vector< Mat > > &rootScores) { PyramidParameter params = feature.getPyramidParameters(); rootScores.resize(model.numComponents); int nlevels = (int) pyramid.size(); int interval = params.interval; for (int comp = 0; comp < model.numComponents; comp++) { rootScores[comp].resize(nlevels); #ifdef HAVE_TBB // parallel computing ParalComputeRootPCAScores paralTask(pcaPyramid, model.rootPCAFilters[comp], model.pcaDim, rootScores[comp]); parallel_for_(Range(interval, nlevels), paralTask); #else #ifdef _OPENMP #pragma omp parallel for #endif for (int level = interval; level < nlevels; level++) { Mat feat = pcaPyramid[level]; Mat filter = model.rootPCAFilters[comp]; // compute size of output int height = feat.rows - filter.rows + 1; int width = (feat.cols - filter.cols) / model.pcaDim + 1; if (height < 1 || width < 1) CV_Error(CV_StsBadArg, "Invalid input, filter size should be smaller than feature size."); Mat result = Mat::zeros(Size(width, height), CV_64F); convolutionEngine.convolve(feat, filter, model.pcaDim, result); rootScores[comp][level] = result; } #endif } } #ifdef HAVE_TBB ParalComputeRootPCAScores::ParalComputeRootPCAScores( const vector< Mat > &pcaPyrad, const Mat &f, int dim, vector< Mat > &sc): pcaPyramid(pcaPyrad), filter(f), pcaDim(dim), scores(sc) { } void ParalComputeRootPCAScores::operator() (const Range &range) const { for (int level = range.start; level != range.end; level++) { Mat feat = pcaPyramid[level]; // compute size of output int height = feat.rows - filter.rows + 1; int width = (feat.cols - filter.cols) / pcaDim + 1; Mat result = Mat::zeros(Size(width, height), CV_64F); // convolution engine ConvolutionEngine convEngine; convEngine.convolve(feat, filter, pcaDim, result); scores[level] = result; } } #endif void DPMCascade::process( vector< vector<double> > &dets) { PyramidParameter params = feature.getPyramidParameters(); int interval = params.interval; int padx = params.padx; int pady = params.pady; vector<double> scales = params.scales; int nlevels = (int)pyramid.size() - interval; CV_Assert(nlevels > 0); // keep track of the PCA scores for each PCA filter vector< vector< double > > pcaScore(model.numComponents); for (int comp = 0; comp < model.numComponents; comp++) pcaScore[comp].resize(model.numParts[comp]+1); // compute location scores vector< vector< double > > locationScores; computeLocationScores(locationScores); // compute root PCA scores vector< vector< Mat > > rootPCAScores; computeRootPCAScores(rootPCAScores); // process each model component and pyramid level for (int comp = 0; comp < model.numComponents; comp++) { for (int plevel = 0; plevel < nlevels; plevel++) { // root filter pyramid level int rlevel = plevel + interval; double bias = model.bias[comp] + locationScores[comp][rlevel]; // get the scores of the first PCA filter Mat rtscore = rootPCAScores[comp][rlevel]; // process each location in the current pyramid level for (int rx = (int)ceil(padx/2.0); rx < rtscore.cols - (int)ceil(padx/2.0); rx++) { for (int ry = (int)ceil(pady/2.0); ry < rtscore.rows - (int)ceil(pady/2.0); ry++) { // get stage 0 score double score = rtscore.at<double>(ry, rx) + bias; // record PCA score pcaScore[comp][0] = score - bias; // cascade stage 1 through 2*numparts + 2 int stage = 1; int numstages = 2*model.numParts[comp] + 2; for(; stage < numstages; stage++) { double t = model.prunThreshold[comp][2*stage-1]; // check for hypothesis pruning if (score < t) break; // pca == 1 if place filters // pca == 0 if place non-pca filters bool isPCA = (stage < model.numParts[comp] + 1 ? true : false); // get the part index // root parts have index -1, none-root part are indexed 0:numParts-1 int part = model.partOrder[comp][stage] - 1;// partOrder if (part == -1) { // calculate the root non-pca score // and replace the PCA score double rscore = 0.0; if (isPCA) { rscore = convolutionEngine.convolve(pcaPyramid[rlevel], model.rootPCAFilters[comp], model.pcaDim, rx, ry); } else { rscore = convolutionEngine.convolve(pyramid[rlevel], model.rootFilters[comp], model.numFeatures, rx, ry); } score += rscore - pcaScore[comp][0]; } else { // place a non-root filter int pId = model.pFind[comp][part]; int px = 2*rx + (int)model.anchors[pId][0]; int py = 2*ry + (int)model.anchors[pId][1]; // look up the filter and deformation model double defThreshold = model.prunThreshold[comp][2*stage] - score; double ps = computePartScore(plevel, pId, px, py, isPCA, defThreshold); if (isPCA) { // record PCA filter score pcaScore[comp][part+1] = ps; // update the hypothesis score score += ps; } else { // update the hypothesis score by replacing // the PCA score score += ps - pcaScore[comp][part+1]; } // isPCA == false } // part != -1 } // stages // check if the hypothesis passed all stages with a // final score over the global threshold if (stage == numstages && score >= model.scoreThresh) { vector<double> coords; // compute and record image coordinates of the detection window double scale = model.sBin/scales[rlevel]; double x1 = (rx-padx)*scale; double y1 = (ry-pady)*scale; double x2 = x1 + model.rootFilterDims[comp].width*scale - 1; double y2 = y1 + model.rootFilterDims[comp].height*scale - 1; coords.push_back(x1); coords.push_back(y1); coords.push_back(x2); coords.push_back(y2); // compute and record image coordinates of the part filters scale = model.sBin/scales[plevel]; int featWidth = pyramid[plevel].cols/feature.dimHOG; for (int p = 0; p < model.numParts[comp]; p++) { int pId = model.pFind[comp][p]; int probx = 2*rx + (int)model.anchors[pId][0]; int proby = 2*ry + (int)model.anchors[pId][1]; int offset = dtLevelOffset[plevel] + pId*featDimsProd[plevel] + (proby - pady)*featWidth + probx - padx; int px = dtArgmaxX[offset] + padx; int py = dtArgmaxY[offset] + pady; x1 = (px - 2*padx)*scale; y1 = (py - 2*pady)*scale; x2 = x1 + model.partFilterDims[p].width*scale - 1; y2 = y1 + model.partFilterDims[p].height*scale - 1; coords.push_back(x1); coords.push_back(y1); coords.push_back(x2); coords.push_back(y2); } // record component number and score coords.push_back(comp + 1); coords.push_back(score); dets.push_back(coords); } } // ry } // rx } // for each pyramid level } // for each component } double DPMCascade::computePartScore(int plevel, int pId, int px, int py, bool isPCA, double defThreshold) { // remove virtual padding PyramidParameter params = feature.getPyramidParameters(); px -= params.padx; py -= params.pady; // check if already computed int levelOffset = dtLevelOffset[plevel]; int locationOffset = pId*featDimsProd[plevel] + py*pyramid[plevel].cols/feature.dimHOG + px; int dtBaseOffset = levelOffset + locationOffset; double val; if (isPCA) val = pcaDtValues[dtBaseOffset]; else val = dtValues[dtBaseOffset]; if (val > -numeric_limits<double>::infinity()) return val; // Nope, define the bounds of the convolution and // distance transform region int xstart = px - halfWindowSize; xstart = (xstart < 0 ? 0 : xstart); int xend = px + halfWindowSize; int ystart = py - halfWindowSize; ystart = (ystart < 0 ? 0 : ystart); int yend = py + halfWindowSize; int featWidth = pyramid[plevel].cols/feature.dimHOG; int featHeight = pyramid[plevel].rows; int filterWidth = model.partFilters[pId].cols/feature.dimHOG; int filterHeight = model.partFilters[pId].rows; xend = (filterWidth + xend > featWidth) ? featWidth - filterWidth : xend; yend = (filterHeight + yend > featHeight) ? featHeight - filterHeight : yend; // do convolution and distance transform in region // [xstar, xend, ystart, yend] levelOffset = convLevelOffset[plevel]; locationOffset = pId*featDimsProd[plevel]; int convBaseOffset = levelOffset + locationOffset; for (int y = ystart; y <= yend; y++) { int loc = convBaseOffset + y*featWidth + xstart - 1; for (int x = xstart; x <= xend; x++) { loc++; // skip if already computed if (isPCA) { if (pcaConvValues[loc] > -numeric_limits<double>::infinity()) continue; } else if(convValues[loc] > -numeric_limits<double>::infinity()) continue; // check for deformation pruning double defCost = defCostCacheX[pId][px - x + halfWindowSize] + defCostCacheY[pId][py - y + halfWindowSize]; if (defCost < defThreshold) continue; if (isPCA) { pcaConvValues[loc] = convolutionEngine.convolve (pcaPyramid[plevel], model.partPCAFilters[pId], model.pcaDim, x, y); } else { convValues[loc] = convolutionEngine.convolve (pyramid[plevel], model.partFilters[pId], model.numFeatures, x, y); } } // y } // x // do distance transform over the region. // the region is small enought that brut force DT // is the fastest method double max = -numeric_limits<double>::infinity(); int xargmax = 0; int yargmax = 0; for (int y = ystart; y <= yend; y++) { int loc = convBaseOffset + y*featWidth + xstart - 1; for (int x = xstart; x <= xend; x++) { loc++; double v; if (isPCA) v = pcaConvValues[loc]; else v = convValues[loc]; v += defCostCacheX[pId][px - x + halfWindowSize] + defCostCacheY[pId][py - y + halfWindowSize]; if (v > max) { max = v; xargmax = x; yargmax = y; } // if v } // for x } // for y // record max and argmax for DT if (isPCA) { pcaDtArgmaxX[dtBaseOffset] = xargmax; pcaDtArgmaxY[dtBaseOffset] = yargmax; pcaDtValues[dtBaseOffset] = max; } else { dtArgmaxX[dtBaseOffset] = xargmax; dtArgmaxY[dtBaseOffset] = yargmax; dtValues[dtBaseOffset] = max; } return max; } } // namespace dpm } // namespace cv