/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "precomp.hpp" namespace cv { namespace linemod { // struct Feature /** * \brief Get the label [0,8) of the single bit set in quantized. */ static inline int getLabel(int quantized) { switch (quantized) { case 1: return 0; case 2: return 1; case 4: return 2; case 8: return 3; case 16: return 4; case 32: return 5; case 64: return 6; case 128: return 7; default: CV_Error(Error::StsBadArg, "Invalid value of quantized parameter"); return -1; //avoid warning } } void Feature::read(const FileNode& fn) { FileNodeIterator fni = fn.begin(); fni >> x >> y >> label; } void Feature::write(FileStorage& fs) const { fs << "[:" << x << y << label << "]"; } // struct Template /** * \brief Crop a set of overlapping templates from different modalities. * * \param[in,out] templates Set of templates representing the same object view. * * \return The bounding box of all the templates in original image coordinates. */ static Rect cropTemplates(std::vector<Template>& templates) { int min_x = std::numeric_limits<int>::max(); int min_y = std::numeric_limits<int>::max(); int max_x = std::numeric_limits<int>::min(); int max_y = std::numeric_limits<int>::min(); // First pass: find min/max feature x,y over all pyramid levels and modalities for (int i = 0; i < (int)templates.size(); ++i) { Template& templ = templates[i]; for (int j = 0; j < (int)templ.features.size(); ++j) { int x = templ.features[j].x << templ.pyramid_level; int y = templ.features[j].y << templ.pyramid_level; min_x = std::min(min_x, x); min_y = std::min(min_y, y); max_x = std::max(max_x, x); max_y = std::max(max_y, y); } } /// @todo Why require even min_x, min_y? if (min_x % 2 == 1) --min_x; if (min_y % 2 == 1) --min_y; // Second pass: set width/height and shift all feature positions for (int i = 0; i < (int)templates.size(); ++i) { Template& templ = templates[i]; templ.width = (max_x - min_x) >> templ.pyramid_level; templ.height = (max_y - min_y) >> templ.pyramid_level; int offset_x = min_x >> templ.pyramid_level; int offset_y = min_y >> templ.pyramid_level; for (int j = 0; j < (int)templ.features.size(); ++j) { templ.features[j].x -= offset_x; templ.features[j].y -= offset_y; } } return Rect(min_x, min_y, max_x - min_x, max_y - min_y); } void Template::read(const FileNode& fn) { width = fn["width"]; height = fn["height"]; pyramid_level = fn["pyramid_level"]; FileNode features_fn = fn["features"]; features.resize(features_fn.size()); FileNodeIterator it = features_fn.begin(), it_end = features_fn.end(); for (int i = 0; it != it_end; ++it, ++i) { features[i].read(*it); } } void Template::write(FileStorage& fs) const { fs << "width" << width; fs << "height" << height; fs << "pyramid_level" << pyramid_level; fs << "features" << "["; for (int i = 0; i < (int)features.size(); ++i) { features[i].write(fs); } fs << "]"; // features } /****************************************************************************************\ * Modality interfaces * \****************************************************************************************/ void QuantizedPyramid::selectScatteredFeatures(const std::vector<Candidate>& candidates, std::vector<Feature>& features, size_t num_features, float distance) { features.clear(); float distance_sq = distance * distance; int i = 0; while (features.size() < num_features) { Candidate c = candidates[i]; // Add if sufficient distance away from any previously chosen feature bool keep = true; for (int j = 0; (j < (int)features.size()) && keep; ++j) { Feature f = features[j]; keep = (c.f.x - f.x)*(c.f.x - f.x) + (c.f.y - f.y)*(c.f.y - f.y) >= distance_sq; } if (keep) features.push_back(c.f); if (++i == (int)candidates.size()) { // Start back at beginning, and relax required distance i = 0; distance -= 1.0f; distance_sq = distance * distance; } } } Ptr<Modality> Modality::create(const String& modality_type) { if (modality_type == "ColorGradient") return makePtr<ColorGradient>(); else if (modality_type == "DepthNormal") return makePtr<DepthNormal>(); else return Ptr<Modality>(); } Ptr<Modality> Modality::create(const FileNode& fn) { String type = fn["type"]; Ptr<Modality> modality = create(type); modality->read(fn); return modality; } void colormap(const Mat& quantized, Mat& dst) { std::vector<Vec3b> lut(8); lut[0] = Vec3b( 0, 0, 255); lut[1] = Vec3b( 0, 170, 255); lut[2] = Vec3b( 0, 255, 170); lut[3] = Vec3b( 0, 255, 0); lut[4] = Vec3b(170, 255, 0); lut[5] = Vec3b(255, 170, 0); lut[6] = Vec3b(255, 0, 0); lut[7] = Vec3b(255, 0, 170); dst = Mat::zeros(quantized.size(), CV_8UC3); for (int r = 0; r < dst.rows; ++r) { const uchar* quant_r = quantized.ptr(r); Vec3b* dst_r = dst.ptr<Vec3b>(r); for (int c = 0; c < dst.cols; ++c) { uchar q = quant_r[c]; if (q) dst_r[c] = lut[getLabel(q)]; } } } /****************************************************************************************\ * Color gradient modality * \****************************************************************************************/ // Forward declaration void hysteresisGradient(Mat& magnitude, Mat& angle, Mat& ap_tmp, float threshold); /** * \brief Compute quantized orientation image from color image. * * Implements section 2.2 "Computing the Gradient Orientations." * * \param[in] src The source 8-bit, 3-channel image. * \param[out] magnitude Destination floating-point array of squared magnitudes. * \param[out] angle Destination 8-bit array of orientations. Each bit * represents one bin of the orientation space. * \param threshold Magnitude threshold. Keep only gradients whose norms are * larger than this. */ static void quantizedOrientations(const Mat& src, Mat& magnitude, Mat& angle, float threshold) { magnitude.create(src.size(), CV_32F); // Allocate temporary buffers Size size = src.size(); Mat sobel_3dx; // per-channel horizontal derivative Mat sobel_3dy; // per-channel vertical derivative Mat sobel_dx(size, CV_32F); // maximum horizontal derivative Mat sobel_dy(size, CV_32F); // maximum vertical derivative Mat sobel_ag; // final gradient orientation (unquantized) Mat smoothed; // Compute horizontal and vertical image derivatives on all color channels separately static const int KERNEL_SIZE = 7; // For some reason cvSmooth/cv::GaussianBlur, cvSobel/cv::Sobel have different defaults for border handling... GaussianBlur(src, smoothed, Size(KERNEL_SIZE, KERNEL_SIZE), 0, 0, BORDER_REPLICATE); Sobel(smoothed, sobel_3dx, CV_16S, 1, 0, 3, 1.0, 0.0, BORDER_REPLICATE); Sobel(smoothed, sobel_3dy, CV_16S, 0, 1, 3, 1.0, 0.0, BORDER_REPLICATE); short * ptrx = (short *)sobel_3dx.data; short * ptry = (short *)sobel_3dy.data; float * ptr0x = (float *)sobel_dx.data; float * ptr0y = (float *)sobel_dy.data; float * ptrmg = (float *)magnitude.data; const int length1 = static_cast<const int>(sobel_3dx.step1()); const int length2 = static_cast<const int>(sobel_3dy.step1()); const int length3 = static_cast<const int>(sobel_dx.step1()); const int length4 = static_cast<const int>(sobel_dy.step1()); const int length5 = static_cast<const int>(magnitude.step1()); const int length0 = sobel_3dy.cols * 3; for (int r = 0; r < sobel_3dy.rows; ++r) { int ind = 0; for (int i = 0; i < length0; i += 3) { // Use the gradient orientation of the channel whose magnitude is largest int mag1 = ptrx[i+0] * ptrx[i + 0] + ptry[i + 0] * ptry[i + 0]; int mag2 = ptrx[i+1] * ptrx[i + 1] + ptry[i + 1] * ptry[i + 1]; int mag3 = ptrx[i+2] * ptrx[i + 2] + ptry[i + 2] * ptry[i + 2]; if (mag1 >= mag2 && mag1 >= mag3) { ptr0x[ind] = ptrx[i]; ptr0y[ind] = ptry[i]; ptrmg[ind] = (float)mag1; } else if (mag2 >= mag1 && mag2 >= mag3) { ptr0x[ind] = ptrx[i + 1]; ptr0y[ind] = ptry[i + 1]; ptrmg[ind] = (float)mag2; } else { ptr0x[ind] = ptrx[i + 2]; ptr0y[ind] = ptry[i + 2]; ptrmg[ind] = (float)mag3; } ++ind; } ptrx += length1; ptry += length2; ptr0x += length3; ptr0y += length4; ptrmg += length5; } // Calculate the final gradient orientations phase(sobel_dx, sobel_dy, sobel_ag, true); hysteresisGradient(magnitude, angle, sobel_ag, threshold * threshold); } void hysteresisGradient(Mat& magnitude, Mat& quantized_angle, Mat& angle, float threshold) { // Quantize 360 degree range of orientations into 16 buckets // Note that [0, 11.25), [348.75, 360) both get mapped in the end to label 0, // for stability of horizontal and vertical features. Mat_<unsigned char> quantized_unfiltered; angle.convertTo(quantized_unfiltered, CV_8U, 16.0 / 360.0); // Zero out top and bottom rows /// @todo is this necessary, or even correct? memset(quantized_unfiltered.ptr(), 0, quantized_unfiltered.cols); memset(quantized_unfiltered.ptr(quantized_unfiltered.rows - 1), 0, quantized_unfiltered.cols); // Zero out first and last columns for (int r = 0; r < quantized_unfiltered.rows; ++r) { quantized_unfiltered(r, 0) = 0; quantized_unfiltered(r, quantized_unfiltered.cols - 1) = 0; } // Mask 16 buckets into 8 quantized orientations for (int r = 1; r < angle.rows - 1; ++r) { uchar* quant_r = quantized_unfiltered.ptr<uchar>(r); for (int c = 1; c < angle.cols - 1; ++c) { quant_r[c] &= 7; } } // Filter the raw quantized image. Only accept pixels where the magnitude is above some // threshold, and there is local agreement on the quantization. quantized_angle = Mat::zeros(angle.size(), CV_8U); for (int r = 1; r < angle.rows - 1; ++r) { float* mag_r = magnitude.ptr<float>(r); for (int c = 1; c < angle.cols - 1; ++c) { if (mag_r[c] > threshold) { // Compute histogram of quantized bins in 3x3 patch around pixel int histogram[8] = {0, 0, 0, 0, 0, 0, 0, 0}; uchar* patch3x3_row = &quantized_unfiltered(r-1, c-1); histogram[patch3x3_row[0]]++; histogram[patch3x3_row[1]]++; histogram[patch3x3_row[2]]++; patch3x3_row += quantized_unfiltered.step1(); histogram[patch3x3_row[0]]++; histogram[patch3x3_row[1]]++; histogram[patch3x3_row[2]]++; patch3x3_row += quantized_unfiltered.step1(); histogram[patch3x3_row[0]]++; histogram[patch3x3_row[1]]++; histogram[patch3x3_row[2]]++; // Find bin with the most votes from the patch int max_votes = 0; int index = -1; for (int i = 0; i < 8; ++i) { if (max_votes < histogram[i]) { index = i; max_votes = histogram[i]; } } // Only accept the quantization if majority of pixels in the patch agree static const int NEIGHBOR_THRESHOLD = 5; if (max_votes >= NEIGHBOR_THRESHOLD) quantized_angle.at<uchar>(r, c) = uchar(1 << index); } } } } class ColorGradientPyramid : public QuantizedPyramid { public: ColorGradientPyramid(const Mat& src, const Mat& mask, float weak_threshold, size_t num_features, float strong_threshold); virtual void quantize(Mat& dst) const; virtual bool extractTemplate(Template& templ) const; virtual void pyrDown(); protected: /// Recalculate angle and magnitude images void update(); Mat src; Mat mask; int pyramid_level; Mat angle; Mat magnitude; float weak_threshold; size_t num_features; float strong_threshold; }; ColorGradientPyramid::ColorGradientPyramid(const Mat& _src, const Mat& _mask, float _weak_threshold, size_t _num_features, float _strong_threshold) : src(_src), mask(_mask), pyramid_level(0), weak_threshold(_weak_threshold), num_features(_num_features), strong_threshold(_strong_threshold) { update(); } void ColorGradientPyramid::update() { quantizedOrientations(src, magnitude, angle, weak_threshold); } void ColorGradientPyramid::pyrDown() { // Some parameters need to be adjusted num_features /= 2; /// @todo Why not 4? ++pyramid_level; // Downsample the current inputs Size size(src.cols / 2, src.rows / 2); Mat next_src; cv::pyrDown(src, next_src, size); src = next_src; if (!mask.empty()) { Mat next_mask; resize(mask, next_mask, size, 0.0, 0.0, INTER_NEAREST); mask = next_mask; } update(); } void ColorGradientPyramid::quantize(Mat& dst) const { dst = Mat::zeros(angle.size(), CV_8U); angle.copyTo(dst, mask); } bool ColorGradientPyramid::extractTemplate(Template& templ) const { // Want features on the border to distinguish from background Mat local_mask; if (!mask.empty()) { erode(mask, local_mask, Mat(), Point(-1,-1), 1, BORDER_REPLICATE); subtract(mask, local_mask, local_mask); } // Create sorted list of all pixels with magnitude greater than a threshold std::vector<Candidate> candidates; bool no_mask = local_mask.empty(); float threshold_sq = strong_threshold*strong_threshold; for (int r = 0; r < magnitude.rows; ++r) { const uchar* angle_r = angle.ptr<uchar>(r); const float* magnitude_r = magnitude.ptr<float>(r); const uchar* mask_r = no_mask ? NULL : local_mask.ptr<uchar>(r); for (int c = 0; c < magnitude.cols; ++c) { if (no_mask || mask_r[c]) { uchar quantized = angle_r[c]; if (quantized > 0) { float score = magnitude_r[c]; if (score > threshold_sq) { candidates.push_back(Candidate(c, r, getLabel(quantized), score)); } } } } } // We require a certain number of features if (candidates.size() < num_features) return false; // NOTE: Stable sort to agree with old code, which used std::list::sort() std::stable_sort(candidates.begin(), candidates.end()); // Use heuristic based on surplus of candidates in narrow outline for initial distance threshold float distance = static_cast<float>(candidates.size() / num_features + 1); selectScatteredFeatures(candidates, templ.features, num_features, distance); // Size determined externally, needs to match templates for other modalities templ.width = -1; templ.height = -1; templ.pyramid_level = pyramid_level; return true; } ColorGradient::ColorGradient() : weak_threshold(10.0f), num_features(63), strong_threshold(55.0f) { } ColorGradient::ColorGradient(float _weak_threshold, size_t _num_features, float _strong_threshold) : weak_threshold(_weak_threshold), num_features(_num_features), strong_threshold(_strong_threshold) { } static const char CG_NAME[] = "ColorGradient"; String ColorGradient::name() const { return CG_NAME; } Ptr<QuantizedPyramid> ColorGradient::processImpl(const Mat& src, const Mat& mask) const { return makePtr<ColorGradientPyramid>(src, mask, weak_threshold, num_features, strong_threshold); } void ColorGradient::read(const FileNode& fn) { String type = fn["type"]; CV_Assert(type == CG_NAME); weak_threshold = fn["weak_threshold"]; num_features = int(fn["num_features"]); strong_threshold = fn["strong_threshold"]; } void ColorGradient::write(FileStorage& fs) const { fs << "type" << CG_NAME; fs << "weak_threshold" << weak_threshold; fs << "num_features" << int(num_features); fs << "strong_threshold" << strong_threshold; } /****************************************************************************************\ * Depth normal modality * \****************************************************************************************/ // Contains GRANULARITY and NORMAL_LUT #include "normal_lut.i" static void accumBilateral(long delta, long i, long j, long * A, long * b, int threshold) { long f = std::abs(delta) < threshold ? 1 : 0; const long fi = f * i; const long fj = f * j; A[0] += fi * i; A[1] += fi * j; A[3] += fj * j; b[0] += fi * delta; b[1] += fj * delta; } /** * \brief Compute quantized normal image from depth image. * * Implements section 2.6 "Extension to Dense Depth Sensors." * * \param[in] src The source 16-bit depth image (in mm). * \param[out] dst The destination 8-bit image. Each bit represents one bin of * the view cone. * \param distance_threshold Ignore pixels beyond this distance. * \param difference_threshold When computing normals, ignore contributions of pixels whose * depth difference with the central pixel is above this threshold. * * \todo Should also need camera model, or at least focal lengths? Replace distance_threshold with mask? */ static void quantizedNormals(const Mat& src, Mat& dst, int distance_threshold, int difference_threshold) { dst = Mat::zeros(src.size(), CV_8U); const unsigned short * lp_depth = src.ptr<ushort>(); unsigned char * lp_normals = dst.ptr<uchar>(); const int l_W = src.cols; const int l_H = src.rows; const int l_r = 5; // used to be 7 const int l_offset0 = -l_r - l_r * l_W; const int l_offset1 = 0 - l_r * l_W; const int l_offset2 = +l_r - l_r * l_W; const int l_offset3 = -l_r; const int l_offset4 = +l_r; const int l_offset5 = -l_r + l_r * l_W; const int l_offset6 = 0 + l_r * l_W; const int l_offset7 = +l_r + l_r * l_W; const int l_offsetx = GRANULARITY / 2; const int l_offsety = GRANULARITY / 2; for (int l_y = l_r; l_y < l_H - l_r - 1; ++l_y) { const unsigned short * lp_line = lp_depth + (l_y * l_W + l_r); unsigned char * lp_norm = lp_normals + (l_y * l_W + l_r); for (int l_x = l_r; l_x < l_W - l_r - 1; ++l_x) { long l_d = lp_line[0]; if (l_d < distance_threshold) { // accum long l_A[4]; l_A[0] = l_A[1] = l_A[2] = l_A[3] = 0; long l_b[2]; l_b[0] = l_b[1] = 0; accumBilateral(lp_line[l_offset0] - l_d, -l_r, -l_r, l_A, l_b, difference_threshold); accumBilateral(lp_line[l_offset1] - l_d, 0, -l_r, l_A, l_b, difference_threshold); accumBilateral(lp_line[l_offset2] - l_d, +l_r, -l_r, l_A, l_b, difference_threshold); accumBilateral(lp_line[l_offset3] - l_d, -l_r, 0, l_A, l_b, difference_threshold); accumBilateral(lp_line[l_offset4] - l_d, +l_r, 0, l_A, l_b, difference_threshold); accumBilateral(lp_line[l_offset5] - l_d, -l_r, +l_r, l_A, l_b, difference_threshold); accumBilateral(lp_line[l_offset6] - l_d, 0, +l_r, l_A, l_b, difference_threshold); accumBilateral(lp_line[l_offset7] - l_d, +l_r, +l_r, l_A, l_b, difference_threshold); // solve long l_det = l_A[0] * l_A[3] - l_A[1] * l_A[1]; long l_ddx = l_A[3] * l_b[0] - l_A[1] * l_b[1]; long l_ddy = -l_A[1] * l_b[0] + l_A[0] * l_b[1]; /// @todo Magic number 1150 is focal length? This is something like /// f in SXGA mode, but in VGA is more like 530. float l_nx = static_cast<float>(1150 * l_ddx); float l_ny = static_cast<float>(1150 * l_ddy); float l_nz = static_cast<float>(-l_det * l_d); float l_sqrt = sqrtf(l_nx * l_nx + l_ny * l_ny + l_nz * l_nz); if (l_sqrt > 0) { float l_norminv = 1.0f / (l_sqrt); l_nx *= l_norminv; l_ny *= l_norminv; l_nz *= l_norminv; //*lp_norm = fabs(l_nz)*255; int l_val1 = static_cast<int>(l_nx * l_offsetx + l_offsetx); int l_val2 = static_cast<int>(l_ny * l_offsety + l_offsety); int l_val3 = static_cast<int>(l_nz * GRANULARITY + GRANULARITY); *lp_norm = NORMAL_LUT[l_val3][l_val2][l_val1]; } else { *lp_norm = 0; // Discard shadows from depth sensor } } else { *lp_norm = 0; //out of depth } ++lp_line; ++lp_norm; } } medianBlur(dst, dst, 5); } class DepthNormalPyramid : public QuantizedPyramid { public: DepthNormalPyramid(const Mat& src, const Mat& mask, int distance_threshold, int difference_threshold, size_t num_features, int extract_threshold); virtual void quantize(Mat& dst) const; virtual bool extractTemplate(Template& templ) const; virtual void pyrDown(); protected: Mat mask; int pyramid_level; Mat normal; size_t num_features; int extract_threshold; }; DepthNormalPyramid::DepthNormalPyramid(const Mat& src, const Mat& _mask, int distance_threshold, int difference_threshold, size_t _num_features, int _extract_threshold) : mask(_mask), pyramid_level(0), num_features(_num_features), extract_threshold(_extract_threshold) { quantizedNormals(src, normal, distance_threshold, difference_threshold); } void DepthNormalPyramid::pyrDown() { // Some parameters need to be adjusted num_features /= 2; /// @todo Why not 4? extract_threshold /= 2; ++pyramid_level; // In this case, NN-downsample the quantized image Mat next_normal; Size size(normal.cols / 2, normal.rows / 2); resize(normal, next_normal, size, 0.0, 0.0, INTER_NEAREST); normal = next_normal; if (!mask.empty()) { Mat next_mask; resize(mask, next_mask, size, 0.0, 0.0, INTER_NEAREST); mask = next_mask; } } void DepthNormalPyramid::quantize(Mat& dst) const { dst = Mat::zeros(normal.size(), CV_8U); normal.copyTo(dst, mask); } bool DepthNormalPyramid::extractTemplate(Template& templ) const { // Features right on the object border are unreliable Mat local_mask; if (!mask.empty()) { erode(mask, local_mask, Mat(), Point(-1,-1), 2, BORDER_REPLICATE); } // Compute distance transform for each individual quantized orientation Mat temp = Mat::zeros(normal.size(), CV_8U); Mat distances[8]; for (int i = 0; i < 8; ++i) { temp.setTo(1 << i, local_mask); bitwise_and(temp, normal, temp); // temp is now non-zero at pixels in the mask with quantized orientation i distanceTransform(temp, distances[i], DIST_C, 3); } // Count how many features taken for each label int label_counts[8] = {0, 0, 0, 0, 0, 0, 0, 0}; // Create sorted list of candidate features std::vector<Candidate> candidates; bool no_mask = local_mask.empty(); for (int r = 0; r < normal.rows; ++r) { const uchar* normal_r = normal.ptr<uchar>(r); const uchar* mask_r = no_mask ? NULL : local_mask.ptr<uchar>(r); for (int c = 0; c < normal.cols; ++c) { if (no_mask || mask_r[c]) { uchar quantized = normal_r[c]; if (quantized != 0 && quantized != 255) // background and shadow { int label = getLabel(quantized); // Accept if distance to a pixel belonging to a different label is greater than // some threshold. IOW, ideal feature is in the center of a large homogeneous // region. float score = distances[label].at<float>(r, c); if (score >= extract_threshold) { candidates.push_back( Candidate(c, r, label, score) ); ++label_counts[label]; } } } } } // We require a certain number of features if (candidates.size() < num_features) return false; // Prefer large distances, but also want to collect features over all 8 labels. // So penalize labels with lots of candidates. for (size_t i = 0; i < candidates.size(); ++i) { Candidate& c = candidates[i]; c.score /= (float)label_counts[c.f.label]; } std::stable_sort(candidates.begin(), candidates.end()); // Use heuristic based on object area for initial distance threshold float area = no_mask ? (float)normal.total() : (float)countNonZero(local_mask); float distance = sqrtf(area) / sqrtf((float)num_features) + 1.5f; selectScatteredFeatures(candidates, templ.features, num_features, distance); // Size determined externally, needs to match templates for other modalities templ.width = -1; templ.height = -1; templ.pyramid_level = pyramid_level; return true; } DepthNormal::DepthNormal() : distance_threshold(2000), difference_threshold(50), num_features(63), extract_threshold(2) { } DepthNormal::DepthNormal(int _distance_threshold, int _difference_threshold, size_t _num_features, int _extract_threshold) : distance_threshold(_distance_threshold), difference_threshold(_difference_threshold), num_features(_num_features), extract_threshold(_extract_threshold) { } static const char DN_NAME[] = "DepthNormal"; String DepthNormal::name() const { return DN_NAME; } Ptr<QuantizedPyramid> DepthNormal::processImpl(const Mat& src, const Mat& mask) const { return makePtr<DepthNormalPyramid>(src, mask, distance_threshold, difference_threshold, num_features, extract_threshold); } void DepthNormal::read(const FileNode& fn) { String type = fn["type"]; CV_Assert(type == DN_NAME); distance_threshold = fn["distance_threshold"]; difference_threshold = fn["difference_threshold"]; num_features = int(fn["num_features"]); extract_threshold = fn["extract_threshold"]; } void DepthNormal::write(FileStorage& fs) const { fs << "type" << DN_NAME; fs << "distance_threshold" << distance_threshold; fs << "difference_threshold" << difference_threshold; fs << "num_features" << int(num_features); fs << "extract_threshold" << extract_threshold; } /****************************************************************************************\ * Response maps * \****************************************************************************************/ static void orUnaligned8u(const uchar * src, const int src_stride, uchar * dst, const int dst_stride, const int width, const int height) { #if CV_SSE2 volatile bool haveSSE2 = checkHardwareSupport(CPU_SSE2); #if CV_SSE3 volatile bool haveSSE3 = checkHardwareSupport(CPU_SSE3); #endif bool src_aligned = reinterpret_cast<unsigned long long>(src) % 16 == 0; #endif for (int r = 0; r < height; ++r) { int c = 0; #if CV_SSE2 // Use aligned loads if possible if (haveSSE2 && src_aligned) { for ( ; c < width - 15; c += 16) { const __m128i* src_ptr = reinterpret_cast<const __m128i*>(src + c); __m128i* dst_ptr = reinterpret_cast<__m128i*>(dst + c); *dst_ptr = _mm_or_si128(*dst_ptr, *src_ptr); } } #if CV_SSE3 // Use LDDQU for fast unaligned load else if (haveSSE3) { for ( ; c < width - 15; c += 16) { __m128i val = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(src + c)); __m128i* dst_ptr = reinterpret_cast<__m128i*>(dst + c); *dst_ptr = _mm_or_si128(*dst_ptr, val); } } #endif // Fall back to MOVDQU else if (haveSSE2) { for ( ; c < width - 15; c += 16) { __m128i val = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src + c)); __m128i* dst_ptr = reinterpret_cast<__m128i*>(dst + c); *dst_ptr = _mm_or_si128(*dst_ptr, val); } } #endif for ( ; c < width; ++c) dst[c] |= src[c]; // Advance to next row src += src_stride; dst += dst_stride; } } /** * \brief Spread binary labels in a quantized image. * * Implements section 2.3 "Spreading the Orientations." * * \param[in] src The source 8-bit quantized image. * \param[out] dst Destination 8-bit spread image. * \param T Sampling step. Spread labels T/2 pixels in each direction. */ static void spread(const Mat& src, Mat& dst, int T) { // Allocate and zero-initialize spread (OR'ed) image dst = Mat::zeros(src.size(), CV_8U); // Fill in spread gradient image (section 2.3) for (int r = 0; r < T; ++r) { int height = src.rows - r; for (int c = 0; c < T; ++c) { orUnaligned8u(&src.at<unsigned char>(r, c), static_cast<const int>(src.step1()), dst.ptr(), static_cast<const int>(dst.step1()), src.cols - c, height); } } } // Auto-generated by create_similarity_lut.py CV_DECL_ALIGNED(16) static const unsigned char SIMILARITY_LUT[256] = {0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 4, 4, 3, 3, 4, 4, 2, 3, 4, 4, 3, 3, 4, 4, 0, 1, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 0, 2, 1, 2, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 0, 3, 2, 3, 1, 3, 2, 3, 0, 3, 2, 3, 1, 3, 2, 3, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, 0, 1, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 3, 4, 4, 3, 3, 4, 4, 2, 3, 4, 4, 3, 3, 4, 4, 0, 2, 1, 2, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 0, 2, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 0, 3, 2, 3, 1, 3, 2, 3, 0, 3, 2, 3, 1, 3, 2, 3, 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4}; /** * \brief Precompute response maps for a spread quantized image. * * Implements section 2.4 "Precomputing Response Maps." * * \param[in] src The source 8-bit spread quantized image. * \param[out] response_maps Vector of 8 response maps, one for each bit label. */ static void computeResponseMaps(const Mat& src, std::vector<Mat>& response_maps) { CV_Assert((src.rows * src.cols) % 16 == 0); // Allocate response maps response_maps.resize(8); for (int i = 0; i < 8; ++i) response_maps[i].create(src.size(), CV_8U); Mat lsb4(src.size(), CV_8U); Mat msb4(src.size(), CV_8U); for (int r = 0; r < src.rows; ++r) { const uchar* src_r = src.ptr(r); uchar* lsb4_r = lsb4.ptr(r); uchar* msb4_r = msb4.ptr(r); for (int c = 0; c < src.cols; ++c) { // Least significant 4 bits of spread image pixel lsb4_r[c] = src_r[c] & 15; // Most significant 4 bits, right-shifted to be in [0, 16) msb4_r[c] = (src_r[c] & 240) >> 4; } } #if CV_SSSE3 volatile bool haveSSSE3 = checkHardwareSupport(CV_CPU_SSSE3); if (haveSSSE3) { const __m128i* lut = reinterpret_cast<const __m128i*>(SIMILARITY_LUT); for (int ori = 0; ori < 8; ++ori) { __m128i* map_data = response_maps[ori].ptr<__m128i>(); __m128i* lsb4_data = lsb4.ptr<__m128i>(); __m128i* msb4_data = msb4.ptr<__m128i>(); // Precompute the 2D response map S_i (section 2.4) for (int i = 0; i < (src.rows * src.cols) / 16; ++i) { // Using SSE shuffle for table lookup on 4 orientations at a time // The most/least significant 4 bits are used as the LUT index __m128i res1 = _mm_shuffle_epi8(lut[2*ori + 0], lsb4_data[i]); __m128i res2 = _mm_shuffle_epi8(lut[2*ori + 1], msb4_data[i]); // Combine the results into a single similarity score map_data[i] = _mm_max_epu8(res1, res2); } } } else #endif { // For each of the 8 quantized orientations... for (int ori = 0; ori < 8; ++ori) { uchar* map_data = response_maps[ori].ptr<uchar>(); uchar* lsb4_data = lsb4.ptr<uchar>(); uchar* msb4_data = msb4.ptr<uchar>(); const uchar* lut_low = SIMILARITY_LUT + 32*ori; const uchar* lut_hi = lut_low + 16; for (int i = 0; i < src.rows * src.cols; ++i) { map_data[i] = std::max(lut_low[ lsb4_data[i] ], lut_hi[ msb4_data[i] ]); } } } } /** * \brief Convert a response map to fast linearized ordering. * * Implements section 2.5 "Linearizing the Memory for Parallelization." * * \param[in] response_map The 2D response map, an 8-bit image. * \param[out] linearized The response map in linearized order. It has T*T rows, * each of which is a linear memory of length (W/T)*(H/T). * \param T Sampling step. */ static void linearize(const Mat& response_map, Mat& linearized, int T) { CV_Assert(response_map.rows % T == 0); CV_Assert(response_map.cols % T == 0); // linearized has T^2 rows, where each row is a linear memory int mem_width = response_map.cols / T; int mem_height = response_map.rows / T; linearized.create(T*T, mem_width * mem_height, CV_8U); // Outer two for loops iterate over top-left T^2 starting pixels int index = 0; for (int r_start = 0; r_start < T; ++r_start) { for (int c_start = 0; c_start < T; ++c_start) { uchar* memory = linearized.ptr(index); ++index; // Inner two loops copy every T-th pixel into the linear memory for (int r = r_start; r < response_map.rows; r += T) { const uchar* response_data = response_map.ptr(r); for (int c = c_start; c < response_map.cols; c += T) *memory++ = response_data[c]; } } } } /****************************************************************************************\ * Linearized similarities * \****************************************************************************************/ static const unsigned char* accessLinearMemory(const std::vector<Mat>& linear_memories, const Feature& f, int T, int W) { // Retrieve the TxT grid of linear memories associated with the feature label const Mat& memory_grid = linear_memories[f.label]; CV_DbgAssert(memory_grid.rows == T*T); CV_DbgAssert(f.x >= 0); CV_DbgAssert(f.y >= 0); // The LM we want is at (x%T, y%T) in the TxT grid (stored as the rows of memory_grid) int grid_x = f.x % T; int grid_y = f.y % T; int grid_index = grid_y * T + grid_x; CV_DbgAssert(grid_index >= 0); CV_DbgAssert(grid_index < memory_grid.rows); const unsigned char* memory = memory_grid.ptr(grid_index); // Within the LM, the feature is at (x/T, y/T). W is the "width" of the LM, the // input image width decimated by T. int lm_x = f.x / T; int lm_y = f.y / T; int lm_index = lm_y * W + lm_x; CV_DbgAssert(lm_index >= 0); CV_DbgAssert(lm_index < memory_grid.cols); return memory + lm_index; } /** * \brief Compute similarity measure for a given template at each sampled image location. * * Uses linear memories to compute the similarity measure as described in Fig. 7. * * \param[in] linear_memories Vector of 8 linear memories, one for each label. * \param[in] templ Template to match against. * \param[out] dst Destination 8-bit similarity image of size (W/T, H/T). * \param size Size (W, H) of the original input image. * \param T Sampling step. */ static void similarity(const std::vector<Mat>& linear_memories, const Template& templ, Mat& dst, Size size, int T) { // 63 features or less is a special case because the max similarity per-feature is 4. // 255/4 = 63, so up to that many we can add up similarities in 8 bits without worrying // about overflow. Therefore here we use _mm_add_epi8 as the workhorse, whereas a more // general function would use _mm_add_epi16. CV_Assert(templ.features.size() <= 63); /// @todo Handle more than 255/MAX_RESPONSE features!! // Decimate input image size by factor of T int W = size.width / T; int H = size.height / T; // Feature dimensions, decimated by factor T and rounded up int wf = (templ.width - 1) / T + 1; int hf = (templ.height - 1) / T + 1; // Span is the range over which we can shift the template around the input image int span_x = W - wf; int span_y = H - hf; // Compute number of contiguous (in memory) pixels to check when sliding feature over // image. This allows template to wrap around left/right border incorrectly, so any // wrapped template matches must be filtered out! int template_positions = span_y * W + span_x + 1; // why add 1? //int template_positions = (span_y - 1) * W + span_x; // More correct? /// @todo In old code, dst is buffer of size m_U. Could make it something like /// (span_x)x(span_y) instead? dst = Mat::zeros(H, W, CV_8U); uchar* dst_ptr = dst.ptr<uchar>(); #if CV_SSE2 volatile bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2); #if CV_SSE3 volatile bool haveSSE3 = checkHardwareSupport(CV_CPU_SSE3); #endif #endif // Compute the similarity measure for this template by accumulating the contribution of // each feature for (int i = 0; i < (int)templ.features.size(); ++i) { // Add the linear memory at the appropriate offset computed from the location of // the feature in the template Feature f = templ.features[i]; // Discard feature if out of bounds /// @todo Shouldn't actually see x or y < 0 here? if (f.x < 0 || f.x >= size.width || f.y < 0 || f.y >= size.height) continue; const uchar* lm_ptr = accessLinearMemory(linear_memories, f, T, W); // Now we do an aligned/unaligned add of dst_ptr and lm_ptr with template_positions elements int j = 0; // Process responses 16 at a time if vectorization possible #if CV_SSE2 #if CV_SSE3 if (haveSSE3) { // LDDQU may be more efficient than MOVDQU for unaligned load of next 16 responses for ( ; j < template_positions - 15; j += 16) { __m128i responses = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(lm_ptr + j)); __m128i* dst_ptr_sse = reinterpret_cast<__m128i*>(dst_ptr + j); *dst_ptr_sse = _mm_add_epi8(*dst_ptr_sse, responses); } } else #endif if (haveSSE2) { // Fall back to MOVDQU for ( ; j < template_positions - 15; j += 16) { __m128i responses = _mm_loadu_si128(reinterpret_cast<const __m128i*>(lm_ptr + j)); __m128i* dst_ptr_sse = reinterpret_cast<__m128i*>(dst_ptr + j); *dst_ptr_sse = _mm_add_epi8(*dst_ptr_sse, responses); } } #endif for ( ; j < template_positions; ++j) dst_ptr[j] = uchar(dst_ptr[j] + lm_ptr[j]); } } /** * \brief Compute similarity measure for a given template in a local region. * * \param[in] linear_memories Vector of 8 linear memories, one for each label. * \param[in] templ Template to match against. * \param[out] dst Destination 8-bit similarity image, 16x16. * \param size Size (W, H) of the original input image. * \param T Sampling step. * \param center Center of the local region. */ static void similarityLocal(const std::vector<Mat>& linear_memories, const Template& templ, Mat& dst, Size size, int T, Point center) { // Similar to whole-image similarity() above. This version takes a position 'center' // and computes the energy in the 16x16 patch centered on it. CV_Assert(templ.features.size() <= 63); // Compute the similarity map in a 16x16 patch around center int W = size.width / T; dst = Mat::zeros(16, 16, CV_8U); // Offset each feature point by the requested center. Further adjust to (-8,-8) from the // center to get the top-left corner of the 16x16 patch. // NOTE: We make the offsets multiples of T to agree with results of the original code. int offset_x = (center.x / T - 8) * T; int offset_y = (center.y / T - 8) * T; #if CV_SSE2 volatile bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2); #if CV_SSE3 volatile bool haveSSE3 = checkHardwareSupport(CV_CPU_SSE3); #endif __m128i* dst_ptr_sse = dst.ptr<__m128i>(); #endif for (int i = 0; i < (int)templ.features.size(); ++i) { Feature f = templ.features[i]; f.x += offset_x; f.y += offset_y; // Discard feature if out of bounds, possibly due to applying the offset if (f.x < 0 || f.y < 0 || f.x >= size.width || f.y >= size.height) continue; const uchar* lm_ptr = accessLinearMemory(linear_memories, f, T, W); // Process whole row at a time if vectorization possible #if CV_SSE2 #if CV_SSE3 if (haveSSE3) { // LDDQU may be more efficient than MOVDQU for unaligned load of 16 responses from current row for (int row = 0; row < 16; ++row) { __m128i aligned = _mm_lddqu_si128(reinterpret_cast<const __m128i*>(lm_ptr)); dst_ptr_sse[row] = _mm_add_epi8(dst_ptr_sse[row], aligned); lm_ptr += W; // Step to next row } } else #endif if (haveSSE2) { // Fall back to MOVDQU for (int row = 0; row < 16; ++row) { __m128i aligned = _mm_loadu_si128(reinterpret_cast<const __m128i*>(lm_ptr)); dst_ptr_sse[row] = _mm_add_epi8(dst_ptr_sse[row], aligned); lm_ptr += W; // Step to next row } } else #endif { uchar* dst_ptr = dst.ptr<uchar>(); for (int row = 0; row < 16; ++row) { for (int col = 0; col < 16; ++col) dst_ptr[col] = uchar(dst_ptr[col] + lm_ptr[col]); dst_ptr += 16; lm_ptr += W; } } } } static void addUnaligned8u16u(const uchar * src1, const uchar * src2, ushort * res, int length) { const uchar * end = src1 + length; while (src1 != end) { *res = *src1 + *src2; ++src1; ++src2; ++res; } } /** * \brief Accumulate one or more 8-bit similarity images. * * \param[in] similarities Source 8-bit similarity images. * \param[out] dst Destination 16-bit similarity image. */ static void addSimilarities(const std::vector<Mat>& similarities, Mat& dst) { if (similarities.size() == 1) { similarities[0].convertTo(dst, CV_16U); } else { // NOTE: add() seems to be rather slow in the 8U + 8U -> 16U case dst.create(similarities[0].size(), CV_16U); addUnaligned8u16u(similarities[0].ptr(), similarities[1].ptr(), dst.ptr<ushort>(), static_cast<int>(dst.total())); /// @todo Optimize 16u + 8u -> 16u when more than 2 modalities for (size_t i = 2; i < similarities.size(); ++i) add(dst, similarities[i], dst, noArray(), CV_16U); } } /****************************************************************************************\ * High-level Detector API * \****************************************************************************************/ Detector::Detector() { } Detector::Detector(const std::vector< Ptr<Modality> >& _modalities, const std::vector<int>& T_pyramid) : modalities(_modalities), pyramid_levels(static_cast<int>(T_pyramid.size())), T_at_level(T_pyramid) { } void Detector::match(const std::vector<Mat>& sources, float threshold, std::vector<Match>& matches, const std::vector<String>& class_ids, OutputArrayOfArrays quantized_images, const std::vector<Mat>& masks) const { matches.clear(); if (quantized_images.needed()) quantized_images.create(1, static_cast<int>(pyramid_levels * modalities.size()), CV_8U); CV_Assert(sources.size() == modalities.size()); // Initialize each modality with our sources std::vector< Ptr<QuantizedPyramid> > quantizers; for (int i = 0; i < (int)modalities.size(); ++i){ Mat mask, source; source = sources[i]; if(!masks.empty()){ CV_Assert(masks.size() == modalities.size()); mask = masks[i]; } CV_Assert(mask.empty() || mask.size() == source.size()); quantizers.push_back(modalities[i]->process(source, mask)); } // pyramid level -> modality -> quantization LinearMemoryPyramid lm_pyramid(pyramid_levels, std::vector<LinearMemories>(modalities.size(), LinearMemories(8))); // For each pyramid level, precompute linear memories for each modality std::vector<Size> sizes; for (int l = 0; l < pyramid_levels; ++l) { int T = T_at_level[l]; std::vector<LinearMemories>& lm_level = lm_pyramid[l]; if (l > 0) { for (int i = 0; i < (int)quantizers.size(); ++i) quantizers[i]->pyrDown(); } Mat quantized, spread_quantized; std::vector<Mat> response_maps; for (int i = 0; i < (int)quantizers.size(); ++i) { quantizers[i]->quantize(quantized); spread(quantized, spread_quantized, T); computeResponseMaps(spread_quantized, response_maps); LinearMemories& memories = lm_level[i]; for (int j = 0; j < 8; ++j) linearize(response_maps[j], memories[j], T); if (quantized_images.needed()) //use copyTo here to side step reference semantics. quantized.copyTo(quantized_images.getMatRef(static_cast<int>(l*quantizers.size() + i))); } sizes.push_back(quantized.size()); } if (class_ids.empty()) { // Match all templates TemplatesMap::const_iterator it = class_templates.begin(), itend = class_templates.end(); for ( ; it != itend; ++it) matchClass(lm_pyramid, sizes, threshold, matches, it->first, it->second); } else { // Match only templates for the requested class IDs for (int i = 0; i < (int)class_ids.size(); ++i) { TemplatesMap::const_iterator it = class_templates.find(class_ids[i]); if (it != class_templates.end()) matchClass(lm_pyramid, sizes, threshold, matches, it->first, it->second); } } // Sort matches by similarity, and prune any duplicates introduced by pyramid refinement std::sort(matches.begin(), matches.end()); std::vector<Match>::iterator new_end = std::unique(matches.begin(), matches.end()); matches.erase(new_end, matches.end()); } // Used to filter out weak matches struct MatchPredicate { MatchPredicate(float _threshold) : threshold(_threshold) {} bool operator() (const Match& m) { return m.similarity < threshold; } float threshold; }; void Detector::matchClass(const LinearMemoryPyramid& lm_pyramid, const std::vector<Size>& sizes, float threshold, std::vector<Match>& matches, const String& class_id, const std::vector<TemplatePyramid>& template_pyramids) const { // For each template... for (size_t template_id = 0; template_id < template_pyramids.size(); ++template_id) { const TemplatePyramid& tp = template_pyramids[template_id]; // First match over the whole image at the lowest pyramid level /// @todo Factor this out into separate function const std::vector<LinearMemories>& lowest_lm = lm_pyramid.back(); // Compute similarity maps for each modality at lowest pyramid level std::vector<Mat> similarities(modalities.size()); int lowest_start = static_cast<int>(tp.size() - modalities.size()); int lowest_T = T_at_level.back(); int num_features = 0; for (int i = 0; i < (int)modalities.size(); ++i) { const Template& templ = tp[lowest_start + i]; num_features += static_cast<int>(templ.features.size()); similarity(lowest_lm[i], templ, similarities[i], sizes.back(), lowest_T); } // Combine into overall similarity /// @todo Support weighting the modalities Mat total_similarity; addSimilarities(similarities, total_similarity); // Convert user-friendly percentage to raw similarity threshold. The percentage // threshold scales from half the max response (what you would expect from applying // the template to a completely random image) to the max response. // NOTE: This assumes max per-feature response is 4, so we scale between [2*nf, 4*nf]. int raw_threshold = static_cast<int>(2*num_features + (threshold / 100.f) * (2*num_features) + 0.5f); // Find initial matches std::vector<Match> candidates; for (int r = 0; r < total_similarity.rows; ++r) { ushort* row = total_similarity.ptr<ushort>(r); for (int c = 0; c < total_similarity.cols; ++c) { int raw_score = row[c]; if (raw_score > raw_threshold) { int offset = lowest_T / 2 + (lowest_T % 2 - 1); int x = c * lowest_T + offset; int y = r * lowest_T + offset; float score =(raw_score * 100.f) / (4 * num_features) + 0.5f; candidates.push_back(Match(x, y, score, class_id, static_cast<int>(template_id))); } } } // Locally refine each match by marching up the pyramid for (int l = pyramid_levels - 2; l >= 0; --l) { const std::vector<LinearMemories>& lms = lm_pyramid[l]; int T = T_at_level[l]; int start = static_cast<int>(l * modalities.size()); Size size = sizes[l]; int border = 8 * T; int offset = T / 2 + (T % 2 - 1); int max_x = size.width - tp[start].width - border; int max_y = size.height - tp[start].height - border; std::vector<Mat> similarities2(modalities.size()); Mat total_similarity2; for (int m = 0; m < (int)candidates.size(); ++m) { Match& match2 = candidates[m]; int x = match2.x * 2 + 1; /// @todo Support other pyramid distance int y = match2.y * 2 + 1; // Require 8 (reduced) row/cols to the up/left x = std::max(x, border); y = std::max(y, border); // Require 8 (reduced) row/cols to the down/left, plus the template size x = std::min(x, max_x); y = std::min(y, max_y); // Compute local similarity maps for each modality int numFeatures = 0; for (int i = 0; i < (int)modalities.size(); ++i) { const Template& templ = tp[start + i]; numFeatures += static_cast<int>(templ.features.size()); similarityLocal(lms[i], templ, similarities2[i], size, T, Point(x, y)); } addSimilarities(similarities2, total_similarity2); // Find best local adjustment int best_score = 0; int best_r = -1, best_c = -1; for (int r = 0; r < total_similarity2.rows; ++r) { ushort* row = total_similarity2.ptr<ushort>(r); for (int c = 0; c < total_similarity2.cols; ++c) { int score = row[c]; if (score > best_score) { best_score = score; best_r = r; best_c = c; } } } // Update current match match2.x = (x / T - 8 + best_c) * T + offset; match2.y = (y / T - 8 + best_r) * T + offset; match2.similarity = (best_score * 100.f) / (4 * numFeatures); } // Filter out any matches that drop below the similarity threshold std::vector<Match>::iterator new_end = std::remove_if(candidates.begin(), candidates.end(), MatchPredicate(threshold)); candidates.erase(new_end, candidates.end()); } matches.insert(matches.end(), candidates.begin(), candidates.end()); } } int Detector::addTemplate(const std::vector<Mat>& sources, const String& class_id, const Mat& object_mask, Rect* bounding_box) { int num_modalities = static_cast<int>(modalities.size()); std::vector<TemplatePyramid>& template_pyramids = class_templates[class_id]; int template_id = static_cast<int>(template_pyramids.size()); TemplatePyramid tp; tp.resize(num_modalities * pyramid_levels); // For each modality... for (int i = 0; i < num_modalities; ++i) { // Extract a template at each pyramid level Ptr<QuantizedPyramid> qp = modalities[i]->process(sources[i], object_mask); for (int l = 0; l < pyramid_levels; ++l) { /// @todo Could do mask subsampling here instead of in pyrDown() if (l > 0) qp->pyrDown(); bool success = qp->extractTemplate(tp[l*num_modalities + i]); if (!success) return -1; } } Rect bb = cropTemplates(tp); if (bounding_box) *bounding_box = bb; /// @todo Can probably avoid a copy of tp here with swap template_pyramids.push_back(tp); return template_id; } int Detector::addSyntheticTemplate(const std::vector<Template>& templates, const String& class_id) { std::vector<TemplatePyramid>& template_pyramids = class_templates[class_id]; int template_id = static_cast<int>(template_pyramids.size()); template_pyramids.push_back(templates); return template_id; } const std::vector<Template>& Detector::getTemplates(const String& class_id, int template_id) const { TemplatesMap::const_iterator i = class_templates.find(class_id); CV_Assert(i != class_templates.end()); CV_Assert(i->second.size() > size_t(template_id)); return i->second[template_id]; } int Detector::numTemplates() const { int ret = 0; TemplatesMap::const_iterator i = class_templates.begin(), iend = class_templates.end(); for ( ; i != iend; ++i) ret += static_cast<int>(i->second.size()); return ret; } int Detector::numTemplates(const String& class_id) const { TemplatesMap::const_iterator i = class_templates.find(class_id); if (i == class_templates.end()) return 0; return static_cast<int>(i->second.size()); } std::vector<String> Detector::classIds() const { std::vector<String> ids; TemplatesMap::const_iterator i = class_templates.begin(), iend = class_templates.end(); for ( ; i != iend; ++i) { ids.push_back(i->first); } return ids; } void Detector::read(const FileNode& fn) { class_templates.clear(); pyramid_levels = fn["pyramid_levels"]; fn["T"] >> T_at_level; modalities.clear(); FileNode modalities_fn = fn["modalities"]; FileNodeIterator it = modalities_fn.begin(), it_end = modalities_fn.end(); for ( ; it != it_end; ++it) { modalities.push_back(Modality::create(*it)); } } void Detector::write(FileStorage& fs) const { fs << "pyramid_levels" << pyramid_levels; fs << "T" << T_at_level; fs << "modalities" << "["; for (int i = 0; i < (int)modalities.size(); ++i) { fs << "{"; modalities[i]->write(fs); fs << "}"; } fs << "]"; // modalities } String Detector::readClass(const FileNode& fn, const String &class_id_override) { // Verify compatible with Detector settings FileNode mod_fn = fn["modalities"]; CV_Assert(mod_fn.size() == modalities.size()); FileNodeIterator mod_it = mod_fn.begin(), mod_it_end = mod_fn.end(); int i = 0; for ( ; mod_it != mod_it_end; ++mod_it, ++i) CV_Assert(modalities[i]->name() == (String)(*mod_it)); CV_Assert((int)fn["pyramid_levels"] == pyramid_levels); // Detector should not already have this class String class_id; if (class_id_override.empty()) { String class_id_tmp = fn["class_id"]; CV_Assert(class_templates.find(class_id_tmp) == class_templates.end()); class_id = class_id_tmp; } else { class_id = class_id_override; } TemplatesMap::value_type v(class_id, std::vector<TemplatePyramid>()); std::vector<TemplatePyramid>& tps = v.second; int expected_id = 0; FileNode tps_fn = fn["template_pyramids"]; tps.resize(tps_fn.size()); FileNodeIterator tps_it = tps_fn.begin(), tps_it_end = tps_fn.end(); for ( ; tps_it != tps_it_end; ++tps_it, ++expected_id) { int template_id = (*tps_it)["template_id"]; CV_Assert(template_id == expected_id); FileNode templates_fn = (*tps_it)["templates"]; tps[template_id].resize(templates_fn.size()); FileNodeIterator templ_it = templates_fn.begin(), templ_it_end = templates_fn.end(); int idx = 0; for ( ; templ_it != templ_it_end; ++templ_it) { tps[template_id][idx++].read(*templ_it); } } class_templates.insert(v); return class_id; } void Detector::writeClass(const String& class_id, FileStorage& fs) const { TemplatesMap::const_iterator it = class_templates.find(class_id); CV_Assert(it != class_templates.end()); const std::vector<TemplatePyramid>& tps = it->second; fs << "class_id" << it->first; fs << "modalities" << "[:"; for (size_t i = 0; i < modalities.size(); ++i) fs << modalities[i]->name(); fs << "]"; // modalities fs << "pyramid_levels" << pyramid_levels; fs << "template_pyramids" << "["; for (size_t i = 0; i < tps.size(); ++i) { const TemplatePyramid& tp = tps[i]; fs << "{"; fs << "template_id" << int(i); //TODO is this cast correct? won't be good if rolls over... fs << "templates" << "["; for (size_t j = 0; j < tp.size(); ++j) { fs << "{"; tp[j].write(fs); fs << "}"; // current template } fs << "]"; // templates fs << "}"; // current pyramid } fs << "]"; // pyramids } void Detector::readClasses(const std::vector<String>& class_ids, const String& format) { for (size_t i = 0; i < class_ids.size(); ++i) { const String& class_id = class_ids[i]; String filename = cv::format(format.c_str(), class_id.c_str()); FileStorage fs(filename, FileStorage::READ); readClass(fs.root()); } } void Detector::writeClasses(const String& format) const { TemplatesMap::const_iterator it = class_templates.begin(), it_end = class_templates.end(); for ( ; it != it_end; ++it) { const String& class_id = it->first; String filename = cv::format(format.c_str(), class_id.c_str()); FileStorage fs(filename, FileStorage::WRITE); writeClass(class_id, fs); } } static const int T_DEFAULTS[] = {5, 8}; Ptr<Detector> getDefaultLINE() { std::vector< Ptr<Modality> > modalities; modalities.push_back(makePtr<ColorGradient>()); return makePtr<Detector>(modalities, std::vector<int>(T_DEFAULTS, T_DEFAULTS + 2)); } Ptr<Detector> getDefaultLINEMOD() { std::vector< Ptr<Modality> > modalities; modalities.push_back(makePtr<ColorGradient>()); modalities.push_back(makePtr<DepthNormal>()); return makePtr<Detector>(modalities, std::vector<int>(T_DEFAULTS, T_DEFAULTS + 2)); } } // namespace linemod } // namespace cv