converted old haar cascades to the new format; added the conversion function;…

converted old haar cascades to the new format; added the conversion function; added OpenCL optimization into CascadeClassfier; optimized the data structures and CPU code for the stump case.

converted old haar cascades to the new format; added the conversion function;…
converted old haar cascades to the new format; added the conversion function; added OpenCL optimization into CascadeClassfier; optimized the data structures and CPU code for the stump case.
a1784b73 · Vadim Pisarevsky · 2431c72d · ec3f22ce · a1784b73 · a1784b73
Commit a1784b73 authored Dec 19, 2013 by Vadim Pisarevsky
31 changed files
--- a/data/haarcascades/haarcascade_eye.xml
+++ b/data/haarcascades/haarcascade_eye.xml
--- a/data/haarcascades/haarcascade_eye_tree_eyeglasses.xml
+++ b/data/haarcascades/haarcascade_eye_tree_eyeglasses.xml
--- a/data/haarcascades/haarcascade_frontalface_alt.xml
+++ b/data/haarcascades/haarcascade_frontalface_alt.xml
--- a/data/haarcascades/haarcascade_frontalface_alt2.xml
+++ b/data/haarcascades/haarcascade_frontalface_alt2.xml
--- a/data/haarcascades/haarcascade_frontalface_alt_tree.xml
+++ b/data/haarcascades/haarcascade_frontalface_alt_tree.xml
--- a/data/haarcascades/haarcascade_frontalface_default.xml
+++ b/data/haarcascades/haarcascade_frontalface_default.xml
--- a/data/haarcascades/haarcascade_fullbody.xml
+++ b/data/haarcascades/haarcascade_fullbody.xml
--- a/data/haarcascades/haarcascade_lefteye_2splits.xml
+++ b/data/haarcascades/haarcascade_lefteye_2splits.xml
--- a/data/haarcascades/haarcascade_lowerbody.xml
+++ b/data/haarcascades/haarcascade_lowerbody.xml
--- a/data/haarcascades/haarcascade_mcs_eyepair_big.xml
+++ b/data/haarcascades/haarcascade_mcs_eyepair_big.xml
--- a/data/haarcascades/haarcascade_mcs_eyepair_small.xml
+++ b/data/haarcascades/haarcascade_mcs_eyepair_small.xml
--- a/data/haarcascades/haarcascade_mcs_leftear.xml
+++ b/data/haarcascades/haarcascade_mcs_leftear.xml
--- a/data/haarcascades/haarcascade_mcs_lefteye.xml
+++ b/data/haarcascades/haarcascade_mcs_lefteye.xml
--- a/data/haarcascades/haarcascade_mcs_mouth.xml
+++ b/data/haarcascades/haarcascade_mcs_mouth.xml
--- a/data/haarcascades/haarcascade_mcs_nose.xml
+++ b/data/haarcascades/haarcascade_mcs_nose.xml
--- a/data/haarcascades/haarcascade_mcs_rightear.xml
+++ b/data/haarcascades/haarcascade_mcs_rightear.xml
--- a/data/haarcascades/haarcascade_mcs_righteye.xml
+++ b/data/haarcascades/haarcascade_mcs_righteye.xml
--- a/data/haarcascades/haarcascade_mcs_upperbody.xml
+++ b/data/haarcascades/haarcascade_mcs_upperbody.xml
--- a/data/haarcascades/haarcascade_profileface.xml
+++ b/data/haarcascades/haarcascade_profileface.xml
--- a/data/haarcascades/haarcascade_righteye_2splits.xml
+++ b/data/haarcascades/haarcascade_righteye_2splits.xml
--- a/data/haarcascades/haarcascade_smile.xml
+++ b/data/haarcascades/haarcascade_smile.xml
--- a/data/haarcascades/haarcascade_upperbody.xml
+++ b/data/haarcascades/haarcascade_upperbody.xml
--- a/modules/core/include/opencv2/core/ocl.hpp
+++ b/modules/core/include/opencv2/core/ocl.hpp
@@ -426,6 +426,61 @@ public:
        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); set(i, a11); return *this;
    }
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
+                 const _Tp12& a12)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
+        set(i, a12); return *this;
+    }
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
+             typename _Tp13>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
+                 const _Tp12& a12, const _Tp13& a13)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
+        i = set(i, a12); set(i, a13); return *this;
+    }
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
+             typename _Tp13, typename _Tp14>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
+                 const _Tp12& a12, const _Tp13& a13, const _Tp14& a14)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
+        i = set(i, a12); i = set(i, a13); set(i, a14); return *this;
+    }
+    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
+             typename _Tp13, typename _Tp14, typename _Tp15>
+    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
+                 const _Tp12& a12, const _Tp13& a13, const _Tp14& a14, const _Tp15& a15)
+    {
+        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
+        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
+        i = set(i, a12); i = set(i, a13); i = set(i, a14); set(i, a15); return *this;
+    }
    bool run(int dims, size_t globalsize[],
             size_t localsize[], bool sync, const Queue& q=Queue());

--- a/modules/imgproc/include/opencv2/imgproc.hpp
+++ b/modules/imgproc/include/opencv2/imgproc.hpp
@@ -1075,6 +1075,11 @@ CV_EXPORTS_W void boxFilter( InputArray src, OutputArray dst, int ddepth,
                             Size ksize, Point anchor = Point(-1,-1),
                             bool normalize = true,
                             int borderType = BORDER_DEFAULT );
+CV_EXPORTS_W void sqrBoxFilter( InputArray _src, OutputArray _dst, int ddepth,
+                                Size ksize, Point anchor = Point(-1, -1),
+                                bool normalize = true,
+                                int borderType = BORDER_DEFAULT );
 //! a synonym for normalized box filter
 CV_EXPORTS_W void blur( InputArray src, OutputArray dst,

--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
@@ -895,6 +895,114 @@ void cv::blur( InputArray src, OutputArray dst,
    boxFilter( src, dst, -1, ksize, anchor, true, borderType );
 }
+/****************************************************************************************\
+                                    Squared Box Filter
+\****************************************************************************************/
+namespace cv
+{
+template<typename T, typename ST> struct SqrRowSum : public BaseRowFilter
+{
+    SqrRowSum( int _ksize, int _anchor )
+    {
+        ksize = _ksize;
+        anchor = _anchor;
+    }
+    void operator()(const uchar* src, uchar* dst, int width, int cn)
+    {
+        const T* S = (const T*)src;
+        ST* D = (ST*)dst;
+        int i = 0, k, ksz_cn = ksize*cn;
+        width = (width - 1)*cn;
+        for( k = 0; k < cn; k++, S++, D++ )
+        {
+            ST s = 0;
+            for( i = 0; i < ksz_cn; i += cn )
+            {
+                ST val = (ST)S[i];
+                s += val*val;
+            }
+            D[0] = s;
+            for( i = 0; i < width; i += cn )
+            {
+                ST val0 = (ST)S[i], val1 = (ST)S[i + ksz_cn];
+                s += val1*val1 - val0*val0;
+                D[i+cn] = s;
+            }
+        }
+    }
+};
+static Ptr<BaseRowFilter> getSqrRowSumFilter(int srcType, int sumType, int ksize, int anchor)
+{
+    int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(sumType);
+    CV_Assert( CV_MAT_CN(sumType) == CV_MAT_CN(srcType) );
+    if( anchor < 0 )
+        anchor = ksize/2;
+    if( sdepth == CV_8U && ddepth == CV_32S )
+        return makePtr<SqrRowSum<uchar, int> >(ksize, anchor);
+    if( sdepth == CV_8U && ddepth == CV_64F )
+        return makePtr<SqrRowSum<uchar, double> >(ksize, anchor);
+    if( sdepth == CV_16U && ddepth == CV_64F )
+        return makePtr<SqrRowSum<ushort, double> >(ksize, anchor);
+    if( sdepth == CV_16S && ddepth == CV_64F )
+        return makePtr<SqrRowSum<short, double> >(ksize, anchor);
+    if( sdepth == CV_32F && ddepth == CV_64F )
+        return makePtr<SqrRowSum<float, double> >(ksize, anchor);
+    if( sdepth == CV_64F && ddepth == CV_64F )
+        return makePtr<SqrRowSum<double, double> >(ksize, anchor);
+    CV_Error_( CV_StsNotImplemented,
+              ("Unsupported combination of source format (=%d), and buffer format (=%d)",
+               srcType, sumType));
+    return Ptr<BaseRowFilter>();
+}
+}
+void cv::sqrBoxFilter( InputArray _src, OutputArray _dst, int ddepth,
+                       Size ksize, Point anchor,
+                       bool normalize, int borderType )
+{
+    Mat src = _src.getMat();
+    int sdepth = src.depth(), cn = src.channels();
+    if( ddepth < 0 )
+        ddepth = sdepth < CV_32F ? CV_32F : CV_64F;
+    _dst.create( src.size(), CV_MAKETYPE(ddepth, cn) );
+    Mat dst = _dst.getMat();
+    if( borderType != BORDER_CONSTANT && normalize )
+    {
+        if( src.rows == 1 )
+            ksize.height = 1;
+        if( src.cols == 1 )
+            ksize.width = 1;
+    }
+    int sumType = CV_64F;
+    if( sdepth == CV_8U )
+        sumType = CV_32S;
+    sumType = CV_MAKETYPE( sumType, cn );
+    int srcType = CV_MAKETYPE(sdepth, cn);
+    int dstType = CV_MAKETYPE(ddepth, cn);
+    Ptr<BaseRowFilter> rowFilter = getSqrRowSumFilter(srcType, sumType, ksize.width, anchor.x );
+    Ptr<BaseColumnFilter> columnFilter = getColumnSumFilter(sumType,
+                                                            dstType, ksize.height, anchor.y,
+                                                            normalize ? 1./(ksize.width*ksize.height) : 1);
+    Ptr<FilterEngine> f = makePtr<FilterEngine>(Ptr<BaseFilter>(), rowFilter, columnFilter,
+                                                srcType, dstType, sumType, borderType );
+    f->apply( src, dst );
+}
 /****************************************************************************************\
                                     Gaussian Blur
 \****************************************************************************************/

--- a/modules/objdetect/include/opencv2/objdetect.hpp
+++ b/modules/objdetect/include/opencv2/objdetect.hpp
@@ -111,12 +111,15 @@ public:
 };
 CV_EXPORTS   void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps = 0.2);
-CV_EXPORTS_W void groupRectangles(CV_IN_OUT std::vector<Rect>& rectList, CV_OUT std::vector<int>& weights, int groupThreshold, double eps = 0.2);
+CV_EXPORTS_W void groupRectangles(CV_IN_OUT std::vector<Rect>& rectList, CV_OUT std::vector<int>& weights,
-CV_EXPORTS   void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps, std::vector<int>* weights, std::vector<double>* levelWeights );
+                                  int groupThreshold, double eps = 0.2);
+CV_EXPORTS   void groupRectangles(std::vector<Rect>& rectList, int groupThreshold,
+                                  double eps, std::vector<int>* weights, std::vector<double>* levelWeights );
 CV_EXPORTS   void groupRectangles(std::vector<Rect>& rectList, std::vector<int>& rejectLevels,
                                  std::vector<double>& levelWeights, int groupThreshold, double eps = 0.2);
-CV_EXPORTS   void groupRectangles_meanshift(std::vector<Rect>& rectList, std::vector<double>& foundWeights, std::vector<double>& foundScales,
+CV_EXPORTS   void groupRectangles_meanshift(std::vector<Rect>& rectList, std::vector<double>& foundWeights,
-                                          double detectThreshold = 0.0, Size winDetSize = Size(64, 128));
+                                            std::vector<double>& foundScales,
+                                            double detectThreshold = 0.0, Size winDetSize = Size(64, 128));
 class CV_EXPORTS FeatureEvaluator
 {
@@ -132,7 +135,7 @@ public:
    virtual Ptr<FeatureEvaluator> clone() const;
    virtual int getFeatureType() const;
-    virtual bool setImage(const Mat& img, Size origWinSize);
+    virtual bool setImage(InputArray img, Size origWinSize, Size sumSize);
    virtual bool setWindow(Point p);
    virtual double calcOrd(int featureIdx) const;
@@ -232,6 +235,8 @@ public:
    CV_WRAP int getFeatureType() const;
    void* getOldCascade();
+    CV_WRAP static bool convert(const String& oldcascade, const String& newcascade);
    void setMaskGenerator(const Ptr<BaseCascadeClassifier::MaskGenerator>& maskGenerator);
    Ptr<BaseCascadeClassifier::MaskGenerator> getMaskGenerator();

--- a/modules/objdetect/src/cascadedetect.cpp
+++ b/modules/objdetect/src/cascadedetect.cpp
--- a/modules/objdetect/src/cascadedetect.hpp
+++ b/modules/objdetect/src/cascadedetect.hpp
--- a/modules/objdetect/src/cascadedetect_convert.cpp
+++ b/modules/objdetect/src/cascadedetect_convert.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, Itseez Inc, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+/* Haar features calculation */
+#include "precomp.hpp"
+#include <stdio.h>
+namespace cv
+{
+/* field names */
+#define ICV_HAAR_SIZE_NAME            "size"
+#define ICV_HAAR_STAGES_NAME          "stages"
+#define ICV_HAAR_TREES_NAME           "trees"
+#define ICV_HAAR_FEATURE_NAME         "feature"
+#define ICV_HAAR_RECTS_NAME           "rects"
+#define ICV_HAAR_TILTED_NAME          "tilted"
+#define ICV_HAAR_THRESHOLD_NAME       "threshold"
+#define ICV_HAAR_LEFT_NODE_NAME       "left_node"
+#define ICV_HAAR_LEFT_VAL_NAME        "left_val"
+#define ICV_HAAR_RIGHT_NODE_NAME      "right_node"
+#define ICV_HAAR_RIGHT_VAL_NAME       "right_val"
+#define ICV_HAAR_STAGE_THRESHOLD_NAME "stage_threshold"
+#define ICV_HAAR_PARENT_NAME          "parent"
+#define ICV_HAAR_NEXT_NAME            "next"
+namespace haar_cvt
+{
+struct HaarFeature
+{
+    enum { RECT_NUM = 3 };
+    HaarFeature()
+    {
+        tilted = false;
+        for( int i = 0; i < RECT_NUM; i++ )
+        {
+            rect[i].r = Rect(0,0,0,0);
+            rect[i].weight = 0.f;
+        }
+    }
+    bool tilted;
+    struct
+    {
+        Rect r;
+        float weight;
+    } rect[RECT_NUM];
+};
+struct HaarClassifierNode
+{
+    HaarClassifierNode()
+    {
+        f = left = right = 0;
+        threshold = 0.f;
+    }
+    int f, left, right;
+    float threshold;
+};
+struct HaarClassifier
+{
+    std::vector<HaarClassifierNode> nodes;
+    std::vector<float> leaves;
+};
+struct HaarStageClassifier
+{
+    double threshold;
+    std::vector<HaarClassifier> weaks;
+};
+static bool convert(const String& oldcascade, const String& newcascade)
+{
+    FileStorage oldfs(oldcascade, FileStorage::READ);
+    if( !oldfs.isOpened() )
+        return false;
+    FileNode oldroot = oldfs.getFirstTopLevelNode();
+    FileNode sznode = oldroot[ICV_HAAR_SIZE_NAME];
+    if( sznode.empty() )
+        return false;
+    int maxdepth = 0;
+    Size cascadesize;
+    cascadesize.width = (int)sznode[0];
+    cascadesize.height = (int)sznode[1];
+    std::vector<HaarFeature> features;
+    size_t i, j, k, n;
+    FileNode stages_seq = oldroot[ICV_HAAR_STAGES_NAME];
+    size_t nstages = stages_seq.size();
+    std::vector<HaarStageClassifier> stages(nstages);
+    for( i = 0; i < nstages; i++ )
+    {
+        FileNode stagenode = stages_seq[i];
+        HaarStageClassifier& stage = stages[i];
+        stage.threshold = (double)stagenode[ICV_HAAR_STAGE_THRESHOLD_NAME];
+        FileNode weaks_seq = stagenode[ICV_HAAR_TREES_NAME];
+        size_t nweaks = weaks_seq.size();
+        stage.weaks.resize(nweaks);
+        for( j = 0; j < nweaks; j++ )
+        {
+            HaarClassifier& weak = stage.weaks[j];
+            FileNode weaknode = weaks_seq[j];
+            size_t nnodes = weaknode.size();
+            for( n = 0; n < nnodes; n++ )
+            {
+                FileNode nnode = weaknode[n];
+                FileNode fnode = nnode[ICV_HAAR_FEATURE_NAME];
+                HaarFeature f;
+                HaarClassifierNode node;
+                node.f = (int)features.size();
+                f.tilted = (int)fnode[ICV_HAAR_TILTED_NAME] != 0;
+                FileNode rects_seq = fnode[ICV_HAAR_RECTS_NAME];
+                size_t nrects = rects_seq.size();
+                for( k = 0; k < nrects; k++ )
+                {
+                    FileNode rnode = rects_seq[k];
+                    f.rect[k].r.x = (int)rnode[0];
+                    f.rect[k].r.y = (int)rnode[1];
+                    f.rect[k].r.width = (int)rnode[2];
+                    f.rect[k].r.height = (int)rnode[3];
+                    f.rect[k].weight = (float)rnode[4];
+                }
+                features.push_back(f);
+                node.threshold = nnode[ICV_HAAR_THRESHOLD_NAME];
+                FileNode leftValNode = nnode[ICV_HAAR_LEFT_VAL_NAME];
+                if( !leftValNode.empty() )
+                {
+                    node.left = -(int)weak.leaves.size();
+                    weak.leaves.push_back((float)leftValNode);
+                }
+                else
+                {
+                    node.left = (int)nnode[ICV_HAAR_LEFT_NODE_NAME];
+                }
+                FileNode rightValNode = nnode[ICV_HAAR_RIGHT_VAL_NAME];
+                if( !rightValNode.empty() )
+                {
+                    node.right = -(int)weak.leaves.size();
+                    weak.leaves.push_back((float)rightValNode);
+                }
+                else
+                {
+                    node.right = (int)nnode[ICV_HAAR_RIGHT_NODE_NAME];
+                }
+                weak.nodes.push_back(node);
+            }
+        }
+    }
+    FileStorage newfs(newcascade, FileStorage::WRITE);
+    if( !newfs.isOpened() )
+        return false;
+    size_t maxWeakCount = 0, nfeatures = features.size();
+    for( i = 0; i < nstages; i++ )
+        maxWeakCount = std::max(maxWeakCount, stages[i].weaks.size());
+    newfs << "cascade" << "{:opencv-cascade-classifier"
+    << "stageType" << "BOOST"
+    << "featureType" << "HAAR"
+    << "height" << cascadesize.width
+    << "width" << cascadesize.height
+    << "stageParams" << "{"
+        << "maxWeakCount" << (int)maxWeakCount
+    << "}"
+    << "featureParams" << "{"
+        << "maxCatCount" << 0
+    << "}"
+    << "stageNum" << (int)nstages
+    << "stages" << "[";
+    for( i = 0; i < nstages; i++ )
+    {
+        size_t nweaks = stages[i].weaks.size();
+        newfs << "{" << "maxWeakCount" << (int)nweaks
+            << "stageThreshold" << stages[i].threshold
+            << "weakClassifiers" << "[";
+        for( j = 0; j < nweaks; j++ )
+        {
+            const HaarClassifier& c = stages[i].weaks[j];
+            newfs << "{" << "internalNodes" << "[";
+            size_t nnodes = c.nodes.size(), nleaves = c.leaves.size();
+            for( k = 0; k < nnodes; k++ )
+                newfs << c.nodes[k].left << c.nodes[k].right
+                    << c.nodes[k].f << c.nodes[k].threshold;
+            newfs << "]" << "leafValues" << "[";
+            for( k = 0; k < nleaves; k++ )
+                newfs << c.leaves[k];
+            newfs << "]" << "}";
+        }
+        newfs << "]" << "}";
+    }
+    newfs << "]"
+        << "features" << "[";
+    for( i = 0; i < nfeatures; i++ )
+    {
+        const HaarFeature& f = features[i];
+        newfs << "{" << "rects" << "[";
+        for( j = 0; j < (size_t)HaarFeature::RECT_NUM; j++ )
+        {
+            if( j >= 2 && fabs(f.rect[j].weight) < FLT_EPSILON )
+                break;
+            newfs << "[" << f.rect[j].r.x << f.rect[j].r.y <<
+                f.rect[j].r.width << f.rect[j].r.height << f.rect[j].weight << "]";
+        }
+        newfs << "]";
+        if( f.tilted )
+            newfs << "tilted" << 1;
+        newfs << "}";
+    }
+    newfs << "]" << "}";
+    return true;
+}
+}
+bool CascadeClassifier::convert(const String& oldcascade, const String& newcascade)
+{
+    bool ok = haar_cvt::convert(oldcascade, newcascade);
+    if( !ok && newcascade.size() > 0 )
+        remove(newcascade.c_str());
+    return ok;
+}
+}
--- a/modules/objdetect/src/opencl/cascadedetect.cl
+++ b/modules/objdetect/src/opencl/cascadedetect.cl
+///////////////////////////// OpenCL kernels for face detection //////////////////////////////
+////////////////////////////// see the opencv/doc/license.txt ///////////////////////////////
+typedef struct __attribute__((aligned(4))) OptFeature
+{
+    int4 ofs[3] __attribute__((aligned (4)));
+    float4 weight __attribute__((aligned (4)));
+}
+OptFeature;
+typedef struct __attribute__((aligned(4))) Stump
+{
+    int featureIdx __attribute__((aligned (4)));
+    float threshold __attribute__((aligned (4))); // for ordered features only
+    float left __attribute__((aligned (4)));
+    float right __attribute__((aligned (4)));
+}
+Stump;
+typedef struct __attribute__((aligned (4))) Stage
+{
+    int first __attribute__((aligned (4)));
+    int ntrees __attribute__((aligned (4)));
+    float threshold __attribute__((aligned (4)));
+}
+Stage;
+__kernel void runHaarClassifierStump(
+    __global const int* sum,
+    int sumstep, int sumoffset,
+    __global const int* sqsum,
+    int sqsumstep, int sqsumoffset,
+    __global const OptFeature* optfeatures,
+    int nstages,
+    __global const Stage* stages,
+    __global const Stump* stumps,
+    volatile __global int* facepos,
+    int2 imgsize, int xyscale, float factor,
+    int4 normrect, int2 windowsize, int maxFaces)
+{
+    int ix = get_global_id(0)*xyscale;
+    int iy = get_global_id(1)*xyscale;
+    sumstep /= sizeof(int);
+    sqsumstep /= sizeof(int);
+    if( ix < imgsize.x && iy < imgsize.y )
+    {
+        int ntrees;
+        int stageIdx, i;
+        float s = 0.f;
+        __global const Stump* stump = stumps;
+        __global const OptFeature* f;
+        __global const int* psum = sum + mad24(iy, sumstep, ix);
+        __global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x);
+        int normarea = normrect.z * normrect.w;
+        float invarea = 1.f/normarea;
+        float sval = (pnsum[0] - pnsum[normrect.z] - pnsum[mul24(normrect.w, sumstep)] +
+                      pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea;
+        float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea;
+        float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
+        float4 weight, vsval;
+        int4 ofs, ofs0, ofs1, ofs2;
+        nf = nf > 0 ? nf : 1.f;
+        for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
+        {
+            ntrees = stages[stageIdx].ntrees;
+            s = 0.f;
+            for( i = 0; i < ntrees; i++, stump++ )
+            {
+                f = optfeatures + stump->featureIdx;
+                weight = f->weight;
+                ofs = f->ofs[0];
+                sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
+                ofs = f->ofs[1];
+                sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
+                if( weight.z > 0 )
+                {
+                    ofs = f->ofs[2];
+                    sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
+                }
+                s += (sval < stump->threshold*nf) ? stump->left : stump->right;
+            }
+            if( s < stages[stageIdx].threshold )
+                break;
+        }
+        if( stageIdx == nstages )
+        {
+            int nfaces = atomic_inc(facepos);
+            if( nfaces < maxFaces )
+            {
+                volatile __global int* face = facepos + 1 + nfaces*4;
+                face[0] = convert_int_rte(ix*factor);
+                face[1] = convert_int_rte(iy*factor);
+                face[2] = convert_int_rte(windowsize.x*factor);
+                face[3] = convert_int_rte(windowsize.y*factor);
+            }
+        }
+    }
+}
+#if 0
+__kernel void runLBPClassifierStump(
+    __global const int* sum,
+    int sumstep, int sumoffset,
+    __global const int* sqsum,
+    int sqsumstep, int sqsumoffset,
+    __global const OptFeature* optfeatures,
+    int nstages,
+    __global const Stage* stages,
+    __global const Stump* stumps,
+    __global const int* bitsets,
+    int bitsetSize,
+    volatile __global int* facepos,
+    int2 imgsize, int xyscale, float factor,
+    int4 normrect, int2 windowsize, int maxFaces)
+{
+    int ix = get_global_id(0)*xyscale*VECTOR_SIZE;
+    int iy = get_global_id(1)*xyscale;
+    sumstep /= sizeof(int);
+    sqsumstep /= sizeof(int);
+    if( ix < imgsize.x && iy < imgsize.y )
+    {
+        int ntrees;
+        int stageIdx, i;
+        float s = 0.f;
+        __global const Stump* stump = stumps;
+        __global const int* bitset = bitsets;
+        __global const OptFeature* f;
+        __global const int* psum = sum + mad24(iy, sumstep, ix);
+        __global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x);
+        int normarea = normrect.z * normrect.w;
+        float invarea = 1.f/normarea;
+        float sval = (pnsum[0] - pnsum[normrect.z] - pnsum[mul24(normrect.w, sumstep)] +
+        pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea;
+        float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea;
+        float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
+        float4 weight;
+        int4 ofs;
+        nf = nf > 0 ? nf : 1.f;
+        for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
+        {
+            ntrees = stages[stageIdx].ntrees;
+            s = 0.f;
+            for( i = 0; i < ntrees; i++, stump++, bitset += bitsetSize )
+            {
+                f = optfeatures + stump->featureIdx;
+                weight = f->weight;
+                // compute LBP feature to val
+                s += (bitset[val >> 5] & (1 << (val & 31))) ? stump->left : stump->right;
+            }
+            if( s < stages[stageIdx].threshold )
+            break;
+        }
+        if( stageIdx == nstages )
+        {
+            int nfaces = atomic_inc(facepos);
+            if( nfaces < maxFaces )
+            {
+                volatile __global int* face = facepos + 1 + nfaces*4;
+                face[0] = convert_int_rte(ix*factor);
+                face[1] = convert_int_rte(iy*factor);
+                face[2] = convert_int_rte(windowsize.x*factor);
+                face[3] = convert_int_rte(windowsize.y*factor);
+            }
+        }
+    }
+}
+#endif
--- a/samples/cpp/ufacedetect.cpp
+++ b/samples/cpp/ufacedetect.cpp
@@ -98,6 +98,8 @@ int main( int argc, const char** argv )
        return -1;
    }
+    cout << "old cascade: " << (cascade.isOldFormatCascade() ? "TRUE" : "FALSE") << endl;
    if( inputName.empty() || (isdigit(inputName.c_str()[0]) && inputName.c_str()[1] == '\0') )
    {
        int c = inputName.empty() ? 0 : inputName.c_str()[0] - '0';
@@ -199,13 +201,12 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
    t = (double)getTickCount();
-    cvtColor( img, gray, COLOR_BGR2GRAY );
+    resize( img, smallImg, Size(), scale0, scale0, INTER_LINEAR );
-    resize( gray, smallImg, Size(), scale0, scale0, INTER_LINEAR );
+    cvtColor( smallImg, gray, COLOR_BGR2GRAY );
-    cvtColor(smallImg, canvas, COLOR_GRAY2BGR);
+    equalizeHist( gray, gray );
-    equalizeHist( smallImg, smallImg );
-    cascade.detectMultiScale( smallImg, faces,
+    cascade.detectMultiScale( gray, faces,
-        1.1, 2, 0
+        1.1, 3, 0
        //|CASCADE_FIND_BIGGEST_OBJECT
        //|CASCADE_DO_ROUGH_SEARCH
        |CASCADE_SCALE_IMAGE
@@ -213,8 +214,8 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
        Size(30, 30) );
    if( tryflip )
    {
-        flip(smallImg, smallImg, 1);
+        flip(gray, gray, 1);
-        cascade.detectMultiScale( smallImg, faces2,
+        cascade.detectMultiScale( gray, faces2,
                                 1.1, 2, 0
                                 //|CASCADE_FIND_BIGGEST_OBJECT
                                 //|CASCADE_DO_ROUGH_SEARCH
@@ -227,7 +228,7 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
        }
    }
    t = (double)getTickCount() - t;
-    cvtColor(smallImg, canvas, COLOR_GRAY2BGR);
+    smallImg.copyTo(canvas);
    double fps = getTickFrequency()/t;
@@ -255,7 +256,7 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
                       color, 3, 8, 0);
        if( nestedCascade.empty() )
            continue;
-        UMat smallImgROI = smallImg(*r);
+        UMat smallImgROI = gray(*r);
        nestedCascade.detectMultiScale( smallImgROI, nestedObjects,
            1.1, 2, 0
            //|CASCADE_FIND_BIGGEST_OBJECT