cascadeclassifier.cpp 30.8 KB
Newer Older
marina.kolpakova's avatar
marina.kolpakova committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
25
//     and/or other materials provided with the distribution.
marina.kolpakova's avatar
marina.kolpakova committed
26 27 28 29 30
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
marina.kolpakova's avatar
marina.kolpakova committed
32 33 34 35 36 37 38 39 40 41 42 43
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "precomp.hpp"
44
#include "opencv2/objdetect/objdetect_c.h"
marina.kolpakova's avatar
marina.kolpakova committed
45 46

using namespace cv;
47
using namespace cv::cuda;
48

49
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
50

51 52
Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); }
Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); }
53

54
#else
55

56 57 58 59 60
//
// CascadeClassifierBase
//

namespace
61
{
62 63 64 65
    class CascadeClassifierBase : public cuda::CascadeClassifier
    {
    public:
        CascadeClassifierBase();
66

67 68
        virtual void setMaxObjectSize(Size maxObjectSize) { maxObjectSize_ = maxObjectSize; }
        virtual Size getMaxObjectSize() const { return maxObjectSize_; }
69

70 71
        virtual void setMinObjectSize(Size minSize) { minObjectSize_ = minSize; }
        virtual Size getMinObjectSize() const { return minObjectSize_; }
72

73 74
        virtual void setScaleFactor(double scaleFactor) { scaleFactor_ = scaleFactor; }
        virtual double getScaleFactor() const { return scaleFactor_; }
75

76 77
        virtual void setMinNeighbors(int minNeighbors) { minNeighbors_ = minNeighbors; }
        virtual int getMinNeighbors() const { return minNeighbors_; }
78

79 80
        virtual void setFindLargestObject(bool findLargestObject) { findLargestObject_ = findLargestObject; }
        virtual bool getFindLargestObject() { return findLargestObject_; }
81

82 83 84 85 86 87 88 89 90 91 92
        virtual void setMaxNumObjects(int maxNumObjects) { maxNumObjects_ = maxNumObjects; }
        virtual int getMaxNumObjects() const { return maxNumObjects_; }

    protected:
        Size maxObjectSize_;
        Size minObjectSize_;
        double scaleFactor_;
        int minNeighbors_;
        bool findLargestObject_;
        int maxNumObjects_;
    };
93

94 95 96 97 98 99 100
    CascadeClassifierBase::CascadeClassifierBase() :
        maxObjectSize_(),
        minObjectSize_(),
        scaleFactor_(1.2),
        minNeighbors_(4),
        findLargestObject_(false),
        maxNumObjects_(100)
101 102
    {
    }
103
}
104

105 106 107 108 109
//
// HaarCascade
//

#ifdef HAVE_OPENCV_CUDALEGACY
110

111
namespace
112
{
113
    class HaarCascade_Impl : public CascadeClassifierBase
114
    {
115 116
    public:
        explicit HaarCascade_Impl(const String& filename);
117

118
        virtual Size getClassifierSize() const;
119

120 121 122
        virtual void detectMultiScale(InputArray image,
                                      OutputArray objects,
                                      Stream& stream);
marina.kolpakova's avatar
marina.kolpakova committed
123

124 125
        virtual void convert(OutputArray gpu_objects,
                             std::vector<Rect>& objects);
marina.kolpakova's avatar
marina.kolpakova committed
126

127 128 129 130
    private:
        NCVStatus load(const String& classifierFile);
        NCVStatus calculateMemReqsAndAllocate(const Size& frameSize);
        NCVStatus process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections);
marina.kolpakova's avatar
marina.kolpakova committed
131

132
        Size lastAllocatedFrameSize;
marina.kolpakova's avatar
marina.kolpakova committed
133

134 135
        Ptr<NCVMemStackAllocator> gpuAllocator;
        Ptr<NCVMemStackAllocator> cpuAllocator;
marina.kolpakova's avatar
marina.kolpakova committed
136

137 138
        cudaDeviceProp devProp;
        NCVStatus ncvStat;
marina.kolpakova's avatar
marina.kolpakova committed
139

140 141
        Ptr<NCVMemNativeAllocator> gpuCascadeAllocator;
        Ptr<NCVMemNativeAllocator> cpuCascadeAllocator;
marina.kolpakova's avatar
marina.kolpakova committed
142

143 144 145
        Ptr<NCVVectorAlloc<HaarStage64> >           h_haarStages;
        Ptr<NCVVectorAlloc<HaarClassifierNode128> > h_haarNodes;
        Ptr<NCVVectorAlloc<HaarFeature64> >         h_haarFeatures;
marina.kolpakova's avatar
marina.kolpakova committed
146

147
        HaarClassifierCascadeDescriptor haar;
148

149 150 151 152
        Ptr<NCVVectorAlloc<HaarStage64> >           d_haarStages;
        Ptr<NCVVectorAlloc<HaarClassifierNode128> > d_haarNodes;
        Ptr<NCVVectorAlloc<HaarFeature64> >         d_haarFeatures;
    };
marina.kolpakova's avatar
marina.kolpakova committed
153

154 155 156 157
    static void NCVDebugOutputHandler(const String &msg)
    {
        CV_Error(Error::GpuApiCallError, msg.c_str());
    }
marina.kolpakova's avatar
marina.kolpakova committed
158

159 160 161 162 163
    HaarCascade_Impl::HaarCascade_Impl(const String& filename) :
        lastAllocatedFrameSize(-1, -1)
    {
        ncvSetDebugOutputHandler(NCVDebugOutputHandler);
        ncvSafeCall( load(filename) );
marina.kolpakova's avatar
marina.kolpakova committed
164 165
    }

166
    Size HaarCascade_Impl::getClassifierSize() const
167
    {
168 169
        return Size(haar.ClassifierSize.width, haar.ClassifierSize.height);
    }
marina.kolpakova's avatar
marina.kolpakova committed
170

171 172 173 174 175
    void HaarCascade_Impl::detectMultiScale(InputArray _image,
                                            OutputArray _objects,
                                            Stream& stream)
    {
        const GpuMat image = _image.getGpuMat();
marina.kolpakova's avatar
marina.kolpakova committed
176

177 178 179
        CV_Assert( image.depth() == CV_8U);
        CV_Assert( scaleFactor_ > 1 );
        CV_Assert( !stream );
marina.kolpakova's avatar
marina.kolpakova committed
180

181 182
        Size ncvMinSize = getClassifierSize();
        if (ncvMinSize.width < minObjectSize_.width && ncvMinSize.height < minObjectSize_.height)
183
        {
184 185
            ncvMinSize.width = minObjectSize_.width;
            ncvMinSize.height = minObjectSize_.height;
186
        }
marina.kolpakova's avatar
marina.kolpakova committed
187

188
        BufferPool pool(stream);
Alexander Alekhin's avatar
Alexander Alekhin committed
189
        GpuMat objectsBuf = pool.getBuffer(1, maxNumObjects_, traits::Type<Rect>::value);
190

191
        unsigned int numDetections;
192
        ncvSafeCall( process(image, objectsBuf, ncvMinSize, numDetections) );
marina.kolpakova's avatar
marina.kolpakova committed
193

194 195 196 197 198 199 200 201
        if (numDetections > 0)
        {
            objectsBuf.colRange(0, numDetections).copyTo(_objects);
        }
        else
        {
            _objects.release();
        }
202 203
    }

204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
    void HaarCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects)
    {
        if (_gpu_objects.empty())
        {
            objects.clear();
            return;
        }

        Mat gpu_objects;
        if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT)
        {
            _gpu_objects.getGpuMat().download(gpu_objects);
        }
        else
        {
            gpu_objects = _gpu_objects.getMat();
        }

        CV_Assert( gpu_objects.rows == 1 );
Alexander Alekhin's avatar
Alexander Alekhin committed
223
        CV_Assert( gpu_objects.type() == traits::Type<Rect>::value );
marina.kolpakova's avatar
marina.kolpakova committed
224

225 226 227
        Rect* ptr = gpu_objects.ptr<Rect>();
        objects.assign(ptr, ptr + gpu_objects.cols);
    }
marina.kolpakova's avatar
marina.kolpakova committed
228

229
    NCVStatus HaarCascade_Impl::load(const String& classifierFile)
marina.kolpakova's avatar
marina.kolpakova committed
230
    {
231
        int devId = cv::cuda::getDevice();
marina.kolpakova's avatar
marina.kolpakova committed
232 233 234
        ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR);

        // Load the classifier from file (assuming its size is about 1 mb) using a simple allocator
235 236
        gpuCascadeAllocator = makePtr<NCVMemNativeAllocator>(NCVMemoryTypeDevice, static_cast<int>(devProp.textureAlignment));
        cpuCascadeAllocator = makePtr<NCVMemNativeAllocator>(NCVMemoryTypeHostPinned, static_cast<int>(devProp.textureAlignment));
marina.kolpakova's avatar
marina.kolpakova committed
237 238 239 240 241 242 243 244

        ncvAssertPrintReturn(gpuCascadeAllocator->isInitialized(), "Error creating cascade GPU allocator", NCV_CUDA_ERROR);
        ncvAssertPrintReturn(cpuCascadeAllocator->isInitialized(), "Error creating cascade CPU allocator", NCV_CUDA_ERROR);

        Ncv32u haarNumStages, haarNumNodes, haarNumFeatures;
        ncvStat = ncvHaarGetClassifierSize(classifierFile, haarNumStages, haarNumNodes, haarNumFeatures);
        ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", NCV_FILE_ERROR);

245 246 247
        h_haarStages.reset  (new NCVVectorAlloc<HaarStage64>(*cpuCascadeAllocator, haarNumStages));
        h_haarNodes.reset   (new NCVVectorAlloc<HaarClassifierNode128>(*cpuCascadeAllocator, haarNumNodes));
        h_haarFeatures.reset(new NCVVectorAlloc<HaarFeature64>(*cpuCascadeAllocator, haarNumFeatures));
marina.kolpakova's avatar
marina.kolpakova committed
248 249 250 251 252 253 254 255

        ncvAssertPrintReturn(h_haarStages->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
        ncvAssertPrintReturn(h_haarNodes->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
        ncvAssertPrintReturn(h_haarFeatures->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);

        ncvStat = ncvHaarLoadFromFile_host(classifierFile, haar, *h_haarStages, *h_haarNodes, *h_haarFeatures);
        ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", NCV_FILE_ERROR);

256 257 258
        d_haarStages.reset  (new NCVVectorAlloc<HaarStage64>(*gpuCascadeAllocator, haarNumStages));
        d_haarNodes.reset   (new NCVVectorAlloc<HaarClassifierNode128>(*gpuCascadeAllocator, haarNumNodes));
        d_haarFeatures.reset(new NCVVectorAlloc<HaarFeature64>(*gpuCascadeAllocator, haarNumFeatures));
marina.kolpakova's avatar
marina.kolpakova committed
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273

        ncvAssertPrintReturn(d_haarStages->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
        ncvAssertPrintReturn(d_haarNodes->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
        ncvAssertPrintReturn(d_haarFeatures->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);

        ncvStat = h_haarStages->copySolid(*d_haarStages, 0);
        ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);
        ncvStat = h_haarNodes->copySolid(*d_haarNodes, 0);
        ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);
        ncvStat = h_haarFeatures->copySolid(*d_haarFeatures, 0);
        ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);

        return NCV_SUCCESS;
    }

274
    NCVStatus HaarCascade_Impl::calculateMemReqsAndAllocate(const Size& frameSize)
marina.kolpakova's avatar
marina.kolpakova committed
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
    {
        if (lastAllocatedFrameSize == frameSize)
        {
            return NCV_SUCCESS;
        }

        // Calculate memory requirements and create real allocators
        NCVMemStackAllocator gpuCounter(static_cast<int>(devProp.textureAlignment));
        NCVMemStackAllocator cpuCounter(static_cast<int>(devProp.textureAlignment));

        ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", NCV_CUDA_ERROR);
        ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", NCV_CUDA_ERROR);

        NCVMatrixAlloc<Ncv8u> d_src(gpuCounter, frameSize.width, frameSize.height);
        NCVMatrixAlloc<Ncv8u> h_src(cpuCounter, frameSize.width, frameSize.height);

        ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
        ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);

        NCVVectorAlloc<NcvRect32u> d_rects(gpuCounter, 100);
        ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);

        NcvSize32u roi;
        roi.width = d_src.width();
        roi.height = d_src.height();
        Ncv32u numDetections;
        ncvStat = ncvDetectObjectsMultiScale_device(d_src, roi, d_rects, numDetections, haar, *h_haarStages,
            *d_haarStages, *d_haarNodes, *d_haarFeatures, haar.ClassifierSize, 4, 1.2f, 1, 0, gpuCounter, cpuCounter, devProp, 0);

        ncvAssertReturnNcvStat(ncvStat);
        ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);

307 308
        gpuAllocator = makePtr<NCVMemStackAllocator>(NCVMemoryTypeDevice, gpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment));
        cpuAllocator = makePtr<NCVMemStackAllocator>(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment));
marina.kolpakova's avatar
marina.kolpakova committed
309 310 311 312 313 314 315 316

        ncvAssertPrintReturn(gpuAllocator->isInitialized(), "Error creating GPU memory allocator", NCV_CUDA_ERROR);
        ncvAssertPrintReturn(cpuAllocator->isInitialized(), "Error creating CPU memory allocator", NCV_CUDA_ERROR);

        lastAllocatedFrameSize = frameSize;
        return NCV_SUCCESS;
    }

317 318 319
    NCVStatus HaarCascade_Impl::process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections)
    {
        calculateMemReqsAndAllocate(src.size());
marina.kolpakova's avatar
marina.kolpakova committed
320

321 322 323
        NCVMemPtr src_beg;
        src_beg.ptr = (void*)src.ptr<Ncv8u>();
        src_beg.memtype = NCVMemoryTypeDevice;
marina.kolpakova's avatar
marina.kolpakova committed
324

325 326 327
        NCVMemSegment src_seg;
        src_seg.begin = src_beg;
        src_seg.size  = src.step * src.rows;
328

329 330
        NCVMatrixReuse<Ncv8u> d_src(src_seg, static_cast<int>(devProp.textureAlignment), src.cols, src.rows, static_cast<int>(src.step), true);
        ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
marina.kolpakova's avatar
marina.kolpakova committed
331

332
        CV_Assert(objects.rows == 1);
333

334 335 336
        NCVMemPtr objects_beg;
        objects_beg.ptr = (void*)objects.ptr<NcvRect32u>();
        objects_beg.memtype = NCVMemoryTypeDevice;
marina.kolpakova's avatar
marina.kolpakova committed
337

338 339 340 341 342
        NCVMemSegment objects_seg;
        objects_seg.begin = objects_beg;
        objects_seg.size = objects.step * objects.rows;
        NCVVectorReuse<NcvRect32u> d_rects(objects_seg, objects.cols);
        ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
marina.kolpakova's avatar
marina.kolpakova committed
343

344 345 346
        NcvSize32u roi;
        roi.width = d_src.width();
        roi.height = d_src.height();
marina.kolpakova's avatar
marina.kolpakova committed
347

348
        NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height);
349

350 351
        Ncv32u flags = 0;
        flags |= findLargestObject_ ? NCVPipeObjDet_FindLargestObject : 0;
marina.kolpakova's avatar
marina.kolpakova committed
352

353 354 355 356 357 358 359 360 361 362
        ncvStat = ncvDetectObjectsMultiScale_device(
            d_src, roi, d_rects, numDetections, haar, *h_haarStages,
            *d_haarStages, *d_haarNodes, *d_haarFeatures,
            winMinSize,
            minNeighbors_,
            scaleFactor_, 1,
            flags,
            *gpuAllocator, *cpuAllocator, devProp, 0);
        ncvAssertReturnNcvStat(ncvStat);
        ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
363

364
        return NCV_SUCCESS;
365
    }
366
}
367

368
#endif
369

370 371 372
//
// LbpCascade
//
373

374
namespace cv { namespace cuda { namespace device
375 376 377 378 379 380 381 382 383 384
{
    namespace lbp
    {
        void classifyPyramid(int frameW,
                             int frameH,
                             int windowW,
                             int windowH,
                             float initalScale,
                             float factor,
                             int total,
385
                             const PtrStepSzb& mstages,
386
                             const int nstages,
387 388 389 390
                             const PtrStepSzi& mnodes,
                             const PtrStepSzf& mleaves,
                             const PtrStepSzi& msubsets,
                             const PtrStepSzb& mfeatures,
391
                             const int subsetSize,
392
                             PtrStepSz<int4> objects,
393
                             unsigned int* classified,
394
                             PtrStepSzi integral);
395

396 397 398 399 400 401
        void connectedConmonents(PtrStepSz<int4> candidates,
                                 int ncandidates,
                                 PtrStepSz<int4> objects,
                                 int groupThreshold,
                                 float grouping_eps,
                                 unsigned int* nclasses);
402 403 404
    }
}}}

405
namespace
406
{
407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
    cv::Size operator -(const cv::Size& a, const cv::Size& b)
    {
        return cv::Size(a.width - b.width, a.height - b.height);
    }

    cv::Size operator +(const cv::Size& a, const int& i)
    {
        return cv::Size(a.width + i, a.height + i);
    }

    cv::Size operator *(const cv::Size& a, const float& f)
    {
        return cv::Size(cvRound(a.width * f), cvRound(a.height * f));
    }

    cv::Size operator /(const cv::Size& a, const float& f)
    {
        return cv::Size(cvRound(a.width / f), cvRound(a.height / f));
    }

    bool operator <=(const cv::Size& a, const cv::Size& b)
    {
        return a.width <= b.width && a.height <= b.width;
    }

    struct PyrLavel
433
    {
434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
        PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window, cv::Size minObjectSize)
        {
            do
            {
                order = _order;
                scale = pow(_scale, order);
                sFrame = frame / scale;
                workArea = sFrame - window + 1;
                sWindow = window * scale;
                _order++;
            } while (sWindow <= minObjectSize);
        }

        bool isFeasible(cv::Size maxObj)
        {
            return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj;
        }

        PyrLavel next(float factor, cv::Size frame, cv::Size window, cv::Size minObjectSize)
        {
            return PyrLavel(order + 1, factor, frame, window, minObjectSize);
        }

        int order;
        float scale;
        cv::Size sFrame;
        cv::Size workArea;
        cv::Size sWindow;
    };

    class LbpCascade_Impl : public CascadeClassifierBase
    {
    public:
        explicit LbpCascade_Impl(const FileStorage& file);

        virtual Size getClassifierSize() const { return NxM; }

        virtual void detectMultiScale(InputArray image,
                                      OutputArray objects,
                                      Stream& stream);

        virtual void convert(OutputArray gpu_objects,
                             std::vector<Rect>& objects);

    private:
        bool load(const FileNode &root);
        void allocateBuffers(cv::Size frame);

    private:
        struct Stage
        {
            int    first;
            int    ntrees;
            float  threshold;
        };

        enum stage { BOOST = 0 };
        enum feature { LBP = 1, HAAR = 2 };

        static const stage stageType = BOOST;
        static const feature featureType = LBP;

        cv::Size NxM;
        bool isStumps;
        int ncategories;
        int subsetSize;
        int nodeStep;

        // gpu representation of classifier
        GpuMat stage_mat;
        GpuMat trees_mat;
        GpuMat nodes_mat;
        GpuMat leaves_mat;
        GpuMat subsets_mat;
        GpuMat features_mat;

        GpuMat integral;
        GpuMat integralBuffer;
        GpuMat resuzeBuffer;

        GpuMat candidates;
        static const int integralFactor = 4;
516 517
    };

518 519 520 521
    LbpCascade_Impl::LbpCascade_Impl(const FileStorage& file)
    {
        load(file.getFirstTopLevelNode());
    }
522

523 524 525
    void LbpCascade_Impl::detectMultiScale(InputArray _image,
                                           OutputArray _objects,
                                           Stream& stream)
526
    {
527 528 529 530 531
        const GpuMat image = _image.getGpuMat();

        CV_Assert( image.depth() == CV_8U);
        CV_Assert( scaleFactor_ > 1 );
        CV_Assert( !stream );
532 533 534

        const float grouping_eps = 0.2f;

535
        BufferPool pool(stream);
Alexander Alekhin's avatar
Alexander Alekhin committed
536
        GpuMat objects = pool.getBuffer(1, maxNumObjects_, traits::Type<Rect>::value);
537 538 539 540 541

        // used for debug
        // candidates.setTo(cv::Scalar::all(0));
        // objects.setTo(cv::Scalar::all(0));

542 543
        if (maxObjectSize_ == cv::Size())
            maxObjectSize_ = image.size();
544 545 546 547 548

        allocateBuffers(image.size());

        unsigned int classified = 0;
        GpuMat dclassified(1, 1, CV_32S);
549
        cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) );
550

551
        PyrLavel level(0, scaleFactor_, image.size(), NxM, minObjectSize_);
552

553
        while (level.isFeasible(maxObjectSize_))
554 555 556 557 558 559 560 561 562
        {
            int acc = level.sFrame.width + 1;
            float iniScale = level.scale;

            cv::Size area = level.workArea;
            int step = 1 + (level.scale <= 2.f);

            int total = 0, prev  = 0;

563
            while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize_))
564 565 566 567 568 569
            {
                // create sutable matrix headers
                GpuMat src  = resuzeBuffer(cv::Rect(0, 0, level.sFrame.width, level.sFrame.height));
                GpuMat sint = integral(cv::Rect(prev, 0, level.sFrame.width + 1, level.sFrame.height + 1));

                // generate integral for scale
570
                cuda::resize(image, src, level.sFrame, 0, 0, cv::INTER_LINEAR);
571
                cuda::integral(src, sint);
572 573 574 575 576 577

                // calculate job
                int totalWidth = level.workArea.width / step;
                total += totalWidth * (level.workArea.height / step);

                // go to next pyramide level
578
                level = level.next(scaleFactor_, image.size(), NxM, minObjectSize_);
579 580 581 582 583 584 585
                area = level.workArea;

                step = (1 + (level.scale <= 2.f));
                prev = acc;
                acc += level.sFrame.width + 1;
            }

586
            device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor_, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat,
587 588 589
                leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr<unsigned int>(), integral);
        }

590 591
        if (minNeighbors_ <= 0  || objects.empty())
            return;
592

593
        cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
594
        device::lbp::connectedConmonents(candidates, classified, objects, minNeighbors_, grouping_eps, dclassified.ptr<unsigned int>());
595

596 597
        cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
        cudaSafeCall( cudaDeviceSynchronize() );
598

599 600 601 602 603 604 605 606
        if (classified > 0)
        {
            objects.colRange(0, classified).copyTo(_objects);
        }
        else
        {
            _objects.release();
        }
607 608
    }

609
    void LbpCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects)
610
    {
611 612 613
        if (_gpu_objects.empty())
        {
            objects.clear();
614
            return;
615
        }
616

617 618
        Mat gpu_objects;
        if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT)
619
        {
620 621 622 623 624 625
            _gpu_objects.getGpuMat().download(gpu_objects);
        }
        else
        {
            gpu_objects = _gpu_objects.getMat();
        }
626

627
        CV_Assert( gpu_objects.rows == 1 );
Alexander Alekhin's avatar
Alexander Alekhin committed
628
        CV_Assert( gpu_objects.type() == traits::Type<Rect>::value );
629

630 631
        Rect* ptr = gpu_objects.ptr<Rect>();
        objects.assign(ptr, ptr + gpu_objects.cols);
632 633
    }

634
    bool LbpCascade_Impl::load(const FileNode &root)
635
    {
636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662
        const char *CUDA_CC_STAGE_TYPE       = "stageType";
        const char *CUDA_CC_FEATURE_TYPE     = "featureType";
        const char *CUDA_CC_BOOST            = "BOOST";
        const char *CUDA_CC_LBP              = "LBP";
        const char *CUDA_CC_MAX_CAT_COUNT    = "maxCatCount";
        const char *CUDA_CC_HEIGHT           = "height";
        const char *CUDA_CC_WIDTH            = "width";
        const char *CUDA_CC_STAGE_PARAMS     = "stageParams";
        const char *CUDA_CC_MAX_DEPTH        = "maxDepth";
        const char *CUDA_CC_FEATURE_PARAMS   = "featureParams";
        const char *CUDA_CC_STAGES           = "stages";
        const char *CUDA_CC_STAGE_THRESHOLD  = "stageThreshold";
        const float CUDA_THRESHOLD_EPS       = 1e-5f;
        const char *CUDA_CC_WEAK_CLASSIFIERS = "weakClassifiers";
        const char *CUDA_CC_INTERNAL_NODES   = "internalNodes";
        const char *CUDA_CC_LEAF_VALUES      = "leafValues";
        const char *CUDA_CC_FEATURES         = "features";
        const char *CUDA_CC_RECT             = "rect";

        String stageTypeStr = (String)root[CUDA_CC_STAGE_TYPE];
        CV_Assert(stageTypeStr == CUDA_CC_BOOST);

        String featureTypeStr = (String)root[CUDA_CC_FEATURE_TYPE];
        CV_Assert(featureTypeStr == CUDA_CC_LBP);

        NxM.width =  (int)root[CUDA_CC_WIDTH];
        NxM.height = (int)root[CUDA_CC_HEIGHT];
663 664
        CV_Assert( NxM.height > 0 && NxM.width > 0 );

665
        isStumps = ((int)(root[CUDA_CC_STAGE_PARAMS][CUDA_CC_MAX_DEPTH]) == 1) ? true : false;
666 667
        CV_Assert(isStumps);

668
        FileNode fn = root[CUDA_CC_FEATURE_PARAMS];
669 670 671
        if (fn.empty())
            return false;

672
        ncategories = fn[CUDA_CC_MAX_CAT_COUNT];
673 674 675 676

        subsetSize = (ncategories + 31) / 32;
        nodeStep = 3 + ( ncategories > 0 ? subsetSize : 1 );

677
        fn = root[CUDA_CC_STAGES];
678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693
        if (fn.empty())
            return false;

        std::vector<Stage> stages;
        stages.reserve(fn.size());

        std::vector<int> cl_trees;
        std::vector<int> cl_nodes;
        std::vector<float> cl_leaves;
        std::vector<int> subsets;

        FileNodeIterator it = fn.begin(), it_end = fn.end();
        for (size_t si = 0; it != it_end; si++, ++it )
        {
            FileNode fns = *it;
            Stage st;
694
            st.threshold = (float)fns[CUDA_CC_STAGE_THRESHOLD] - CUDA_THRESHOLD_EPS;
695

696
            fns = fns[CUDA_CC_WEAK_CLASSIFIERS];
697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712
            if (fns.empty())
                return false;

            st.ntrees = (int)fns.size();
            st.first = (int)cl_trees.size();

            stages.push_back(st);// (int, int, float)

            cl_trees.reserve(stages[si].first + stages[si].ntrees);

            // weak trees
            FileNodeIterator it1 = fns.begin(), it1_end = fns.end();
            for ( ; it1 != it1_end; ++it1 )
            {
                FileNode fnw = *it1;

713 714
                FileNode internalNodes = fnw[CUDA_CC_INTERNAL_NODES];
                FileNode leafValues = fnw[CUDA_CC_LEAF_VALUES];
715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747
                if ( internalNodes.empty() || leafValues.empty() )
                    return false;

                int nodeCount = (int)internalNodes.size()/nodeStep;
                cl_trees.push_back(nodeCount);

                cl_nodes.reserve((cl_nodes.size() + nodeCount) * 3);
                cl_leaves.reserve(cl_leaves.size() + leafValues.size());

                if( subsetSize > 0 )
                    subsets.reserve(subsets.size() + nodeCount * subsetSize);

                // nodes
                FileNodeIterator iIt = internalNodes.begin(), iEnd = internalNodes.end();

                for( ; iIt != iEnd; )
                {
                    cl_nodes.push_back((int)*(iIt++));
                    cl_nodes.push_back((int)*(iIt++));
                    cl_nodes.push_back((int)*(iIt++));

                    if( subsetSize > 0 )
                        for( int j = 0; j < subsetSize; j++, ++iIt )
                            subsets.push_back((int)*iIt);
                }

                // leaves
                iIt = leafValues.begin(), iEnd = leafValues.end();
                for( ; iIt != iEnd; ++iIt )
                    cl_leaves.push_back((float)*iIt);
            }
        }

748
        fn = root[CUDA_CC_FEATURES];
749 750 751 752 753 754 755
        if( fn.empty() )
            return false;
        std::vector<uchar> features;
        features.reserve(fn.size() * 4);
        FileNodeIterator f_it = fn.begin(), f_end = fn.end();
        for (; f_it != f_end; ++f_it)
        {
756
            FileNode rect = (*f_it)[CUDA_CC_RECT];
757 758 759 760 761 762 763 764
            FileNodeIterator r_it = rect.begin();
            features.push_back(saturate_cast<uchar>((int)*(r_it++)));
            features.push_back(saturate_cast<uchar>((int)*(r_it++)));
            features.push_back(saturate_cast<uchar>((int)*(r_it++)));
            features.push_back(saturate_cast<uchar>((int)*(r_it++)));
        }

        // copy data structures on gpu
765
        stage_mat.upload(cv::Mat(1, (int) (stages.size() * sizeof(Stage)), CV_8UC1, (uchar*)&(stages[0]) ));
766 767 768 769 770 771 772 773 774
        trees_mat.upload(cv::Mat(cl_trees).reshape(1,1));
        nodes_mat.upload(cv::Mat(cl_nodes).reshape(1,1));
        leaves_mat.upload(cv::Mat(cl_leaves).reshape(1,1));
        subsets_mat.upload(cv::Mat(subsets).reshape(1,1));
        features_mat.upload(cv::Mat(features).reshape(4,1));

        return true;
    }

775 776 777 778
    void LbpCascade_Impl::allocateBuffers(cv::Size frame)
    {
        if (frame == cv::Size())
            return;
779

780 781 782
        if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows)
        {
            resuzeBuffer.create(frame, CV_8UC1);
783

784
            integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1);
785

786 787 788 789
        #ifdef HAVE_OPENCV_CUDALEGACY
            NcvSize32u roiSize;
            roiSize.width = frame.width;
            roiSize.height = frame.height;
790

791 792
            cudaDeviceProp prop;
            cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
793

794 795 796 797
            Ncv32u bufSize;
            ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
            integralBuffer.create(1, bufSize, CV_8UC1);
        #endif
marina.kolpakova's avatar
marina.kolpakova committed
798

799 800 801
            candidates.create(1 , frame.width >> 1, CV_32SC4);
        }
    }
marina.kolpakova's avatar
marina.kolpakova committed
802

803
}
marina.kolpakova's avatar
marina.kolpakova committed
804

805 806 807
//
// create
//
808

809
Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String& filename)
810
{
811
    String fext = filename.substr(filename.find_last_of(".") + 1);
812
    fext = fext.toLowerCase();
813 814

    if (fext == "nvbin")
marina.kolpakova's avatar
marina.kolpakova committed
815
    {
816 817 818 819 820 821
    #ifndef HAVE_OPENCV_CUDALEGACY
        CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
        return Ptr<cuda::CascadeClassifier>();
    #else
        return makePtr<HaarCascade_Impl>(filename);
    #endif
marina.kolpakova's avatar
marina.kolpakova committed
822 823
    }

824
    FileStorage fs(filename, FileStorage::READ);
marina.kolpakova's avatar
marina.kolpakova committed
825

826
    if (!fs.isOpened())
marina.kolpakova's avatar
marina.kolpakova committed
827
    {
828 829 830 831 832 833
    #ifndef HAVE_OPENCV_CUDALEGACY
        CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
        return Ptr<cuda::CascadeClassifier>();
    #else
        return makePtr<HaarCascade_Impl>(filename);
    #endif
marina.kolpakova's avatar
marina.kolpakova committed
834 835
    }

836
    const char *CUDA_CC_LBP = "LBP";
837
    String featureTypeStr = (String)fs.getFirstTopLevelNode()["featureType"];
838
    if (featureTypeStr == CUDA_CC_LBP)
839 840 841
    {
        return makePtr<LbpCascade_Impl>(fs);
    }
842
    else
843 844 845 846 847 848 849 850
    {
    #ifndef HAVE_OPENCV_CUDALEGACY
        CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
        return Ptr<cuda::CascadeClassifier>();
    #else
        return makePtr<HaarCascade_Impl>(filename);
    #endif
    }
marina.kolpakova's avatar
marina.kolpakova committed
851

852 853 854 855 856 857 858
    CV_Error(Error::StsUnsupportedFormat, "Unsupported format for CUDA CascadeClassifier");
    return Ptr<cuda::CascadeClassifier>();
}

Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage& file)
{
    return makePtr<LbpCascade_Impl>(file);
marina.kolpakova's avatar
marina.kolpakova committed
859 860
}

861
#endif