templmatch.cpp 39 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                        Intel License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of Intel Corporation may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "precomp.hpp"
Alexander Alekhin's avatar
Alexander Alekhin committed
43
#include "opencl_kernels_imgproc.hpp"
44

Ilya Lavrenov's avatar
Ilya Lavrenov committed
45 46
////////////////////////////////////////////////// matchTemplate //////////////////////////////////////////////////////////

47 48 49
namespace cv
{

Ilya Lavrenov's avatar
Ilya Lavrenov committed
50
#ifdef HAVE_OPENCL
51

52 53 54
/////////////////////////////////////////////////// CCORR //////////////////////////////////////////////////////////////

enum
Ilya Lavrenov's avatar
Ilya Lavrenov committed
55
{
56 57 58
    SUM_1 = 0, SUM_2 = 1
};

Elena Gvozdeva's avatar
Elena Gvozdeva committed
59 60
static bool extractFirstChannel_32F(InputArray _image, OutputArray _result, int cn)
{
61
    int depth = _image.depth();
Elena Gvozdeva's avatar
Elena Gvozdeva committed
62 63 64 65 66 67 68 69 70

    ocl::Device dev = ocl::Device::getDefault();
    int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;

    ocl::Kernel k("extractFirstChannel", ocl::imgproc::match_template_oclsrc, format("-D FIRST_CHANNEL -D T1=%s -D cn=%d -D PIX_PER_WI_Y=%d",
                                                                            ocl::typeToStr(depth), cn, pxPerWIy));
    if (k.empty())
        return false;

71 72 73 74
    UMat image  = _image.getUMat();
    UMat result = _result.getUMat();


75
    size_t globalsize[2] = {(size_t)result.cols, ((size_t)result.rows+pxPerWIy-1)/pxPerWIy};
Elena Gvozdeva's avatar
Elena Gvozdeva committed
76 77 78
    return k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::WriteOnly(result)).run( 2, globalsize, NULL, false);
}

Ilya Lavrenov's avatar
Ilya Lavrenov committed
79
static bool sumTemplate(InputArray _src, UMat & result)
80
{
Ilya Lavrenov's avatar
Ilya Lavrenov committed
81
    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
82
    int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);
Ilya Lavrenov's avatar
Ilya Lavrenov committed
83 84 85 86 87 88
    size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();

    int wgs2_aligned = 1;
    while (wgs2_aligned < (int)wgs)
        wgs2_aligned <<= 1;
    wgs2_aligned >>= 1;
89 90 91

    char cvt[40];
    ocl::Kernel k("calcSum", ocl::imgproc::match_template_oclsrc,
92
                  format("-D CALC_SUM -D T=%s -D T1=%s -D WT=%s -D cn=%d -D convertToWT=%s -D WGS=%d -D WGS2_ALIGNED=%d",
93
                         ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), cn,
Ilya Lavrenov's avatar
Ilya Lavrenov committed
94
                         ocl::convertTypeStr(depth, wdepth, cn, cvt),
95
                         (int)wgs, wgs2_aligned));
96
    if (k.empty())
Ilya Lavrenov's avatar
Ilya Lavrenov committed
97
        return false;
98

Ilya Lavrenov's avatar
Ilya Lavrenov committed
99
    UMat src = _src.getUMat();
100 101
    result.create(1, 1, CV_32FC1);

Ilya Lavrenov's avatar
Ilya Lavrenov committed
102 103 104 105
    ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
            resarg = ocl::KernelArg::PtrWriteOnly(result);

    k.args(srcarg, src.cols, (int)src.total(), resarg);
106

Ilya Lavrenov's avatar
Ilya Lavrenov committed
107 108
    size_t globalsize = wgs;
    return k.run(1, &globalsize, &wgs, false);
109
}
110

111
static bool useNaive(Size size)
Elena Gvozdeva's avatar
Elena Gvozdeva committed
112
{
113 114
    int dft_size = 18;
    return size.height < dft_size && size.width < dft_size;
Elena Gvozdeva's avatar
Elena Gvozdeva committed
115 116 117
}

struct ConvolveBuf
118 119 120 121 122
{
    Size result_size;
    Size block_size;
    Size user_block_size;
    Size dft_size;
Elena Gvozdeva's avatar
Elena Gvozdeva committed
123

124 125
    UMat image_spect, templ_spect, result_spect;
    UMat image_block, templ_block, result_data;
Elena Gvozdeva's avatar
Elena Gvozdeva committed
126

127 128
    void create(Size image_size, Size templ_size);
};
Elena Gvozdeva's avatar
Elena Gvozdeva committed
129 130 131 132 133 134

void ConvolveBuf::create(Size image_size, Size templ_size)
{
    result_size = Size(image_size.width - templ_size.width + 1,
                       image_size.height - templ_size.height + 1);

Elena Gvozdeva's avatar
Elena Gvozdeva committed
135 136
    const double blockScale = 4.5;
    const int minBlockSize = 256;
137

Elena Gvozdeva's avatar
Elena Gvozdeva committed
138
    block_size.width = cvRound(result_size.width*blockScale);
Elena Gvozdeva's avatar
Elena Gvozdeva committed
139
    block_size.width = std::max( block_size.width, minBlockSize - templ_size.width + 1 );
Elena Gvozdeva's avatar
Elena Gvozdeva committed
140 141 142 143 144
    block_size.width = std::min( block_size.width, result_size.width );
    block_size.height = cvRound(templ_size.height*blockScale);
    block_size.height = std::max( block_size.height, minBlockSize - templ_size.height + 1 );
    block_size.height = std::min( block_size.height, result_size.height );

Elena Gvozdeva's avatar
Elena Gvozdeva committed
145
    dft_size.width = std::max(getOptimalDFTSize(block_size.width + templ_size.width - 1), 2);
Elena Gvozdeva's avatar
Elena Gvozdeva committed
146
    dft_size.height = getOptimalDFTSize(block_size.height + templ_size.height - 1);
Elena Gvozdeva's avatar
Elena Gvozdeva committed
147 148
    if( dft_size.width <= 0 || dft_size.height <= 0 )
        CV_Error( CV_StsOutOfRange, "the input arrays are too big" );
Elena Gvozdeva's avatar
Elena Gvozdeva committed
149

Elena Gvozdeva's avatar
Elena Gvozdeva committed
150 151
    // recompute block size
    block_size.width = dft_size.width - templ_size.width + 1;
Elena Gvozdeva's avatar
Elena Gvozdeva committed
152
    block_size.width = std::min( block_size.width, result_size.width);
Elena Gvozdeva's avatar
Elena Gvozdeva committed
153
    block_size.height = dft_size.height - templ_size.height + 1;
Elena Gvozdeva's avatar
Elena Gvozdeva committed
154
    block_size.height = std::min( block_size.height, result_size.height );
Elena Gvozdeva's avatar
Elena Gvozdeva committed
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173

    image_block.create(dft_size, CV_32F);
    templ_block.create(dft_size, CV_32F);
    result_data.create(dft_size, CV_32F);

    image_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
    templ_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
    result_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);

    // Use maximum result matrix block size for the estimated DFT block size
    block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
    block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
}

static bool convolve_dft(InputArray _image, InputArray _templ, OutputArray _result)
{
    ConvolveBuf buf;
    CV_Assert(_image.type() == CV_32F);
    CV_Assert(_templ.type() == CV_32F);
174

Elena Gvozdeva's avatar
Elena Gvozdeva committed
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
    buf.create(_image.size(), _templ.size());
    _result.create(buf.result_size, CV_32F);

    UMat image  = _image.getUMat();
    UMat templ  = _templ.getUMat();

    UMat result = _result.getUMat();

    Size& block_size = buf.block_size;
    Size& dft_size = buf.dft_size;

    UMat& image_block = buf.image_block;
    UMat& templ_block = buf.templ_block;
    UMat& result_data = buf.result_data;

    UMat& image_spect = buf.image_spect;
    UMat& templ_spect = buf.templ_spect;
    UMat& result_spect = buf.result_spect;

    UMat templ_roi = templ;
    copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
                   templ_block.cols - templ_roi.cols, BORDER_ISOLATED);

Elena Gvozdeva's avatar
Elena Gvozdeva committed
198
    dft(templ_block, templ_spect, 0, templ.rows);
Elena Gvozdeva's avatar
Elena Gvozdeva committed
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253

    // Process all blocks of the result matrix
    for (int y = 0; y < result.rows; y += block_size.height)
    {
        for (int x = 0; x < result.cols; x += block_size.width)
        {
            Size image_roi_size(std::min(x + dft_size.width, image.cols) - x,
                                std::min(y + dft_size.height, image.rows) - y);
            Rect roi0(x, y, image_roi_size.width, image_roi_size.height);

            UMat image_roi(image, roi0);

            copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,
                           0, image_block.cols - image_roi.cols, BORDER_ISOLATED);

            dft(image_block, image_spect, 0);

            mulSpectrums(image_spect, templ_spect, result_spect, 0, true);

            dft(result_spect, result_data, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);

            Size result_roi_size(std::min(x + block_size.width, result.cols) - x,
                                 std::min(y + block_size.height, result.rows) - y);

            Rect roi1(x, y, result_roi_size.width, result_roi_size.height);
            Rect roi2(0, 0, result_roi_size.width, result_roi_size.height);

            UMat result_roi(result, roi1);
            UMat result_block(result_data, roi2);

            result_block.copyTo(result_roi);
        }
    }
    return true;
}

static bool convolve_32F(InputArray _image, InputArray _templ, OutputArray _result)
{
    _result.create(_image.rows() - _templ.rows() + 1, _image.cols() - _templ.cols() + 1, CV_32F);

    if (_image.channels() == 1)
        return(convolve_dft(_image, _templ, _result));
    else
    {
        UMat image = _image.getUMat();
        UMat templ = _templ.getUMat();
        UMat result_(image.rows-templ.rows+1,(image.cols-templ.cols+1)*image.channels(), CV_32F);
        bool ok = convolve_dft(image.reshape(1), templ.reshape(1), result_);
        if (ok==false)
            return false;
        UMat result = _result.getUMat();
        return (extractFirstChannel_32F(result_, _result, _image.channels()));
    }
}

254
static bool matchTemplateNaive_CCORR(InputArray _image, InputArray _templ, OutputArray _result)
Ilya Lavrenov's avatar
Ilya Lavrenov committed
255
{
256
    int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
257
    int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);
258

259
    ocl::Device dev = ocl::Device::getDefault();
260
    int pxPerWIx = (cn==1 && dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;
261 262 263
    int rated_cn = cn;
    int wtype1 = wtype;

264
    if (pxPerWIx!=1)
265 266 267 268 269 270
    {
        rated_cn = pxPerWIx;
        type = CV_MAKE_TYPE(depth, rated_cn);
        wtype1 = CV_MAKE_TYPE(wdepth, rated_cn);
    }

271
    char cvt[40];
272 273 274 275
    char cvt1[40];
    const char* convertToWT1 = ocl::convertTypeStr(depth, wdepth, cn, cvt);
    const char* convertToWT = ocl::convertTypeStr(depth, wdepth, rated_cn, cvt1);

276
    ocl::Kernel k("matchTemplate_Naive_CCORR", ocl::imgproc::match_template_oclsrc,
277 278
                  format("-D CCORR -D T=%s -D T1=%s -D WT=%s -D WT1=%s -D convertToWT=%s -D convertToWT1=%s -D cn=%d -D PIX_PER_WI_X=%d", ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype1), ocl::typeToStr(wtype),
                         convertToWT, convertToWT1, cn, pxPerWIx));
Ilya Lavrenov's avatar
Ilya Lavrenov committed
279 280
    if (k.empty())
        return false;
281

282
    UMat image = _image.getUMat(), templ = _templ.getUMat();
283
    _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
284
    UMat result = _result.getUMat();
285

286 287 288
    k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ),
           ocl::KernelArg::WriteOnly(result));

289
    size_t globalsize[2] = { ((size_t)result.cols+pxPerWIx-1)/pxPerWIx, (size_t)result.rows};
290
    return k.run(2, globalsize, NULL, false);
Ilya Lavrenov's avatar
Ilya Lavrenov committed
291
}
292

Elena Gvozdeva's avatar
Elena Gvozdeva committed
293 294

static bool matchTemplate_CCORR(InputArray _image, InputArray _templ, OutputArray _result)
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
{
    if (useNaive(_templ.size()))
        return( matchTemplateNaive_CCORR(_image, _templ, _result));
    else
    {
        if(_image.depth() == CV_8U)
        {
            UMat imagef, templf;
            UMat image = _image.getUMat();
            UMat templ = _templ.getUMat();
            image.convertTo(imagef, CV_32F);
            templ.convertTo(templf, CV_32F);
            return(convolve_32F(imagef, templf, _result));
        }
        else
Elena Gvozdeva's avatar
Elena Gvozdeva committed
310
        {
311
            return(convolve_32F(_image, _templ, _result));
Elena Gvozdeva's avatar
Elena Gvozdeva committed
312
        }
313 314
    }
}
Elena Gvozdeva's avatar
Elena Gvozdeva committed
315

Ilya Lavrenov's avatar
Ilya Lavrenov committed
316 317 318
static bool matchTemplate_CCORR_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
{
    matchTemplate(_image, _templ, _result, CV_TM_CCORR);
319

320
    int type = _image.type(), cn = CV_MAT_CN(type);
321

322
    ocl::Kernel k("matchTemplate_CCORR_NORMED", ocl::imgproc::match_template_oclsrc,
323
                  format("-D CCORR_NORMED -D T=%s -D cn=%d", ocl::typeToStr(type), cn));
Ilya Lavrenov's avatar
Ilya Lavrenov committed
324 325
    if (k.empty())
        return false;
326

327
    UMat image = _image.getUMat(), templ = _templ.getUMat();
328
    _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
329
    UMat result = _result.getUMat();
330

Ilya Lavrenov's avatar
Ilya Lavrenov committed
331 332
    UMat image_sums, image_sqsums;
    integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
Elena Gvozdeva's avatar
Elena Gvozdeva committed
333

334
    UMat templ_sqsum;
Ilya Lavrenov's avatar
Ilya Lavrenov committed
335
    if (!sumTemplate(templ, templ_sqsum))
336
        return false;
337

338 339
    k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
           templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));
340

341
    size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
342
    return k.run(2, globalsize, NULL, false);
Ilya Lavrenov's avatar
Ilya Lavrenov committed
343
}
344

Ilya Lavrenov's avatar
Ilya Lavrenov committed
345
////////////////////////////////////// SQDIFF //////////////////////////////////////////////////////////////
346

347
static bool matchTemplateNaive_SQDIFF(InputArray _image, InputArray _templ, OutputArray _result)
Ilya Lavrenov's avatar
Ilya Lavrenov committed
348
{
349
    int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
350
    int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);
351

352
    char cvt[40];
353
    ocl::Kernel k("matchTemplate_Naive_SQDIFF", ocl::imgproc::match_template_oclsrc,
354 355
                  format("-D SQDIFF -D T=%s -D T1=%s -D WT=%s -D convertToWT=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth),
                         ocl::typeToStr(wtype), ocl::convertTypeStr(depth, wdepth, cn, cvt), cn));
Ilya Lavrenov's avatar
Ilya Lavrenov committed
356 357
    if (k.empty())
        return false;
358

359
    UMat image = _image.getUMat(), templ = _templ.getUMat();
Ilya Lavrenov's avatar
Ilya Lavrenov committed
360
    _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
361
    UMat result = _result.getUMat();
Elena Gvozdeva's avatar
Elena Gvozdeva committed
362

363 364 365
    k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ),
           ocl::KernelArg::WriteOnly(result));

366
    size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
367
    return k.run(2, globalsize, NULL, false);
Ilya Lavrenov's avatar
Ilya Lavrenov committed
368
}
369

Elena Gvozdeva's avatar
Elena Gvozdeva committed
370 371
static bool matchTemplate_SQDIFF(InputArray _image, InputArray _templ, OutputArray _result)
{
372
    if (useNaive(_templ.size()))
Elena Gvozdeva's avatar
Elena Gvozdeva committed
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
        return( matchTemplateNaive_SQDIFF(_image, _templ, _result));
    else
    {
        matchTemplate(_image, _templ, _result, CV_TM_CCORR);

        int type = _image.type(), cn = CV_MAT_CN(type);

        ocl::Kernel k("matchTemplate_Prepared_SQDIFF", ocl::imgproc::match_template_oclsrc,
                  format("-D SQDIFF_PREPARED -D T=%s -D cn=%d", ocl::typeToStr(type),  cn));
        if (k.empty())
            return false;

        UMat image = _image.getUMat(), templ = _templ.getUMat();
        _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
        UMat result = _result.getUMat();

        UMat image_sums, image_sqsums;
        integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);

        UMat templ_sqsum;
        if (!sumTemplate(_templ, templ_sqsum))
            return false;

        k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
           templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));

399
        size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
Elena Gvozdeva's avatar
Elena Gvozdeva committed
400 401 402 403 404

        return k.run(2, globalsize, NULL, false);
    }
}

405
static bool matchTemplate_SQDIFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
Ilya Lavrenov's avatar
Ilya Lavrenov committed
406 407
{
    matchTemplate(_image, _templ, _result, CV_TM_CCORR);
408

409
    int type = _image.type(), cn = CV_MAT_CN(type);
410

411
    ocl::Kernel k("matchTemplate_SQDIFF_NORMED", ocl::imgproc::match_template_oclsrc,
412
                  format("-D SQDIFF_NORMED -D T=%s -D cn=%d", ocl::typeToStr(type),  cn));
Ilya Lavrenov's avatar
Ilya Lavrenov committed
413 414
    if (k.empty())
        return false;
415

416
    UMat image = _image.getUMat(), templ = _templ.getUMat();
Ilya Lavrenov's avatar
Ilya Lavrenov committed
417
    _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
418
    UMat result = _result.getUMat();
419

Ilya Lavrenov's avatar
Ilya Lavrenov committed
420 421
    UMat image_sums, image_sqsums;
    integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
Elena Gvozdeva's avatar
Elena Gvozdeva committed
422

423
    UMat templ_sqsum;
Ilya Lavrenov's avatar
Ilya Lavrenov committed
424
    if (!sumTemplate(_templ, templ_sqsum))
425
        return false;
426

427 428
    k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
           templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));
429

430
    size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
431

432
    return k.run(2, globalsize, NULL, false);
Ilya Lavrenov's avatar
Ilya Lavrenov committed
433
}
434

Ilya Lavrenov's avatar
Ilya Lavrenov committed
435
///////////////////////////////////// CCOEFF /////////////////////////////////////////////////////////////////
436

Ilya Lavrenov's avatar
Ilya Lavrenov committed
437 438 439
static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArray _result)
{
    matchTemplate(_image, _templ, _result, CV_TM_CCORR);
440

Elena Gvozdeva's avatar
Elena Gvozdeva committed
441
    UMat image_sums, temp;
442
    integral(_image, image_sums, CV_32F);
443

444
    int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
445

446
    ocl::Kernel k("matchTemplate_Prepared_CCOEFF", ocl::imgproc::match_template_oclsrc,
447
                  format("-D CCOEFF -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
Ilya Lavrenov's avatar
Ilya Lavrenov committed
448 449
    if (k.empty())
        return false;
450

451
    UMat templ  = _templ.getUMat();
452
    UMat result = _result.getUMat();
Elena Gvozdeva's avatar
Elena Gvozdeva committed
453

454
    if (cn==1)
Ilya Lavrenov's avatar
Ilya Lavrenov committed
455
    {
456 457
        Scalar templMean = mean(templ);
        float templ_sum = (float)templMean[0];
458

459
        k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum);
460
    }
Ilya Lavrenov's avatar
Ilya Lavrenov committed
461
    else
462
    {
Ilya Lavrenov's avatar
Ilya Lavrenov committed
463
        Vec4f templ_sum = Vec4f::all(0);
464
        templ_sum = (Vec4f)mean(templ);
Ilya Lavrenov's avatar
Ilya Lavrenov committed
465

466
       k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum);    }
467

468
    size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
469
    return k.run(2, globalsize, NULL, false);
Ilya Lavrenov's avatar
Ilya Lavrenov committed
470
}
471

Ilya Lavrenov's avatar
Ilya Lavrenov committed
472 473
static bool matchTemplate_CCOEFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
{
474
    matchTemplate(_image, _templ, _result, CV_TM_CCORR);
475

Ilya Lavrenov's avatar
Ilya Lavrenov committed
476
    UMat temp, image_sums, image_sqsums;
477
    integral(_image, image_sums, image_sqsums, CV_32F, CV_32F);
478

479
    int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
480

481
    ocl::Kernel k("matchTemplate_CCOEFF_NORMED", ocl::imgproc::match_template_oclsrc,
482
        format("-D CCOEFF_NORMED -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
Ilya Lavrenov's avatar
Ilya Lavrenov committed
483 484
    if (k.empty())
        return false;
Elena Gvozdeva's avatar
Elena Gvozdeva committed
485

486 487 488 489
    UMat templ = _templ.getUMat();
    Size size = _image.size(), tsize = templ.size();
    _result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F);
    UMat result = _result.getUMat();
490

491
    float scale = 1.f / tsize.area();
492

493
    if (cn == 1)
Ilya Lavrenov's avatar
Ilya Lavrenov committed
494 495
    {
        float templ_sum = (float)sum(templ)[0];
496

497
        multiply(templ, templ, temp, 1, CV_32F);
Ilya Lavrenov's avatar
Ilya Lavrenov committed
498
        float templ_sqsum = (float)sum(temp)[0];
499

Ilya Lavrenov's avatar
Ilya Lavrenov committed
500 501
        templ_sqsum -= scale * templ_sum * templ_sum;
        templ_sum   *= scale;
502

Ilya Lavrenov's avatar
Ilya Lavrenov committed
503
        if (templ_sqsum < DBL_EPSILON)
504
        {
Ilya Lavrenov's avatar
Ilya Lavrenov committed
505 506 507
            result = Scalar::all(1);
            return true;
        }
508

509 510
        k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
                      ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale, templ_sum, templ_sqsum);
Ilya Lavrenov's avatar
Ilya Lavrenov committed
511 512 513
    }
    else
    {
514
        Vec4f templ_sum = Vec4f::all(0), templ_sqsum = Vec4f::all(0);
Ilya Lavrenov's avatar
Ilya Lavrenov committed
515
        templ_sum = sum(templ);
516

517
        multiply(templ, templ, temp, 1, CV_32F);
Ilya Lavrenov's avatar
Ilya Lavrenov committed
518
        templ_sqsum = sum(temp);
519

Ilya Lavrenov's avatar
Ilya Lavrenov committed
520
        float templ_sqsum_sum = 0;
521 522
        for (int i = 0; i < cn; i ++)
            templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
523

524
        templ_sum *= scale;
525

Ilya Lavrenov's avatar
Ilya Lavrenov committed
526 527 528 529
        if (templ_sqsum_sum < DBL_EPSILON)
        {
            result = Scalar::all(1);
            return true;
530 531
        }

532
        k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
533
                   ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale,
534
                   templ_sum, templ_sqsum_sum);    }
535

536
    size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
537
    return k.run(2, globalsize, NULL, false);
Ilya Lavrenov's avatar
Ilya Lavrenov committed
538 539
}

540 541
///////////////////////////////////////////////////////////////////////////////////////////////////////////

Ilya Lavrenov's avatar
Ilya Lavrenov committed
542 543
static bool ocl_matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method)
{
544
    int cn = _img.channels();
545

546
    if (cn > 4)
Ilya Lavrenov's avatar
Ilya Lavrenov committed
547
        return false;
548

Ilya Lavrenov's avatar
Ilya Lavrenov committed
549
    typedef bool (*Caller)(InputArray _img, InputArray _templ, OutputArray _result);
550

551
    static const Caller callers[] =
Ilya Lavrenov's avatar
Ilya Lavrenov committed
552
    {
Elena Gvozdeva's avatar
Elena Gvozdeva committed
553
        matchTemplate_SQDIFF, matchTemplate_SQDIFF_NORMED, matchTemplate_CCORR,
Ilya Lavrenov's avatar
Ilya Lavrenov committed
554 555
        matchTemplate_CCORR_NORMED, matchTemplate_CCOEFF, matchTemplate_CCOEFF_NORMED
    };
556
    const Caller caller = callers[method];
557

Ilya Lavrenov's avatar
Ilya Lavrenov committed
558
    return caller(_img, _templ, _result);
559
}
560

Ilya Lavrenov's avatar
Ilya Lavrenov committed
561
#endif
562

563
#if defined HAVE_IPP
564 565 566

typedef IppStatus (CV_STDCALL * ippimatchTemplate)(const void*, int, IppiSize, const void*, int, IppiSize, Ipp32f* , int , IppEnum , Ipp8u*);

Elena Gvozdeva's avatar
Elena Gvozdeva committed
567
static bool ipp_crossCorr(const Mat& src, const Mat& tpl, Mat& dst)
568
{
569 570
    CV_INSTRUMENT_REGION_IPP()

571 572 573 574 575 576 577 578 579 580
    IppStatus status;

    IppiSize srcRoiSize = {src.cols,src.rows};
    IppiSize tplRoiSize = {tpl.cols,tpl.rows};

    Ipp8u *pBuffer;
    int bufSize=0;

    int depth = src.depth();

581
    ippimatchTemplate ippiCrossCorrNorm =
582 583 584
            depth==CV_8U ? (ippimatchTemplate)ippiCrossCorrNorm_8u32f_C1R:
            depth==CV_32F? (ippimatchTemplate)ippiCrossCorrNorm_32f_C1R: 0;

585
    if (ippiCrossCorrNorm==0)
586 587
        return false;

Elena Gvozdeva's avatar
Elena Gvozdeva committed
588 589 590 591 592 593 594 595
    IppEnum funCfg = (IppEnum)(ippAlgAuto | ippiNormNone | ippiROIValid);

    status = ippiCrossCorrNormGetBufferSize(srcRoiSize, tplRoiSize, funCfg, &bufSize);
    if ( status < 0 )
        return false;

    pBuffer = ippsMalloc_8u( bufSize );

596
    status = CV_INSTRUMENT_FUN_IPP(ippiCrossCorrNorm, src.ptr(), (int)src.step, srcRoiSize, tpl.ptr(), (int)tpl.step, tplRoiSize, dst.ptr<Ipp32f>(), (int)dst.step, funCfg, pBuffer);
Elena Gvozdeva's avatar
Elena Gvozdeva committed
597 598 599 600 601 602 603

    ippsFree( pBuffer );
    return status >= 0;
}

static bool ipp_sqrDistance(const Mat& src, const Mat& tpl, Mat& dst)
{
604 605
    CV_INSTRUMENT_REGION_IPP()

Elena Gvozdeva's avatar
Elena Gvozdeva committed
606 607 608 609 610 611 612 613 614 615
    IppStatus status;

    IppiSize srcRoiSize = {src.cols,src.rows};
    IppiSize tplRoiSize = {tpl.cols,tpl.rows};

    Ipp8u *pBuffer;
    int bufSize=0;

    int depth = src.depth();

616
    ippimatchTemplate ippiSqrDistanceNorm =
Elena Gvozdeva's avatar
Elena Gvozdeva committed
617 618 619
            depth==CV_8U ? (ippimatchTemplate)ippiSqrDistanceNorm_8u32f_C1R:
            depth==CV_32F? (ippimatchTemplate)ippiSqrDistanceNorm_32f_C1R: 0;

620
    if (ippiSqrDistanceNorm==0)
Elena Gvozdeva's avatar
Elena Gvozdeva committed
621 622 623
        return false;

    IppEnum funCfg = (IppEnum)(ippAlgAuto | ippiNormNone | ippiROIValid);
624

Elena Gvozdeva's avatar
Elena Gvozdeva committed
625
    status = ippiSqrDistanceNormGetBufferSize(srcRoiSize, tplRoiSize, funCfg, &bufSize);
626 627 628 629 630
    if ( status < 0 )
        return false;

    pBuffer = ippsMalloc_8u( bufSize );

631
    status = CV_INSTRUMENT_FUN_IPP(ippiSqrDistanceNorm, src.ptr(), (int)src.step, srcRoiSize, tpl.ptr(), (int)tpl.step, tplRoiSize, dst.ptr<Ipp32f>(), (int)dst.step, funCfg, pBuffer);
632 633

    ippsFree( pBuffer );
Elena Gvozdeva's avatar
Elena Gvozdeva committed
634
    return status >= 0;
635 636 637 638
}

#endif

639 640
#include "opencv2/core/hal/hal.hpp"

Vadim Pisarevsky's avatar
Vadim Pisarevsky committed
641
void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
642 643 644 645 646 647 648
                Size corrsize, int ctype,
                Point anchor, double delta, int borderType )
{
    const double blockScale = 4.5;
    const int minBlockSize = 256;
    std::vector<uchar> buf;

Vadim Pisarevsky's avatar
Vadim Pisarevsky committed
649
    Mat templ = _templ;
650 651 652
    int depth = img.depth(), cn = img.channels();
    int tdepth = templ.depth(), tcn = templ.channels();
    int cdepth = CV_MAT_DEPTH(ctype), ccn = CV_MAT_CN(ctype);
653

654
    CV_Assert( img.dims <= 2 && templ.dims <= 2 && corr.dims <= 2 );
655

Vadim Pisarevsky's avatar
Vadim Pisarevsky committed
656 657 658 659 660
    if( depth != tdepth && tdepth != std::max(CV_32F, depth) )
    {
        _templ.convertTo(templ, std::max(CV_32F, depth));
        tdepth = templ.depth();
    }
661

Vadim Pisarevsky's avatar
Vadim Pisarevsky committed
662
    CV_Assert( depth == tdepth || tdepth == CV_32F);
663 664
    CV_Assert( corrsize.height <= img.rows + templ.rows - 1 &&
               corrsize.width <= img.cols + templ.cols - 1 );
665

666
    CV_Assert( ccn == 1 || delta == 0 );
667

668 669
    corr.create(corrsize, ctype);

670
    int maxDepth = depth > CV_8S ? CV_64F : std::max(std::max(CV_32F, tdepth), cdepth);
671
    Size blocksize, dftsize;
672

673 674 675 676 677 678 679 680 681
    blocksize.width = cvRound(templ.cols*blockScale);
    blocksize.width = std::max( blocksize.width, minBlockSize - templ.cols + 1 );
    blocksize.width = std::min( blocksize.width, corr.cols );
    blocksize.height = cvRound(templ.rows*blockScale);
    blocksize.height = std::max( blocksize.height, minBlockSize - templ.rows + 1 );
    blocksize.height = std::min( blocksize.height, corr.rows );

    dftsize.width = std::max(getOptimalDFTSize(blocksize.width + templ.cols - 1), 2);
    dftsize.height = getOptimalDFTSize(blocksize.height + templ.rows - 1);
682 683 684 685
    if( dftsize.width <= 0 || dftsize.height <= 0 )
        CV_Error( CV_StsOutOfRange, "the input arrays are too big" );

    // recompute block size
686 687 688 689
    blocksize.width = dftsize.width - templ.cols + 1;
    blocksize.width = MIN( blocksize.width, corr.cols );
    blocksize.height = dftsize.height - templ.rows + 1;
    blocksize.height = MIN( blocksize.height, corr.rows );
690

691 692
    Mat dftTempl( dftsize.height*tcn, dftsize.width, maxDepth );
    Mat dftImg( dftsize, maxDepth );
693

694 695 696
    int i, k, bufSize = 0;
    if( tcn > 1 && tdepth != maxDepth )
        bufSize = templ.cols*templ.rows*CV_ELEM_SIZE(tdepth);
697

698 699 700
    if( cn > 1 && depth != maxDepth )
        bufSize = std::max( bufSize, (blocksize.width + templ.cols - 1)*
            (blocksize.height + templ.rows - 1)*CV_ELEM_SIZE(depth));
701

702 703
    if( (ccn > 1 || cn > 1) && cdepth != maxDepth )
        bufSize = std::max( bufSize, blocksize.width*blocksize.height*CV_ELEM_SIZE(cdepth));
704

705
    buf.resize(bufSize);
706

707
    Ptr<hal::DFT2D> c = hal::DFT2D::create(dftsize.width, dftsize.height, dftTempl.depth(), 1, 1, CV_HAL_DFT_IS_INPLACE, templ.rows);
708

709
    // compute DFT of each template plane
710
    for( k = 0; k < tcn; k++ )
711 712
    {
        int yofs = k*dftsize.height;
713 714 715
        Mat src = templ;
        Mat dst(dftTempl, Rect(0, yofs, dftsize.width, dftsize.height));
        Mat dst1(dftTempl, Rect(0, yofs, templ.cols, templ.rows));
716

717
        if( tcn > 1 )
718
        {
719 720 721
            src = tdepth == maxDepth ? dst1 : Mat(templ.size(), tdepth, &buf[0]);
            int pairs[] = {k, 0};
            mixChannels(&templ, 1, &src, 1, pairs, 1);
722 723
        }

724 725
        if( dst1.data != src.data )
            src.convertTo(dst1, dst1.depth());
726

727
        if( dst.cols > templ.cols )
728
        {
729 730
            Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols));
            part = Scalar::all(0);
731
        }
732
        c->apply(dst.data, (int)dst.step, dst.data, (int)dst.step);
733 734
    }

735 736 737
    int tileCountX = (corr.cols + blocksize.width - 1)/blocksize.width;
    int tileCountY = (corr.rows + blocksize.height - 1)/blocksize.height;
    int tileCount = tileCountX * tileCountY;
738

739 740 741
    Size wholeSize = img.size();
    Point roiofs(0,0);
    Mat img0 = img;
742

743 744 745 746 747 748
    if( !(borderType & BORDER_ISOLATED) )
    {
        img.locateROI(wholeSize, roiofs);
        img0.adjustROI(roiofs.y, wholeSize.height-img.rows-roiofs.y,
                       roiofs.x, wholeSize.width-img.cols-roiofs.x);
    }
749
    borderType |= BORDER_ISOLATED;
750

751
    Ptr<hal::DFT2D> cF, cR;
752 753
    int f = CV_HAL_DFT_IS_INPLACE;
    int f_inv = f | CV_HAL_DFT_INVERSE | CV_HAL_DFT_SCALE;
754 755
    cF = hal::DFT2D::create(dftsize.width, dftsize.height, maxDepth, 1, 1, f, blocksize.height + templ.rows - 1);
    cR = hal::DFT2D::create(dftsize.width, dftsize.height, maxDepth, 1, 1, f_inv, blocksize.height);
756

757
    // calculate correlation by blocks
758
    for( i = 0; i < tileCount; i++ )
759
    {
760 761
        int x = (i%tileCountX)*blocksize.width;
        int y = (i/tileCountX)*blocksize.height;
762

763 764 765 766 767 768 769 770 771 772 773
        Size bsz(std::min(blocksize.width, corr.cols - x),
                 std::min(blocksize.height, corr.rows - y));
        Size dsz(bsz.width + templ.cols - 1, bsz.height + templ.rows - 1);
        int x0 = x - anchor.x + roiofs.x, y0 = y - anchor.y + roiofs.y;
        int x1 = std::max(0, x0), y1 = std::max(0, y0);
        int x2 = std::min(img0.cols, x0 + dsz.width);
        int y2 = std::min(img0.rows, y0 + dsz.height);
        Mat src0(img0, Range(y1, y2), Range(x1, x2));
        Mat dst(dftImg, Rect(0, 0, dsz.width, dsz.height));
        Mat dst1(dftImg, Rect(x1-x0, y1-y0, x2-x1, y2-y1));
        Mat cdst(corr, Rect(x, y, bsz.width, bsz.height));
774

775
        for( k = 0; k < cn; k++ )
776
        {
777
            Mat src = src0;
778
            dftImg = Scalar::all(0);
779

780 781
            if( cn > 1 )
            {
782 783 784
                src = depth == maxDepth ? dst1 : Mat(y2-y1, x2-x1, depth, &buf[0]);
                int pairs[] = {k, 0};
                mixChannels(&src0, 1, &src, 1, pairs, 1);
785 786
            }

787 788
            if( dst1.data != src.data )
                src.convertTo(dst1, dst1.depth());
789

790 791 792
            if( x2 - x1 < dsz.width || y2 - y1 < dsz.height )
                copyMakeBorder(dst1, dst, y1-y0, dst.rows-dst1.rows-(y1-y0),
                               x1-x0, dst.cols-dst1.cols-(x1-x0), borderType);
793

794
            if (bsz.height == blocksize.height)
795
                cF->apply(dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step);
796 797 798
            else
                dft( dftImg, dftImg, 0, dsz.height );

799 800 801
            Mat dftTempl1(dftTempl, Rect(0, tcn > 1 ? k*dftsize.height : 0,
                                         dftsize.width, dftsize.height));
            mulSpectrums(dftImg, dftTempl1, dftImg, 0, true);
802

803
            if (bsz.height == blocksize.height)
804
                cR->apply(dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step);
805 806
            else
                dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height );
807

808
            src = dftImg(Rect(0, 0, bsz.width, bsz.height));
809

810
            if( ccn > 1 )
811
            {
812
                if( cdepth != maxDepth )
813
                {
814 815 816
                    Mat plane(bsz, cdepth, &buf[0]);
                    src.convertTo(plane, cdepth, 1, delta);
                    src = plane;
817
                }
818
                int pairs[] = {0, k};
819
                mixChannels(&src, 1, &cdst, 1, pairs, 1);
820 821 822
            }
            else
            {
823 824
                if( k == 0 )
                    src.convertTo(cdst, cdepth, 1, delta);
825 826
                else
                {
827
                    if( maxDepth != cdepth )
828
                    {
829 830 831
                        Mat plane(bsz, cdepth, &buf[0]);
                        src.convertTo(plane, cdepth);
                        src = plane;
832
                    }
833
                    add(src, cdst, cdst);
834 835 836 837 838
                }
            }
        }
    }
}
Yan Wang's avatar
Yan Wang committed
839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917

static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
{
    int type = _img.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED );
    CV_Assert( (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 );

    Mat img = _img.getMat(), templ = _templ.getMat(), mask = _mask.getMat();
    int ttype = templ.type(), tdepth = CV_MAT_DEPTH(ttype), tcn = CV_MAT_CN(ttype);
    int mtype = img.type(), mdepth = CV_MAT_DEPTH(type), mcn = CV_MAT_CN(mtype);

    if (depth == CV_8U)
    {
        depth = CV_32F;
        type = CV_MAKETYPE(CV_32F, cn);
        img.convertTo(img, type, 1.0 / 255);
    }

    if (tdepth == CV_8U)
    {
        tdepth = CV_32F;
        ttype = CV_MAKETYPE(CV_32F, tcn);
        templ.convertTo(templ, ttype, 1.0 / 255);
    }

    if (mdepth == CV_8U)
    {
        mdepth = CV_32F;
        mtype = CV_MAKETYPE(CV_32F, mcn);
        compare(mask, Scalar::all(0), mask, CMP_NE);
        mask.convertTo(mask, mtype, 1.0 / 255);
    }

    Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
    _result.create(corrSize, CV_32F);
    Mat result = _result.getMat();

    Mat img2 = img.mul(img);
    Mat mask2 = mask.mul(mask);
    Mat mask_templ = templ.mul(mask);
    Scalar templMean, templSdv;

    double templSum2 = 0;
    meanStdDev( mask_templ, templMean, templSdv );

    templSum2 = templSdv[0]*templSdv[0] + templSdv[1]*templSdv[1] + templSdv[2]*templSdv[2] + templSdv[3]*templSdv[3];
    templSum2 += templMean[0]*templMean[0] + templMean[1]*templMean[1] + templMean[2]*templMean[2] + templMean[3]*templMean[3];
    templSum2 *= ((double)templ.rows * templ.cols);

    if (method == CV_TM_SQDIFF)
    {
        Mat mask2_templ = templ.mul(mask2);

        Mat corr(corrSize, CV_32F);
        crossCorr( img, mask2_templ, corr, corr.size(), corr.type(), Point(0,0), 0, 0 );
        crossCorr( img2, mask, result, result.size(), result.type(), Point(0,0), 0, 0 );

        result -= corr * 2;
        result += templSum2;
    }
    else if (method == CV_TM_CCORR_NORMED)
    {
        if (templSum2 < DBL_EPSILON)
        {
            result = Scalar::all(1);
            return;
        }

        Mat corr(corrSize, CV_32F);
        crossCorr( img2, mask2, corr, corr.size(), corr.type(), Point(0,0), 0, 0 );
        crossCorr( img, mask_templ, result, result.size(), result.type(), Point(0,0), 0, 0 );

        sqrt(corr, corr);
        result = result.mul(1/corr);
        result /= std::sqrt(templSum2);
    }
    else
        CV_Error(Error::StsNotImplemented, "");
}
918
}
919 920


921
namespace cv
922
{
923 924 925
static void common_matchTemplate( Mat& img, Mat& templ, Mat& result, int method, int cn )
{
    if( method == CV_TM_CCORR )
Yan Wang's avatar
Yan Wang committed
926
        return;
927

928 929 930
    int numType = method == CV_TM_CCORR || method == CV_TM_CCORR_NORMED ? 0 :
                  method == CV_TM_CCOEFF || method == CV_TM_CCOEFF_NORMED ? 1 : 2;
    bool isNormed = method == CV_TM_CCORR_NORMED ||
931 932 933
                    method == CV_TM_SQDIFF_NORMED ||
                    method == CV_TM_CCOEFF_NORMED;

934
    double invArea = 1./((double)templ.rows * templ.cols);
935

936 937 938 939
    Mat sum, sqsum;
    Scalar templMean, templSdv;
    double *q0 = 0, *q1 = 0, *q2 = 0, *q3 = 0;
    double templNorm = 0, templSum2 = 0;
940

941 942
    if( method == CV_TM_CCOEFF )
    {
943 944
        integral(img, sum, CV_64F);
        templMean = mean(templ);
945 946 947
    }
    else
    {
948 949
        integral(img, sum, sqsum, CV_64F);
        meanStdDev( templ, templMean, templSdv );
950

951
        templNorm = templSdv[0]*templSdv[0] + templSdv[1]*templSdv[1] + templSdv[2]*templSdv[2] + templSdv[3]*templSdv[3];
952

953
        if( templNorm < DBL_EPSILON && method == CV_TM_CCOEFF_NORMED )
954
        {
955
            result = Scalar::all(1);
956 957
            return;
        }
958

959
        templSum2 = templNorm + templMean[0]*templMean[0] + templMean[1]*templMean[1] + templMean[2]*templMean[2] + templMean[3]*templMean[3];
960

961
        if( numType != 1 )
962
        {
963 964
            templMean = Scalar::all(0);
            templNorm = templSum2;
965
        }
966

967
        templSum2 /= invArea;
968 969
        templNorm = std::sqrt(templNorm);
        templNorm /= std::sqrt(invArea); // care of accuracy here
970 971 972 973 974

        q0 = (double*)sqsum.data;
        q1 = q0 + templ.cols*cn;
        q2 = (double*)(sqsum.data + templ.rows*sqsum.step);
        q3 = q2 + templ.cols*cn;
975 976
    }

977 978 979 980
    double* p0 = (double*)sum.data;
    double* p1 = p0 + templ.cols*cn;
    double* p2 = (double*)(sum.data + templ.rows*sum.step);
    double* p3 = p2 + templ.cols*cn;
981

982 983
    int sumstep = sum.data ? (int)(sum.step / sizeof(double)) : 0;
    int sqstep = sqsum.data ? (int)(sqsum.step / sizeof(double)) : 0;
984

985
    int i, j, k;
986

987
    for( i = 0; i < result.rows; i++ )
988
    {
989
        float* rrow = result.ptr<float>(i);
990 991
        int idx = i * sumstep;
        int idx2 = i * sqstep;
992

993
        for( j = 0; j < result.cols; j++, idx += cn, idx2 += cn )
994 995
        {
            double num = rrow[j], t;
996
            double wndMean2 = 0, wndSum2 = 0;
997

998
            if( numType == 1 )
999 1000 1001 1002
            {
                for( k = 0; k < cn; k++ )
                {
                    t = p0[idx+k] - p1[idx+k] - p2[idx+k] + p3[idx+k];
1003
                    wndMean2 += t*t;
1004
                    num -= t*templMean[k];
1005 1006
                }

1007
                wndMean2 *= invArea;
1008 1009
            }

1010
            if( isNormed || numType == 2 )
1011 1012 1013 1014
            {
                for( k = 0; k < cn; k++ )
                {
                    t = q0[idx2+k] - q1[idx2+k] - q2[idx2+k] + q3[idx2+k];
1015
                    wndSum2 += t;
1016 1017
                }

1018
                if( numType == 2 )
Vadim Pisarevsky's avatar
Vadim Pisarevsky committed
1019
                {
1020
                    num = wndSum2 - 2*num + templSum2;
Vadim Pisarevsky's avatar
Vadim Pisarevsky committed
1021 1022
                    num = MAX(num, 0.);
                }
1023 1024
            }

1025
            if( isNormed )
1026
            {
1027
                t = std::sqrt(MAX(wndSum2 - wndMean2,0))*templNorm;
1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039
                if( fabs(num) < t )
                    num /= t;
                else if( fabs(num) < t*1.125 )
                    num = num > 0 ? 1 : -1;
                else
                    num = method != CV_TM_SQDIFF_NORMED ? 0 : 1;
            }

            rrow[j] = (float)num;
        }
    }
}
1040
}
1041

1042

1043 1044 1045 1046 1047
#if defined HAVE_IPP
namespace cv
{
static bool ipp_matchTemplate( Mat& img, Mat& templ, Mat& result, int method, int cn )
{
1048 1049
    CV_INSTRUMENT_REGION_IPP()

1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077
    bool useIppMT = (templ.rows < img.rows/2 && templ.cols < img.cols/2);

    if(cn == 1 && useIppMT)
    {
        if(method == CV_TM_SQDIFF)
        {
            if (ipp_sqrDistance(img, templ, result))
                return true;
        }
        else
        {
            if(ipp_crossCorr(img, templ, result))
            {
                common_matchTemplate(img, templ, result, method, cn);
                return true;
            }
        }
    }

    return false;
}
}
#endif

////////////////////////////////////////////////////////////////////////////////////////////////////////

void cv::matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
{
1078 1079
    CV_INSTRUMENT_REGION()

1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118
    if (!_mask.empty())
    {
        cv::matchTemplateMask(_img, _templ, _result, method, _mask);
        return;
    }

    int type = _img.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED );
    CV_Assert( (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 );

    bool needswap = _img.size().height < _templ.size().height || _img.size().width < _templ.size().width;
    if (needswap)
    {
        CV_Assert(_img.size().height <= _templ.size().height && _img.size().width <= _templ.size().width);
    }

    CV_OCL_RUN(_img.dims() <= 2 && _result.isUMat(),
               (!needswap ? ocl_matchTemplate(_img, _templ, _result, method) : ocl_matchTemplate(_templ, _img, _result, method)))

    Mat img = _img.getMat(), templ = _templ.getMat();
    if (needswap)
        std::swap(img, templ);

    Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
    _result.create(corrSize, CV_32F);
    Mat result = _result.getMat();

#ifdef HAVE_TEGRA_OPTIMIZATION
    if (tegra::useTegra() && tegra::matchTemplate(img, templ, result, method))
        return;
#endif

    CV_IPP_RUN(true, ipp_matchTemplate(img, templ, result, method, cn))

    crossCorr( img, templ, result, result.size(), result.type(), Point(0,0), 0, 0);

    common_matchTemplate(img, templ, result, method, cn);
}

1119 1120
CV_IMPL void
cvMatchTemplate( const CvArr* _img, const CvArr* _templ, CvArr* _result, int method )
1121
{
1122 1123 1124 1125 1126 1127
    cv::Mat img = cv::cvarrToMat(_img), templ = cv::cvarrToMat(_templ),
        result = cv::cvarrToMat(_result);
    CV_Assert( result.size() == cv::Size(std::abs(img.cols - templ.cols) + 1,
                                         std::abs(img.rows - templ.rows) + 1) &&
              result.type() == CV_32F );
    matchTemplate(img, templ, result, method);
1128 1129 1130
}

/* End of file. */