surf.cpp 26.6 KB
Newer Older
yao's avatar
yao committed
1
/*M/////////////////////////////////////////////////////////////////////////////////////////
yao's avatar
yao committed
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
//    Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other oclMaterials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <iomanip>
#include "precomp.hpp"
yao's avatar
yao committed
47
#include "mcwutil.hpp"
yao's avatar
yao committed
48 49 50 51

using namespace cv;
using namespace cv::ocl;

52
namespace cv
yao's avatar
yao committed
53
{
54 55 56 57
    namespace ocl
    {
        ///////////////////////////OpenCL kernel strings///////////////////////////
        extern const char *nonfree_surf;
yao's avatar
yao committed
58 59 60

        const char* noImage2dOption = "-D DISABLE_IMAGE2D";

Andrey Kamaev's avatar
Andrey Kamaev committed
61 62
        static void openCLExecuteKernelSURF(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
            size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
yao's avatar
yao committed
63 64 65 66 67 68 69 70 71 72
        {
            if(support_image2d())
            {
                openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth);
            }
            else
            {
                openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, noImage2dOption);
            }
        }
73 74
    }
}
yao's avatar
yao committed
75

76 77

static inline int divUp(int total, int grain)
yao's avatar
yao committed
78
{
79 80 81 82 83 84
    return (total + grain - 1) / grain;
}
static inline int calcSize(int octave, int layer)
{
    /* Wavelet size at first layer of first octave. */
    const int HAAR_SIZE0 = 9;
yao's avatar
yao committed
85

86 87
    /* Wavelet size increment between layers. This should be an even number,
    such that the wavelet sizes in an octave are either all even or all odd.
yao's avatar
yao committed
88
    This ensures that when looking for the neighbors of a sample, the layers
yao's avatar
yao committed
89

90 91
    above and below are aligned correctly. */
    const int HAAR_SIZE_INC = 6;
yao's avatar
yao committed
92

93 94
    return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
}
yao's avatar
yao committed
95

yao's avatar
yao committed
96

97 98 99 100
class SURF_OCL_Invoker
{
public:
    // facilities
101
    void bindImgTex(const oclMat &img, cl_mem &texture);
yao's avatar
yao committed
102

103 104
    //void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
    //void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
yao's avatar
yao committed
105

yao's avatar
yao committed
106
    // kernel callers declarations
107
    void icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, int octave, int nOctaveLayers, int layer_rows);
yao's avatar
yao committed
108

109 110
    void icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset,
                                  int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols);
yao's avatar
yao committed
111

yao's avatar
yao committed
112
    void icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, int maxCounter,
113
                                    oclMat &keypoints, oclMat &counters, int octave, int layer_rows, int maxFeatures);
yao's avatar
yao committed
114

115
    void icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures);
yao's avatar
yao committed
116

yao's avatar
yao committed
117 118
    void icvSetUpright_gpu(const oclMat &keypoints, int nFeatures);

119
    void compute_descriptors_gpu(const oclMat &descriptors, const oclMat &keypoints, int nFeatures);
yao's avatar
yao committed
120
    // end of kernel callers declarations
yao's avatar
yao committed
121

122 123
    SURF_OCL_Invoker(SURF_OCL &surf, const oclMat &img, const oclMat &mask) :
        surf_(surf),
124
        img_cols(img.cols), img_rows(img.rows),
yao's avatar
yao committed
125 126
        use_mask(!mask.empty()), counters(oclMat()),
        imgTex(NULL), sumTex(NULL), maskSumTex(NULL), _img(img)
127 128 129 130
    {
        CV_Assert(!img.empty() && img.type() == CV_8UC1);
        CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1));
        CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0);
yao's avatar
yao committed
131

132 133 134
        const int min_size = calcSize(surf_.nOctaves - 1, 0);
        CV_Assert(img_rows - min_size >= 0);
        CV_Assert(img_cols - min_size >= 0);
yao's avatar
yao committed
135

136 137 138 139 140
        const int layer_rows = img_rows >> (surf_.nOctaves - 1);
        const int layer_cols = img_cols >> (surf_.nOctaves - 1);
        const int min_margin = ((calcSize((surf_.nOctaves - 1), 2) >> 1) >> (surf_.nOctaves - 1)) + 1;
        CV_Assert(layer_rows - 2 * min_margin > 0);
        CV_Assert(layer_cols - 2 * min_margin > 0);
yao's avatar
yao committed
141

142 143
        maxFeatures   = std::min(static_cast<int>(img.size().area() * surf.keypointsRatio), 65535);
        maxCandidates = std::min(static_cast<int>(1.5 * maxFeatures), 65535);
yao's avatar
yao committed
144

145
        CV_Assert(maxFeatures > 0);
yao's avatar
yao committed
146

147 148
        counters.create(1, surf_.nOctaves + 1, CV_32SC1);
        counters.setTo(Scalar::all(0));
yao's avatar
yao committed
149

yao's avatar
yao committed
150 151 152
        integral(img, surf_.sum);
        if(support_image2d())
        {
153
        bindImgTex(img, imgTex);
yao's avatar
yao committed
154 155
            bindImgTex(surf_.sum, sumTex);
        }
yao's avatar
yao committed
156

157
        maskSumTex = 0;
yao's avatar
yao committed
158

159 160 161 162 163
        if (use_mask)
        {
            throw std::exception();
            //!FIXME
            // temp fix for missing min overload
164 165 166 167 168
            //oclMat temp(mask.size(), mask.type());
            //temp.setTo(Scalar::all(1.0));
            ////cv::ocl::min(mask, temp, surf_.mask1);           ///////// disable this
            //integral(surf_.mask1, surf_.maskSum);
            //bindImgTex(surf_.maskSum, maskSumTex);
yao's avatar
yao committed
169
        }
170
    }
yao's avatar
yao committed
171

172
    void detectKeypoints(oclMat &keypoints)
173 174
    {
        // create image pyramid buffers
yao's avatar
yao committed
175
        // different layers have same sized buffers, but they are sampled from Gaussian kernel.
176 177
        ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.det);
        ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.trace);
yao's avatar
yao committed
178

179 180 181
        ensureSizeIsEnough(1, maxCandidates, CV_32SC4, surf_.maxPosBuffer);
        ensureSizeIsEnough(SURF_OCL::ROWS_COUNT, maxFeatures, CV_32FC1, keypoints);
        keypoints.setTo(Scalar::all(0));
yao's avatar
yao committed
182

183 184 185 186
        for (int octave = 0; octave < surf_.nOctaves; ++octave)
        {
            const int layer_rows = img_rows >> octave;
            const int layer_cols = img_cols >> octave;
yao's avatar
yao committed
187

188
            //loadOctaveConstants(octave, layer_rows, layer_cols);
yao's avatar
yao committed
189

190
            icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, octave, surf_.nOctaveLayers, layer_rows);
yao's avatar
yao committed
191

192
            icvFindMaximaInLayer_gpu(surf_.det, surf_.trace, surf_.maxPosBuffer, counters, 1 + octave,
193
                                     octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols);
yao's avatar
yao committed
194

yao's avatar
yao committed
195 196
            int maxCounter = ((Mat)counters).at<int>(1 + octave);
            maxCounter = std::min(maxCounter, static_cast<int>(maxCandidates));
yao's avatar
yao committed
197

198 199 200
            if (maxCounter > 0)
            {
                icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer, maxCounter,
201
                                           keypoints, counters, octave, layer_rows, maxFeatures);
yao's avatar
yao committed
202
            }
203
        }
yao's avatar
yao committed
204 205
        int featureCounter = Mat(counters).at<int>(0);
        featureCounter = std::min(featureCounter, static_cast<int>(maxFeatures));
yao's avatar
yao committed
206

207
        keypoints.cols = featureCounter;
yao's avatar
yao committed
208

209
        if (surf_.upright)
yao's avatar
yao committed
210 211 212 213
        {
            //keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0));
            setUpright(keypoints);
        }
214
        else
yao's avatar
yao committed
215
        {
216
            findOrientation(keypoints);
yao's avatar
yao committed
217 218 219 220 221 222 223 224 225 226
        }
    }

    void setUpright(oclMat &keypoints)
    {
        const int nFeatures = keypoints.cols;
        if(nFeatures > 0)
        {
            icvSetUpright_gpu(keypoints, keypoints.cols);
        }
227
    }
yao's avatar
yao committed
228

229
    void findOrientation(oclMat &keypoints)
230 231 232
    {
        const int nFeatures = keypoints.cols;
        if (nFeatures > 0)
yao's avatar
yao committed
233
        {
234
            icvCalcOrientation_gpu(keypoints, nFeatures);
yao's avatar
yao committed
235
        }
236
    }
yao's avatar
yao committed
237

238
    void computeDescriptors(const oclMat &keypoints, oclMat &descriptors, int descriptorSize)
239 240 241
    {
        const int nFeatures = keypoints.cols;
        if (nFeatures > 0)
yao's avatar
yao committed
242
        {
243 244
            ensureSizeIsEnough(nFeatures, descriptorSize, CV_32F, descriptors);
            compute_descriptors_gpu(descriptors, keypoints, nFeatures);
yao's avatar
yao committed
245
        }
246
    }
yao's avatar
yao committed
247

248 249 250 251 252 253 254 255 256
    ~SURF_OCL_Invoker()
    {
        if(imgTex)
            openCLFree(imgTex);
        if(sumTex)
            openCLFree(sumTex);
        if(maskSumTex)
            openCLFree(maskSumTex);
    }
yao's avatar
yao committed
257

258
private:
259
    SURF_OCL &surf_;
yao's avatar
yao committed
260

261
    int img_cols, img_rows;
yao's avatar
yao committed
262

263
    bool use_mask;
yao's avatar
yao committed
264

265 266
    int maxCandidates;
    int maxFeatures;
yao's avatar
yao committed
267

268
    oclMat counters;
yao's avatar
yao committed
269

270 271 272 273
    // texture buffers
    cl_mem imgTex;
    cl_mem sumTex;
    cl_mem maskSumTex;
yao's avatar
yao committed
274

yao's avatar
yao committed
275
    const oclMat _img; // make a copy for non-image2d_t supported platform
276

277 278
    SURF_OCL_Invoker &operator= (const SURF_OCL_Invoker &right)
    {
279 280 281 282
        (*this) = right;
        return *this;
    } // remove warning C4512
};
yao's avatar
yao committed
283 284 285 286 287 288 289 290 291 292 293 294 295

cv::ocl::SURF_OCL::SURF_OCL()
{
    hessianThreshold = 100.0f;
    extended = true;
    nOctaves = 4;
    nOctaveLayers = 2;
    keypointsRatio = 0.01f;
    upright = false;
}

cv::ocl::SURF_OCL::SURF_OCL(double _threshold, int _nOctaves, int _nOctaveLayers, bool _extended, float _keypointsRatio, bool _upright)
{
296
    hessianThreshold = saturate_cast<float>(_threshold);
yao's avatar
yao committed
297 298 299 300 301 302 303 304 305 306 307 308
    extended = _extended;
    nOctaves = _nOctaves;
    nOctaveLayers = _nOctaveLayers;
    keypointsRatio = _keypointsRatio;
    upright = _upright;
}

int cv::ocl::SURF_OCL::descriptorSize() const
{
    return extended ? 128 : 64;
}

309
void cv::ocl::SURF_OCL::uploadKeypoints(const std::vector<KeyPoint> &keypoints, oclMat &keypointsGPU)
yao's avatar
yao committed
310 311 312 313 314 315 316
{
    if (keypoints.empty())
        keypointsGPU.release();
    else
    {
        Mat keypointsCPU(SURF_OCL::ROWS_COUNT, static_cast<int>(keypoints.size()), CV_32FC1);

317 318 319 320 321 322 323
        float *kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW);
        float *kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW);
        int *kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW);
        int *kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW);
        float *kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW);
        float *kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW);
        float *kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW);
yao's avatar
yao committed
324 325 326

        for (size_t i = 0, size = keypoints.size(); i < size; ++i)
        {
327
            const KeyPoint &kp = keypoints[i];
yao's avatar
yao committed
328 329 330 331 332 333 334 335 336 337 338 339 340
            kp_x[i] = kp.pt.x;
            kp_y[i] = kp.pt.y;
            kp_octave[i] = kp.octave;
            kp_size[i] = kp.size;
            kp_dir[i] = kp.angle;
            kp_hessian[i] = kp.response;
            kp_laplacian[i] = 1;
        }

        keypointsGPU.upload(keypointsCPU);
    }
}

341
void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat &keypointsGPU, std::vector<KeyPoint> &keypoints)
yao's avatar
yao committed
342 343 344 345 346 347 348 349 350 351 352 353 354
{
    const int nFeatures = keypointsGPU.cols;

    if (nFeatures == 0)
        keypoints.clear();
    else
    {
        CV_Assert(keypointsGPU.type() == CV_32FC1 && keypointsGPU.rows == ROWS_COUNT);

        Mat keypointsCPU(keypointsGPU);

        keypoints.resize(nFeatures);

355 356 357 358 359 360 361
        float *kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW);
        float *kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW);
        int *kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW);
        int *kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW);
        float *kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW);
        float *kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW);
        float *kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW);
yao's avatar
yao committed
362 363 364

        for (int i = 0; i < nFeatures; ++i)
        {
365
            KeyPoint &kp = keypoints[i];
yao's avatar
yao committed
366 367 368 369 370 371 372 373 374 375 376
            kp.pt.x = kp_x[i];
            kp.pt.y = kp_y[i];
            kp.class_id = kp_laplacian[i];
            kp.octave = kp_octave[i];
            kp.size = kp_size[i];
            kp.angle = kp_dir[i];
            kp.response = kp_hessian[i];
        }
    }
}

377
void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat &descriptorsGPU, std::vector<float> &descriptors)
yao's avatar
yao committed
378 379 380 381 382 383 384 385 386 387 388 389 390
{
    if (descriptorsGPU.empty())
        descriptors.clear();
    else
    {
        CV_Assert(descriptorsGPU.type() == CV_32F);

        descriptors.resize(descriptorsGPU.rows * descriptorsGPU.cols);
        Mat descriptorsCPU(descriptorsGPU.size(), CV_32F, &descriptors[0]);
        descriptorsGPU.download(descriptorsCPU);
    }
}

391
void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints)
yao's avatar
yao committed
392 393 394 395 396 397 398 399 400
{
    if (!img.empty())
    {
        SURF_OCL_Invoker surf(*this, img, mask);

        surf.detectKeypoints(keypoints);
    }
}

401 402
void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints, oclMat &descriptors,
                                   bool useProvidedKeypoints)
yao's avatar
yao committed
403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
{
    if (!img.empty())
    {
        SURF_OCL_Invoker surf(*this, img, mask);

        if (!useProvidedKeypoints)
            surf.detectKeypoints(keypoints);
        else if (!upright)
        {
            surf.findOrientation(keypoints);
        }

        surf.computeDescriptors(keypoints, descriptors, descriptorSize());
    }
}

419
void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints)
yao's avatar
yao committed
420 421 422 423 424 425 426 427
{
    oclMat keypointsGPU;

    (*this)(img, mask, keypointsGPU);

    downloadKeypoints(keypointsGPU, keypoints);
}

428
void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints,
429
                                   oclMat &descriptors, bool useProvidedKeypoints)
yao's avatar
yao committed
430 431 432 433 434 435 436 437 438 439 440
{
    oclMat keypointsGPU;

    if (useProvidedKeypoints)
        uploadKeypoints(keypoints, keypointsGPU);

    (*this)(img, mask, keypointsGPU, descriptors, useProvidedKeypoints);

    downloadKeypoints(keypointsGPU, keypoints);
}

441 442
void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints,
                                   std::vector<float> &descriptors, bool useProvidedKeypoints)
yao's avatar
yao committed
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
{
    oclMat descriptorsGPU;

    (*this)(img, mask, keypoints, descriptorsGPU, useProvidedKeypoints);

    downloadDescriptors(descriptorsGPU, descriptors);
}

void cv::ocl::SURF_OCL::releaseMemory()
{
    sum.release();
    mask1.release();
    maskSum.release();
    intBuffer.release();
    det.release();
    trace.release();
    maxPosBuffer.release();
}

462 463

// bind source buffer to image oject.
464
void SURF_OCL_Invoker::bindImgTex(const oclMat &img, cl_mem &texture)
yao's avatar
yao committed
465
{
466
    if(texture)
niko's avatar
niko committed
467
    {
468
        openCLFree(texture);
niko's avatar
niko committed
469
    }
yao's avatar
yao committed
470
    texture = bindTexture(img);
yao's avatar
yao committed
471 472 473 474
}

////////////////////////////
// kernel caller definitions
475
void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, int octave, int nOctaveLayers, int c_layer_rows)
yao's avatar
yao committed
476 477 478 479 480 481
{
    const int min_size = calcSize(octave, 0);
    const int max_samples_i = 1 + ((img_rows - min_size) >> octave);
    const int max_samples_j = 1 + ((img_cols - min_size) >> octave);

    Context *clCxt = det.clCxt;
482 483 484
    std::string kernelName = "icvCalcLayerDetAndTrace";
    std::vector< std::pair<size_t, const void *> > args;

yao's avatar
yao committed
485 486
    if(sumTex)
    {
Andrey Kamaev's avatar
Andrey Kamaev committed
487
        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&sumTex));
yao's avatar
yao committed
488 489 490
    }
    else
    {
Andrey Kamaev's avatar
Andrey Kamaev committed
491
        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&surf_.sum.data)); // if image2d is not supported
yao's avatar
yao committed
492
    }
493 494 495 496 497 498 499 500 501
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&det.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trace.data));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&det.step));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&trace.step));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_rows));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&nOctaveLayers));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&octave));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&c_layer_rows));
Andrey Kamaev's avatar
Andrey Kamaev committed
502
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&surf_.sum.step));
yao's avatar
yao committed
503 504

    size_t localThreads[3]  = {16, 16, 1};
505 506 507 508 509 510
    size_t globalThreads[3] =
    {
        divUp(max_samples_j, localThreads[0]) *localThreads[0],
        divUp(max_samples_i, localThreads[1]) *localThreads[1] *(nOctaveLayers + 2),
        1
    };
yao's avatar
yao committed
511
    openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
yao's avatar
yao committed
512 513
}

514 515
void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset,
        int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols)
yao's avatar
yao committed
516 517 518 519
{
    const int min_margin = ((calcSize(octave, 2) >> 1) >> octave) + 1;

    Context *clCxt = det.clCxt;
520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537
    std::string kernelName = use_mask ? "icvFindMaximaInLayer_withmask" : "icvFindMaximaInLayer";
    std::vector< std::pair<size_t, const void *> > args;

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&det.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trace.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&maxCounter.data));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&counterOffset));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&det.step));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&trace.step));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_rows));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&nLayers));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&octave));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&layer_rows));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&layer_cols));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&maxCandidates));
    args.push_back( std::make_pair( sizeof(cl_float), (void *)&surf_.hessianThreshold));
yao's avatar
yao committed
538 539 540

    if(use_mask)
    {
yao's avatar
yao committed
541 542
        if(maskSumTex)
        {
Andrey Kamaev's avatar
Andrey Kamaev committed
543
            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&maskSumTex));
yao's avatar
yao committed
544 545 546
        }
        else
        {
Andrey Kamaev's avatar
Andrey Kamaev committed
547
            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&surf_.maskSum.data));
yao's avatar
yao committed
548
        }
Andrey Kamaev's avatar
Andrey Kamaev committed
549
        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&surf_.maskSum.step));
yao's avatar
yao committed
550 551
    }
    size_t localThreads[3]  = {16, 16, 1};
552 553 554 555
    size_t globalThreads[3] = {divUp(layer_cols - 2 * min_margin, localThreads[0] - 2) *localThreads[0],
                               divUp(layer_rows - 2 * min_margin, localThreads[1] - 2) *nLayers *localThreads[1],
                               1
                              };
yao's avatar
yao committed
556

yao's avatar
yao committed
557
    openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
yao's avatar
yao committed
558 559
}

yao's avatar
yao committed
560
void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, int maxCounter,
561
        oclMat &keypoints, oclMat &counters, int octave, int layer_rows, int maxFeatures)
yao's avatar
yao committed
562 563
{
    Context *clCxt = det.clCxt;
564 565 566 567 568 569 570 571 572 573 574 575 576 577
    std::string kernelName = "icvInterpolateKeypoint";
    std::vector< std::pair<size_t, const void *> > args;

    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&det.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data));
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counters.data));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&det.step));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_rows));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&octave));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&layer_rows));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&maxFeatures));
yao's avatar
yao committed
578 579

    size_t localThreads[3]  = {3, 3, 3};
580
    size_t globalThreads[3] = {maxCounter *localThreads[0], localThreads[1], 1};
yao's avatar
yao committed
581

yao's avatar
yao committed
582
    openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
yao's avatar
yao committed
583 584
}

585
void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures)
yao's avatar
yao committed
586
{
587
    Context *clCxt = counters.clCxt;
588
    std::string kernelName = "icvCalcOrientation";
yao's avatar
yao committed
589

590
    std::vector< std::pair<size_t, const void *> > args;
yao's avatar
yao committed
591

yao's avatar
yao committed
592 593
    if(sumTex)
    {
594
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&sumTex));
yao's avatar
yao committed
595 596 597
    }
    else
    {
Andrey Kamaev's avatar
Andrey Kamaev committed
598
        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&surf_.sum.data)); // if image2d is not supported
yao's avatar
yao committed
599
    }
600 601 602 603
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_rows));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols));
Andrey Kamaev's avatar
Andrey Kamaev committed
604
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&surf_.sum.step));
yao's avatar
yao committed
605 606

    size_t localThreads[3]  = {32, 4, 1};
607
    size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1};
yao's avatar
yao committed
608

yao's avatar
yao committed
609
    openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
yao's avatar
yao committed
610
}
yao's avatar
yao committed
611 612 613 614

void SURF_OCL_Invoker::icvSetUpright_gpu(const oclMat &keypoints, int nFeatures)
{
    Context *clCxt = counters.clCxt;
Andrey Kamaev's avatar
Andrey Kamaev committed
615
    std::string kernelName = "icvSetUpright";
yao's avatar
yao committed
616

Andrey Kamaev's avatar
Andrey Kamaev committed
617
    std::vector< std::pair<size_t, const void *> > args;
yao's avatar
yao committed
618

Andrey Kamaev's avatar
Andrey Kamaev committed
619 620 621
    args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step));
    args.push_back( std::make_pair( sizeof(cl_int), (void *)&nFeatures));
yao's avatar
yao committed
622 623 624 625 626

    size_t localThreads[3]  = {256, 1, 1};
    size_t globalThreads[3] = {nFeatures, 1, 1};

    openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
yao's avatar
yao committed
627 628
}

yao's avatar
yao committed
629

630
void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const oclMat &keypoints, int nFeatures)
yao's avatar
yao committed
631 632 633
{
    // compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
    Context *clCxt = descriptors.clCxt;
634 635
    std::string kernelName = "";
    std::vector< std::pair<size_t, const void *> > args;
yao's avatar
yao committed
636 637 638 639 640 641 642 643 644 645 646 647 648 649
    size_t localThreads[3]  = {1, 1, 1};
    size_t globalThreads[3] = {1, 1, 1};

    if(descriptors.cols == 64)
    {
        kernelName = "compute_descriptors64";

        localThreads[0] = 6;
        localThreads[1] = 6;

        globalThreads[0] = nFeatures * localThreads[0];
        globalThreads[1] = 16 * localThreads[1];

        args.clear();
yao's avatar
yao committed
650 651
        if(imgTex)
        {
Andrey Kamaev's avatar
Andrey Kamaev committed
652
            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&imgTex));
yao's avatar
yao committed
653 654 655
        }
        else
        {
Andrey Kamaev's avatar
Andrey Kamaev committed
656
            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&_img.data));
yao's avatar
yao committed
657
        }
658 659 660 661
        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data));
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step));
Andrey Kamaev's avatar
Andrey Kamaev committed
662 663 664
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.rows));
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols));
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step));
yao's avatar
yao committed
665 666

        openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
yao's avatar
yao committed
667 668 669 670 671 672 673 674 675 676

        kernelName = "normalize_descriptors64";

        localThreads[0] = 64;
        localThreads[1] = 1;

        globalThreads[0] = nFeatures * localThreads[0];
        globalThreads[1] = localThreads[1];

        args.clear();
677 678
        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
yao's avatar
yao committed
679 680

        openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
yao's avatar
yao committed
681 682 683 684 685 686 687 688 689 690 691 692
    }
    else
    {
        kernelName = "compute_descriptors128";

        localThreads[0] = 6;
        localThreads[1] = 6;

        globalThreads[0] = nFeatures * localThreads[0];
        globalThreads[1] = 16 * localThreads[1];

        args.clear();
yao's avatar
yao committed
693 694
        if(imgTex)
        {
Andrey Kamaev's avatar
Andrey Kamaev committed
695
            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&imgTex));
yao's avatar
yao committed
696 697 698
        }
        else
        {
Andrey Kamaev's avatar
Andrey Kamaev committed
699
            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&_img.data));
yao's avatar
yao committed
700
        }
701 702 703 704
        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data));
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step));
Andrey Kamaev's avatar
Andrey Kamaev committed
705 706 707
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.rows));
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols));
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step));
Andrey Kamaev's avatar
Andrey Kamaev committed
708

yao's avatar
yao committed
709
        openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
yao's avatar
yao committed
710 711 712 713 714 715 716 717 718 719

        kernelName = "normalize_descriptors128";

        localThreads[0] = 128;
        localThreads[1] = 1;

        globalThreads[0] = nFeatures * localThreads[0];
        globalThreads[1] = localThreads[1];

        args.clear();
720 721
        args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data));
        args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step));
Andrey Kamaev's avatar
Andrey Kamaev committed
722

yao's avatar
yao committed
723
        openCLExecuteKernelSURF(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
yao's avatar
yao committed
724 725 726
    }
}