gftt.cpp 11.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
//    Peng Xiao, pengxiao@outlook.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
Andrey Pavlenko's avatar
Andrey Pavlenko committed
28
//     and/or other materials provided with the distribution.
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
46
#include "opencl_kernels.hpp"
47 48 49 50

using namespace cv;
using namespace cv::ocl;

51 52
// compact structure for corners
struct DefCorner
53
{
54 55 56
    float eig;  //eigenvalue of corner
    short x;    //x coordinate of corner point
    short y;    //y coordinate of corner point
57
};
58 59 60

// compare procedure for corner
//it is used for sort on the host side
61 62
struct DefCornerCompare :
        public std::binary_function<DefCorner, DefCorner, bool>
63
{
64
    bool operator()(const DefCorner a, const DefCorner b) const
65
    {
66
        return a.eig > b.eig;
67 68 69
    }
};

70
// find corners on matrix and put it into array
krodyush's avatar
krodyush committed
71
static void findCorners_caller(
72 73 74 75 76 77
    const oclMat&   eig_mat,        //input matrix worth eigenvalues
    oclMat&         eigMinMax,      //input with min and max values of eigenvalues
    const float     qualityLevel,
    const oclMat&   mask,
    oclMat&         corners,        //output array with detected corners
    oclMat&         counter)        //output value with number of detected corners, have to be 0 before call
78
{
79
    String  opt;
80 81 82 83 84 85 86
    std::vector<int> k;
    Context * cxt = Context::getContext();

    std::vector< std::pair<size_t, const void*> > args;

    const int mask_strip = mask.step / mask.elemSize1();

87
    args.push_back(std::make_pair( sizeof(cl_mem),   (void*)&(eig_mat.data)));
88

89
    int src_pitch = (int)eig_mat.step;
90
    args.push_back(std::make_pair( sizeof(cl_int),   (void*)&src_pitch ));
91 92 93
    args.push_back(std::make_pair( sizeof(cl_mem),   (void*)&mask.data ));
    args.push_back(std::make_pair( sizeof(cl_mem),   (void*)&corners.data ));
    args.push_back(std::make_pair( sizeof(cl_int),   (void*)&mask_strip));
94 95 96 97 98 99
    args.push_back(std::make_pair( sizeof(cl_mem),   (void*)&eigMinMax.data ));
    args.push_back(std::make_pair( sizeof(cl_float), (void*)&qualityLevel ));
    args.push_back(std::make_pair( sizeof(cl_int),   (void*)&eig_mat.rows ));
    args.push_back(std::make_pair( sizeof(cl_int),   (void*)&eig_mat.cols ));
    args.push_back(std::make_pair( sizeof(cl_int),   (void*)&corners.cols ));
    args.push_back(std::make_pair( sizeof(cl_mem),   (void*)&counter.data ));
100 101

    size_t globalThreads[3] = {eig_mat.cols, eig_mat.rows, 1};
102
    size_t localThreads[3]  = {16, 16, 1};
103 104
    if(!mask.empty())
        opt += " -D WITH_MASK=1";
105

106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
     openCLExecuteKernel(cxt, &imgproc_gftt, "findCorners", globalThreads, localThreads, args, -1, -1, opt.c_str());
}


static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero)
{
    size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
    CV_Assert(groupnum != 0);

    int dbsize = groupnum * 2 * src.elemSize();

    ensureSizeIsEnough(1, dbsize, CV_8UC1, dst);

    cl_mem dst_data = reinterpret_cast<cl_mem>(dst.data);

    int all_cols = src.step / src.elemSize();
    int pre_cols = (src.offset % src.step) / src.elemSize();
    int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
    int invalid_cols = pre_cols + sec_cols;
    int cols = all_cols - invalid_cols , elemnum = cols * src.rows;
    int offset = src.offset / src.elemSize();

128 129
    {
        // first parallel pass
130 131 132 133 134 135 136 137
        std::vector<std::pair<size_t , const void *> > args;
        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&elemnum));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum));
138 139 140 141 142 143
        size_t globalThreads[3] = {groupnum * 256, 1, 1};
        size_t localThreads[3] = {256, 1, 1};
        openCLExecuteKernel(src.clCxt, &arithm_minMax, "arithm_op_minMax", globalThreads, localThreads,
                            args, -1, -1, "-D T=float -D DEPTH_5");
    }

144 145
    {
        // run final "serial" kernel to find accumulate results from threads and reset corner counter
146 147 148 149
        std::vector<std::pair<size_t , const void *> > args;
        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
        args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum ));
        args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&tozero.data ));
150 151 152 153 154
        size_t globalThreads[3] = {1, 1, 1};
        size_t localThreads[3] = {1, 1, 1};
        openCLExecuteKernel(src.clCxt, &imgproc_gftt, "arithm_op_minMax_final", globalThreads, localThreads,
                            args, -1, -1);
    }
155 156 157 158 159 160 161 162 163 164
}

void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
{
    CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0);
    CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));

    ensureSizeIsEnough(image.size(), CV_32F, eig_);

    if (useHarrisDetector)
165
        cornerHarris_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
166 167 168
    else
        cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);

169 170 171
    ensureSizeIsEnough(1,1, CV_32SC1, counter_);

    // find max eigenvalue and reset detected counters
172
    minMaxEig_caller(eig_, eig_minmax_, counter_);
173

174 175
    // allocate buffer for kernels
    int corner_array_size = std::max(1024, static_cast<int>(image.size().area() * 0.05));
176
    ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
177 178

    int total = tmpCorners_.cols; // by default the number of corner is full array
179
    std::vector<DefCorner> tmp(tmpCorners_.cols); // input buffer with corner for HOST part of algorithm
180

181 182 183 184 185
    // find points with high eigenvalue and put it into the output array
    findCorners_caller(eig_, eig_minmax_, static_cast<float>(qualityLevel), mask, tmpCorners_, counter_);

    // send non-blocking request to read real non-zero number of corners to sort it on the HOST side
    openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(counter_.clCxt), (cl_mem)counter_.data, CL_FALSE, 0, sizeof(int), &total, 0, NULL, NULL));
186 187

    if (total == 0)
188 189
    {
        // check for trivial case
190 191 192
        corners.release();
        return;
    }
193

194 195
    // blocking read whole corners array (sorted or not sorted)
    openCLReadBuffer(tmpCorners_.clCxt, (cl_mem)tmpCorners_.data, &tmp[0], tmpCorners_.cols * sizeof(DefCorner));
196

197 198 199 200 201
    // sort detected corners on cpu side.
    tmp.resize(total);
    std::sort(tmp.begin(), tmp.end(), DefCornerCompare());

    // estimate maximal size of final output array
202 203
    int total_max = maxCorners > 0 ? std::min(maxCorners, total) : total;
    int D2 = (int)ceil(minDistance * minDistance);
204

205
    // allocate output buffer
206
    std::vector<Point2f> tmp2;
207 208 209
    tmp2.reserve(total_max);


210
    if (minDistance < 1)
211 212 213 214
    {
        // we have not distance restriction. then just copy with conversion maximal allowed points into output array
        for (int i = 0; i < total_max; ++i)
            tmp2.push_back(Point2f(tmp[i].x, tmp[i].y));
215 216
    }
    else
217 218
    {
        // we have distance restriction. then start coping to output array from the first element and check distance for each next one
219 220 221 222
        const int cell_size = cvRound(minDistance);
        const int grid_width = (image.cols + cell_size - 1) / cell_size;
        const int grid_height = (image.rows + cell_size - 1) / cell_size;

223
        std::vector< std::vector<Point2i> > grid(grid_width * grid_height);
224

225
        for (int i = 0; i < total ; ++i)
226
        {
227
            DefCorner p = tmp[i];
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
            bool good = true;

            int x_cell = static_cast<int>(p.x / cell_size);
            int y_cell = static_cast<int>(p.y / cell_size);

            int x1 = x_cell - 1;
            int y1 = y_cell - 1;
            int x2 = x_cell + 1;
            int y2 = y_cell + 1;

            // boundary check
            x1 = std::max(0, x1);
            y1 = std::max(0, y1);
            x2 = std::min(grid_width - 1, x2);
            y2 = std::min(grid_height - 1, y2);

            for (int yy = y1; yy <= y2; yy++)
            {
                for (int xx = x1; xx <= x2; xx++)
                {
248
                    std::vector<Point2i>& m = grid[yy * grid_width + xx];
249 250 251
                    if (m.empty())
                        continue;
                    for(size_t j = 0; j < m.size(); j++)
252
                    {
253 254 255 256
                        int dx = p.x - m[j].x;
                        int dy = p.y - m[j].y;

                        if (dx * dx + dy * dy < D2)
257
                        {
258 259
                            good = false;
                            goto break_out_;
260 261 262 263 264
                        }
                    }
                }
            }

265
            break_out_:
266 267 268

            if(good)
            {
269 270
                grid[y_cell * grid_width + x_cell].push_back(Point2i(p.x, p.y));
                tmp2.push_back(Point2f(p.x, p.y));
271 272 273 274 275 276 277

                if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
                    break;
            }
        }

    }
278

279
    int final_size = static_cast<int>(tmp2.size());
280
    if (final_size > 0)
281 282 283
        corners.upload(Mat(1, final_size, CV_32FC2, &tmp2[0]));
    else
        corners.release();
284
}
285

286
void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector<Point2f> &points_v)
287 288 289 290
{
    CV_DbgAssert(points.type() == CV_32FC2);
    points_v.resize(points.cols);
    openCLSafeCall(clEnqueueReadBuffer(
291
        *(cl_command_queue*)getClCommandQueuePtr(),
292 293 294 295 296 297 298
        reinterpret_cast<cl_mem>(points.data),
        CL_TRUE,
        0,
        points.cols * sizeof(Point2f),
        &points_v[0],
        0,
        NULL,
299 300
        NULL));
}