canny.cpp 16.9 KB
Newer Older
yao's avatar
yao committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
//    Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other oclMaterials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "precomp.hpp"

using namespace cv;
using namespace cv::ocl;
using namespace std;

namespace cv
{
54 55 56 57 58
    namespace ocl
    {
        ///////////////////////////OpenCL kernel strings///////////////////////////
        extern const char *imgproc_canny;
    }
yao's avatar
yao committed
59 60
}

61
cv::ocl::CannyBuf::CannyBuf(const oclMat &dx_, const oclMat &dy_) : dx(dx_), dy(dy_), counter(NULL)
yao's avatar
yao committed
62 63 64 65 66 67
{
    CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());

    create(dx_.size(), -1);
}

68
void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size)
yao's avatar
yao committed
69
{
70 71
    ensureSizeIsEnough(image_size, CV_32SC1, dx);
    ensureSizeIsEnough(image_size, CV_32SC1, dy);
72 73 74

    if(apperture_size == 3)
    {
75 76
        ensureSizeIsEnough(image_size, CV_32SC1, dx_buf);
        ensureSizeIsEnough(image_size, CV_32SC1, dy_buf);
77 78
    }
    else if(apperture_size > 0)
yao's avatar
yao committed
79
    {
80
        Mat kx, ky;
yao's avatar
yao committed
81
        if (!filterDX)
82 83 84
        {
            filterDX = createDerivFilter_GPU(CV_8U, CV_32S, 1, 0, apperture_size, BORDER_REPLICATE);
        }
yao's avatar
yao committed
85
        if (!filterDY)
86 87 88
        {
            filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
        }
yao's avatar
yao committed
89
    }
peng xiao's avatar
peng xiao committed
90
    ensureSizeIsEnough(2 * (image_size.height + 2), image_size.width + 2, CV_32FC1, edgeBuf);
yao's avatar
yao committed
91

92 93
    ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1);
    ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2);
94

95
    int counter_i [1] = { 0 };
96
    int err = 0;
97 98 99 100
    if(counter)
    {
        openCLFree(counter);
    }
peng xiao's avatar
peng xiao committed
101
    counter = clCreateBuffer( *((cl_context*)getoclContext()), CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err );
102
    openCLSafeCall(err);
yao's avatar
yao committed
103 104 105 106 107 108 109 110 111 112 113
}

void cv::ocl::CannyBuf::release()
{
    dx.release();
    dy.release();
    dx_buf.release();
    dy_buf.release();
    edgeBuf.release();
    trackBuf1.release();
    trackBuf2.release();
114
    openCLFree(counter);
yao's avatar
yao committed
115 116
}

117 118 119
namespace cv
{
    namespace ocl
120
    {
121 122 123
        namespace canny
        {
            void calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_buf, int rows, int cols);
yao's avatar
yao committed
124

125 126
            void calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad);
            void calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad);
yao's avatar
yao committed
127

128
            void calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh);
yao's avatar
yao committed
129

130
            void edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols);
yao's avatar
yao committed
131

132
            void edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols);
yao's avatar
yao committed
133

134 135
            void getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols);
        }
136
    }
137
}// cv::ocl
yao's avatar
yao committed
138 139 140

namespace
{
141
    void CannyCaller(CannyBuf &buf, oclMat &dst, float low_thresh, float high_thresh)
yao's avatar
yao committed
142 143
    {
        using namespace ::cv::ocl::canny;
peng xiao's avatar
peng xiao committed
144 145
        oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
        oclMat mapBuf = buf.edgeBuf(Rect(0, buf.edgeBuf.rows / 2, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));
yao's avatar
yao committed
146

peng xiao's avatar
peng xiao committed
147
        calcMap_gpu(buf.dx, buf.dy, magBuf, mapBuf, dst.rows, dst.cols, low_thresh, high_thresh);
yao's avatar
yao committed
148

peng xiao's avatar
peng xiao committed
149
        edgesHysteresisLocal_gpu(mapBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols);
yao's avatar
yao committed
150

peng xiao's avatar
peng xiao committed
151 152 153
        edgesHysteresisGlobal_gpu(mapBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols);

        getEdges_gpu(mapBuf, dst, dst.rows, dst.cols);
yao's avatar
yao committed
154 155 156
    }
}

157
void cv::ocl::Canny(const oclMat &src, oclMat &dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
yao's avatar
yao committed
158 159 160 161 162
{
    CannyBuf buf(src.size(), apperture_size);
    Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient);
}

163
void cv::ocl::Canny(const oclMat &src, CannyBuf &buf, oclMat &dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
yao's avatar
yao committed
164 165 166 167 168 169 170 171 172
{
    using namespace ::cv::ocl::canny;

    CV_Assert(src.type() == CV_8UC1);

    if( low_thresh > high_thresh )
        std::swap( low_thresh, high_thresh );

    dst.create(src.size(), CV_8U);
niko's avatar
niko committed
173
    dst.setTo(Scalar::all(0));
yao's avatar
yao committed
174 175

    buf.create(src.size(), apperture_size);
niko's avatar
niko committed
176
    buf.edgeBuf.setTo(Scalar::all(0));
yao's avatar
yao committed
177

peng xiao's avatar
peng xiao committed
178 179
    oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));

yao's avatar
yao committed
180 181 182 183
    if (apperture_size == 3)
    {
        calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols);

peng xiao's avatar
peng xiao committed
184
        calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, magBuf, src.rows, src.cols, L2gradient);
yao's avatar
yao committed
185 186 187
    }
    else
    {
188 189
        buf.filterDX->apply(src, buf.dx);
        buf.filterDY->apply(src, buf.dy);
yao's avatar
yao committed
190

peng xiao's avatar
peng xiao committed
191
        calcMagnitude_gpu(buf.dx, buf.dy, magBuf, src.rows, src.cols, L2gradient);
yao's avatar
yao committed
192 193 194
    }
    CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
}
195
void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, oclMat &dst, double low_thresh, double high_thresh, bool L2gradient)
yao's avatar
yao committed
196 197 198 199 200
{
    CannyBuf buf(dx, dy);
    Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient);
}

201
void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &dst, double low_thresh, double high_thresh, bool L2gradient)
yao's avatar
yao committed
202 203 204 205 206 207 208 209 210
{
    using namespace ::cv::ocl::canny;

    CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size());

    if( low_thresh > high_thresh )
        std::swap( low_thresh, high_thresh);

    dst.create(dx.size(), CV_8U);
niko's avatar
niko committed
211
    dst.setTo(Scalar::all(0));
yao's avatar
yao committed
212

213 214
    buf.dx = dx;
    buf.dy = dy;
yao's avatar
yao committed
215
    buf.create(dx.size(), -1);
niko's avatar
niko committed
216
    buf.edgeBuf.setTo(Scalar::all(0));
peng xiao's avatar
peng xiao committed
217 218 219 220

    oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2));

    calcMagnitude_gpu(buf.dx, buf.dy, magBuf, dx.rows, dx.cols, L2gradient);
yao's avatar
yao committed
221 222 223 224

    CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
}

225
void canny::calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_buf, int rows, int cols)
yao's avatar
yao committed
226
{
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
    Context *clCxt = src.clCxt;
    string kernelName = "calcSobelRowPass";
    vector< pair<size_t, const void *> > args;

    args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data));
    args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
    args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.offset));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.offset));

    size_t globalThreads[3] = {cols, rows, 1};
    size_t localThreads[3]  = {16, 16, 1};
peng xiao's avatar
peng xiao committed
245
    openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
yao's avatar
yao committed
246 247
}

248
void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
yao's avatar
yao committed
249
{
250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
    Context *clCxt = dx_buf.clCxt;
    string kernelName = "calcMagnitude_buf";
    vector< pair<size_t, const void *> > args;

    args.push_back( make_pair( sizeof(cl_mem), (void *)&dx_buf.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&dy_buf.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data));
    args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
    args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dx_buf.offset));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dy_buf.offset));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset));
    args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset));

    size_t globalThreads[3] = {cols, rows, 1};
    size_t localThreads[3]  = {16, 16, 1};

peng xiao's avatar
peng xiao committed
275 276
    const char * build_options = L2Grad ? "-D L2GRAD":"";
    openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
yao's avatar
yao committed
277
}
278
void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
yao's avatar
yao committed
279
{
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
    Context *clCxt = dx.clCxt;
    string kernelName = "calcMagnitude";
    vector< pair<size_t, const void *> > args;

    args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data));
    args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
    args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset));
    args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset));

    size_t globalThreads[3] = {cols, rows, 1};
    size_t localThreads[3]  = {16, 16, 1};

peng xiao's avatar
peng xiao committed
299 300
    const char * build_options = L2Grad ? "-D L2GRAD":"";
    openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
yao's avatar
yao committed
301 302
}

303
void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh)
yao's avatar
yao committed
304
{
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
    Context *clCxt = dx.clCxt;

    vector< pair<size_t, const void *> > args;

    args.push_back( make_pair( sizeof(cl_mem), (void *)&dx.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&dy.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&mag.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
    args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
    args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
    args.push_back( make_pair( sizeof(cl_float), (void *)&low_thresh));
    args.push_back( make_pair( sizeof(cl_float), (void *)&high_thresh));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dx.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dx.offset));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dy.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dy.offset));
    args.push_back( make_pair( sizeof(cl_int), (void *)&mag.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&mag.offset));
    args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
yao's avatar
yao committed
325

326

327 328 329
    size_t globalThreads[3] = {cols, rows, 1};
    string kernelName = "calcMap";
    size_t localThreads[3]  = {16, 16, 1};
330

peng xiao's avatar
peng xiao committed
331
    openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
yao's avatar
yao committed
332 333
}

334
void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols)
yao's avatar
yao committed
335
{
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
    Context *clCxt = map.clCxt;
    string kernelName = "edgesHysteresisLocal";
    vector< pair<size_t, const void *> > args;

    args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&counter));
    args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
    args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
    args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));

    size_t globalThreads[3] = {cols, rows, 1};
    size_t localThreads[3]  = {16, 16, 1};

peng xiao's avatar
peng xiao committed
351
    openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
yao's avatar
yao committed
352 353
}

354
void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
yao's avatar
yao committed
355
{
356
    unsigned int count;
peng xiao's avatar
peng xiao committed
357
    openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
358 359 360 361
    Context *clCxt = map.clCxt;
    string kernelName = "edgesHysteresisGlobal";
    vector< pair<size_t, const void *> > args;
    size_t localThreads[3]  = {128, 1, 1};
yao's avatar
yao committed
362

363
    int count_i[1] = {0};
364 365
    while(count > 0)
    {
peng xiao's avatar
peng xiao committed
366
        openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL));
367

368
        args.clear();
Ilya Lavrenov's avatar
Ilya Lavrenov committed
369
        size_t globalThreads[3] = {std::min(count, 65535u) * 128, divUp(count, 65535), 1};
370 371 372 373 374 375 376 377 378 379
        args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&st1.data));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&st2.data));
        args.push_back( make_pair( sizeof(cl_mem), (void *)&counter));
        args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
        args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
        args.push_back( make_pair( sizeof(cl_int), (void *)&count));
        args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
        args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));

peng xiao's avatar
peng xiao committed
380
        openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
peng xiao's avatar
peng xiao committed
381
        openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
382 383
        std::swap(st1, st2);
    }
yao's avatar
yao committed
384 385
}

386
void canny::getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols)
yao's avatar
yao committed
387
{
388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403
    Context *clCxt = map.clCxt;
    string kernelName = "getEdges";
    vector< pair<size_t, const void *> > args;

    args.push_back( make_pair( sizeof(cl_mem), (void *)&map.data));
    args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
    args.push_back( make_pair( sizeof(cl_int), (void *)&rows));
    args.push_back( make_pair( sizeof(cl_int), (void *)&cols));
    args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset));

    size_t globalThreads[3] = {cols, rows, 1};
    size_t localThreads[3]  = {16, 16, 1};

peng xiao's avatar
peng xiao committed
404
    openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
yao's avatar
yao committed
405
}