/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // // Authors: // * Peter Andreas Entschev, peter@entschev.com // //M*/ #include "precomp.hpp" #include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; cv::ocl::FAST_OCL::FAST_OCL(int _threshold, bool _nonmaxSupression, double _keypointsRatio) : nonmaxSupression(_nonmaxSupression), threshold(_threshold), keypointsRatio(_keypointsRatio), count_(0) { } void cv::ocl::FAST_OCL::operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints) { if (image.empty()) return; (*this)(image, mask, d_keypoints_); downloadKeypoints(d_keypoints_, keypoints); } void cv::ocl::FAST_OCL::downloadKeypoints(const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints) { if (d_keypoints.empty()) return; Mat h_keypoints(d_keypoints); convertKeypoints(h_keypoints, keypoints); } void cv::ocl::FAST_OCL::convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints) { if (h_keypoints.empty()) return; CV_Assert(h_keypoints.rows == ROWS_COUNT && h_keypoints.elemSize() == 4); int npoints = h_keypoints.cols; keypoints.resize(npoints); const float* loc_x = h_keypoints.ptr<float>(X_ROW); const float* loc_y = h_keypoints.ptr<float>(Y_ROW); const float* response_row = h_keypoints.ptr<float>(RESPONSE_ROW); for (int i = 0; i < npoints; ++i) { KeyPoint kp(loc_x[i], loc_y[i], static_cast<float>(FEATURE_SIZE), -1, response_row[i]); keypoints[i] = kp; } } void cv::ocl::FAST_OCL::operator ()(const oclMat& img, const oclMat& mask, oclMat& keypoints) { calcKeyPointsLocation(img, mask); keypoints.cols = getKeyPoints(keypoints); } int cv::ocl::FAST_OCL::calcKeyPointsLocation(const oclMat& img, const oclMat& mask) { CV_Assert(img.type() == CV_8UC1); CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size())); int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area()); ensureSizeIsEnough(ROWS_COUNT, maxKeypoints, CV_32SC1, kpLoc_); kpLoc_.setTo(Scalar::all(0)); if (nonmaxSupression) { ensureSizeIsEnough(img.size(), CV_32SC1, score_); score_.setTo(Scalar::all(0)); } count_ = calcKeypointsOCL(img, mask, maxKeypoints); count_ = std::min(count_, maxKeypoints); return count_; } int cv::ocl::FAST_OCL::calcKeypointsOCL(const oclMat& img, const oclMat& mask, int maxKeypoints) { size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3] = {divUp(img.cols - 6, localThreads[0]) * localThreads[0], divUp(img.rows - 6, localThreads[1]) * localThreads[1], 1}; Context *clCxt = Context::getContext(); String kernelName = (mask.empty()) ? "calcKeypoints" : "calcKeypointsWithMask"; std::vector< std::pair<size_t, const void *> > args; int counter = 0; int err = CL_SUCCESS; cl_mem counterCL = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_COPY_HOST_PTR, sizeof(int), &counter, &err); int kpLocStep = kpLoc_.step / kpLoc_.elemSize(); int scoreStep = score_.step / score_.elemSize(); int nms = (nonmaxSupression) ? 1 : 0; args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img.data)); if (!mask.empty()) args.push_back( std::make_pair( sizeof(cl_mem), (void *)&mask.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&kpLoc_.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&score_.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counterCL)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&nms)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&maxKeypoints)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&threshold)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.step)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.rows)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.cols)); if (!mask.empty()) args.push_back( std::make_pair( sizeof(cl_int), (void *)&mask.step)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&kpLocStep)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&scoreStep)); openCLExecuteKernel(clCxt, &featdetect_fast, kernelName, globalThreads, localThreads, args, -1, -1); openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), counterCL, CL_TRUE, 0, sizeof(int), &counter, 0, NULL, NULL)); openCLSafeCall(clReleaseMemObject(counterCL)); return counter; } int cv::ocl::FAST_OCL::nonmaxSupressionOCL(oclMat& keypoints) { size_t localThreads[3] = {256, 1, 1}; size_t globalThreads[3] = {count_, 1, 1}; Context *clCxt = Context::getContext(); String kernelName = "nonmaxSupression"; std::vector< std::pair<size_t, const void *> > args; int counter = 0; int err = CL_SUCCESS; cl_mem counterCL = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_COPY_HOST_PTR, sizeof(int), &counter, &err); int kpLocStep = kpLoc_.step / kpLoc_.elemSize(); int sStep = score_.step / score_.elemSize(); int kStep = keypoints.step / keypoints.elemSize(); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&kpLoc_.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&score_.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data)); args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counterCL)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&count_)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&kpLocStep)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&sStep)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&kStep)); openCLExecuteKernel(clCxt, &featdetect_fast, kernelName, globalThreads, localThreads, args, -1, -1); openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), counterCL, CL_TRUE, 0, sizeof(int), &counter, 0, NULL, NULL)); openCLSafeCall(clReleaseMemObject(counterCL)); return counter; } int cv::ocl::FAST_OCL::getKeyPoints(oclMat& keypoints) { if (count_ == 0) return 0; if (nonmaxSupression) { ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints); return nonmaxSupressionOCL(keypoints); } kpLoc_.convertTo(keypoints, CV_32FC1); Mat k = keypoints; return count_; } void cv::ocl::FAST_OCL::release() { kpLoc_.release(); score_.release(); d_keypoints_.release(); }