/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors // Peng Xiao, pengxiao@outlook.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "precomp.hpp" #include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; // compact structure for corners struct DefCorner { float eig; //eigenvalue of corner short x; //x coordinate of corner point short y; //y coordinate of corner point }; // compare procedure for corner //it is used for sort on the host side struct DefCornerCompare : public std::binary_function { bool operator()(const DefCorner a, const DefCorner b) const { return a.eig > b.eig; } }; // find corners on matrix and put it into array static void findCorners_caller( const oclMat& eig_mat, //input matrix worth eigenvalues oclMat& eigMinMax, //input with min and max values of eigenvalues const float qualityLevel, const oclMat& mask, oclMat& corners, //output array with detected corners oclMat& counter) //output value with number of detected corners, have to be 0 before call { String opt; std::vector k; Context * cxt = Context::getContext(); std::vector< std::pair > args; const int mask_strip = mask.step / mask.elemSize1(); args.push_back(std::make_pair( sizeof(cl_mem), (void*)&(eig_mat.data))); int src_pitch = (int)eig_mat.step; args.push_back(std::make_pair( sizeof(cl_int), (void*)&src_pitch )); args.push_back(std::make_pair( sizeof(cl_mem), (void*)&mask.data )); args.push_back(std::make_pair( sizeof(cl_mem), (void*)&corners.data )); args.push_back(std::make_pair( sizeof(cl_int), (void*)&mask_strip)); args.push_back(std::make_pair( sizeof(cl_mem), (void*)&eigMinMax.data )); args.push_back(std::make_pair( sizeof(cl_float), (void*)&qualityLevel )); args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig_mat.rows )); args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig_mat.cols )); args.push_back(std::make_pair( sizeof(cl_int), (void*)&corners.cols )); args.push_back(std::make_pair( sizeof(cl_mem), (void*)&counter.data )); size_t globalThreads[3] = {eig_mat.cols, eig_mat.rows, 1}; size_t localThreads[3] = {16, 16, 1}; if(!mask.empty()) opt += " -D WITH_MASK=1"; openCLExecuteKernel(cxt, &imgproc_gftt, "findCorners", globalThreads, localThreads, args, -1, -1, opt.c_str()); } static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero) { size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits; CV_Assert(groupnum != 0); int dbsize = groupnum * 2 * src.elemSize(); ensureSizeIsEnough(1, dbsize, CV_8UC1, dst); cl_mem dst_data = reinterpret_cast(dst.data); int all_cols = src.step / src.elemSize(); int pre_cols = (src.offset % src.step) / src.elemSize(); int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1; int invalid_cols = pre_cols + sec_cols; int cols = all_cols - invalid_cols , elemnum = cols * src.rows; int offset = src.offset / src.elemSize(); { // first parallel pass std::vector > args; args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&invalid_cols )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&elemnum)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum)); size_t globalThreads[3] = {groupnum * 256, 1, 1}; size_t localThreads[3] = {256, 1, 1}; openCLExecuteKernel(src.clCxt, &arithm_minMax, "arithm_op_minMax", globalThreads, localThreads, args, -1, -1, "-D T=float -D DEPTH_5"); } { // run final "serial" kernel to find accumulate results from threads and reset corner counter std::vector > args; args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum )); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&tozero.data )); size_t globalThreads[3] = {1, 1, 1}; size_t localThreads[3] = {1, 1, 1}; openCLExecuteKernel(src.clCxt, &imgproc_gftt, "arithm_op_minMax_final", globalThreads, localThreads, args, -1, -1); } } void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask) { CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0); CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size())); ensureSizeIsEnough(image.size(), CV_32F, eig_); if (useHarrisDetector) cornerHarris_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK); else cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3); ensureSizeIsEnough(1,1, CV_32SC1, counter_); // find max eigenvalue and reset detected counters minMaxEig_caller(eig_, eig_minmax_, counter_); // allocate buffer for kernels int corner_array_size = std::max(1024, static_cast(image.size().area() * 0.05)); ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_); int total = tmpCorners_.cols; // by default the number of corner is full array std::vector tmp(tmpCorners_.cols); // input buffer with corner for HOST part of algorithm // find points with high eigenvalue and put it into the output array findCorners_caller(eig_, eig_minmax_, static_cast(qualityLevel), mask, tmpCorners_, counter_); // send non-blocking request to read real non-zero number of corners to sort it on the HOST side openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(counter_.clCxt), (cl_mem)counter_.data, CL_FALSE, 0, sizeof(int), &total, 0, NULL, NULL)); if (total == 0) { // check for trivial case corners.release(); return; } // blocking read whole corners array (sorted or not sorted) openCLReadBuffer(tmpCorners_.clCxt, (cl_mem)tmpCorners_.data, &tmp[0], tmpCorners_.cols * sizeof(DefCorner)); // sort detected corners on cpu side. tmp.resize(total); std::sort(tmp.begin(), tmp.end(), DefCornerCompare()); // estimate maximal size of final output array int total_max = maxCorners > 0 ? std::min(maxCorners, total) : total; int D2 = (int)ceil(minDistance * minDistance); // allocate output buffer std::vector tmp2; tmp2.reserve(total_max); if (minDistance < 1) { // we have not distance restriction. then just copy with conversion maximal allowed points into output array for (int i = 0; i < total_max; ++i) tmp2.push_back(Point2f(tmp[i].x, tmp[i].y)); } else { // we have distance restriction. then start coping to output array from the first element and check distance for each next one const int cell_size = cvRound(minDistance); const int grid_width = (image.cols + cell_size - 1) / cell_size; const int grid_height = (image.rows + cell_size - 1) / cell_size; std::vector< std::vector > grid(grid_width * grid_height); for (int i = 0; i < total ; ++i) { DefCorner p = tmp[i]; bool good = true; int x_cell = static_cast(p.x / cell_size); int y_cell = static_cast(p.y / cell_size); int x1 = x_cell - 1; int y1 = y_cell - 1; int x2 = x_cell + 1; int y2 = y_cell + 1; // boundary check x1 = std::max(0, x1); y1 = std::max(0, y1); x2 = std::min(grid_width - 1, x2); y2 = std::min(grid_height - 1, y2); for (int yy = y1; yy <= y2; yy++) { for (int xx = x1; xx <= x2; xx++) { std::vector& m = grid[yy * grid_width + xx]; if (m.empty()) continue; for(size_t j = 0; j < m.size(); j++) { int dx = p.x - m[j].x; int dy = p.y - m[j].y; if (dx * dx + dy * dy < D2) { good = false; goto break_out_; } } } } break_out_: if(good) { grid[y_cell * grid_width + x_cell].push_back(Point2i(p.x, p.y)); tmp2.push_back(Point2f(p.x, p.y)); if (maxCorners > 0 && tmp2.size() == static_cast(maxCorners)) break; } } } int final_size = static_cast(tmp2.size()); if (final_size > 0) corners.upload(Mat(1, final_size, CV_32FC2, &tmp2[0])); else corners.release(); } void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector &points_v) { CV_DbgAssert(points.type() == CV_32FC2); points_v.resize(points.cols); openCLSafeCall(clEnqueueReadBuffer( *(cl_command_queue*)getClCommandQueuePtr(), reinterpret_cast(points.data), CL_TRUE, 0, points.cols * sizeof(Point2f), &points_v[0], 0, NULL, NULL)); }