Commit f03c7521 authored by Andrey Kamaev's avatar Andrey Kamaev Committed by OpenCV Buildbot

Merge pull request #783 from pengx17:master_matchTemplate_dft

parents 2cd67cc9 143f8f69
......@@ -109,17 +109,52 @@ Returns void
The function calculates the Laplacian of the source image by adding up the second x and y derivatives calculated using the Sobel operator.
ocl::ConvolveBuf
----------------
.. ocv:struct:: ocl::ConvolveBuf
Class providing a memory buffer for :ocv:func:`ocl::convolve` function, plus it allows to adjust some specific parameters. ::
struct CV_EXPORTS ConvolveBuf
{
Size result_size;
Size block_size;
Size user_block_size;
Size dft_size;
int spect_len;
oclMat image_spect, templ_spect, result_spect;
oclMat image_block, templ_block, result_data;
void create(Size image_size, Size templ_size);
static Size estimateBlockSize(Size result_size, Size templ_size);
};
You can use field `user_block_size` to set specific block size for :ocv:func:`ocl::convolve` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed.
ocl::ConvolveBuf::create
------------------------
.. ocv:function:: ocl::ConvolveBuf::create(Size image_size, Size templ_size)
Constructs a buffer for :ocv:func:`ocl::convolve` function with respective arguments.
ocl::convolve
------------------
Returns void
.. ocv:function:: void ocl::convolve(const oclMat &image, const oclMat &temp1, oclMat &result)
.. ocv:function:: void ocl::convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr=false)
.. ocv:function:: void ocl::convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr, ConvolveBuf& buf)
:param image: The source image
:param image: The source image. Only ``CV_32FC1`` images are supported for now.
:param temp1: Convolution kernel, a single-channel floating point matrix.
:param temp1: Convolution kernel, a single-channel floating point matrix. The size is not greater than the ``image`` size. The type is the same as ``image``.
:param result: The destination image
:param ccorr: Flags to evaluate cross-correlation instead of convolution.
:param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`ocl::ConvolveBuf`.
Convolves an image with the kernel. Supports only CV_32FC1 data types and do not support ROI.
......
......@@ -540,9 +540,29 @@ namespace cv
CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2);
CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2);
//! computes convolution of two images
struct CV_EXPORTS ConvolveBuf
{
Size result_size;
Size block_size;
Size user_block_size;
Size dft_size;
oclMat image_spect, templ_spect, result_spect;
oclMat image_block, templ_block, result_data;
void create(Size image_size, Size templ_size);
static Size estimateBlockSize(Size result_size, Size templ_size);
};
//! computes convolution of two images, may use discrete Fourier transform
//! support only CV_32FC1 type
CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result);
CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr = false);
CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr, ConvolveBuf& buf);
//! Performs a per-element multiplication of two Fourier spectrums.
//! Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now.
//! support only CV_32FC2 type
CV_EXPORTS void mulSpectrums(const oclMat &a, const oclMat &b, oclMat &c, int flags, float scale, bool conjB = false);
CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0);
......
......@@ -25,6 +25,7 @@
// Xu Pang, pangxu010@163.com
// Wu Zailong, bullet@yeah.net
// Wenju He, wenju@multicorewareinc.com
// Peng Xiao, pengxiao@outlook.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
......@@ -79,6 +80,7 @@ namespace cv
extern const char *imgproc_calcHarris;
extern const char *imgproc_calcMinEigenVal;
extern const char *imgproc_convolve;
extern const char *imgproc_mulAndScaleSpectrums;
////////////////////////////////////OpenCL call wrappers////////////////////////////
template <typename T> struct index_and_sizeof;
......@@ -1585,11 +1587,151 @@ namespace cv
}
}
//////////////////////////////////mulSpectrums////////////////////////////////////////////////////
void cv::ocl::mulSpectrums(const oclMat &a, const oclMat &b, oclMat &c, int /*flags*/, float scale, bool conjB)
{
CV_Assert(a.type() == CV_32FC2);
CV_Assert(b.type() == CV_32FC2);
c.create(a.size(), CV_32FC2);
size_t lt[3] = { 16, 16, 1 };
size_t gt[3] = { a.cols, a.rows, 1 };
String kernelName = conjB ? "mulAndScaleSpectrumsKernel_CONJ":"mulAndScaleSpectrumsKernel";
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&a.data ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&b.data ));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&c.data ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&a.cols ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&a.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&a.step ));
Context *clCxt = Context::getContext();
openCLExecuteKernel(clCxt, &imgproc_mulAndScaleSpectrums, kernelName, gt, lt, args, -1, -1);
}
//////////////////////////////////convolve////////////////////////////////////////////////////
inline int divUp(int total, int grain)
{
return (total + grain - 1) / grain;
}
// ported from CUDA module
void cv::ocl::ConvolveBuf::create(Size image_size, Size templ_size)
{
result_size = Size(image_size.width - templ_size.width + 1,
image_size.height - templ_size.height + 1);
block_size = user_block_size;
if (user_block_size.width == 0 || user_block_size.height == 0)
block_size = estimateBlockSize(result_size, templ_size);
dft_size.width = 1 << int(ceil(std::log(block_size.width + templ_size.width - 1.) / std::log(2.)));
dft_size.height = 1 << int(ceil(std::log(block_size.height + templ_size.height - 1.) / std::log(2.)));
// CUFFT has hard-coded kernels for power-of-2 sizes (up to 8192),
// see CUDA Toolkit 4.1 CUFFT Library Programming Guide
//if (dft_size.width > 8192)
dft_size.width = getOptimalDFTSize(block_size.width + templ_size.width - 1.);
//if (dft_size.height > 8192)
dft_size.height = getOptimalDFTSize(block_size.height + templ_size.height - 1.);
// To avoid wasting time doing small DFTs
dft_size.width = std::max(dft_size.width, 512);
dft_size.height = std::max(dft_size.height, 512);
image_block.create(dft_size, CV_32F);
templ_block.create(dft_size, CV_32F);
result_data.create(dft_size, CV_32F);
//spect_len = dft_size.height * (dft_size.width / 2 + 1);
image_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
templ_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
result_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
// Use maximum result matrix block size for the estimated DFT block size
block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
}
Size cv::ocl::ConvolveBuf::estimateBlockSize(Size result_size, Size /*templ_size*/)
{
int width = (result_size.width + 2) / 3;
int height = (result_size.height + 2) / 3;
width = std::min(width, result_size.width);
height = std::min(height, result_size.height);
return Size(width, height);
}
static void convolve_run_fft(const oclMat &image, const oclMat &templ, oclMat &result, bool ccorr, ConvolveBuf& buf)
{
#if defined HAVE_CLAMDFFT
CV_Assert(image.type() == CV_32F);
CV_Assert(templ.type() == CV_32F);
buf.create(image.size(), templ.size());
result.create(buf.result_size, CV_32F);
Size& block_size = buf.block_size;
Size& dft_size = buf.dft_size;
oclMat& image_block = buf.image_block;
oclMat& templ_block = buf.templ_block;
oclMat& result_data = buf.result_data;
oclMat& image_spect = buf.image_spect;
oclMat& templ_spect = buf.templ_spect;
oclMat& result_spect = buf.result_spect;
oclMat templ_roi = templ;
copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
templ_block.cols - templ_roi.cols, 0, Scalar());
cv::ocl::dft(templ_block, templ_spect, dft_size);
// Process all blocks of the result matrix
for (int y = 0; y < result.rows; y += block_size.height)
{
for (int x = 0; x < result.cols; x += block_size.width)
{
Size image_roi_size(std::min(x + dft_size.width, image.cols) - x,
std::min(y + dft_size.height, image.rows) - y);
Rect roi0(x, y, image_roi_size.width, image_roi_size.height);
oclMat image_roi(image, roi0);
copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,
0, image_block.cols - image_roi.cols, 0, Scalar());
cv::ocl::dft(image_block, image_spect, dft_size);
mulSpectrums(image_spect, templ_spect, result_spect, 0,
1.f / dft_size.area(), ccorr);
cv::ocl::dft(result_spect, result_data, dft_size, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT);
Size result_roi_size(std::min(x + block_size.width, result.cols) - x,
std::min(y + block_size.height, result.rows) - y);
Rect roi1(x, y, result_roi_size.width, result_roi_size.height);
Rect roi2(0, 0, result_roi_size.width, result_roi_size.height);
oclMat result_roi(result, roi1);
oclMat result_block(result_data, roi2);
result_block.copyTo(result_roi);
}
}
#else
CV_Error(CV_StsNotImplemented, "OpenCL DFT is not implemented");
#define UNUSED(x) (void)(x);
UNUSED(image) UNUSED(templ) UNUSED(result) UNUSED(ccorr) UNUSED(buf)
#undef UNUSED
#endif
}
static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, String kernelName, const char **kernelString)
{
CV_Assert(src.depth() == CV_32FC1);
......@@ -1630,13 +1772,25 @@ static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, St
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
}
void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y)
void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y, bool ccorr)
{
CV_Assert(x.depth() == CV_32F);
CV_Assert(t.depth() == CV_32F);
CV_Assert(x.type() == y.type() && x.size() == y.size());
y.create(x.size(), x.type());
String kernelName = "convolve";
convolve_run(x, t, y, kernelName, &imgproc_convolve);
if(t.cols > 17 || t.rows > 17)
{
ConvolveBuf buf;
convolve_run_fft(x, t, y, ccorr, buf);
}
else
{
CV_Assert(ccorr == false);
convolve_run(x, t, y, kernelName, &imgproc_convolve);
}
}
void cv::ocl::convolve(const oclMat &image, const oclMat &templ, oclMat &result, bool ccorr, ConvolveBuf& buf)
{
result.create(image.size(), image.type());
convolve_run_fft(image, templ, result, ccorr, buf);
}
......@@ -98,11 +98,25 @@ namespace cv
// Evaluates optimal template's area threshold. If
// template's area is less than the threshold, we use naive match
// template version, otherwise FFT-based (if available)
static bool useNaive(int , int , Size )
static bool useNaive(int method, int depth, Size size)
{
// FIXME!
// always use naive until convolve is imported
#ifdef HAVE_CLAMDFFT
if (method == CV_TM_SQDIFF && (depth == CV_32F || !Context::getContext()->supportsFeature(Context::CL_DOUBLE)))
{
return true;
}
else if(method == CV_TM_CCORR || (method == CV_TM_SQDIFF && depth == CV_8U))
{
return size.height < 18 && size.width < 18;
}
else
return false;
#else
#define UNUSED(x) (void)(x);
UNUSED(method) UNUSED(depth) UNUSED(size)
#undef UNUSED
return true;
#endif
}
//////////////////////////////////////////////////////////////////////
......@@ -223,9 +237,18 @@ namespace cv
//////////////////////////////////////////////////////////////////////
// CCORR
void convolve_32F(
const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &)
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
{
CV_Error(-1, "convolve is not fully implemented yet");
ConvolveBuf convolve_buf;
convolve_buf.user_block_size = buf.user_block_size;
if (image.oclchannels() == 1)
convolve(image, templ, result, true, convolve_buf);
else
{
oclMat result_;
convolve(image.reshape(1), templ.reshape(1), result_, true, convolve_buf);
extractFirstChannel_32F(result_, result);
}
}
void matchTemplate_CCORR(
......
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the uintel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business uinterruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
typedef float2 cfloat;
inline cfloat cmulf(cfloat a, cfloat b)
{
return (cfloat)( a.x*b.x - a.y*b.y, a.x*b.y + a.y*b.x);
}
inline cfloat conjf(cfloat a)
{
return (cfloat)( a.x, - a.y );
}
__kernel void
mulAndScaleSpectrumsKernel(
__global const cfloat* a,
__global const cfloat* b,
float scale,
__global cfloat* dst,
uint cols,
uint rows,
uint mstep
)
{
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint idx = mad24(y, mstep / sizeof(cfloat), x);
if (x < cols && y < rows)
{
cfloat v = cmulf(a[idx], b[idx]);
dst[idx] = (cfloat)( v.x * scale, v.y * scale );
}
}
__kernel void
mulAndScaleSpectrumsKernel_CONJ(
__global const cfloat* a,
__global const cfloat* b,
float scale,
__global cfloat* dst,
uint cols,
uint rows,
uint mstep
)
{
const uint x = get_global_id(0);
const uint y = get_global_id(1);
const uint idx = mad24(y, mstep / sizeof(cfloat), x);
if (x < cols && y < rows)
{
cfloat v = cmulf(a[idx], conjf(b[idx]));
dst[idx] = (cfloat)( v.x * scale, v.y * scale );
}
}
......@@ -103,4 +103,138 @@ INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Dft, testing::Combine(
testing::Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), cv::Size(512, 1), cv::Size(1024, 768)),
testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE) ));
////////////////////////////////////////////////////////////////////////////
// MulSpectrums
PARAM_TEST_CASE(MulSpectrums, cv::Size, DftFlags, bool)
{
cv::Size size;
int flag;
bool ccorr;
cv::Mat a, b;
virtual void SetUp()
{
size = GET_PARAM(0);
flag = GET_PARAM(1);
ccorr = GET_PARAM(2);
a = randomMat(size, CV_32FC2);
b = randomMat(size, CV_32FC2);
}
};
TEST_P(MulSpectrums, Simple)
{
cv::ocl::oclMat c;
cv::ocl::mulSpectrums(cv::ocl::oclMat(a), cv::ocl::oclMat(b), c, flag, 1.0, ccorr);
cv::Mat c_gold;
cv::mulSpectrums(a, b, c_gold, flag, ccorr);
EXPECT_MAT_NEAR(c_gold, c, 1e-2, "");
}
TEST_P(MulSpectrums, Scaled)
{
float scale = 1.f / size.area();
cv::ocl::oclMat c;
cv::ocl::mulSpectrums(cv::ocl::oclMat(a), cv::ocl::oclMat(b), c, flag, scale, ccorr);
cv::Mat c_gold;
cv::mulSpectrums(a, b, c_gold, flag, ccorr);
c_gold.convertTo(c_gold, c_gold.type(), scale);
EXPECT_MAT_NEAR(c_gold, c, 1e-2, "");
}
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(
DIFFERENT_SIZES,
testing::Values(DftFlags(0)),
testing::Values(false, true)));
////////////////////////////////////////////////////////
// Convolve
void static convolveDFT(const cv::Mat& A, const cv::Mat& B, cv::Mat& C, bool ccorr = false)
{
// reallocate the output array if needed
C.create(std::abs(A.rows - B.rows) + 1, std::abs(A.cols - B.cols) + 1, A.type());
cv::Size dftSize;
// compute the size of DFT transform
dftSize.width = cv::getOptimalDFTSize(A.cols + B.cols - 1);
dftSize.height = cv::getOptimalDFTSize(A.rows + B.rows - 1);
// allocate temporary buffers and initialize them with 0s
cv::Mat tempA(dftSize, A.type(), cv::Scalar::all(0));
cv::Mat tempB(dftSize, B.type(), cv::Scalar::all(0));
// copy A and B to the top-left corners of tempA and tempB, respectively
cv::Mat roiA(tempA, cv::Rect(0, 0, A.cols, A.rows));
A.copyTo(roiA);
cv::Mat roiB(tempB, cv::Rect(0, 0, B.cols, B.rows));
B.copyTo(roiB);
// now transform the padded A & B in-place;
// use "nonzeroRows" hint for faster processing
cv::dft(tempA, tempA, 0, A.rows);
cv::dft(tempB, tempB, 0, B.rows);
// multiply the spectrums;
// the function handles packed spectrum representations well
cv::mulSpectrums(tempA, tempB, tempA, 0, ccorr);
// transform the product back from the frequency domain.
// Even though all the result rows will be non-zero,
// you need only the first C.rows of them, and thus you
// pass nonzeroRows == C.rows
cv::dft(tempA, tempA, cv::DFT_INVERSE + cv::DFT_SCALE, C.rows);
// now copy the result back to C.
tempA(cv::Rect(0, 0, C.cols, C.rows)).copyTo(C);
}
IMPLEMENT_PARAM_CLASS(KSize, int);
IMPLEMENT_PARAM_CLASS(Ccorr, bool);
PARAM_TEST_CASE(Convolve_DFT, cv::Size, KSize, Ccorr)
{
cv::Size size;
int ksize;
bool ccorr;
cv::Mat src;
cv::Mat kernel;
cv::Mat dst_gold;
virtual void SetUp()
{
size = GET_PARAM(0);
ksize = GET_PARAM(1);
ccorr = GET_PARAM(2);
}
};
TEST_P(Convolve_DFT, Accuracy)
{
cv::Mat src = randomMat(size, CV_32FC1, 0.0, 100.0);
cv::Mat kernel = randomMat(cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0);
cv::ocl::oclMat dst;
cv::ocl::convolve(cv::ocl::oclMat(src), cv::ocl::oclMat(kernel), dst, ccorr);
cv::Mat dst_gold;
convolveDFT(src, kernel, dst_gold, ccorr);
EXPECT_MAT_NEAR(dst, dst_gold, 1e-1, "");
}
#define DIFFERENT_CONVOLVE_SIZES testing::Values(cv::Size(251, 257), cv::Size(113, 113), cv::Size(200, 480), cv::Size(1300, 1300))
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Convolve_DFT, testing::Combine(
DIFFERENT_CONVOLVE_SIZES,
testing::Values(KSize(19), KSize(23), KSize(45)),
testing::Values(Ccorr(true)/*, Ccorr(false)*/))); // false ccorr cannot pass for some instances
#endif // HAVE_CLAMDFFT
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment