Commit 916703c6 authored by Alexey Spizhevoy's avatar Alexey Spizhevoy

Updated optimal block size estimation in gpu::convolve()

parent e3265b0c
...@@ -762,6 +762,7 @@ namespace cv ...@@ -762,6 +762,7 @@ namespace cv
ConvolveBuf(Size image_size, Size templ_size) ConvolveBuf(Size image_size, Size templ_size)
{ create(image_size, templ_size); } { create(image_size, templ_size); }
void create(Size image_size, Size templ_size); void create(Size image_size, Size templ_size);
void create(Size image_size, Size templ_size, Size block_size);
private: private:
static Size estimateBlockSize(Size result_size, Size templ_size); static Size estimateBlockSize(Size result_size, Size templ_size);
......
...@@ -1520,15 +1520,23 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags) ...@@ -1520,15 +1520,23 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags)
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// convolve // convolve
void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size) void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)
{ {
result_size = Size(image_size.width - templ_size.width + 1, result_size = Size(image_size.width - templ_size.width + 1,
image_size.height - templ_size.height + 1); image_size.height - templ_size.height + 1);
block_size = estimateBlockSize(result_size, templ_size); create(image_size, templ_size, estimateBlockSize(result_size, templ_size));
}
void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size, Size block_size)
{
result_size = Size(image_size.width - templ_size.width + 1,
image_size.height - templ_size.height + 1);
this->block_size = block_size;
dft_size.width = getOptimalDFTSize(block_size.width + templ_size.width - 1); dft_size.width = 1 << int(ceil(std::log(block_size.width + templ_size.width - 1.) / std::log(2.)));
dft_size.height = getOptimalDFTSize(block_size.width + templ_size.height - 1); dft_size.height = 1 << int(ceil(std::log(block_size.height + templ_size.height - 1.) / std::log(2.)));
createContinuous(dft_size, CV_32F, image_block); createContinuous(dft_size, CV_32F, image_block);
createContinuous(dft_size, CV_32F, templ_block); createContinuous(dft_size, CV_32F, templ_block);
createContinuous(dft_size, CV_32F, result_data); createContinuous(dft_size, CV_32F, result_data);
...@@ -1538,34 +1546,18 @@ void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size) ...@@ -1538,34 +1546,18 @@ void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)
createContinuous(1, spect_len, CV_32FC2, templ_spect); createContinuous(1, spect_len, CV_32FC2, templ_spect);
createContinuous(1, spect_len, CV_32FC2, result_spect); createContinuous(1, spect_len, CV_32FC2, result_spect);
block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width); this->block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height); this->block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
} }
Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size templ_size) Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size templ_size)
{ {
int scale = 40; int width = (result_size.width + 2) / 3;
Size bsize_min(512, 512); int height = (result_size.height + 2) / 3;
width = std::min(width, result_size.width);
// Check whether we use Fermi generation or newer GPU height = std::min(height, result_size.height);
if (DeviceInfo().majorVersion() >= 2) return Size(width, height);
{
bsize_min.width = 1024;
bsize_min.height = 1024;
}
Size bsize(std::max(templ_size.width * scale, bsize_min.width),
std::max(templ_size.height * scale, bsize_min.height));
int blocks_per_row = (result_size.width + bsize.width - 1) / bsize.width;
int blocks_per_col = (result_size.height + bsize.height - 1) / bsize.height;
bsize.width = (result_size.width + blocks_per_row - 1) / blocks_per_row;
bsize.height = (result_size.height + blocks_per_col - 1) / blocks_per_col;
bsize.width = std::min(bsize.width, result_size.width);
bsize.height = std::min(bsize.height, result_size.height);
return bsize;
} }
......
...@@ -3704,8 +3704,9 @@ TEST_P(MatchTemplate_CCOEF_NORMED, Accuracy) ...@@ -3704,8 +3704,9 @@ TEST_P(MatchTemplate_CCOEF_NORMED, Accuracy)
cv::Mat dstGold; cv::Mat dstGold;
cv::matchTemplate(image, pattern, dstGold, CV_TM_CCOEFF_NORMED); cv::matchTemplate(image, pattern, dstGold, CV_TM_CCOEFF_NORMED);
double minValGold, maxValGold;
cv::Point minLocGold, maxLocGold; cv::Point minLocGold, maxLocGold;
cv::minMaxLoc(dstGold, NULL, NULL, &minLocGold, &maxLocGold); cv::minMaxLoc(dstGold, &minValGold, &maxValGold, &minLocGold, &maxLocGold);
cv::Mat dst; cv::Mat dst;
ASSERT_NO_THROW( ASSERT_NO_THROW(
...@@ -3727,8 +3728,57 @@ TEST_P(MatchTemplate_CCOEF_NORMED, Accuracy) ...@@ -3727,8 +3728,57 @@ TEST_P(MatchTemplate_CCOEF_NORMED, Accuracy)
INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CCOEF_NORMED, testing::Combine( INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CCOEF_NORMED, testing::Combine(
testing::ValuesIn(devices()), testing::ValuesIn(devices()),
testing::Values(std::make_pair(std::string("matchtemplate/source-0.png"), std::string("matchtemplate/target-0.png")), testing::Values(std::make_pair(std::string("matchtemplate/source-0.png"), std::string("matchtemplate/target-0.png")))));
std::make_pair(std::string("matchtemplate/source-1.png"), std::string("matchtemplate/target-1.png")))));
struct MatchTemplate_CCOEF_NORMED_NoThrow : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, std::pair<std::string, std::string> > >
{
cv::gpu::DeviceInfo devInfo;
std::string imageName;
std::string patternName;
cv::Mat image, pattern;
virtual void SetUp()
{
devInfo = std::tr1::get<0>(GetParam());
imageName = std::tr1::get<1>(GetParam()).first;
patternName = std::tr1::get<1>(GetParam()).second;
image = readImage(imageName);
ASSERT_FALSE(image.empty());
pattern = readImage(patternName);
ASSERT_FALSE(pattern.empty());
}
};
TEST_P(MatchTemplate_CCOEF_NORMED_NoThrow, NoThrow)
{
PRINT_PARAM(devInfo);
PRINT_PARAM(imageName);
PRINT_PARAM(patternName);
cv::Mat dstGold;
cv::matchTemplate(image, pattern, dstGold, CV_TM_CCOEFF_NORMED);
double minValGold, maxValGold;
cv::Point minLocGold, maxLocGold;
cv::minMaxLoc(dstGold, &minValGold, &maxValGold, &minLocGold, &maxLocGold);
cv::Mat dst;
ASSERT_NO_THROW(
cv::gpu::GpuMat dev_dst;
cv::gpu::matchTemplate(cv::gpu::GpuMat(image), cv::gpu::GpuMat(pattern), dev_dst, CV_TM_CCOEFF_NORMED);
dev_dst.download(dst);
);
}
INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CCOEF_NORMED_NoThrow, testing::Combine(
testing::ValuesIn(devices()),
testing::Values(std::make_pair(std::string("matchtemplate/source-1.png"), std::string("matchtemplate/target-1.png")))));
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment