Commit 50b72197 authored by Alexey Spizhevoy's avatar Alexey Spizhevoy

fixed bug in gpu::matchTemplate (added normalization routine to make the GPU…

fixed bug in gpu::matchTemplate (added normalization routine to make the GPU version consistent with the CPU one), added test cases from the ticket #1341
parent 8bf5c0e7
...@@ -313,6 +313,29 @@ void matchTemplatePrepared_SQDIFF_8U( ...@@ -313,6 +313,29 @@ void matchTemplatePrepared_SQDIFF_8U(
} }
// normAcc* are accurate normalization routines which make GPU matchTemplate
// consistent with CPU one
__device__ float normAcc(float num, float denum)
{
if (fabs(num) < denum)
return num / denum;
if (fabs(num) < denum * 1.125f)
return num > 0 ? 1 : -1;
return 0;
}
__device__ float normAcc_SQDIFF(float num, float denum)
{
if (fabs(num) < denum)
return num / denum;
if (fabs(num) < denum * 1.125f)
return num > 0 ? 1 : -1;
return 1;
}
template <int cn> template <int cn>
__global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U( __global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(
int w, int h, const PtrStep_<unsigned long long> image_sqsum, int w, int h, const PtrStep_<unsigned long long> image_sqsum,
...@@ -327,8 +350,8 @@ __global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U( ...@@ -327,8 +350,8 @@ __global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(
(image_sqsum.ptr(y + h)[(x + w) * cn] - image_sqsum.ptr(y)[(x + w) * cn]) - (image_sqsum.ptr(y + h)[(x + w) * cn] - image_sqsum.ptr(y)[(x + w) * cn]) -
(image_sqsum.ptr(y + h)[x * cn] - image_sqsum.ptr(y)[x * cn])); (image_sqsum.ptr(y + h)[x * cn] - image_sqsum.ptr(y)[x * cn]));
float ccorr = result.ptr(y)[x]; float ccorr = result.ptr(y)[x];
result.ptr(y)[x] = min(1.f, (image_sqsum_ - 2.f * ccorr + templ_sqsum) * result.ptr(y)[x] = normAcc_SQDIFF(image_sqsum_ - 2.f * ccorr + templ_sqsum,
rsqrtf(image_sqsum_ * templ_sqsum)); sqrtf(image_sqsum_ * templ_sqsum));
} }
} }
...@@ -440,7 +463,7 @@ void matchTemplatePrepared_CCOFF_8UC2( ...@@ -440,7 +463,7 @@ void matchTemplatePrepared_CCOFF_8UC2(
__global__ void matchTemplatePreparedKernel_CCOFF_8UC3( __global__ void matchTemplatePreparedKernel_CCOFF_8UC3(
int w, int h, int w, int h,
float templ_sum_scale_r, float templ_sum_scale_r,
float templ_sum_scale_g, float templ_sum_scale_g,
float templ_sum_scale_b, float templ_sum_scale_b,
const PtrStep_<unsigned int> image_sum_r, const PtrStep_<unsigned int> image_sum_r,
...@@ -463,7 +486,7 @@ __global__ void matchTemplatePreparedKernel_CCOFF_8UC3( ...@@ -463,7 +486,7 @@ __global__ void matchTemplatePreparedKernel_CCOFF_8UC3(
(image_sum_b.ptr(y + h)[x + w] - image_sum_b.ptr(y)[x + w]) - (image_sum_b.ptr(y + h)[x + w] - image_sum_b.ptr(y)[x + w]) -
(image_sum_b.ptr(y + h)[x] - image_sum_b.ptr(y)[x])); (image_sum_b.ptr(y + h)[x] - image_sum_b.ptr(y)[x]));
float ccorr = result.ptr(y)[x]; float ccorr = result.ptr(y)[x];
result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r
- image_sum_g_ * templ_sum_scale_g - image_sum_g_ * templ_sum_scale_g
- image_sum_b_ * templ_sum_scale_b; - image_sum_b_ * templ_sum_scale_b;
} }
...@@ -484,8 +507,8 @@ void matchTemplatePrepared_CCOFF_8UC3( ...@@ -484,8 +507,8 @@ void matchTemplatePrepared_CCOFF_8UC3(
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
matchTemplatePreparedKernel_CCOFF_8UC3<<<grid, threads>>>( matchTemplatePreparedKernel_CCOFF_8UC3<<<grid, threads>>>(
w, h, w, h,
(float)templ_sum_r / (w * h), (float)templ_sum_r / (w * h),
(float)templ_sum_g / (w * h), (float)templ_sum_g / (w * h),
(float)templ_sum_b / (w * h), (float)templ_sum_b / (w * h),
image_sum_r, image_sum_g, image_sum_b, result); image_sum_r, image_sum_g, image_sum_b, result);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
...@@ -579,8 +602,8 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8U( ...@@ -579,8 +602,8 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8U(
float image_sqsum_ = (float)( float image_sqsum_ = (float)(
(image_sqsum.ptr(y + h)[x + w] - image_sqsum.ptr(y)[x + w]) - (image_sqsum.ptr(y + h)[x + w] - image_sqsum.ptr(y)[x + w]) -
(image_sqsum.ptr(y + h)[x] - image_sqsum.ptr(y)[x])); (image_sqsum.ptr(y + h)[x] - image_sqsum.ptr(y)[x]));
result.ptr(y)[x] = (ccorr - image_sum_ * templ_sum_scale) * result.ptr(y)[x] = normAcc(ccorr - image_sum_ * templ_sum_scale,
rsqrtf(templ_sqsum_scale * max(1e-3f, image_sqsum_ - weight * image_sum_ * image_sum_)); sqrtf(templ_sqsum_scale * (image_sqsum_ - weight * image_sum_ * image_sum_)));
} }
} }
...@@ -631,11 +654,12 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC2( ...@@ -631,11 +654,12 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC2(
float image_sqsum_g_ = (float)( float image_sqsum_g_ = (float)(
(image_sqsum_g.ptr(y + h)[x + w] - image_sqsum_g.ptr(y)[x + w]) - (image_sqsum_g.ptr(y + h)[x + w] - image_sqsum_g.ptr(y)[x + w]) -
(image_sqsum_g.ptr(y + h)[x] - image_sqsum_g.ptr(y)[x])); (image_sqsum_g.ptr(y + h)[x] - image_sqsum_g.ptr(y)[x]));
float ccorr = result.ptr(y)[x];
float rdenom = rsqrtf(templ_sqsum_scale * max(1e-3f, image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_ float num = result.ptr(y)[x] - image_sum_r_ * templ_sum_scale_r
+ image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_)); - image_sum_g_ * templ_sum_scale_g;
result.ptr(y)[x] = (ccorr - image_sum_r_ * templ_sum_scale_r float denum = sqrtf(templ_sqsum_scale * (image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_
- image_sum_g_ * templ_sum_scale_g) * rdenom; + image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_));
result.ptr(y)[x] = normAcc(num, denum);
} }
} }
...@@ -701,13 +725,14 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC3( ...@@ -701,13 +725,14 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC3(
float image_sqsum_b_ = (float)( float image_sqsum_b_ = (float)(
(image_sqsum_b.ptr(y + h)[x + w] - image_sqsum_b.ptr(y)[x + w]) - (image_sqsum_b.ptr(y + h)[x + w] - image_sqsum_b.ptr(y)[x + w]) -
(image_sqsum_b.ptr(y + h)[x] - image_sqsum_b.ptr(y)[x])); (image_sqsum_b.ptr(y + h)[x] - image_sqsum_b.ptr(y)[x]));
float ccorr = result.ptr(y)[x];
float rdenom = rsqrtf(templ_sqsum_scale * max(1e-3f, image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_ float num = result.ptr(y)[x] - image_sum_r_ * templ_sum_scale_r
+ image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_ - image_sum_g_ * templ_sum_scale_g
+ image_sqsum_b_ - weight * image_sum_b_ * image_sum_b_)); - image_sum_b_ * templ_sum_scale_b;
result.ptr(y)[x] = (ccorr - image_sum_r_ * templ_sum_scale_r float denum = sqrtf(templ_sqsum_scale * (image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_
- image_sum_g_ * templ_sum_scale_g + image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_
- image_sum_b_ * templ_sum_scale_b) * rdenom; + image_sqsum_b_ - weight * image_sum_b_ * image_sum_b_));
result.ptr(y)[x] = normAcc(num, denum);
} }
} }
...@@ -785,15 +810,14 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC4( ...@@ -785,15 +810,14 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC4(
float image_sqsum_a_ = (float)( float image_sqsum_a_ = (float)(
(image_sqsum_a.ptr(y + h)[x + w] - image_sqsum_a.ptr(y)[x + w]) - (image_sqsum_a.ptr(y + h)[x + w] - image_sqsum_a.ptr(y)[x + w]) -
(image_sqsum_a.ptr(y + h)[x] - image_sqsum_a.ptr(y)[x])); (image_sqsum_a.ptr(y + h)[x] - image_sqsum_a.ptr(y)[x]));
float ccorr = result.ptr(y)[x];
float rdenom = rsqrtf(templ_sqsum_scale * max(1e-3f, image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_ float num = result.ptr(y)[x] - image_sum_r_ * templ_sum_scale_r - image_sum_g_ * templ_sum_scale_g
+ image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_ - image_sum_b_ * templ_sum_scale_b - image_sum_a_ * templ_sum_scale_a;
+ image_sqsum_b_ - weight * image_sum_b_ * image_sum_b_ float denum = sqrtf(templ_sqsum_scale * (image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_
+ image_sqsum_a_ - weight * image_sum_a_ * image_sum_a_)); + image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_
result.ptr(y)[x] = (ccorr - image_sum_r_ * templ_sum_scale_r + image_sqsum_b_ - weight * image_sum_b_ * image_sum_b_
- image_sum_g_ * templ_sum_scale_g + image_sqsum_a_ - weight * image_sum_a_ * image_sum_a_));
- image_sum_b_ * templ_sum_scale_b result.ptr(y)[x] = normAcc(num, denum);
- image_sum_a_ * templ_sum_scale_a) * rdenom;
} }
} }
...@@ -850,7 +874,7 @@ __global__ void normalizeKernel_8U( ...@@ -850,7 +874,7 @@ __global__ void normalizeKernel_8U(
float image_sqsum_ = (float)( float image_sqsum_ = (float)(
(image_sqsum.ptr(y + h)[(x + w) * cn] - image_sqsum.ptr(y)[(x + w) * cn]) - (image_sqsum.ptr(y + h)[(x + w) * cn] - image_sqsum.ptr(y)[(x + w) * cn]) -
(image_sqsum.ptr(y + h)[x * cn] - image_sqsum.ptr(y)[x * cn])); (image_sqsum.ptr(y + h)[x * cn] - image_sqsum.ptr(y)[x * cn]));
result.ptr(y)[x] = result.ptr(y)[x] * rsqrtf(max(1.f, image_sqsum_) * templ_sqsum); result.ptr(y)[x] = normAcc(result.ptr(y)[x], sqrtf(image_sqsum_ * templ_sqsum));
} }
} }
......
...@@ -3594,13 +3594,8 @@ INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate32F, testing::Combine( ...@@ -3594,13 +3594,8 @@ INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate32F, testing::Combine(
testing::Range(1, 5), testing::Range(1, 5),
testing::Values((int)CV_TM_SQDIFF, (int)CV_TM_CCORR))); testing::Values((int)CV_TM_SQDIFF, (int)CV_TM_CCORR)));
struct MatchTemplate : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> > struct MatchTemplateBlackSource : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
{ {
cv::Mat image;
cv::Mat pattern;
cv::Point maxLocGold;
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
int method; int method;
...@@ -3608,26 +3603,25 @@ struct MatchTemplate : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceIn ...@@ -3608,26 +3603,25 @@ struct MatchTemplate : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceIn
{ {
devInfo = std::tr1::get<0>(GetParam()); devInfo = std::tr1::get<0>(GetParam());
method = std::tr1::get<1>(GetParam()); method = std::tr1::get<1>(GetParam());
cv::gpu::setDevice(devInfo.deviceID()); cv::gpu::setDevice(devInfo.deviceID());
image = readImage("matchtemplate/black.png");
ASSERT_FALSE(image.empty());
pattern = readImage("matchtemplate/cat.png");
ASSERT_FALSE(pattern.empty());
maxLocGold = cv::Point(284, 12);
} }
}; };
TEST_P(MatchTemplate, FindPatternInBlack) TEST_P(MatchTemplateBlackSource, Accuracy)
{ {
const char* matchTemplateMethodStr = matchTemplateMethods[method]; const char* matchTemplateMethodStr = matchTemplateMethods[method];
PRINT_PARAM(devInfo); PRINT_PARAM(devInfo);
PRINT_PARAM(matchTemplateMethodStr); PRINT_PARAM(matchTemplateMethodStr);
cv::Mat image = readImage("matchtemplate/black.png");
ASSERT_FALSE(image.empty());
cv::Mat pattern = readImage("matchtemplate/cat.png");
ASSERT_FALSE(pattern.empty());
cv::Point maxLocGold = cv::Point(284, 12);
cv::Mat dst; cv::Mat dst;
ASSERT_NO_THROW( ASSERT_NO_THROW(
...@@ -3643,10 +3637,61 @@ TEST_P(MatchTemplate, FindPatternInBlack) ...@@ -3643,10 +3637,61 @@ TEST_P(MatchTemplate, FindPatternInBlack)
ASSERT_EQ(maxLocGold, maxLoc); ASSERT_EQ(maxLocGold, maxLoc);
} }
INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate, testing::Combine( INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplateBlackSource, testing::Combine(
testing::ValuesIn(devices()), testing::ValuesIn(devices()),
testing::Values((int)CV_TM_CCOEFF_NORMED, (int)CV_TM_CCORR_NORMED))); testing::Values((int)CV_TM_CCOEFF_NORMED, (int)CV_TM_CCORR_NORMED)));
struct MatchTemplate_CCOEF_NORMED : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, std::tr1::tuple<const char*, const char*> > >
{
cv::gpu::DeviceInfo devInfo;
cv::Mat image, pattern;
virtual void SetUp()
{
devInfo = std::tr1::get<0>(GetParam());
image = readImage(std::tr1::get<0>(std::tr1::get<1>(GetParam())));
ASSERT_FALSE(image.empty());
pattern = readImage(std::tr1::get<1>(std::tr1::get<1>(GetParam())));
ASSERT_FALSE(pattern.empty());
}
};
TEST_P(MatchTemplate_CCOEF_NORMED, Accuracy)
{
PRINT_PARAM(devInfo);
cv::Mat dstGold;
cv::matchTemplate(image, pattern, dstGold, CV_TM_CCOEFF_NORMED);
cv::Point minLocGold, maxLocGold;
cv::minMaxLoc(dstGold, NULL, NULL, &minLocGold, &maxLocGold);
cv::Mat dst;
ASSERT_NO_THROW(
cv::gpu::GpuMat dev_dst;
cv::gpu::matchTemplate(cv::gpu::GpuMat(image), cv::gpu::GpuMat(pattern), dev_dst, CV_TM_CCOEFF_NORMED);
dev_dst.download(dst);
);
cv::Point minLoc, maxLoc;
double minVal, maxVal;
cv::minMaxLoc(dst, &minVal, &maxVal, &minLoc, &maxLoc);
ASSERT_EQ(minLocGold, minLoc);
ASSERT_EQ(maxLocGold, maxLoc);
ASSERT_LE(maxVal, 1.);
ASSERT_GE(minVal, -1.);
}
INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CCOEF_NORMED, testing::Combine(
testing::ValuesIn(devices()),
testing::Values(std::tr1::make_tuple("matchtemplate/source-0.png", "matchtemplate/target-0.png"),
std::tr1::make_tuple("matchtemplate/source-1.png", "matchtemplate/target-1.png"))));
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// MulSpectrums // MulSpectrums
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment