Commit f36db3a0 authored by yao's avatar yao

more fix of mismatch

parent ad6aae45
This diff is collapsed.
......@@ -45,22 +45,28 @@
#pragma OPENCL EXTENSION cl_amd_printf : enable
#if defined (__ATI__)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#if defined (DOUBLE_SUPPORT)
#elif defined (__NVIDIA__)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#if !defined(USE_SQR_INTEGRAL) && (defined (__ATI__) || defined (__NVIDIA__))
#define TYPE_IMAGE_SQSUM double
#else
#define TYPE_IMAGE_SQSUM ulong
#define TYPE_IMAGE_SQSUM float
#endif
#ifndef CN4
#define CN4 1
#else
#define CN4 4
#endif
//////////////////////////////////////////////////
// utilities
#define SQSUMS_PTR(ox, oy) mad24(gidy + oy, img_sqsums_step, gidx + img_sqsums_offset + ox)
#define SQSUMS_PTR(ox, oy) mad24(gidy + oy, img_sqsums_step, (gidx + img_sqsums_offset + ox) * CN4)
#define SUMS_PTR(ox, oy) mad24(gidy + oy, img_sums_step, gidx + img_sums_offset + ox)
// normAcc* are accurate normalization routines which make GPU matchTemplate
// consistent with CPU one
......@@ -95,7 +101,7 @@ float normAcc_SQDIFF(float num, float denum)
__kernel
void normalizeKernel_C1_D0
(
__global const TYPE_IMAGE_SQSUM * img_sqsums,
__global const float * img_sqsums,
__global float * res,
ulong tpl_sqsum,
int res_rows,
......@@ -161,7 +167,7 @@ void matchTemplate_Prepared_SQDIFF_C1_D0
__kernel
void matchTemplate_Prepared_SQDIFF_NORMED_C1_D0
(
__global const TYPE_IMAGE_SQSUM * img_sqsums,
__global const float * img_sqsums,
__global float * res,
ulong tpl_sqsum,
int res_rows,
......@@ -702,7 +708,7 @@ void matchTemplate_Prepared_CCOFF_NORMED_C1_D0
__global const uint * img_sums,
int img_sums_offset,
int img_sums_step,
__global const TYPE_IMAGE_SQSUM * img_sqsums,
__global const float * img_sqsums,
int img_sqsums_offset,
int img_sqsums_step,
float tpl_sum,
......@@ -754,10 +760,10 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
__global const uint * img_sums_c3,
int img_sums_offset,
int img_sums_step,
__global const TYPE_IMAGE_SQSUM * img_sqsums_c0,
__global const TYPE_IMAGE_SQSUM * img_sqsums_c1,
__global const TYPE_IMAGE_SQSUM * img_sqsums_c2,
__global const TYPE_IMAGE_SQSUM * img_sqsums_c3,
__global const float * img_sqsums_c0,
__global const float * img_sqsums_c1,
__global const float * img_sqsums_c2,
__global const float * img_sqsums_c3,
int img_sqsums_offset,
int img_sqsums_step,
float tpl_sum_c0,
......@@ -821,3 +827,32 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
res[res_idx] = normAcc(num, denum);
}
}
//////////////////////////////////////////////////////////////////////
// extractFirstChannel
__kernel
void extractFirstChannel
(
const __global float4* img,
__global float* res,
int rows,
int cols,
int img_offset,
int res_offset,
int img_step,
int res_step
)
{
img_step /= sizeof(float4);
res_step /= sizeof(float);
img_offset /= sizeof(float4);
res_offset /= sizeof(float);
img += img_offset;
res += res_offset;
int gidx = get_global_id(0);
int gidy = get_global_id(1);
if(gidx < cols && gidy < rows)
{
res[gidx + gidy * res_step] = img[gidx + gidy * img_step].x;
}
}
......@@ -75,7 +75,7 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMetho
}
};
TEST_P(MatchTemplate8U, DISABLED_Accuracy)
TEST_P(MatchTemplate8U, Accuracy)
{
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
......@@ -138,18 +138,18 @@ TEST_P(MatchTemplate32F, Accuracy)
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MatchTemplate8U,
testing::Combine(
MTEMP_SIZES,
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
testing::Values(Channels(1), Channels(3), Channels(4)),
ALL_TEMPLATE_METHODS
)
);
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MatchTemplate32F, testing::Combine(
MTEMP_SIZES,
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
testing::Values(Channels(1), Channels(3), Channels(4)),
testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment