Commit f36db3a0 authored by yao's avatar yao

more fix of mismatch

parent ad6aae45
This diff is collapsed.
...@@ -45,22 +45,28 @@ ...@@ -45,22 +45,28 @@
#pragma OPENCL EXTENSION cl_amd_printf : enable #pragma OPENCL EXTENSION cl_amd_printf : enable
#if defined (__ATI__) #if defined (DOUBLE_SUPPORT)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (__NVIDIA__) #ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif #endif
#if !defined(USE_SQR_INTEGRAL) && (defined (__ATI__) || defined (__NVIDIA__))
#define TYPE_IMAGE_SQSUM double #define TYPE_IMAGE_SQSUM double
#else #else
#define TYPE_IMAGE_SQSUM ulong #define TYPE_IMAGE_SQSUM float
#endif
#ifndef CN4
#define CN4 1
#else
#define CN4 4
#endif #endif
////////////////////////////////////////////////// //////////////////////////////////////////////////
// utilities // utilities
#define SQSUMS_PTR(ox, oy) mad24(gidy + oy, img_sqsums_step, gidx + img_sqsums_offset + ox) #define SQSUMS_PTR(ox, oy) mad24(gidy + oy, img_sqsums_step, (gidx + img_sqsums_offset + ox) * CN4)
#define SUMS_PTR(ox, oy) mad24(gidy + oy, img_sums_step, gidx + img_sums_offset + ox) #define SUMS_PTR(ox, oy) mad24(gidy + oy, img_sums_step, gidx + img_sums_offset + ox)
// normAcc* are accurate normalization routines which make GPU matchTemplate // normAcc* are accurate normalization routines which make GPU matchTemplate
// consistent with CPU one // consistent with CPU one
...@@ -95,7 +101,7 @@ float normAcc_SQDIFF(float num, float denum) ...@@ -95,7 +101,7 @@ float normAcc_SQDIFF(float num, float denum)
__kernel __kernel
void normalizeKernel_C1_D0 void normalizeKernel_C1_D0
( (
__global const TYPE_IMAGE_SQSUM * img_sqsums, __global const float * img_sqsums,
__global float * res, __global float * res,
ulong tpl_sqsum, ulong tpl_sqsum,
int res_rows, int res_rows,
...@@ -161,7 +167,7 @@ void matchTemplate_Prepared_SQDIFF_C1_D0 ...@@ -161,7 +167,7 @@ void matchTemplate_Prepared_SQDIFF_C1_D0
__kernel __kernel
void matchTemplate_Prepared_SQDIFF_NORMED_C1_D0 void matchTemplate_Prepared_SQDIFF_NORMED_C1_D0
( (
__global const TYPE_IMAGE_SQSUM * img_sqsums, __global const float * img_sqsums,
__global float * res, __global float * res,
ulong tpl_sqsum, ulong tpl_sqsum,
int res_rows, int res_rows,
...@@ -702,7 +708,7 @@ void matchTemplate_Prepared_CCOFF_NORMED_C1_D0 ...@@ -702,7 +708,7 @@ void matchTemplate_Prepared_CCOFF_NORMED_C1_D0
__global const uint * img_sums, __global const uint * img_sums,
int img_sums_offset, int img_sums_offset,
int img_sums_step, int img_sums_step,
__global const TYPE_IMAGE_SQSUM * img_sqsums, __global const float * img_sqsums,
int img_sqsums_offset, int img_sqsums_offset,
int img_sqsums_step, int img_sqsums_step,
float tpl_sum, float tpl_sum,
...@@ -754,10 +760,10 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0 ...@@ -754,10 +760,10 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
__global const uint * img_sums_c3, __global const uint * img_sums_c3,
int img_sums_offset, int img_sums_offset,
int img_sums_step, int img_sums_step,
__global const TYPE_IMAGE_SQSUM * img_sqsums_c0, __global const float * img_sqsums_c0,
__global const TYPE_IMAGE_SQSUM * img_sqsums_c1, __global const float * img_sqsums_c1,
__global const TYPE_IMAGE_SQSUM * img_sqsums_c2, __global const float * img_sqsums_c2,
__global const TYPE_IMAGE_SQSUM * img_sqsums_c3, __global const float * img_sqsums_c3,
int img_sqsums_offset, int img_sqsums_offset,
int img_sqsums_step, int img_sqsums_step,
float tpl_sum_c0, float tpl_sum_c0,
...@@ -821,3 +827,32 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0 ...@@ -821,3 +827,32 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
res[res_idx] = normAcc(num, denum); res[res_idx] = normAcc(num, denum);
} }
} }
//////////////////////////////////////////////////////////////////////
// extractFirstChannel
__kernel
void extractFirstChannel
(
const __global float4* img,
__global float* res,
int rows,
int cols,
int img_offset,
int res_offset,
int img_step,
int res_step
)
{
img_step /= sizeof(float4);
res_step /= sizeof(float);
img_offset /= sizeof(float4);
res_offset /= sizeof(float);
img += img_offset;
res += res_offset;
int gidx = get_global_id(0);
int gidy = get_global_id(1);
if(gidx < cols && gidy < rows)
{
res[gidx + gidy * res_step] = img[gidx + gidy * img_step].x;
}
}
...@@ -75,7 +75,7 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMetho ...@@ -75,7 +75,7 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMetho
} }
}; };
TEST_P(MatchTemplate8U, DISABLED_Accuracy) TEST_P(MatchTemplate8U, Accuracy)
{ {
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
...@@ -138,18 +138,18 @@ TEST_P(MatchTemplate32F, Accuracy) ...@@ -138,18 +138,18 @@ TEST_P(MatchTemplate32F, Accuracy)
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss); EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
} }
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U, INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MatchTemplate8U,
testing::Combine( testing::Combine(
MTEMP_SIZES, MTEMP_SIZES,
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
testing::Values(Channels(1), Channels(3), Channels(4)), testing::Values(Channels(1), Channels(3), Channels(4)),
ALL_TEMPLATE_METHODS ALL_TEMPLATE_METHODS
) )
); );
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine( INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MatchTemplate32F, testing::Combine(
MTEMP_SIZES, MTEMP_SIZES,
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
testing::Values(Channels(1), Channels(3), Channels(4)), testing::Values(Channels(1), Channels(3), Channels(4)),
testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR)))); testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment