Commit 6c99b5c9 authored by Andrey Kamaev's avatar Andrey Kamaev Committed by OpenCV Buildbot

Merge pull request #705 from bitwangyaoyao:2.4_oclFix

parents 6a6ae355 d6f1ad8c
...@@ -43,7 +43,7 @@ if(OPENCL_FOUND) ...@@ -43,7 +43,7 @@ if(OPENCL_FOUND)
set(OPENCL_LIBRARIES ${OPENCL_LIBRARY}) set(OPENCL_LIBRARIES ${OPENCL_LIBRARY})
if (X86_64) if (X86_64)
set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import) set(CLAMD_POSSIBLE_LIB_SUFFIXES lib64/import)
elseif (X86) elseif (X86)
set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import) set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import)
endif() endif()
......
...@@ -18,6 +18,7 @@ foreach(cl ${cl_list}) ...@@ -18,6 +18,7 @@ foreach(cl ${cl_list})
string(REPLACE "\t" " " lines "${lines}") string(REPLACE "\t" " " lines "${lines}")
string(REGEX REPLACE "/\\*([^*]/|\\*[^/]|[^*/])*\\*/" "" lines "${lines}") # multiline comments string(REGEX REPLACE "/\\*([^*]/|\\*[^/]|[^*/])*\\*/" "" lines "${lines}") # multiline comments
string(REGEX REPLACE "/\\*([^\n])*\\*/" "" lines "${lines}") # single-line comments
string(REGEX REPLACE "[ ]*//[^\n]*\n" "\n" lines "${lines}") # single-line comments string(REGEX REPLACE "[ ]*//[^\n]*\n" "\n" lines "${lines}") # single-line comments
string(REGEX REPLACE "\n[ ]*(\n[ ]*)*" "\n" lines "${lines}") # empty lines & leading whitespace string(REGEX REPLACE "\n[ ]*(\n[ ]*)*" "\n" lines "${lines}") # empty lines & leading whitespace
string(REGEX REPLACE "^\n" "" lines "${lines}") # leading new line string(REGEX REPLACE "^\n" "" lines "${lines}") # leading new line
......
This diff is collapsed.
...@@ -953,8 +953,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS ...@@ -953,8 +953,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
//int flag = 0; //int flag = 0;
oclMat gimg1(gimg.rows, gimg.cols, CV_8UC1); oclMat gimg1(gimg.rows, gimg.cols, CV_8UC1);
oclMat gsum(totalheight, gimg.cols + 1, CV_32SC1); oclMat gsum(totalheight + 4, gimg.cols + 1, CV_32SC1);
oclMat gsqsum(totalheight, gimg.cols + 1, CV_32FC1); oclMat gsqsum(totalheight + 4, gimg.cols + 1, CV_32FC1);
//cl_mem cascadebuffer; //cl_mem cascadebuffer;
cl_mem stagebuffer; cl_mem stagebuffer;
......
This diff is collapsed.
...@@ -277,16 +277,7 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary ) ...@@ -277,16 +277,7 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
blocky = size.height/TILE_SIZE; blocky = size.height/TILE_SIZE;
else else
blocky = size.height/TILE_SIZE + 1; blocky = size.height/TILE_SIZE + 1;
cv::ocl::oclMat dst_m00(blocky, blockx, CV_64FC1); cv::ocl::oclMat dst_m(blocky * 10, blockx, CV_64FC1);
cv::ocl::oclMat dst_m10(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m01(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m20(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m11(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m02(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m30(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m21(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m12(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m03(blocky, blockx, CV_64FC1);
cl_mem sum = openCLCreateBuffer(src.clCxt,CL_MEM_READ_WRITE,10*sizeof(double)); cl_mem sum = openCLCreateBuffer(src.clCxt,CL_MEM_READ_WRITE,10*sizeof(double));
int tile_width = std::min(size.width,TILE_SIZE); int tile_width = std::min(size.width,TILE_SIZE);
int tile_height = std::min(size.height,TILE_SIZE); int tile_height = std::min(size.height,TILE_SIZE);
...@@ -299,25 +290,17 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary ) ...@@ -299,25 +290,17 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&tileSize.width )); args.push_back( make_pair( sizeof(cl_int) , (void *)&tileSize.width ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&tileSize.height )); args.push_back( make_pair( sizeof(cl_int) , (void *)&tileSize.height ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m00.data )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m10.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m.cols ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m01.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m.step ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m20.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&blocky ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m11.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m02.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m30.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m21.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m12.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m03.data ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m00.cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m00.step ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&type )); args.push_back( make_pair( sizeof(cl_int) , (void *)&type ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&depth )); args.push_back( make_pair( sizeof(cl_int) , (void *)&depth ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&cn )); args.push_back( make_pair( sizeof(cl_int) , (void *)&cn ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&coi )); args.push_back( make_pair( sizeof(cl_int) , (void *)&coi ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&binary )); args.push_back( make_pair( sizeof(cl_int) , (void *)&binary ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&TILE_SIZE )); args.push_back( make_pair( sizeof(cl_int) , (void *)&TILE_SIZE ));
openCLExecuteKernel(dst_m00.clCxt, &moments, "CvMoments", globalThreads, localThreads, args, -1, depth); openCLExecuteKernel(dst_m.clCxt, &moments, "CvMoments", globalThreads, localThreads, args, -1, depth);
size_t localThreadss[3] = { 128, 1, 1}; size_t localThreadss[3] = { 128, 1, 1};
size_t globalThreadss[3] = { 128, 1, 1}; size_t globalThreadss[3] = { 128, 1, 1};
...@@ -327,20 +310,12 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary ) ...@@ -327,20 +310,12 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&tile_width )); args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&tile_width ));
args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&TILE_SIZE )); args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&TILE_SIZE ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&sum )); args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&sum ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m00.data )); args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m.data ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m10.data )); args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m.step ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m01.data )); openCLExecuteKernel(dst_m.clCxt, &moments, "dst_sum", globalThreadss, localThreadss, args_sum, -1, -1);
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m20.data ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m11.data ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m02.data ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m30.data ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m21.data ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m12.data ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m03.data ));
openCLExecuteKernel(dst_m00.clCxt, &moments, "dst_sum", globalThreadss, localThreadss, args_sum, -1, -1);
double* dstsum = new double[10]; double* dstsum = new double[10];
memset(dstsum,0,10*sizeof(double)); memset(dstsum,0,10*sizeof(double));
openCLReadBuffer(dst_m00.clCxt,sum,(void *)dstsum,10*sizeof(double)); openCLReadBuffer(dst_m.clCxt,sum,(void *)dstsum,10*sizeof(double));
mom->m00 = dstsum[0]; mom->m00 = dstsum[0];
mom->m10 = dstsum[1]; mom->m10 = dstsum[1];
mom->m01 = dstsum[2]; mom->m01 = dstsum[2];
...@@ -351,6 +326,7 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary ) ...@@ -351,6 +326,7 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
mom->m21 = dstsum[7]; mom->m21 = dstsum[7];
mom->m12 = dstsum[8]; mom->m12 = dstsum[8];
mom->m03 = dstsum[9]; mom->m03 = dstsum[9];
delete [] dstsum;
icvCompleteMomentState( mom ); icvCompleteMomentState( mom );
} }
......
This diff is collapsed.
...@@ -211,10 +211,14 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa ...@@ -211,10 +211,14 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
int4 data = *(__global int4*)&sum[glb_off]; int4 data = *(__global int4*)&sum[glb_off];
int lcl_off = mad24(lcl_y, readwidth, lcl_x<<2); int lcl_off = mad24(lcl_y, readwidth, lcl_x<<2);
#if OFF
lcldata[lcl_off] = data.x; lcldata[lcl_off] = data.x;
lcldata[lcl_off+1] = data.y; lcldata[lcl_off+1] = data.y;
lcldata[lcl_off+2] = data.z; lcldata[lcl_off+2] = data.z;
lcldata[lcl_off+3] = data.w; lcldata[lcl_off+3] = data.w;
#else
vstore4(data, 0, &lcldata[lcl_off]);
#endif
} }
lcloutindex[lcl_id] = 0; lcloutindex[lcl_id] = 0;
...@@ -559,3 +563,7 @@ if(result) ...@@ -559,3 +563,7 @@ if(result)
} }
} }
*/ */
...@@ -45,22 +45,28 @@ ...@@ -45,22 +45,28 @@
#pragma OPENCL EXTENSION cl_amd_printf : enable #pragma OPENCL EXTENSION cl_amd_printf : enable
#if defined (__ATI__) #if defined (DOUBLE_SUPPORT)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (__NVIDIA__) #ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif #endif
#if !defined(USE_SQR_INTEGRAL) && (defined (__ATI__) || defined (__NVIDIA__))
#define TYPE_IMAGE_SQSUM double #define TYPE_IMAGE_SQSUM double
#else #else
#define TYPE_IMAGE_SQSUM ulong #define TYPE_IMAGE_SQSUM float
#endif
#ifndef CN4
#define CN4 1
#else
#define CN4 4
#endif #endif
////////////////////////////////////////////////// //////////////////////////////////////////////////
// utilities // utilities
#define SQSUMS_PTR(ox, oy) mad24(gidy + oy, img_sqsums_step, gidx + img_sqsums_offset + ox) #define SQSUMS_PTR(ox, oy) mad24(gidy + oy, img_sqsums_step, (gidx + img_sqsums_offset + ox) * CN4)
#define SUMS_PTR(ox, oy) mad24(gidy + oy, img_sums_step, gidx + img_sums_offset + ox) #define SUMS_PTR(ox, oy) mad24(gidy + oy, img_sums_step, gidx + img_sums_offset + ox)
// normAcc* are accurate normalization routines which make GPU matchTemplate // normAcc* are accurate normalization routines which make GPU matchTemplate
// consistent with CPU one // consistent with CPU one
...@@ -95,7 +101,7 @@ float normAcc_SQDIFF(float num, float denum) ...@@ -95,7 +101,7 @@ float normAcc_SQDIFF(float num, float denum)
__kernel __kernel
void normalizeKernel_C1_D0 void normalizeKernel_C1_D0
( (
__global const TYPE_IMAGE_SQSUM * img_sqsums, __global const float * img_sqsums,
__global float * res, __global float * res,
ulong tpl_sqsum, ulong tpl_sqsum,
int res_rows, int res_rows,
...@@ -161,7 +167,7 @@ void matchTemplate_Prepared_SQDIFF_C1_D0 ...@@ -161,7 +167,7 @@ void matchTemplate_Prepared_SQDIFF_C1_D0
__kernel __kernel
void matchTemplate_Prepared_SQDIFF_NORMED_C1_D0 void matchTemplate_Prepared_SQDIFF_NORMED_C1_D0
( (
__global const TYPE_IMAGE_SQSUM * img_sqsums, __global const float * img_sqsums,
__global float * res, __global float * res,
ulong tpl_sqsum, ulong tpl_sqsum,
int res_rows, int res_rows,
...@@ -702,7 +708,7 @@ void matchTemplate_Prepared_CCOFF_NORMED_C1_D0 ...@@ -702,7 +708,7 @@ void matchTemplate_Prepared_CCOFF_NORMED_C1_D0
__global const uint * img_sums, __global const uint * img_sums,
int img_sums_offset, int img_sums_offset,
int img_sums_step, int img_sums_step,
__global const TYPE_IMAGE_SQSUM * img_sqsums, __global const float * img_sqsums,
int img_sqsums_offset, int img_sqsums_offset,
int img_sqsums_step, int img_sqsums_step,
float tpl_sum, float tpl_sum,
...@@ -754,10 +760,10 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0 ...@@ -754,10 +760,10 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
__global const uint * img_sums_c3, __global const uint * img_sums_c3,
int img_sums_offset, int img_sums_offset,
int img_sums_step, int img_sums_step,
__global const TYPE_IMAGE_SQSUM * img_sqsums_c0, __global const float * img_sqsums_c0,
__global const TYPE_IMAGE_SQSUM * img_sqsums_c1, __global const float * img_sqsums_c1,
__global const TYPE_IMAGE_SQSUM * img_sqsums_c2, __global const float * img_sqsums_c2,
__global const TYPE_IMAGE_SQSUM * img_sqsums_c3, __global const float * img_sqsums_c3,
int img_sqsums_offset, int img_sqsums_offset,
int img_sqsums_step, int img_sqsums_step,
float tpl_sum_c0, float tpl_sum_c0,
...@@ -821,3 +827,32 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0 ...@@ -821,3 +827,32 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
res[res_idx] = normAcc(num, denum); res[res_idx] = normAcc(num, denum);
} }
} }
//////////////////////////////////////////////////////////////////////
// extractFirstChannel
__kernel
void extractFirstChannel
(
const __global float4* img,
__global float* res,
int rows,
int cols,
int img_offset,
int res_offset,
int img_step,
int res_step
)
{
img_step /= sizeof(float4);
res_step /= sizeof(float);
img_offset /= sizeof(float4);
res_offset /= sizeof(float);
img += img_offset;
res += res_offset;
int gidx = get_global_id(0);
int gidy = get_global_id(1);
if(gidx < cols && gidy < rows)
{
res[gidx + gidy * res_step] = img[gidx + gidy * img_step].x;
}
}
This diff is collapsed.
This diff is collapsed.
...@@ -323,7 +323,7 @@ float sobel(__global unsigned char *input, int x, int y, int rows, int cols) ...@@ -323,7 +323,7 @@ float sobel(__global unsigned char *input, int x, int y, int rows, int cols)
float conv = 0; float conv = 0;
int y1 = y==0? 0 : y-1; int y1 = y==0? 0 : y-1;
int x1 = x==0? 0 : x-1; int x1 = x==0? 0 : x-1;
if(x < cols && y < rows) if(x < cols && y < rows && x > 0 && y > 0)
{ {
conv = (float)input[(y1) * cols + (x1)] * (-1) + (float)input[(y1) * cols + (x+1)] * (1) + conv = (float)input[(y1) * cols + (x1)] * (-1) + (float)input[(y1) * cols + (x+1)] * (1) +
(float)input[(y) * cols + (x1)] * (-2) + (float)input[(y) * cols + (x+1)] * (2) + (float)input[(y) * cols + (x1)] * (-2) + (float)input[(y) * cols + (x+1)] * (2) +
......
...@@ -110,7 +110,7 @@ namespace ...@@ -110,7 +110,7 @@ namespace
} }
}; };
TEST_P(BruteForceMatcher, DISABLED_Match_Single) TEST_P(BruteForceMatcher, Match_Single)
{ {
cv::ocl::BruteForceMatcher_OCL_base matcher(distType); cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
...@@ -130,7 +130,7 @@ namespace ...@@ -130,7 +130,7 @@ namespace
ASSERT_EQ(0, badCount); ASSERT_EQ(0, badCount);
} }
TEST_P(BruteForceMatcher, DISABLED_KnnMatch_2_Single) TEST_P(BruteForceMatcher, KnnMatch_2_Single)
{ {
const int knn = 2; const int knn = 2;
......
...@@ -75,7 +75,7 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMetho ...@@ -75,7 +75,7 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMetho
} }
}; };
TEST_P(MatchTemplate8U, DISABLED_Accuracy) TEST_P(MatchTemplate8U, Accuracy)
{ {
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
...@@ -138,18 +138,18 @@ TEST_P(MatchTemplate32F, Accuracy) ...@@ -138,18 +138,18 @@ TEST_P(MatchTemplate32F, Accuracy)
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss); EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
} }
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U, INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MatchTemplate8U,
testing::Combine( testing::Combine(
MTEMP_SIZES, MTEMP_SIZES,
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
testing::Values(Channels(1), Channels(3), Channels(4)), testing::Values(Channels(1), Channels(3), Channels(4)),
ALL_TEMPLATE_METHODS ALL_TEMPLATE_METHODS
) )
); );
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine( INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MatchTemplate32F, testing::Combine(
MTEMP_SIZES, MTEMP_SIZES,
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
testing::Values(Channels(1), Channels(3), Channels(4)), testing::Values(Channels(1), Channels(3), Channels(4)),
testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR)))); testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment