Commit 6c99b5c9 authored by Andrey Kamaev's avatar Andrey Kamaev Committed by OpenCV Buildbot

Merge pull request #705 from bitwangyaoyao:2.4_oclFix

parents 6a6ae355 d6f1ad8c
......@@ -43,7 +43,7 @@ if(OPENCL_FOUND)
set(OPENCL_LIBRARIES ${OPENCL_LIBRARY})
if (X86_64)
set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import)
set(CLAMD_POSSIBLE_LIB_SUFFIXES lib64/import)
elseif (X86)
set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import)
endif()
......
......@@ -18,6 +18,7 @@ foreach(cl ${cl_list})
string(REPLACE "\t" " " lines "${lines}")
string(REGEX REPLACE "/\\*([^*]/|\\*[^/]|[^*/])*\\*/" "" lines "${lines}") # multiline comments
string(REGEX REPLACE "/\\*([^\n])*\\*/" "" lines "${lines}") # single-line comments
string(REGEX REPLACE "[ ]*//[^\n]*\n" "\n" lines "${lines}") # single-line comments
string(REGEX REPLACE "\n[ ]*(\n[ ]*)*" "\n" lines "${lines}") # empty lines & leading whitespace
string(REGEX REPLACE "^\n" "" lines "${lines}") # leading new line
......
This diff is collapsed.
......@@ -953,8 +953,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
//int flag = 0;
oclMat gimg1(gimg.rows, gimg.cols, CV_8UC1);
oclMat gsum(totalheight, gimg.cols + 1, CV_32SC1);
oclMat gsqsum(totalheight, gimg.cols + 1, CV_32FC1);
oclMat gsum(totalheight + 4, gimg.cols + 1, CV_32SC1);
oclMat gsqsum(totalheight + 4, gimg.cols + 1, CV_32FC1);
//cl_mem cascadebuffer;
cl_mem stagebuffer;
......
This diff is collapsed.
......@@ -143,7 +143,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom )
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step ));
openCLExecuteKernel(dst_a.clCxt, &moments, "icvContourMoments", globalThreads, localThreads, args, -1, -1);
cv::Mat dst(dst_a);
a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0;
if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE))
......@@ -277,16 +277,7 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
blocky = size.height/TILE_SIZE;
else
blocky = size.height/TILE_SIZE + 1;
cv::ocl::oclMat dst_m00(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m10(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m01(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m20(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m11(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m02(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m30(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m21(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m12(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m03(blocky, blockx, CV_64FC1);
cv::ocl::oclMat dst_m(blocky * 10, blockx, CV_64FC1);
cl_mem sum = openCLCreateBuffer(src.clCxt,CL_MEM_READ_WRITE,10*sizeof(double));
int tile_width = std::min(size.width,TILE_SIZE);
int tile_height = std::min(size.height,TILE_SIZE);
......@@ -299,25 +290,17 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&tileSize.width ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&tileSize.height ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m00.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m10.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m01.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m20.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m11.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m02.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m30.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m21.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m12.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m03.data ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m00.cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m00.step ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m.data ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m.cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m.step ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&blocky ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&type ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&depth ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&cn ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&coi ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&binary ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&TILE_SIZE ));
openCLExecuteKernel(dst_m00.clCxt, &moments, "CvMoments", globalThreads, localThreads, args, -1, depth);
openCLExecuteKernel(dst_m.clCxt, &moments, "CvMoments", globalThreads, localThreads, args, -1, depth);
size_t localThreadss[3] = { 128, 1, 1};
size_t globalThreadss[3] = { 128, 1, 1};
......@@ -327,20 +310,12 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&tile_width ));
args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&TILE_SIZE ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&sum ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m00.data ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m10.data ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m01.data ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m20.data ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m11.data ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m02.data ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m30.data ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m21.data ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m12.data ));
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m03.data ));
openCLExecuteKernel(dst_m00.clCxt, &moments, "dst_sum", globalThreadss, localThreadss, args_sum, -1, -1);
args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m.data ));
args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m.step ));
openCLExecuteKernel(dst_m.clCxt, &moments, "dst_sum", globalThreadss, localThreadss, args_sum, -1, -1);
double* dstsum = new double[10];
memset(dstsum,0,10*sizeof(double));
openCLReadBuffer(dst_m00.clCxt,sum,(void *)dstsum,10*sizeof(double));
openCLReadBuffer(dst_m.clCxt,sum,(void *)dstsum,10*sizeof(double));
mom->m00 = dstsum[0];
mom->m10 = dstsum[1];
mom->m01 = dstsum[2];
......@@ -351,6 +326,7 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
mom->m21 = dstsum[7];
mom->m12 = dstsum[8];
mom->m03 = dstsum[9];
delete [] dstsum;
icvCompleteMomentState( mom );
}
......
This diff is collapsed.
......@@ -211,10 +211,14 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
int4 data = *(__global int4*)&sum[glb_off];
int lcl_off = mad24(lcl_y, readwidth, lcl_x<<2);
#if OFF
lcldata[lcl_off] = data.x;
lcldata[lcl_off+1] = data.y;
lcldata[lcl_off+2] = data.z;
lcldata[lcl_off+3] = data.w;
#else
vstore4(data, 0, &lcldata[lcl_off]);
#endif
}
lcloutindex[lcl_id] = 0;
......@@ -559,3 +563,7 @@ if(result)
}
}
*/
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -323,7 +323,7 @@ float sobel(__global unsigned char *input, int x, int y, int rows, int cols)
float conv = 0;
int y1 = y==0? 0 : y-1;
int x1 = x==0? 0 : x-1;
if(x < cols && y < rows)
if(x < cols && y < rows && x > 0 && y > 0)
{
conv = (float)input[(y1) * cols + (x1)] * (-1) + (float)input[(y1) * cols + (x+1)] * (1) +
(float)input[(y) * cols + (x1)] * (-2) + (float)input[(y) * cols + (x+1)] * (2) +
......
......@@ -110,7 +110,7 @@ namespace
}
};
TEST_P(BruteForceMatcher, DISABLED_Match_Single)
TEST_P(BruteForceMatcher, Match_Single)
{
cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
......@@ -130,7 +130,7 @@ namespace
ASSERT_EQ(0, badCount);
}
TEST_P(BruteForceMatcher, DISABLED_KnnMatch_2_Single)
TEST_P(BruteForceMatcher, KnnMatch_2_Single)
{
const int knn = 2;
......
......@@ -75,7 +75,7 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMetho
}
};
TEST_P(MatchTemplate8U, DISABLED_Accuracy)
TEST_P(MatchTemplate8U, Accuracy)
{
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
......@@ -138,18 +138,18 @@ TEST_P(MatchTemplate32F, Accuracy)
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MatchTemplate8U,
testing::Combine(
MTEMP_SIZES,
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
testing::Values(Channels(1), Channels(3), Channels(4)),
ALL_TEMPLATE_METHODS
)
);
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MatchTemplate32F, testing::Combine(
MTEMP_SIZES,
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
testing::Values(Channels(1), Channels(3), Channels(4)),
testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment