Commit 5f3eb0fc authored by Andrey Kamaev's avatar Andrey Kamaev Committed by OpenCV Buildbot

Merge pull request #628 from bitwangyaoyao:2.4_fixMoments

parents 1d321974 7f0d6d42
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
typedef double T;
#else #else
typedef float double; typedef float double;
typedef float4 double4; typedef float4 double4;
typedef long T;
#define convert_double4 convert_float4 #define convert_double4 convert_float4
#endif #endif
//#pragma OPENCL EXTENSION cl_amd_printf:enable //#pragma OPENCL EXTENSION cl_amd_printf:enable
//#if defined (DOUBLE_SUPPORT) //#if defined (DOUBLE_SUPPORT)
__kernel void icvContourMoments(int contour_total, #define DST_ROW_A00 0
__global float* reader_oclmat_data, #define DST_ROW_A10 1
__global double* dst_a00, #define DST_ROW_A01 2
__global double* dst_a10, #define DST_ROW_A20 3
__global double* dst_a01, #define DST_ROW_A11 4
__global double* dst_a20, #define DST_ROW_A02 5
__global double* dst_a11, #define DST_ROW_A30 6
__global double* dst_a02, #define DST_ROW_A21 7
__global double* dst_a30, #define DST_ROW_A12 8
__global double* dst_a21, #define DST_ROW_A03 9
__global double* dst_a12,
__global double* dst_a03) __kernel void icvContourMoments(int contour_total,
__global float* reader_oclmat_data,
__global T* dst_a,
int dst_step)
{ {
double xi_1, yi_1, xi_12, yi_12, xi, yi, xi2, yi2, dxy, xii_1, yii_1; T xi_1, yi_1, xi_12, yi_12, xi, yi, xi2, yi2, dxy, xii_1, yii_1;
int idx = get_global_id(0); int idx = get_global_id(0);
xi_1 = *(reader_oclmat_data + (get_global_id(0) << 1)); if (idx < 0 || idx >= contour_total)
yi_1 = *(reader_oclmat_data + (get_global_id(0) << 1) + 1); return;
xi_1 = (T)(*(reader_oclmat_data + (get_global_id(0) << 1)));
yi_1 = (T)(*(reader_oclmat_data + (get_global_id(0) << 1) + 1));
xi_12 = xi_1 * xi_1; xi_12 = xi_1 * xi_1;
yi_12 = yi_1 * yi_1; yi_12 = yi_1 * yi_1;
if(idx == contour_total - 1) if(idx == contour_total - 1)
{ {
xi = *(reader_oclmat_data); xi = (T)(*(reader_oclmat_data));
yi = *(reader_oclmat_data + 1); yi = (T)(*(reader_oclmat_data + 1));
} }
else else
{ {
xi = *(reader_oclmat_data + (idx + 1) * 2); xi = (T)(*(reader_oclmat_data + (idx + 1) * 2));
yi = *(reader_oclmat_data + (idx + 1) * 2 + 1); yi = (T)(*(reader_oclmat_data + (idx + 1) * 2 + 1));
} }
xi2 = xi * xi; xi2 = xi * xi;
...@@ -44,19 +58,20 @@ __kernel void icvContourMoments(int contour_total, ...@@ -44,19 +58,20 @@ __kernel void icvContourMoments(int contour_total,
dxy = xi_1 * yi - xi * yi_1; dxy = xi_1 * yi - xi * yi_1;
xii_1 = xi_1 + xi; xii_1 = xi_1 + xi;
yii_1 = yi_1 + yi; yii_1 = yi_1 + yi;
dst_a00[idx] = dxy; dst_step /= sizeof(T);
dst_a10[idx] = dxy * xii_1; *( dst_a + DST_ROW_A00 * dst_step + idx) = dxy;
dst_a01[idx] = dxy * yii_1; *( dst_a + DST_ROW_A10 * dst_step + idx) = dxy * xii_1;
dst_a20[idx] = dxy * (xi_1 * xii_1 + xi2); *( dst_a + DST_ROW_A01 * dst_step + idx) = dxy * yii_1;
dst_a11[idx] = dxy * (xi_1 * (yii_1 + yi_1) + xi * (yii_1 + yi)); *( dst_a + DST_ROW_A20 * dst_step + idx) = dxy * (xi_1 * xii_1 + xi2);
dst_a02[idx] = dxy * (yi_1 * yii_1 + yi2); *( dst_a + DST_ROW_A11 * dst_step + idx) = dxy * (xi_1 * (yii_1 + yi_1) + xi * (yii_1 + yi));
dst_a30[idx] = dxy * xii_1 * (xi_12 + xi2); *( dst_a + DST_ROW_A02 * dst_step + idx) = dxy * (yi_1 * yii_1 + yi2);
dst_a03[idx] = dxy * yii_1 * (yi_12 + yi2); *( dst_a + DST_ROW_A30 * dst_step + idx) = dxy * xii_1 * (xi_12 + xi2);
dst_a21[idx] = *( dst_a + DST_ROW_A03 * dst_step + idx) = dxy * yii_1 * (yi_12 + yi2);
*( dst_a + DST_ROW_A21 * dst_step + idx) =
dxy * (xi_12 * (3 * yi_1 + yi) + 2 * xi * xi_1 * yii_1 + dxy * (xi_12 * (3 * yi_1 + yi) + 2 * xi * xi_1 * yii_1 +
xi2 * (yi_1 + 3 * yi)); xi2 * (yi_1 + 3 * yi));
dst_a12[idx] = *( dst_a + DST_ROW_A12 * dst_step + idx) =
dxy * (yi_12 * (3 * xi_1 + xi) + 2 * yi * yi_1 * xii_1 + dxy * (yi_12 * (3 * xi_1 + xi) + 2 * yi * yi_1 * xii_1 +
yi2 * (xi_1 + 3 * xi)); yi2 * (xi_1 + 3 * xi));
} }
......
...@@ -98,25 +98,19 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom ) ...@@ -98,25 +98,19 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom )
CvSeqReader reader; CvSeqReader reader;
int lpt = contour->total; int lpt = contour->total;
double a00, a10, a01, a20, a11, a02, a30, a21, a12, a03; double a00, a10, a01, a20, a11, a02, a30, a21, a12, a03;
int dst_type = cv::ocl::Context::getContext()->impl->double_support ? CV_64FC1 : CV_32FC1;
cvStartReadSeq( contour, &reader, 0 ); cvStartReadSeq( contour, &reader, 0 );
cv::ocl::oclMat dst_a00(1,lpt,dst_type);
cv::ocl::oclMat dst_a10(1,lpt,dst_type);
cv::ocl::oclMat dst_a01(1,lpt,dst_type);
cv::ocl::oclMat dst_a20(1,lpt,dst_type);
cv::ocl::oclMat dst_a11(1,lpt,dst_type);
cv::ocl::oclMat dst_a02(1,lpt,dst_type);
cv::ocl::oclMat dst_a30(1,lpt,dst_type);
cv::ocl::oclMat dst_a21(1,lpt,dst_type);
cv::ocl::oclMat dst_a12(1,lpt,dst_type);
cv::ocl::oclMat dst_a03(1,lpt,dst_type);
size_t reader_size = lpt << 1; size_t reader_size = lpt << 1;
cv::Mat reader_mat(1,reader_size,CV_32FC1); cv::Mat reader_mat(1,reader_size,CV_32FC1);
bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2; bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2;
if (!cv::ocl::Context::getContext()->impl->double_support && is_float)
{
CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!");
}
if( is_float ) if( is_float )
{ {
for(size_t i = 0; i < reader_size; ++i) for(size_t i = 0; i < reader_size; ++i)
...@@ -136,6 +130,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom ) ...@@ -136,6 +130,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom )
} }
} }
cv::ocl::oclMat dst_a(10, lpt, CV_64FC1);
cv::ocl::oclMat reader_oclmat(reader_mat); cv::ocl::oclMat reader_oclmat(reader_mat);
int llength = std::min(lpt,128); int llength = std::min(lpt,128);
size_t localThreads[3] = { llength, 1, 1}; size_t localThreads[3] = { llength, 1, 1};
...@@ -143,48 +138,43 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom ) ...@@ -143,48 +138,43 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom )
vector<pair<size_t , const void *> > args; vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_int) , (void *)&contour->total )); args.push_back( make_pair( sizeof(cl_int) , (void *)&contour->total ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&reader_oclmat.data )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&reader_oclmat.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a00.data )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a10.data )); cl_int dst_step = (cl_int)dst_a.step;
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a01.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a20.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a11.data )); openCLExecuteKernel(dst_a.clCxt, &moments, "icvContourMoments", globalThreads, localThreads, args, -1, -1);
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a02.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a30.data )); cv::Mat dst(dst_a);
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a21.data )); a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0;
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a12.data )); if (!cv::ocl::Context::getContext()->impl->double_support)
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_a03.data )); {
openCLExecuteKernel(dst_a00.clCxt, &moments, "icvContourMoments", globalThreads, localThreads, args, -1, -1); for (int i = 0; i < contour->total; ++i)
{
cv::Mat dst(dst_a00); a00 += dst.at<cl_long>(0, i);
cv::Scalar s = cv::sum(dst); a10 += dst.at<cl_long>(1, i);
a00 = s[0]; a01 += dst.at<cl_long>(2, i);
dst = dst_a10; a20 += dst.at<cl_long>(3, i);
s = cv::sum(dst); a11 += dst.at<cl_long>(4, i);
a10 = s[0];//dstsum[1]; a02 += dst.at<cl_long>(5, i);
dst = dst_a01; a30 += dst.at<cl_long>(6, i);
s = cv::sum(dst); a21 += dst.at<cl_long>(7, i);
a01 = s[0];//dstsum[2]; a12 += dst.at<cl_long>(8, i);
dst = dst_a20; a03 += dst.at<cl_long>(9, i);
s = cv::sum(dst); }
a20 = s[0];//dstsum[3]; }
dst = dst_a11; else
s = cv::sum(dst); {
a11 = s[0];//dstsum[4]; a00 = cv::sum(dst.row(0))[0];
dst = dst_a02; a10 = cv::sum(dst.row(1))[0];
s = cv::sum(dst); a01 = cv::sum(dst.row(2))[0];
a02 = s[0];//dstsum[5]; a20 = cv::sum(dst.row(3))[0];
dst = dst_a30; a11 = cv::sum(dst.row(4))[0];
s = cv::sum(dst); a02 = cv::sum(dst.row(5))[0];
a30 = s[0];//dstsum[6]; a30 = cv::sum(dst.row(6))[0];
dst = dst_a21; a21 = cv::sum(dst.row(7))[0];
s = cv::sum(dst); a12 = cv::sum(dst.row(8))[0];
a21 = s[0];//dstsum[7]; a03 = cv::sum(dst.row(9))[0];
dst = dst_a12; }
s = cv::sum(dst);
a12 = s[0];//dstsum[8];
dst = dst_a03;
s = cv::sum(dst);
a03 = s[0];//dstsum[9];
double db1_2, db1_6, db1_12, db1_24, db1_20, db1_60; double db1_2, db1_6, db1_12, db1_24, db1_20, db1_60;
if( fabs(a00) > FLT_EPSILON ) if( fabs(a00) > FLT_EPSILON )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment