fixed cv::multuply, cv::divide

febb200e · Ilya Lavrenov · b29835a8 · febb200e · febb200e · febb200e
Commit febb200e authored Dec 22, 2013 by Ilya Lavrenov
Hide whitespace changes
Inline Side-by-side

Showing with 90 additions and 19 deletions

arithm.cpp modules/core/src/arithm.cpp +19 -7

arithm.cl modules/core/src/opencl/arithm.cl +27 -7

test_arithm.cpp modules/core/test/ocl/test_arithm.cpp +44 -5

No files found.
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@@ -915,11 +915,12 @@ void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t bl

 enum { OCL_OP_ADD=0, OCL_OP_SUB=1, OCL_OP_RSUB=2, OCL_OP_ABSDIFF=3, OCL_OP_MUL=4,
       OCL_OP_MUL_SCALE=5, OCL_OP_DIV_SCALE=6, OCL_OP_RECIP_SCALE=7, OCL_OP_ADDW=8,
-       OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14 };
+       OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14,
+       OCL_OP_RDIV_SCALE=15 };

 static const char* oclop2str[] = { "OP_ADD", "OP_SUB", "OP_RSUB", "OP_ABSDIFF",
    "OP_MUL", "OP_MUL_SCALE", "OP_DIV_SCALE", "OP_RECIP_SCALE",
-    "OP_ADDW", "OP_AND", "OP_OR", "OP_XOR", "OP_NOT", "OP_MIN", "OP_MAX", 0 };
+    "OP_ADDW", "OP_AND", "OP_OR", "OP_XOR", "OP_NOT", "OP_MIN", "OP_MAX", "OP_RDIV_SCALE", 0 };

 static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
                          InputArray _mask, bool bitwise, int oclop, bool haveScalar )
@@ -1301,25 +1302,27 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,

    int kercn = haveMask || haveScalar ? cn : 1;

-    char cvtstr[3][32], opts[1024];
+    char cvtstr[4][32], opts[1024];
    sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT2=%s "
-            "-D dstT=%s -D workT=%s -D convertToWT1=%s "
+            "-D dstT=%s -D workT=%s -D scaleT=%s -D convertToWT1=%s "
            "-D convertToWT2=%s -D convertToDT=%s%s",
            (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"),
            oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)),
            ocl::typeToStr(CV_MAKETYPE(depth2, kercn)),
            ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)),
            ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)),
+            ocl::typeToStr(CV_MAKETYPE(wdepth, 1)),
            ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]),
            ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]),
            ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]),
            doubleSupport ? " -D DOUBLE_SUPPORT" : "");

+    size_t usrdata_esz = CV_ELEM_SIZE(wdepth);
    const uchar* usrdata_p = (const uchar*)usrdata;
    const double* usrdata_d = (const double*)usrdata;
    float usrdata_f[3];
    int i, n = oclop == OCL_OP_MUL_SCALE || oclop == OCL_OP_DIV_SCALE ||
-        oclop == OCL_OP_RECIP_SCALE ? 1 : oclop == OCL_OP_ADDW ? 3 : 0;
+        oclop == OCL_OP_RDIV_SCALE || oclop == OCL_OP_RECIP_SCALE ? 1 : oclop == OCL_OP_ADDW ? 3 : 0;
    if( n > 0 && wdepth == CV_32F )
    {
        for( i = 0; i < n; i++ )
@@ -1352,13 +1355,20 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
        ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, buf, esz);

        if( !haveMask )
-            k.args(src1arg, dstarg, scalararg);
+        {
+            if(n == 0)
+                k.args(src1arg, dstarg, scalararg);
+            else if(n == 1)
+                k.args(src1arg, dstarg, scalararg,
+                       ocl::KernelArg(0, 0, 0, usrdata_p, usrdata_esz));
+            else
+                CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters");
+        }
        else
            k.args(src1arg, maskarg, dstarg, scalararg);
    }
    else
    {
-        size_t usrdata_esz = CV_ELEM_SIZE(wdepth);
        src2 = _src2.getUMat();
        ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cscale);

@@ -1439,6 +1449,8 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
            swapped12 = true;
            if( oclop == OCL_OP_SUB )
                oclop = OCL_OP_RSUB;
+            if ( oclop == OCL_OP_DIV_SCALE )
+                oclop = OCL_OP_RDIV_SCALE;
        }
        else if( !checkScalar(*psrc2, type1, kind2, kind1) )
            CV_Error( CV_StsUnmatchedSizes,

--- a/modules/core/src/opencl/arithm.cl
+++ b/modules/core/src/opencl/arithm.cl
@@ -136,8 +136,12 @@

 #elif defined OP_MUL_SCALE
 #undef EXTRA_PARAMS
-#define EXTRA_PARAMS , workT scale
-#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * srcelem2 * scale)
+#ifdef UNARY_OP
+#define EXTRA_PARAMS , workT srcelem2, scaleT scale
+#else
+#define EXTRA_PARAMS , scaleT scale
+#endif
+#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * scale * srcelem2)

 #elif defined OP_DIV
 #define PROCESS_ELEM \
@@ -146,21 +150,36 @@

 #elif defined OP_DIV_SCALE
 #undef EXTRA_PARAMS
-#define EXTRA_PARAMS , workT scale
+#ifdef UNARY_OP
+#define EXTRA_PARAMS , workT srcelem2, scaleT scale
+#else
+#define EXTRA_PARAMS , scaleT scale
+#endif
 #define PROCESS_ELEM \
        workT e2 = srcelem2, zero = (workT)(0); \
-        dstelem = convertToDT(e2 != zero ? srcelem1 * scale / e2 : zero)
+        dstelem = convertToDT(e2 == zero ? zero : (srcelem1 * (workT)(scale) / e2))
+
+#elif defined OP_RDIV_SCALE
+#undef EXTRA_PARAMS
+#ifdef UNARY_OP
+#define EXTRA_PARAMS , workT srcelem2, scaleT scale
+#else
+#define EXTRA_PARAMS , scaleT scale
+#endif
+#define PROCESS_ELEM \
+        workT e1 = srcelem1, zero = (workT)(0); \
+        dstelem = convertToDT(e1 == zero ? zero : (srcelem2 * (workT)(scale) / e1))

 #elif defined OP_RECIP_SCALE
 #undef EXTRA_PARAMS
-#define EXTRA_PARAMS , workT scale
+#define EXTRA_PARAMS , scaleT scale
 #define PROCESS_ELEM \
        workT e1 = srcelem1, zero = (workT)(0); \
        dstelem = convertToDT(e1 != zero ? scale / e1 : zero)

 #elif defined OP_ADDW
 #undef EXTRA_PARAMS
-#define EXTRA_PARAMS , workT alpha, workT beta, workT gamma
+#define EXTRA_PARAMS , scaleT alpha, scaleT beta, scaleT gamma
 #define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + srcelem2*beta + gamma)

 #elif defined OP_MAG
@@ -260,7 +279,8 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v)
 #undef srcelem2
 #if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \
    defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \
-    defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW
+    defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW || \
+    defined OP_MUL || defined OP_DIV
    #undef EXTRA_PARAMS
    #define EXTRA_PARAMS , workT srcelem2
 #endif

--- a/modules/core/test/ocl/test_arithm.cpp
+++ b/modules/core/test/ocl/test_arithm.cpp
@@ -293,7 +293,7 @@ OCL_TEST_P(Mul, Mat)
    }
 }

-OCL_TEST_P(Mul, DISABLED_Scalar)
+OCL_TEST_P(Mul, Scalar)
 {
    for (int j = 0; j < test_loop_times; j++)
    {
@@ -306,7 +306,7 @@ OCL_TEST_P(Mul, DISABLED_Scalar)
    }
 }

-OCL_TEST_P(Mul, DISABLED_Mat_Scale)
+OCL_TEST_P(Mul, Mat_Scale)
 {
    for (int j = 0; j < test_loop_times; j++)
    {
@@ -319,6 +319,20 @@ OCL_TEST_P(Mul, DISABLED_Mat_Scale)
    }
 }

+OCL_TEST_P(Mul, Mat_Scalar_Scale)
+{
+    for (int j = 0; j < test_loop_times; j++)
+    {
+        generateTestData();
+
+        OCL_OFF(cv::multiply(src1_roi, val, dst1_roi, val[0]));
+        OCL_ON(cv::multiply(usrc1_roi, val, udst1_roi, val[0]));
+
+        Near(udst1_roi.depth() >= CV_32F ? 1e-2 : 1);
+    }
+}
+
+
 //////////////////////////////// Div /////////////////////////////////////////////////

 typedef ArithmTestBase Div;
@@ -335,7 +349,7 @@ OCL_TEST_P(Div, Mat)
    }
 }

-OCL_TEST_P(Div, DISABLED_Scalar)
+OCL_TEST_P(Div, Scalar)
 {
    for (int j = 0; j < test_loop_times; j++)
    {
@@ -348,6 +362,19 @@ OCL_TEST_P(Div, DISABLED_Scalar)
    }
 }

+OCL_TEST_P(Div, Scalar2)
+{
+    for (int j = 0; j < test_loop_times; j++)
+    {
+        generateTestData();
+
+        OCL_OFF(cv::divide(src1_roi, val, dst1_roi));
+        OCL_ON(cv::divide(usrc1_roi, val, udst1_roi));
+
+        Near(udst1_roi.depth() >= CV_32F ? 1e-3 : 1);
+    }
+}
+
 OCL_TEST_P(Div, Mat_Scale)
 {
    for (int j = 0; j < test_loop_times; j++)
@@ -361,8 +388,7 @@ OCL_TEST_P(Div, Mat_Scale)
    }
 }

-
-OCL_TEST_P(Div, DISABLED_Mat_Scalar_Scale)
+OCL_TEST_P(Div, Mat_Scalar_Scale)
 {
    for (int j = 0; j < test_loop_times; j++)
    {
@@ -375,6 +401,19 @@ OCL_TEST_P(Div, DISABLED_Mat_Scalar_Scale)
    }
 }

+OCL_TEST_P(Div, Recip)
+{
+    for (int j = 0; j < test_loop_times; j++)
+    {
+        generateTestData();
+
+        OCL_OFF(cv::divide(val[0], src1_roi, dst1_roi));
+        OCL_ON(cv::divide(val[0], usrc1_roi, udst1_roi));
+
+        Near(udst1_roi.depth() >= CV_32F ? 1e-3 : 1);
+    }
+}
+
 //////////////////////////////// Min/Max /////////////////////////////////////////////////

 typedef ArithmTestBase Min;