Merge pull request #1599 from ilya-lavrenov:ocl_mac_kernel_warnings

171e0e62 · Andrey Pavlenko · OpenCV Buildbot · b7f1edcc · 1be77dd2 · 171e0e62
Commit 171e0e62 authored Oct 11, 2013 by Andrey Pavlenko Committed by OpenCV Buildbot Oct 11, 2013
15 changed files
--- a/modules/core/include/opencv2/core/types_c.h
+++ b/modules/core/include/opencv2/core/types_c.h
@@ -266,7 +266,8 @@ enum {
 CV_OpenGlNotSupported=        -218,
 CV_OpenGlApiCallError=        -219,
 CV_OpenCLDoubleNotSupported=  -220,
- CV_OpenCLInitError=           -221
+ CV_OpenCLInitError=           -221,
+ CV_OpenCLNoAMDBlasFft=        -222
 };
 /****************************************************************************************\

--- a/modules/ocl/src/cl_operations.cpp
+++ b/modules/ocl/src/cl_operations.cpp
@@ -212,13 +212,35 @@ void openCLVerifyKernel(const Context *ctx, cl_kernel kernel, size_t *localThrea
 static double total_execute_time = 0;
 static double total_kernel_time = 0;
 #endif
+static std::string removeDuplicatedWhiteSpaces(const char * buildOptions)
+{
+    if (buildOptions == NULL)
+        return "";
+    size_t length = strlen(buildOptions), didx = 0, sidx = 0;
+    while (sidx < length && buildOptions[sidx] == 0)
+        ++sidx;
+    std::string opt;
+    opt.resize(length);
+    for ( ; sidx < length; ++sidx)
+        if (buildOptions[sidx] != ' ')
+            opt[didx++] = buildOptions[sidx];
+        else if ( !(didx > 0 && opt[didx - 1] == ' ') )
+            opt[didx++] = buildOptions[sidx];
+    return opt;
+}
 void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3],
                          size_t localThreads[3],  vector< pair<size_t, const void *> > &args, int channels,
                          int depth, const char *build_options)
 {
    //construct kernel name
    //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
-    //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
+    //for example split_C2_D3, represent the split kernel with channels = 2 and dataType Depth = 3(Data type is short)
    stringstream idxStr;
    if(channels != -1)
        idxStr << "_C" << channels;
@@ -227,7 +249,8 @@ void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, str
    kernelName += idxStr.str();
    cl_kernel kernel;
-    kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options);
+    std::string fixedOptions = removeDuplicatedWhiteSpaces(build_options);
+    kernel = openCLGetKernelFromSource(ctx, source, kernelName, fixedOptions.c_str());
    if ( localThreads != NULL)
    {

--- a/modules/ocl/src/fft.cpp
+++ b/modules/ocl/src/fft.cpp
@@ -50,7 +50,7 @@ using namespace cv::ocl;
 #if !defined HAVE_CLAMDFFT
 void cv::ocl::dft(const oclMat&, oclMat&, Size, int)
 {
-    CV_Error(CV_StsNotImplemented, "OpenCL DFT is not implemented");
+    CV_Error(CV_OpenCLNoAMDBlasFft, "OpenCL DFT is not implemented");
 }
 namespace cv { namespace ocl {
    void fft_teardown();

--- a/modules/ocl/src/gemm.cpp
+++ b/modules/ocl/src/gemm.cpp
@@ -58,12 +58,12 @@ void clBlasTeardown();
 void cv::ocl::gemm(const oclMat&, const oclMat&, double,
                   const oclMat&, double, oclMat&, int)
 {
-    CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented");
+    CV_Error(CV_OpenCLNoAMDBlasFft, "OpenCL BLAS is not implemented");
 }
 void cv::ocl::clBlasSetup()
 {
-    CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented");
+    CV_Error(CV_OpenCLNoAMDBlasFft, "OpenCL BLAS is not implemented");
 }
 void cv::ocl::clBlasTeardown()

--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@ -1497,7 +1497,7 @@ namespace cv
                    openCLSafeCall(clReleaseKernel(kernel));
                    static char opt[20] = {0};
-                    sprintf(opt, " -D WAVE_SIZE=%d", (int)wave_size);
+                    sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size);
                    openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, opt);
                }
            }

--- a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
+++ b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
@@ -70,7 +70,7 @@ __kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_st
        dstT t1 = convertToDstT(src2[src2_index]);
        dstT t2 = t0 - t1;
-        dst[dst_index] = t2 >= 0 ? t2 : -t2;
+        dst[dst_index] = t2 >= (dstT)(0) ? t2 : -t2;
    }
 }
@@ -88,6 +88,6 @@ __kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int
        dstT t0 = convertToDstT(src1[src1_index]);
-        dst[dst_index] = t0 >= 0 ? t0 : -t0;
+        dst[dst_index] = t0 >= (dstT)(0) ? t0 : -t0;
    }
 }
--- a/modules/ocl/src/opencl/arithm_sum.cl
+++ b/modules/ocl/src/opencl/arithm_sum.cl
@@ -51,14 +51,14 @@
 #endif
 #endif
-#if defined (FUNC_SUM)
+#if FUNC_SUM
 #define FUNC(a, b) b += a;
-#endif
+#elif FUNC_ABS_SUM
-#if defined (FUNC_ABS_SUM)
+#define FUNC(a, b) b += a >= (dstT)(0) ? a : -a;
-#define FUNC(a, b) b += a >= 0 ? a : -a;
+#elif FUNC_SQR_SUM
-#endif
-#if defined (FUNC_SQR_SUM)
 #define FUNC(a, b) b += a * a;
+#else
+#error No sum function
 #endif
 /**************************************Array buffer SUM**************************************/

--- a/modules/ocl/src/opencl/filtering_laplacian.cl
+++ b/modules/ocl/src/opencl/filtering_laplacian.cl
@@ -211,7 +211,7 @@ __kernel void filter2D(
    barrier(CLK_LOCAL_MEM_FENCE);
    if(globalRow < rows && globalCol < cols)
    {
-        T_SUM sum = (T_SUM)SUM_ZERO;
+        T_SUM sum = (T_SUM)(SUM_ZERO);
        int filterIdx = 0;
        for(int i = 0; i < FILTER_SIZE; i++)
        {
@@ -291,7 +291,7 @@ __kernel void filter2D_3x3(
                T_IMG data = src[mad24(selected_row, src_step, selected_cols)];
                int con = selected_row >= 0 && selected_row < wholerows && selected_cols >= 0 && selected_cols < wholecols;
-                data = con ? data : 0;
+                data = con ? data : (T_IMG)(0);
                local_data[mad24(i, LOCAL_MEM_STEP, lX)] = data;
                if(lX < (ANX << 1))
@@ -300,7 +300,7 @@ __kernel void filter2D_3x3(
                    data  = src[mad24(selected_row, src_step, selected_cols)];
                    con = selected_row >= 0 && selected_row < wholerows && selected_cols >= 0 && selected_cols < wholecols;
-                    data = con ? data : 0;
+                    data = con ? data : (T_IMG)(0);
                    local_data[mad24(i, LOCAL_MEM_STEP, lX) + groupX_size] = data;
                }
 #else

--- a/modules/ocl/src/opencl/stereobp.cl
+++ b/modules/ocl/src/opencl/stereobp.cl
@@ -290,7 +290,7 @@ void message(__global T *us_, __global T *ds_, __global T *ls_, __global T *rs_,
    minimum += cmax_disc_term;
-    float4 sum = 0;
+    float4 sum = (float4)(0);
    prev = convert_float4(t_dst[CNDISP - 1]);
    for (int disp = CNDISP - 2; disp >= 0; disp--)
    {
@@ -308,7 +308,7 @@ void message(__global T *us_, __global T *ds_, __global T *ls_, __global T *rs_,
    t_dst[CNDISP - 1] = saturate_cast4(dst_reg);
    sum += dst_reg;
-    sum /= CNDISP;
+    sum /= (float4)(CNDISP);
 #pragma unroll
    for(int i = 0, idx = 0; i < CNDISP; ++i, idx+=msg_disp_step)
    {

--- a/modules/ocl/src/pyrlk.cpp
+++ b/modules/ocl/src/pyrlk.cpp
@@ -134,7 +134,7 @@ static void lkSparse_run(oclMat &I, oclMat &J,
            openCLSafeCall(clReleaseKernel(kernel));
            static char opt[32] = {0};
-            sprintf(opt, " -D WAVE_SIZE=%d", wave_size);
+            sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
            openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads,
                                args, I.oclchannels(), I.depth(), opt);

--- a/modules/ocl/test/test_fft.cpp
+++ b/modules/ocl/test/test_fft.cpp
@@ -47,8 +47,6 @@
 using namespace std;
-#ifdef HAVE_CLAMDFFT
 ////////////////////////////////////////////////////////////////////////////
 // Dft
@@ -102,9 +100,6 @@ OCL_TEST_P(Dft, R2CthenC2R)
    EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4);
 }
 INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Dft, testing::Combine(
                            testing::Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), cv::Size(512, 1), cv::Size(1024, 768)),
                            testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE) ));
-#endif // HAVE_CLAMDFFT
--- a/modules/ocl/test/test_gemm.cpp
+++ b/modules/ocl/test/test_gemm.cpp
@@ -42,12 +42,13 @@
 //
 //M*/
 #include "test_precomp.hpp"
 using namespace std;
-#ifdef HAVE_CLAMDBLAS
 ////////////////////////////////////////////////////////////////////////////
 // GEMM
 PARAM_TEST_CASE(Gemm, int, cv::Size, int)
 {
    int      type;
@@ -81,4 +82,3 @@ INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
                            testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/),
                            testing::Values(cv::Size(20, 20), cv::Size(300, 300)),
                            testing::Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)(cv::GEMM_1_T + cv::GEMM_2_T))));
-#endif
--- a/modules/ocl/test/test_kalman.cpp
+++ b/modules/ocl/test/test_kalman.cpp
@@ -46,8 +46,6 @@
 #ifdef HAVE_OPENCL
-#ifdef HAVE_CLAMDBLAS
 using namespace cv;
 using namespace cv::ocl;
 using namespace cvtest;
@@ -147,6 +145,4 @@ OCL_TEST_P(Kalman, Accuracy)
 INSTANTIATE_TEST_CASE_P(OCL_Video, Kalman, Combine(Values(3, 7), Values(30)));
-#endif // HAVE_CLAMDBLAS
 #endif // HAVE_OPENCL
--- a/modules/ocl/test/test_ml.cpp
+++ b/modules/ocl/test/test_ml.cpp
@@ -128,8 +128,6 @@ INSTANTIATE_TEST_CASE_P(OCL_ML, KNN, Combine(Values(6, 5), Values(Size(200, 400)
 ////////////////////////////////SVM/////////////////////////////////////////////////
-#ifdef HAVE_CLAMDBLAS
 PARAM_TEST_CASE(SVM_OCL, int, int, int)
 {
    cv::Size size;
@@ -307,6 +305,5 @@ INSTANTIATE_TEST_CASE_P(OCL_ML, SVM_OCL, testing::Combine(
                            Values((int)CvSVM::C_SVC, (int)CvSVM::NU_SVC, (int)CvSVM::ONE_CLASS, (int)CvSVM::NU_SVR),
                            Values(2, 3, 4)
                        ));
-#endif // HAVE_CLAMDBLAS
 #endif // HAVE_OPENCL
--- a/modules/ocl/test/utility.hpp
+++ b/modules/ocl/test/utility.hpp
@@ -291,10 +291,12 @@ CV_FLAGS(DftFlags, DFT_INVERSE, DFT_SCALE, DFT_ROWS, DFT_COMPLEX_OUTPUT, DFT_REA
        } \
        catch (const cv::Exception & ex) \
        { \
-            if (ex.code != CV_OpenCLDoubleNotSupported) \
+            if (ex.code == CV_OpenCLDoubleNotSupported)\
-               throw; \
-            else \
                std::cout << "Test skipped (selected device does not support double)" << std::endl; \
+            else if (ex.code == CV_OpenCLNoAMDBlasFft) \
+                std::cout << "Test skipped (AMD Blas / Fft libraries are not available)" << std::endl; \
+            else \
+                throw; \
        } \
    } \
    \