Allow OpenCL acceleration in every OpenCV module

91ac9688 · Andrey Kamaev · d28df08e · 91ac9688 · 91ac9688 · 91ac9688
Commit 91ac9688 authored Mar 15, 2013 by Andrey Kamaev
95 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -782,7 +782,7 @@ if(HAVE_CUDA)
  status("    Use fast math:"        CUDA_FAST_MATH THEN YES ELSE NO)
 endif()
-if(HAVE_OPENCL AND BUILD_opencv_ocl)
+if(HAVE_OPENCL)
  status("")
  status("  OpenCL")
  if(OPENCL_INCLUDE_DIR)

--- a/cmake/OpenCVModule.cmake
+++ b/cmake/OpenCVModule.cmake
@@ -432,10 +432,22 @@ macro(ocv_glob_module_sources)
  file(GLOB lib_hdrs "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
  file(GLOB lib_hdrs_detail "include/opencv2/${name}/detail/*.hpp" "include/opencv2/${name}/detail/*.h")
+  file(GLOB cl_kernels "src/opencl/*.cl")
  source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
  source_group("Include" FILES ${lib_hdrs})
  source_group("Include\\detail" FILES ${lib_hdrs_detail})
+  if(HAVE_OPENCL AND cl_kernels)
+    ocv_include_directories(${OPENCL_INCLUDE_DIRS})
+    add_custom_command(
+      OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp"
+      COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake"
+      DEPENDS ${cl_kernels} "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake")
+    source_group("Src\\OpenCL" FILES ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp")
+    list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp")
+  endif()
  ocv_set_module_sources(${ARGN} HEADERS ${lib_hdrs} ${lib_hdrs_detail} SOURCES ${lib_srcs} ${lib_int_hdrs})
 endmacro()
@@ -449,6 +461,9 @@ macro(ocv_create_module)
  if(NOT "${ARGN}" STREQUAL "SKIP_LINK")
    target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_${the_module}_DEPS_EXT} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${ARGN})
+    if(HAVE_OPENCL AND OPENCL_LIBRARIES)
+      target_link_libraries(${the_module} ${OPENCL_LIBRARIES})
+    endif()
  endif()
  add_dependencies(opencv_modules ${the_module})

--- a/modules/ocl/cl2cpp.cmake
+++ b/modules/ocl/cl2cpp.cmake
--- a/modules/ocl/CMakeLists.txt
+++ b/modules/ocl/CMakeLists.txt
@@ -3,45 +3,5 @@ if(NOT HAVE_OPENCL)
 endif()
 set(the_description "OpenCL-accelerated Computer Vision")
-ocv_add_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_nonfree)
+ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_nonfree)
-ocv_module_include_directories(${OPENCL_INCLUDE_DIRS})
-file(GLOB CL_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/kernels/*.cl")
-set(kernels_cpp "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp")
-set(cl2cpp_script "${CMAKE_CURRENT_SOURCE_DIR}/cl2cpp.cmake")
-add_custom_command(
-  OUTPUT ${kernels_cpp}
-  COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/kernels" -DOUTPUT="${kernels_cpp}" -P ${cl2cpp_script}
-  DEPENDS ${CL_FILES} ${cl2cpp_script})
-file(GLOB lib_hdrs     "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
-file(GLOB lib_srcs     "src/*.cpp")
-file(GLOB lib_int_hdrs "src/*.h*")
-source_group("Include"   FILES ${lib_hdrs})
-source_group("Src\\Host" FILES ${lib_srcs} ${lib_int_hdrs} ${kernels_cpp})
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow)
-ocv_set_module_sources(HEADERS ${lib_hdrs} SOURCES ${lib_int_hdrs} ${lib_srcs} ${kernels_cpp})
-ocv_create_module(${OPENCL_LIBRARIES})
-ocv_add_precompiled_headers(${the_module})
-################################################################################################################
-################################      OpenCL Module Tests     ##################################################
-################################################################################################################
-file(GLOB test_srcs "test/*.cpp")
-file(GLOB test_hdrs "test/*.hpp" "test/*.h")
-ocv_add_accuracy_tests(FILES "Include" ${test_hdrs}
-                       FILES "Src" ${test_srcs})
-################################################################################################################
-################################   OpenCL Module Performance  ##################################################
-################################################################################################################
-file(GLOB perf_srcs "perf/*.cpp")
-file(GLOB perf_hdrs "perf/*.hpp" "perf/*.h")
-ocv_add_perf_tests(FILES "Include" ${perf_hdrs}
-                   FILES "Src" ${perf_srcs})
--- a/modules/ocl/src/kernels/brute_force_match.cl
+++ b/modules/ocl/src/kernels/brute_force_match.cl
--- a/modules/ocl/src/kernels/arithm_2_mat.cl
+++ b/modules/ocl/src/kernels/arithm_2_mat.cl
--- a/modules/ocl/src/kernels/arithm_LUT.cl
+++ b/modules/ocl/src/kernels/arithm_LUT.cl
--- a/modules/ocl/src/kernels/arithm_absdiff.cl
+++ b/modules/ocl/src/kernels/arithm_absdiff.cl
--- a/modules/ocl/src/kernels/arithm_add.cl
+++ b/modules/ocl/src/kernels/arithm_add.cl
--- a/modules/ocl/src/kernels/arithm_addWeighted.cl
+++ b/modules/ocl/src/kernels/arithm_addWeighted.cl
@@ -61,29 +61,29 @@ __kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset
    int y = get_global_id(1);
    if (x < cols && y < rows)
    {
        x = x << 2;
        #define dst_align (dst_offset & 3)
-        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); 
+        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
-        int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); 
+        int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
        int dst_index  = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-		uchar4 src1_data ,src2_data;
+        uchar4 src1_data ,src2_data;
-		src1_data.x= src1_index+0 >= 0 ? src1[src1_index+0] : 0;
+        src1_data.x= src1_index+0 >= 0 ? src1[src1_index+0] : 0;
-		src1_data.y= src1_index+1 >= 0 ? src1[src1_index+1] : 0;
+        src1_data.y= src1_index+1 >= 0 ? src1[src1_index+1] : 0;
-		src1_data.z= src1_index+2 >= 0 ? src1[src1_index+2] : 0;
+        src1_data.z= src1_index+2 >= 0 ? src1[src1_index+2] : 0;
-		src1_data.w= src1_index+3 >= 0 ? src1[src1_index+3] : 0;
+        src1_data.w= src1_index+3 >= 0 ? src1[src1_index+3] : 0;
-		src2_data.x= src2_index+0 >= 0 ? src2[src2_index+0] : 0;
+        src2_data.x= src2_index+0 >= 0 ? src2[src2_index+0] : 0;
-		src2_data.y= src2_index+1 >= 0 ? src2[src2_index+1] : 0;
+        src2_data.y= src2_index+1 >= 0 ? src2[src2_index+1] : 0;
-		src2_data.z= src2_index+2 >= 0 ? src2[src2_index+2] : 0;
+        src2_data.z= src2_index+2 >= 0 ? src2[src2_index+2] : 0;
-		src2_data.w= src2_index+3 >= 0 ? src2[src2_index+3] : 0;
+        src2_data.w= src2_index+3 >= 0 ? src2[src2_index+3] : 0;
        uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
 //        short4 tmp      = convert_short4_sat(src1_data) * alpha + convert_short4_sat(src2_data) * beta + gama;
@@ -117,14 +117,14 @@ __kernel void addWeighted_D2 (__global ushort *src1, int src1_step,int src1_offs
    int y = get_global_id(1);
    if (x < cols && y < rows)
    {
        x = x << 2;
        #define dst_align ((dst_offset >> 1) & 3)
-        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1)); 
+        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1));
-        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1)); 
+        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1));
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -177,14 +177,14 @@ __kernel void addWeighted_D3 (__global short *src1, int src1_step,int src1_offse
    int y = get_global_id(1);
    if (x < cols && y < rows)
    {
        x = x << 2;
        #define dst_align ((dst_offset >> 1) & 3)
-        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1)); 
+        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1));
-        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1)); 
+        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1));
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -236,18 +236,18 @@ __kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset,
    int y = get_global_id(1);
    if (x < cols && y < rows)
    {
        x = x << 2;
        #define bitOfInt  (sizeof(int)== 4 ? 2: 3)
        #define dst_align ((dst_offset >> bitOfInt) & 3)
-        int src1_index = mad24(y, src1_step, (x << bitOfInt) + src1_offset - (dst_align << bitOfInt)); 
+        int src1_index = mad24(y, src1_step, (x << bitOfInt) + src1_offset - (dst_align << bitOfInt));
-        int src2_index = mad24(y, src2_step, (x << bitOfInt) + src2_offset - (dst_align << bitOfInt)); 
+        int src2_index = mad24(y, src2_step, (x << bitOfInt) + src2_offset - (dst_align << bitOfInt));
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
        int dst_index  = mad24(y, dst_step, dst_offset + (x << bitOfInt) -(dst_align << bitOfInt));
@@ -256,7 +256,7 @@ __kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset,
    int src2_index_fix = src2_index < 0 ? 0 : src2_index;
        int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index_fix));
        int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index_fix));
    if(src1_index < 0)
    {
        int4 tmp;
@@ -299,16 +299,16 @@ __kernel void addWeighted_D5 (__global float *src1,int src1_step,int src1_offset
    int y = get_global_id(1);
    if (x < cols && y < rows)
    {
        x = x << 2;
        #define dst_align ((dst_offset >> 2) & 3)
-        int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); 
+        int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
-        int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); 
+        int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
        int dst_index  = mad24(y, dst_step, dst_offset + (x << 2) -(dst_align << 2));
@@ -361,16 +361,16 @@ __kernel void addWeighted_D6 (__global double *src1, int src1_step,int src1_offs
    int y = get_global_id(1);
    if (x < cols && y < rows)
    {
        x = x << 2;
        #define dst_align ((dst_offset >> 3) & 3)
-        int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3)); 
+        int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
-        int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3)); 
+        int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3));
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
        int dst_index  = mad24(y, dst_step, dst_offset + (x << 3) -(dst_align << 3));

--- a/modules/ocl/src/kernels/arithm_add_scalar.cl
+++ b/modules/ocl/src/kernels/arithm_add_scalar.cl
--- a/modules/ocl/src/kernels/arithm_add_scalar_mask.cl
+++ b/modules/ocl/src/kernels/arithm_add_scalar_mask.cl
--- a/modules/ocl/src/kernels/arithm_bitwise_and.cl
+++ b/modules/ocl/src/kernels/arithm_bitwise_and.cl
@@ -63,8 +63,8 @@ __kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int sr
        x = x << 2;
        #define dst_align (dst_offset & 3)
-        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); 
+        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
-        int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); 
+        int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -75,14 +75,14 @@ __kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int sr
     uchar4 src2_data = vload4(0, src2 + src2_index_fix);
     if(src1_index < 0)
-     {     
+     {
        uchar4 tmp;
        tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
        src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
-     }                 
+     }
-     if(src2_index < 0)  
+     if(src2_index < 0)
-     {                         
+     {
-        uchar4 tmp;                   
+        uchar4 tmp;
        tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
        src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
     }
@@ -113,8 +113,8 @@ __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src
        x = x << 2;
        #define dst_align (dst_offset & 3)
-        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); 
+        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
-        int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); 
+        int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -126,14 +126,14 @@ __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src
     char4 src2_data = vload4(0, src2 + src2_index_fix);
     if(src1_index < 0)
-     {     
+     {
        char4 tmp;
        tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
        src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
-     }                 
+     }
-     if(src2_index < 0)  
+     if(src2_index < 0)
-     {                         
+     {
-        char4 tmp;                   
+        char4 tmp;
        tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
        src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
     }
@@ -164,8 +164,8 @@ __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int s
        x = x << 2;
        #define dst_align ((dst_offset >> 1) & 3)
-        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); 
+        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); 
+        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -177,14 +177,14 @@ __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int s
        ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index_fix));
     if(src1_index < 0)
-     {     
+     {
        ushort4 tmp;
        tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
        src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
-     }                 
+     }
-     if(src2_index < 0)  
+     if(src2_index < 0)
-     {                         
+     {
-        ushort4 tmp;                   
+        ushort4 tmp;
        tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
        src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
     }
@@ -216,8 +216,8 @@ __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int sr
        x = x << 2;
        #define dst_align ((dst_offset >> 1) & 3)
-        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); 
+        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); 
+        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -229,14 +229,14 @@ __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int sr
        short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index_fix));
     if(src1_index < 0)
-     {     
+     {
        short4 tmp;
        tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
        src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
-     }                 
+     }
-     if(src2_index < 0)  
+     if(src2_index < 0)
-     {                         
+     {
-        short4 tmp;                   
+        short4 tmp;
        tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
        src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
     }
@@ -320,4 +320,3 @@ __kernel void arithm_bitwise_and_D6 (__global char *src1, int src1_step, int src
    }
 }
 #endif
--- a/modules/ocl/src/kernels/arithm_bitwise_and_mask.cl
+++ b/modules/ocl/src/kernels/arithm_bitwise_and_mask.cl
@@ -1135,4 +1135,3 @@ __kernel void arithm_bitwise_and_with_mask_C4_D6 (__global char *src1, int src1_
    }
 }
 #endif
--- a/modules/ocl/src/kernels/arithm_bitwise_and_scalar.cl
+++ b/modules/ocl/src/kernels/arithm_bitwise_and_scalar.cl
--- a/modules/ocl/src/kernels/arithm_bitwise_and_scalar_mask.cl
+++ b/modules/ocl/src/kernels/arithm_bitwise_and_scalar_mask.cl
@@ -1055,4 +1055,3 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D6 (__global short *src1, int sr
    }
 }
 #endif
--- a/modules/ocl/src/kernels/arithm_bitwise_not.cl
+++ b/modules/ocl/src/kernels/arithm_bitwise_not.cl
@@ -62,7 +62,7 @@ __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int sr
        x = x << 2;
        #define dst_align (dst_offset & 3)
-        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); 
+        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -72,7 +72,7 @@ __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int sr
        uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
        uchar4 tmp_data = ~ src1_data;
  /*  if(src1_index < 0)
    {
      uchar4 tmp;
@@ -102,7 +102,7 @@ __kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src
        x = x << 2;
        #define dst_align (dst_offset & 3)
-        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); 
+        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -136,7 +136,7 @@ __kernel void arithm_bitwise_not_D2 (__global ushort *src1, int src1_step, int s
        x = x << 2;
        #define dst_align ((dst_offset >> 1) & 3)
-        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); 
+        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -171,7 +171,7 @@ __kernel void arithm_bitwise_not_D3 (__global short *src1, int src1_step, int sr
        x = x << 2;
        #define dst_align ((dst_offset >> 1) & 3)
-        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); 
+        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -245,14 +245,13 @@ __kernel void arithm_bitwise_not_D6 (__global char *src, int src_step, int src_o
    {
        int src_index = mad24(y, src_step, (x << 3) + src_offset);
        int dst_index = mad24(y, dst_step,  (x << 3) + dst_offset);
        char8 data;
        data = *((__global char8 *)((__global char *)src + src_index));
        data = ~ data;
        *((__global char8 *)((__global char *)dst + dst_index)) = data;
    }
 }
 #endif
--- a/modules/ocl/src/kernels/arithm_bitwise_or.cl
+++ b/modules/ocl/src/kernels/arithm_bitwise_or.cl
@@ -63,8 +63,8 @@ __kernel void arithm_bitwise_or_D0 (__global uchar *src1, int src1_step, int src
        x = x << 2;
        #define dst_align (dst_offset & 3)
-        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); 
+        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
-        int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); 
+        int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -111,8 +111,8 @@ __kernel void arithm_bitwise_or_D1 (__global char *src1, int src1_step, int src1
        x = x << 2;
        #define dst_align (dst_offset & 3)
-        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); 
+        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
-        int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); 
+        int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -148,8 +148,8 @@ __kernel void arithm_bitwise_or_D2 (__global ushort *src1, int src1_step, int sr
        x = x << 2;
        #define dst_align ((dst_offset >> 1) & 3)
-        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); 
+        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); 
+        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -186,8 +186,8 @@ __kernel void arithm_bitwise_or_D3 (__global short *src1, int src1_step, int src
        x = x << 2;
        #define dst_align ((dst_offset >> 1) & 3)
-        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); 
+        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); 
+        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -276,4 +276,3 @@ __kernel void arithm_bitwise_or_D6 (__global char *src1, int src1_step, int src1
    }
 }
 #endif
--- a/modules/ocl/src/kernels/arithm_bitwise_or_mask.cl
+++ b/modules/ocl/src/kernels/arithm_bitwise_or_mask.cl
@@ -1135,4 +1135,3 @@ __kernel void arithm_bitwise_or_with_mask_C4_D6 (__global char *src1, int src1_s
    }
 }
 #endif
--- a/modules/ocl/src/kernels/arithm_bitwise_or_scalar.cl
+++ b/modules/ocl/src/kernels/arithm_bitwise_or_scalar.cl
@@ -911,4 +911,3 @@ __kernel void arithm_s_bitwise_or_C4_D6 (__global short *src1, int src1_step, in
    }
 }
 #endif
--- a/modules/ocl/src/kernels/arithm_bitwise_or_scalar_mask.cl
+++ b/modules/ocl/src/kernels/arithm_bitwise_or_scalar_mask.cl
@@ -1078,4 +1078,3 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D6 (__global short *src1, int src
    }
 }
 #endif
--- a/modules/ocl/src/kernels/arithm_bitwise_xor.cl
+++ b/modules/ocl/src/kernels/arithm_bitwise_xor.cl
@@ -63,8 +63,8 @@ __kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int sr
        x = x << 2;
        #define dst_align (dst_offset & 3)
-        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); 
+        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
-        int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); 
+        int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -76,14 +76,14 @@ __kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int sr
        uchar4 src2_data = vload4(0, src2 + src2_index_fix);
     if(src1_index < 0)
-     {     
+     {
        uchar4 tmp;
        tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
        src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
-     }                 
+     }
-     if(src2_index < 0)  
+     if(src2_index < 0)
-     {                         
+     {
-        uchar4 tmp;                   
+        uchar4 tmp;
        tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
        src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
     }
@@ -113,8 +113,8 @@ __kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src
        x = x << 2;
        #define dst_align (dst_offset & 3)
-        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); 
+        int src1_index = mad24(y, src1_step, x + src1_offset - dst_align);
-        int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); 
+        int src2_index = mad24(y, src2_step, x + src2_offset - dst_align);
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -126,14 +126,14 @@ __kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src
        char4 src2_data = vload4(0, src2 + src2_index_fix);
     if(src1_index < 0)
-     {     
+     {
        char4 tmp;
        tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
        src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
-     }                 
+     }
-     if(src2_index < 0)  
+     if(src2_index < 0)
-     {                         
+     {
-        char4 tmp;                   
+        char4 tmp;
        tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
        src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
     }
@@ -164,8 +164,8 @@ __kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int s
        x = x << 2;
        #define dst_align ((dst_offset >> 1) & 3)
-        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); 
+        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); 
+        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -177,14 +177,14 @@ __kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int s
        ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index_fix));
     if(src1_index < 0)
-     {     
+     {
        ushort4 tmp;
        tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
        src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
-     }                 
+     }
-     if(src2_index < 0)  
+     if(src2_index < 0)
-     {                         
+     {
-        ushort4 tmp;                   
+        ushort4 tmp;
        tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
        src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
     }
@@ -216,8 +216,8 @@ __kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int sr
        x = x << 2;
        #define dst_align ((dst_offset >> 1) & 3)
-        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); 
+        int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
-        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); 
+        int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
@@ -231,14 +231,14 @@ __kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int sr
        short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
     if(src1_index < 0)
-     {     
+     {
        short4 tmp;
        tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
        src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
-     }                 
+     }
-     if(src2_index < 0)  
+     if(src2_index < 0)
-     {                         
+     {
-        short4 tmp;                   
+        short4 tmp;
        tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
        src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
     }
@@ -324,4 +324,3 @@ __kernel void arithm_bitwise_xor_D6 (__global char *src1, int src1_step, int src
    }
 }
 #endif
--- a/modules/ocl/src/kernels/arithm_bitwise_xor_mask.cl
+++ b/modules/ocl/src/kernels/arithm_bitwise_xor_mask.cl
@@ -1135,4 +1135,3 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D6 (__global char *src1, int src1_
    }
 }
 #endif
--- a/modules/ocl/src/kernels/arithm_bitwise_xor_scalar.cl
+++ b/modules/ocl/src/kernels/arithm_bitwise_xor_scalar.cl
--- a/modules/ocl/src/kernels/arithm_bitwise_xor_scalar_mask.cl
+++ b/modules/ocl/src/kernels/arithm_bitwise_xor_scalar_mask.cl
@@ -1055,4 +1055,3 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D6 (__global short *src1, int sr
    }
 }
 #endif
--- a/modules/ocl/src/kernels/arithm_cartToPolar.cl
+++ b/modules/ocl/src/kernels/arithm_cartToPolar.cl
--- a/modules/ocl/src/kernels/arithm_compare_eq.cl
+++ b/modules/ocl/src/kernels/arithm_compare_eq.cl
--- a/modules/ocl/src/kernels/arithm_compare_ne.cl
+++ b/modules/ocl/src/kernels/arithm_compare_ne.cl
--- a/modules/ocl/src/kernels/arithm_div.cl
+++ b/modules/ocl/src/kernels/arithm_div.cl
@@ -455,5 +455,3 @@ __kernel void arithm_s_div_D6 (__global double *src, int src_step, int src_offse
    }
 }
 #endif
--- a/modules/ocl/src/kernels/arithm_exp.cl
+++ b/modules/ocl/src/kernels/arithm_exp.cl
--- a/modules/ocl/src/kernels/arithm_flip.cl
+++ b/modules/ocl/src/kernels/arithm_flip.cl
--- a/modules/ocl/src/kernels/arithm_flip_rc.cl
+++ b/modules/ocl/src/kernels/arithm_flip_rc.cl
--- a/modules/ocl/src/kernels/arithm_log.cl
+++ b/modules/ocl/src/kernels/arithm_log.cl
--- a/modules/ocl/src/kernels/arithm_magnitude.cl
+++ b/modules/ocl/src/kernels/arithm_magnitude.cl
--- a/modules/ocl/src/kernels/arithm_magnitudeSqr.cl
+++ b/modules/ocl/src/kernels/arithm_magnitudeSqr.cl
@@ -60,17 +60,17 @@ __kernel void magnitudeSqr_C1_D5 (__global float *src1,int src1_step,int src1_of
    int y = get_global_id(1);
    if (x < cols && y < rows)
    {
        x = x << 2;
        #define dst_align ((dst_offset >> 2) & 3)
-        int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); 
+        int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2));
-        int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); 
+        int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2));
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
        int dst_index  = mad24(y, dst_step, dst_offset + (x << 2) -(dst_align << 2));
@@ -125,16 +125,16 @@ __kernel void magnitudeSqr_C2_D5 (__global float *src1,int src1_step,int src1_of
    int y = get_global_id(1);
    if (x < cols && y < rows)
    {
        x = x << 2;
        #define dst_align ((dst_offset >> 2) & 3)
-        int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3)); 
+        int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3));
        int dst_start  = mad24(y, dst_step, dst_offset);
        int dst_end    = mad24(y, dst_step, dst_offset + dst_step1);
        int dst_index  = mad24(y, dst_step, dst_offset + (x << 2) -(dst_align << 2));
@@ -148,8 +148,8 @@ __kernel void magnitudeSqr_C2_D5 (__global float *src1,int src1_step,int src1_of
          src1_data.s01234567 = src1_data.s45670123;
    if(src1_index== -2)
          src1_data.s01234567 = src1_data.s23456701;
        float4 dst_data = *((__global float4 *)((__global char *)dst + dst_index));

--- a/modules/ocl/src/kernels/arithm_minMax.cl
+++ b/modules/ocl/src/kernels/arithm_minMax.cl
--- a/modules/ocl/src/kernels/arithm_minMaxLoc.cl
+++ b/modules/ocl/src/kernels/arithm_minMaxLoc.cl
--- a/modules/ocl/src/kernels/arithm_minMaxLoc_mask.cl
+++ b/modules/ocl/src/kernels/arithm_minMaxLoc_mask.cl
@@ -240,4 +240,3 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
       dst[gid + 3 * groupnum] = CONVERT_RES_TYPE(lm_maxloc[0]);
   }
 }
--- a/modules/ocl/src/kernels/arithm_minMax_mask.cl
+++ b/modules/ocl/src/kernels/arithm_minMax_mask.cl
@@ -194,4 +194,3 @@ __kernel void arithm_op_minMax_mask (int cols,int invalid_cols,int offset,int el
       dst[gid + groupnum] = localmem_max[0];
   }
 }
--- a/modules/ocl/src/kernels/arithm_mul.cl
+++ b/modules/ocl/src/kernels/arithm_mul.cl
--- a/modules/ocl/src/kernels/arithm_nonzero.cl
+++ b/modules/ocl/src/kernels/arithm_nonzero.cl
--- a/modules/ocl/src/kernels/arithm_phase.cl
+++ b/modules/ocl/src/kernels/arithm_phase.cl
--- a/modules/ocl/src/kernels/arithm_polarToCart.cl
+++ b/modules/ocl/src/kernels/arithm_polarToCart.cl
--- a/modules/ocl/src/kernels/arithm_pow.cl
+++ b/modules/ocl/src/kernels/arithm_pow.cl
--- a/modules/ocl/src/kernels/arithm_sub.cl
+++ b/modules/ocl/src/kernels/arithm_sub.cl
--- a/modules/ocl/src/kernels/arithm_sub_scalar.cl
+++ b/modules/ocl/src/kernels/arithm_sub_scalar.cl
--- a/modules/ocl/src/kernels/arithm_sub_scalar_mask.cl
+++ b/modules/ocl/src/kernels/arithm_sub_scalar_mask.cl
--- a/modules/ocl/src/kernels/arithm_sum.cl
+++ b/modules/ocl/src/kernels/arithm_sum.cl
@@ -203,4 +203,3 @@ __kernel void arithm_op_sum (int cols,int invalid_cols,int offset,int elemnum,in
       dst[gid] = localmem_sum[0];
   }
 }
--- a/modules/ocl/src/kernels/arithm_sum_3.cl
+++ b/modules/ocl/src/kernels/arithm_sum_3.cl
@@ -245,4 +245,3 @@ __kernel void arithm_op_sum_3 (int cols,int invalid_cols,int offset,int elemnum,
       dst[gid*3+2] = localmem_sum3[0];
   }
 }
--- a/modules/ocl/src/kernels/arithm_transpose.cl
+++ b/modules/ocl/src/kernels/arithm_transpose.cl
--- a/modules/ocl/src/kernels/blend_linear.cl
+++ b/modules/ocl/src/kernels/blend_linear.cl
@@ -15,7 +15,7 @@
 // Third party copyrights are property of their respective owners.
 //
 // @Authors
-//    Liu Liujun, liujun@multicorewareinc.com 
+//    Liu Liujun, liujun@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -61,7 +61,7 @@ __kernel void BlendLinear_C1_D0(
        int pos = mad24(idy,istep >> 2,idx);
        int wpos = mad24(idy,wstep >> 2,idx);
        float4 w1 = weight1[wpos], w2 = weight2[wpos];
-        dst[pos] = convert_uchar4((convert_float4(img1[pos]) * w1 + 
+        dst[pos] = convert_uchar4((convert_float4(img1[pos]) * w1 +
            convert_float4(img2[pos]) * w2) / (w1 + w2 + 1e-5f));
    }
 }
@@ -86,7 +86,7 @@ __kernel void BlendLinear_C4_D0(
        int wpos = mad24(idy,wstep, idx);
        float w1 = weight1[wpos];
        float w2 = weight2[wpos];
-        dst[pos] = convert_uchar4((convert_float4(img1[pos]) * w1 + 
+        dst[pos] = convert_uchar4((convert_float4(img1[pos]) * w1 +
            convert_float4(img2[pos]) * w2) / (w1 + w2 + 1e-5f));
    }
 }
@@ -138,4 +138,3 @@ __kernel void BlendLinear_C4_D5(
        dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f);
    }
 }
--- a/modules/ocl/src/opencl/brute_force_match.cl
+++ b/modules/ocl/src/opencl/brute_force_match.cl
--- a/modules/ocl/src/kernels/build_warps.cl
+++ b/modules/ocl/src/kernels/build_warps.cl
@@ -234,4 +234,3 @@ __kernel
        map_y[y * step_y + x] = ycoo;
    }
 }
--- a/modules/ocl/src/kernels/convertC3C4.cl
+++ b/modules/ocl/src/kernels/convertC3C4.cl
--- a/modules/ocl/src/kernels/cvt_color.cl
+++ b/modules/ocl/src/kernels/cvt_color.cl
--- a/modules/ocl/src/kernels/filter_sep_col.cl
+++ b/modules/ocl/src/kernels/filter_sep_col.cl
--- a/modules/ocl/src/kernels/filter_sep_row.cl
+++ b/modules/ocl/src/kernels/filter_sep_row.cl
@@ -466,5 +466,3 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_
        dst[start_addr] = sum;
    }
 }
--- a/modules/ocl/src/kernels/filtering_boxFilter.cl
+++ b/modules/ocl/src/kernels/filtering_boxFilter.cl
--- a/modules/ocl/src/kernels/filtering_laplacian.cl
+++ b/modules/ocl/src/kernels/filtering_laplacian.cl
--- a/modules/ocl/src/kernels/filtering_morph.cl
+++ b/modules/ocl/src/kernels/filtering_morph.cl
--- a/modules/ocl/src/kernels/haarobjectdetect.cl
+++ b/modules/ocl/src/kernels/haarobjectdetect.cl
@@ -559,7 +559,3 @@ if(result)
 }
 }
 */
--- a/modules/ocl/src/kernels/haarobjectdetect_scaled2.cl
+++ b/modules/ocl/src/kernels/haarobjectdetect_scaled2.cl
@@ -283,4 +283,3 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
        newnode[counter].alpha[0] = t1.alpha[0];
        newnode[counter].alpha[1] = t1.alpha[1];
 }
--- a/modules/ocl/src/kernels/imgproc_bilateral.cl
+++ b/modules/ocl/src/kernels/imgproc_bilateral.cl
--- a/modules/ocl/src/kernels/imgproc_calcHarris.cl
+++ b/modules/ocl/src/kernels/imgproc_calcHarris.cl
--- a/modules/ocl/src/kernels/imgproc_calcMinEigenVal.cl
+++ b/modules/ocl/src/kernels/imgproc_calcMinEigenVal.cl
--- a/modules/ocl/src/kernels/imgproc_canny.cl
+++ b/modules/ocl/src/kernels/imgproc_canny.cl
--- a/modules/ocl/src/kernels/imgproc_columnsum.cl
+++ b/modules/ocl/src/kernels/imgproc_columnsum.cl
--- a/modules/ocl/src/kernels/imgproc_convolve.cl
+++ b/modules/ocl/src/kernels/imgproc_convolve.cl
@@ -107,5 +107,3 @@ __kernel void convolve_D5 (__global float *src, __global float *temp1, __global
        dst[gy*(dst_step >> 2)+gx] = res;
   }
 }
--- a/modules/ocl/src/kernels/imgproc_copymakeboder.cl
+++ b/modules/ocl/src/kernels/imgproc_copymakeboder.cl
--- a/modules/ocl/src/kernels/imgproc_histogram.cl
+++ b/modules/ocl/src/kernels/imgproc_histogram.cl
@@ -267,4 +267,3 @@ __kernel __attribute__((reqd_work_group_size(256,1,1)))void equalizeHist(
    }
 }
 */
--- a/modules/ocl/src/kernels/imgproc_integral.cl
+++ b/modules/ocl/src/kernels/imgproc_integral.cl
--- a/modules/ocl/src/kernels/imgproc_integral_sum.cl
+++ b/modules/ocl/src/kernels/imgproc_integral_sum.cl
--- a/modules/ocl/src/kernels/imgproc_median.cl
+++ b/modules/ocl/src/kernels/imgproc_median.cl
@@ -484,4 +484,3 @@ __kernel void medianFilter5_C1_D5(__global float * src, __global float * dst,  i
        dst[dstOffset + get_global_id(1)*dstStep + get_global_id(0)]=p12;
 }
 #undef op(a,b)
--- a/modules/ocl/src/kernels/imgproc_remap.cl
+++ b/modules/ocl/src/kernels/imgproc_remap.cl
--- a/modules/ocl/src/kernels/imgproc_resize.cl
+++ b/modules/ocl/src/kernels/imgproc_resize.cl
@@ -411,4 +411,3 @@ __kernel void resizeNN_C4_D5(__global float4 * dst, __global float4 * src,
        dst[dpos] = src[spos];
 }
--- a/modules/ocl/src/kernels/imgproc_threshold.cl
+++ b/modules/ocl/src/kernels/imgproc_threshold.cl
@@ -150,4 +150,3 @@ __kernel void threshold_C1_D5(__global const float * restrict src, __global floa
        }
    }
 }
--- a/modules/ocl/src/kernels/imgproc_warpAffine.cl
+++ b/modules/ocl/src/kernels/imgproc_warpAffine.cl
--- a/modules/ocl/src/kernels/imgproc_warpPerspective.cl
+++ b/modules/ocl/src/kernels/imgproc_warpPerspective.cl
@@ -682,4 +682,3 @@ __kernel void warpPerspectiveCubic_C4_D5(__global float4 * src, __global float4
        }
   }
 }
--- a/modules/ocl/src/kernels/interpolate_frames.cl
+++ b/modules/ocl/src/kernels/interpolate_frames.cl
--- a/modules/ocl/src/kernels/match_template.cl
+++ b/modules/ocl/src/kernels/match_template.cl
@@ -821,4 +821,3 @@ void matchTemplate_Prepared_CCOFF_NORMED_C4_D0
        res[res_idx] = normAcc(num, denum);
    }
 }
--- a/modules/ocl/src/kernels/meanShift.cl
+++ b/modules/ocl/src/kernels/meanShift.cl
@@ -240,4 +240,3 @@ __kernel void meanshiftproc_kernel( __global uchar4* in, __global uchar4* outr,
 //        outsp[basesp] =(short2)((short)x0,(short)y0);
    }
 }
--- a/modules/ocl/src/kernels/merge_mat.cl
+++ b/modules/ocl/src/kernels/merge_mat.cl
--- a/modules/ocl/src/kernels/moments.cl
+++ b/modules/ocl/src/kernels/moments.cl
@@ -27,7 +27,7 @@ typedef long T;
 #define DST_ROW_A03     9
 __kernel void icvContourMoments(int contour_total,
-                                __global float* reader_oclmat_data, 
+                                __global float* reader_oclmat_data,
                                __global T* dst_a,
                                int dst_step)
 {
@@ -58,7 +58,7 @@ __kernel void icvContourMoments(int contour_total,
    dxy = xi_1 * yi - xi * yi_1;
    xii_1 = xi_1 + xi;
    yii_1 = yi_1 + yi;
    dst_step /= sizeof(T);
    *( dst_a + DST_ROW_A00 * dst_step + idx) = dxy;
    *( dst_a + DST_ROW_A10 * dst_step + idx) = dxy * xii_1;

--- a/modules/ocl/src/kernels/nonfree_surf.cl
+++ b/modules/ocl/src/kernels/nonfree_surf.cl
--- a/modules/ocl/src/kernels/objdetect_hog.cl
+++ b/modules/ocl/src/kernels/objdetect_hog.cl
--- a/modules/ocl/src/kernels/operator_convertTo.cl
+++ b/modules/ocl/src/kernels/operator_convertTo.cl
--- a/modules/ocl/src/kernels/operator_copyToM.cl
+++ b/modules/ocl/src/kernels/operator_copyToM.cl
--- a/modules/ocl/src/kernels/operator_setTo.cl
+++ b/modules/ocl/src/kernels/operator_setTo.cl
--- a/modules/ocl/src/kernels/operator_setToM.cl
+++ b/modules/ocl/src/kernels/operator_setToM.cl
@@ -57,4 +57,3 @@ __kernel void set_to_with_mask(
        }
 }
--- a/modules/ocl/src/kernels/pyr_down.cl
+++ b/modules/ocl/src/kernels/pyr_down.cl
--- a/modules/ocl/src/kernels/pyr_up.cl
+++ b/modules/ocl/src/kernels/pyr_up.cl
--- a/modules/ocl/src/kernels/pyrlk.cl
+++ b/modules/ocl/src/kernels/pyrlk.cl
--- a/modules/ocl/src/kernels/pyrlk_no_image.cl
+++ b/modules/ocl/src/kernels/pyrlk_no_image.cl
--- a/modules/ocl/src/kernels/split_mat.cl
+++ b/modules/ocl/src/kernels/split_mat.cl
--- a/modules/ocl/src/kernels/stereobm.cl
+++ b/modules/ocl/src/kernels/stereobm.cl