Merge branch 'master' of code.opencv.org:opencv

4c15465f · Alexander Mordvintesv · a4bffd96 · bf4c1df0 · 4c15465f · 4c15465f
Commit 4c15465f authored Aug 18, 2012 by Alexander Mordvintesv
30 changed files
--- a/android/android.toolchain.cmake
+++ b/android/android.toolchain.cmake
--- a/doc/_themes/blue/static/default.css_t
+++ b/doc/_themes/blue/static/default.css_t
@@ -175,6 +175,8 @@ a:hover {
 div.body p, div.body dd, div.body li {
    text-align: justify;
    line-height: 130%;
+    margin-top: 1em;
+    margin-bottom: 1em;
 }
 div.body h1,
@@ -327,16 +329,16 @@ table.field-list {
    margin-top: 20px;
 }
-ul.simple {
+/*ul.simple {
    list-style: none;
-}
+}*/
 em.menuselection, em.guilabel {
    font-family: {{ theme_guifont }};
 }
 .enumeratevisibleitemswithsquare ul {
-list-style: square; 
+list-style: square;
 margin-bottom: 0px;
 margin-left: 0px;
 margin-right: 0px;
@@ -349,25 +351,25 @@ margin-left: 0px;
 margin-right: 0px;
 margin-top: 0.2em;
 }
 .enumeratevisibleitemswithsquare p {
 margin-bottom: 0pt;
 margin-top: 1pt;
 }
 .enumeratevisibleitemswithsquare dl{
 margin-bottom: 0px;
 margin-left: 0px;
 margin-right: 0px;
 margin-top: 0px;
 }
 .toctableopencv
 {
-   width: 100% ; 
+   width: 100% ;
   table-layout: fixed;
 }
  .toctableopencv colgroup col:first-child
  {
@@ -375,12 +377,17 @@ margin-top: 0px;
    max-width: 100pt !important;
    min-width: 100pt !important;
  }
-  .toctableopencv colgroup col:nth-child(2) 
+  .toctableopencv colgroup col:nth-child(2)
  {
    width: 100% !important;
  }
 div.body ul.search li {
    text-align: left;
 }
+div.linenodiv {
+    min-width: 1em;
+    text-align: right;
+}
\ No newline at end of file
--- a/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst
+++ b/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst
@@ -36,21 +36,21 @@ The structure of package contents looks as follows:
    OpenCV-2.4.2-android-sdk
    |_ apk
-    |   |_ OpenCV_2.4.2_binary_pack_XXX.apk 
+    |   |_ OpenCV_2.4.2_binary_pack_XXX.apk
-    |   |_ OpenCV_2.4.2_Manager.apk 
+    |   |_ OpenCV_2.4.2_Manager.apk
    |
    |_ doc
    |_ samples
    |_ sdk
-    |    |_ etc 
+    |    |_ etc
-    |    |_ java 
+    |    |_ java
-    |    |_ native 
+    |    |_ native
-    |          |_ 3rdparty 
+    |          |_ 3rdparty
-    |          |_ jni 
+    |          |_ jni
-    |          |_ libs 
+    |          |_ libs
-    |               |_ armeabi 
+    |               |_ armeabi
-    |               |_ armeabi-v7a 
+    |               |_ armeabi-v7a
-    |               |_ x86 
+    |               |_ x86
    |
    |_ license.txt
    |_ README.android
@@ -64,11 +64,11 @@ The structure of package contents looks as follows:
 * :file:`sdk/etc` folder contains Haar and LBP cascades distributed with OpenCV.
 * :file:`apk` folder contains Android packages that should be installed on the target Android device to enable OpenCV library access via OpenCV Manager API (see details below).
  On production devices that have access to Google Play Market (and internet) these packages will be installed from Market on the first start of an application using OpenCV Manager API.
  But dev kits without Market or internet require this packages to be installed manually.
  (Install the `Manager.apk` and the corresponding `binary_pack.apk` depending on the device CPU, the Manager GUI provides this info).
  **Note**: installation from internet is the preferable way since we may publish updated versions of this packages on the Market.
 * :file:`samples` folder contains sample applications projects and their prebuilt packages (APK).
@@ -76,7 +76,7 @@ The structure of package contents looks as follows:
 * :file:`doc` folder contains various OpenCV documentation in PDF format.
  It's also available online at http://docs.opencv.org.
  **Note**: the most recent docs (nightly build) are at http://docs.opencv.org/trunk/.
  Generally, it's more up-to-date, but can refer to not-yet-released functionality.
@@ -94,10 +94,10 @@ Starting version 2.4.2 `OpenCV4Android SDK` uses `OpenCV Manager` API for librar
 For additional information on OpenCV Manager see the:
-* |OpenCV4Android_Slides|_ 
+* |OpenCV4Android_Slides|_
-* |OpenCV4Android_Reference|_ 
+* |OpenCV4Android_Reference|_
     ..
@@ -196,15 +196,15 @@ Open OpenCV library and samples in Eclipse
   However, **all these errors are only false-alarms**!
   Just give a minute to Eclipse to complete initialization.
   In some cases these errors disappear after :menuselection:`Project --> Clean... --> Clean all --> OK`
   or after pressing :kbd:`F5` (for Refresh action) when selecting error-label-marked projects in :guilabel:`Package Explorer`.
   Sometimes more advanced manipulations are required:
-   * The provided projects are configured for ``API 11`` target (and ``API 9`` for the library) that can be missing platform in your Android SDK.
+   The provided projects are configured for ``API 11`` target (and ``API 9`` for the library) that can be missing platform in your Android SDK.
-     After right click on any project select  :guilabel:`Properties` and then :guilabel:`Android` on the left pane.
+   After right click on any project select  :guilabel:`Properties` and then :guilabel:`Android` on the left pane.
-     Click some target with `API Level` 11 or higher:
+   Click some target with `API Level` 11 or higher:
      .. image:: images/eclipse_8a_target.png
         :alt: Updating target
@@ -239,10 +239,10 @@ Well, running samples from Eclipse is very simple:
 * Connect your device with :command:`adb` tool from Android SDK or create an emulator with camera support.
-   * See `Managing Virtual Devices
+  * See `Managing Virtual Devices
-     <http://developer.android.com/guide/developing/devices/index.html>`_ document for help with Android Emulator.
+    <http://developer.android.com/guide/developing/devices/index.html>`_ document for help with Android Emulator.
-   * See `Using Hardware Devices
+  * See `Using Hardware Devices
-     <http://developer.android.com/guide/developing/device.html>`_ for help with real devices (not emulators).
+    <http://developer.android.com/guide/developing/device.html>`_ for help with real devices (not emulators).
 * Select project you want to start in :guilabel:`Package Explorer` and just press :kbd:`Ctrl + F11` or select option :menuselection:`Run --> Run` from the main menu, or click :guilabel:`Run` button on the toolbar.
@@ -263,33 +263,33 @@ Well, running samples from Eclipse is very simple:
  .. image:: images/android_emulator_opencv_manager_fail.png
     :alt: You will see this message if you have no OpenCV Manager installed
     :align: center
  To get rid of the message you will need to install `OpenCV Manager` and the appropriate `OpenCV binary pack`.
  Simply tap :menuselection:`Yes` if you have *Google Play Market* installed on your device/emulator. It will redirect you to the corresponding page on *Google Play Market*.
  If you have no access to the *Market*, which is often the case with emulators - you will need to install the packages from OpenCV4Android SDK folder manually. Open the console/terminal and type in the following two commands:
  .. code-block:: sh
    :linenos:
    <Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.2_Manager.apk
    <Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.2_binary_pack_armv7a.apk
  If you're running Windows, that will probably look like this:
  .. image:: images/install_opencv_manager_with_adb.png
     :alt: Run these commands in the console to install OpenCV Manager
     :align: center
  When done, you will be able to run OpenCV samples on your device/emulator seamlessly.
 * Here is ``Tutorial 2 - Use OpenCV Camera`` sample, running on top of stock camera-preview of the emulator.
  .. image:: images/emulator_canny.png
     :height: 600px
     :alt: Tutorial 1 Basic - 1. Add OpenCV - running Canny
     :align: center
 What's next
 ===========

--- a/doc/tutorials/introduction/android_binary_package/android_dev_intro.rst
+++ b/doc/tutorials/introduction/android_binary_package/android_dev_intro.rst
--- a/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.rst
+++ b/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.rst
--- a/modules/core/include/opencv2/core/core.hpp
+++ b/modules/core/include/opencv2/core/core.hpp
@@ -4620,6 +4620,34 @@ public:
 CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body);
+/////////////////////////// Synchronization Primitives ///////////////////////////////
+class CV_EXPORTS Mutex
+{
+public:
+    Mutex();
+    ~Mutex();
+    Mutex(const Mutex& m);
+    Mutex& operator = (const Mutex& m);
+    void lock();
+    bool trylock();
+    void unlock();
+    struct Impl;
+protected:
+    Impl* impl;
+};
+class CV_EXPORTS AutoLock
+{
+public:    
+    AutoLock(Mutex& m) : mutex(&m) { mutex->lock(); }
+    ~AutoLock() { mutex->unlock(); }
+protected:    
+    Mutex* mutex;
+};
 }
 #endif // __cplusplus

--- a/modules/core/src/parallel.cpp
+++ b/modules/core/src/parallel.cpp
@@ -42,6 +42,16 @@
 #include "precomp.hpp"
+#if !defined HAVE_TBB && !defined HAVE_OPENMP && !defined HAVE_GCD && !defined HAVE_CONCURRENCY
+#ifdef __APPLE__
+#define HAVE_GCD
+#elif defined _MSC_VER && _MSC_VER >= 1600
+#define HAVE_CONCURRENCY
+#endif
+#endif
 #ifdef HAVE_CONCURRENCY
 #  include <ppl.h>
 #elif defined HAVE_OPENMP

--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@@ -930,4 +930,104 @@ BOOL WINAPI DllMain( HINSTANCE, DWORD  fdwReason, LPVOID )
 }
 #endif
+namespace cv
+{
+#if defined WIN32 || defined _WIN32 || defined WINCE
+struct Mutex::Impl
+{
+    Impl() { InitializeCriticalSection(&cs); refcount = 1; }
+    ~Impl() { DeleteCriticalSection(&cs); }
+    void lock() { EnterCriticalSection(&cs); }
+    bool trylock() { return TryEnterCriticalSection(&cs) != 0; }
+    void unlock() { LeaveCriticalSection(&cs); }
+    CRITICAL_SECTION cs;
+    int refcount;
+};
+#elif defined __APPLE__
+#include <libkern/OSAtomic.h>
+struct Mutex::Impl
+{
+    Impl() { sl = OS_SPINLOCK_INIT; refcount = 1; }
+    ~Impl() {}
+    void lock() { OSSpinLockLock(&sl); }
+    bool trylock() { return OSSpinLockTry(&sl); }
+    void unlock() { OSSpinLockUnlock(&sl); }
+    OSSpinLock sl;
+    int refcount;
+};
+#elif defined __linux__ && !defined ANDROID
+struct Mutex::Impl
+{
+    Impl() { pthread_spin_init(&sl, 0); refcount = 1; }
+    ~Impl() { pthread_spin_destroy(&sl); }
+    void lock() { pthread_spin_lock(&sl); }
+    bool trylock() { return pthread_spin_trylock(&sl) == 0; }
+    void unlock() { pthread_spin_unlock(&sl); }
+    pthread_spinlock_t sl;
+    int refcount;
+};
+#else
+struct Mutex::Impl
+{
+    Impl() { pthread_mutex_init(&sl, 0); refcount = 1; }
+    ~Impl() { pthread_mutex_destroy(&sl); }
+    void lock() { pthread_mutex_lock(&sl); }
+    bool trylock() { return pthread_mutex_trylock(&sl) == 0; }
+    void unlock() { pthread_mutex_unlock(&sl); }
+    pthread_mutex_t sl;
+    int refcount;
+};
+#endif
+Mutex::Mutex()
+{
+    impl = new Mutex::Impl;
+}
+Mutex::~Mutex()
+{
+    if( CV_XADD(&impl->refcount, -1) == 1 )
+        delete impl;
+    impl = 0;
+}
+Mutex::Mutex(const Mutex& m)
+{
+    impl = m.impl;
+    CV_XADD(&impl->refcount, 1);
+}
+Mutex& Mutex::operator = (const Mutex& m)
+{
+    CV_XADD(&m.impl->refcount, 1);
+    if( CV_XADD(&impl->refcount, -1) == 1 )
+        delete impl;
+    impl = m.impl;
+    return *this;
+}
+void Mutex::lock() { impl->lock(); }
+void Mutex::unlock() { impl->unlock(); }
+bool Mutex::trylock() { return impl->trylock(); }
+}
 /* End of file. */
\ No newline at end of file
--- a/modules/gpu/doc/video.rst
+++ b/modules/gpu/doc/video.rst
@@ -653,7 +653,7 @@ gpu::GMG_GPU
 ------------
 .. ocv:class:: gpu::GMG_GPU
-Class used for background/foreground segmentation. ::
+  Class used for background/foreground segmentation. ::
    class GMG_GPU_GPU
    {
@@ -677,9 +677,9 @@ Class used for background/foreground segmentation. ::
        ...
    };
-The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [GMG2012]_.
+  The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [GMG2012]_.
-Here are important members of the class that control the algorithm, which you can set after constructing the class instance:
+  Here are important members of the class that control the algorithm, which you can set after constructing the class instance:
    .. ocv:member:: int maxFeatures

--- a/modules/gpu/misc/mark_nvidia.py
+++ b/modules/gpu/misc/mark_nvidia.py
 import sys, re
 spaces = '[\s]*'
-symbols = '[\s\w\d,=:|]*'
+symbols = '[\s\w\d,.=:|]*'
 def pattern1(prefix, test):
    return re.compile(spaces + 'perf::' + prefix + '/' + test + '::' + '\(' + symbols + '\)' + spaces)

--- a/modules/ocl/CMakeLists.txt
+++ b/modules/ocl/CMakeLists.txt
@@ -4,7 +4,7 @@ if(NOT HAVE_OPENCL)
 endif()
 set(the_description "OpenCL-accelerated Computer Vision")
-ocv_add_module(ocl opencv_core opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_nonfree)
+ocv_add_module(ocl opencv_core opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_nonfree opencv_ts)
 ocv_module_include_directories()

--- a/modules/ocl/perf/test_filters.cpp
+++ b/modules/ocl/perf/test_filters.cpp
@@ -325,7 +325,7 @@ PARAM_TEST_CASE(LaplacianTestBase, MatType, int)
 		ksize = GET_PARAM(1);
 		cv::RNG& rng = TS::ptr()->get_rng();
-		cv::Size size = cv::Size(2560, 2560);
+		cv::Size size = cv::Size(MWIDTH, MHEIGHT);
 		mat  = randomMat(rng, size, type, 5, 16, false);
 		dst  = randomMat(rng, size, type, 5, 16, false);
@@ -468,7 +468,7 @@ PARAM_TEST_CASE(ErodeDilateBase, MatType, bool)
 		//  iterations = GET_PARAM(1);
 		cv::RNG& rng = TS::ptr()->get_rng();
-		cv::Size size = cv::Size(2560, 2560);
+		cv::Size size = cv::Size(MWIDTH, MHEIGHT);
 		mat1 = randomMat(rng, size, type, 5, 16, false);
 		dst  = randomMat(rng, size, type, 5, 16, false);
@@ -679,7 +679,7 @@ PARAM_TEST_CASE(Sobel, MatType, int, int, int, int)
 		dx = 2; dy=0;
 		cv::RNG& rng = TS::ptr()->get_rng();
-		cv::Size size = cv::Size(2560, 2560);
+		cv::Size size = cv::Size(MWIDTH, MHEIGHT);
 		mat1 = randomMat(rng, size, type, 5, 16, false);
 		dst  = randomMat(rng, size, type, 5, 16, false);
@@ -817,7 +817,7 @@ PARAM_TEST_CASE(Scharr, MatType, int, int, int)
 		dx = 1; dy=0;
 		cv::RNG& rng = TS::ptr()->get_rng();
-		cv::Size size = cv::Size(2560, 2560);
+		cv::Size size = cv::Size(MWIDTH, MHEIGHT);
 		mat1 = randomMat(rng, size, type, 5, 16, false);
 		dst  = randomMat(rng, size, type, 5, 16, false);
@@ -956,7 +956,7 @@ PARAM_TEST_CASE(GaussianBlur, MatType, cv::Size, int)
 		bordertype = GET_PARAM(2);
 		cv::RNG& rng = TS::ptr()->get_rng();
-		cv::Size size = cv::Size(2560, 2560);
+		cv::Size size = cv::Size(MWIDTH, MHEIGHT);
 		sigma1 = rng.uniform(0.1, 1.0); 
 		sigma2 = rng.uniform(0.1, 1.0);

--- a/modules/ocl/src/filtering.cpp
+++ b/modules/ocl/src/filtering.cpp
--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@ -260,7 +260,7 @@ namespace cv
            CV_Assert((!map2.data || map2.size()== map1.size()));
            dst.create(map1.size(), src.type());
            string kernelName;
@@ -394,8 +394,15 @@ namespace cv
                args.push_back( make_pair(sizeof(cl_int),(void*)&map1.cols));
                args.push_back( make_pair(sizeof(cl_int),(void*)&map1.rows));
                args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
-                args.push_back( make_pair(sizeof(cl_double4),(void*)&borderValue));
+                if(src.clCxt -> impl -> double_support != 0)
-            }
+                {
+                    args.push_back( make_pair(sizeof(cl_double4),(void*)&borderValue));
+                }
+                else
+                {
+                    args.push_back( make_pair(sizeof(cl_float4),(void*)&borderValue));
+                }
+              }
            openCLExecuteKernel(clCxt,&imgproc_remap,kernelName,globalThreads,localThreads,args,src.channels(),src.depth());
    }	

--- a/modules/ocl/src/initialization.cpp
+++ b/modules/ocl/src/initialization.cpp
@@ -44,9 +44,9 @@
 //M*/
 #include "precomp.hpp"
-#include "threadsafe.h"
+#include "Threadsafe.h"
 #include <iomanip>
-#include "binarycaching.hpp"
+#include "binaryCaching.hpp"
 using namespace cv;
 using namespace cv::ocl;

--- a/modules/ocl/src/kernels/filter_sep_col.cl
+++ b/modules/ocl/src/kernels/filter_sep_col.cl
@@ -90,9 +90,9 @@ Niko
 ***********************************************************************************/
-__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter_C1_D0
+__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter
-						(__global const float * restrict src, 
+						(__global const GENTYPE_SRC * restrict src, 
-						 __global uchar * dst,
+						 __global GENTYPE_DST * dst,
                         const int dst_cols,
                         const int dst_rows, 
 						 const int src_whole_cols,
@@ -111,10 +111,10 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter_
 	int start_addr = mad24(y,src_step_in_pixel,x);
 	int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
 	int i;
-	float sum;
+	GENTYPE_SRC sum;
-	float temp[READ_TIMES_COL];
+	GENTYPE_SRC temp[READ_TIMES_COL];
-	__local float LDS_DAT[LSIZE1*READ_TIMES_COL][LSIZE0+1];
+	__local GENTYPE_SRC LDS_DAT[LSIZE1*READ_TIMES_COL][LSIZE0+1];
 	//read pixels from src
 	for(i = 0;i<READ_TIMES_COL;i++)
@@ -141,170 +141,6 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter_
 	if((x<dst_cols) & (y<dst_rows))
 	{
 		start_addr = mad24(y,dst_step_in_pixel,x+dst_offset_in_pixel);
-		dst[start_addr] = convert_uchar_sat(sum);
+		dst[start_addr] = convert_to_DST(sum);
-	}
-}
-__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter_C4_D0
-						(__global const float4 * restrict src, 
-						 __global uchar4 * dst,
-                         const int dst_cols,
-                         const int dst_rows, 
-						 const int src_whole_cols,
-						 const int src_whole_rows,
-                         const int src_step_in_pixel, 
-                         //const int src_offset_x, 
-                         //const int src_offset_y, 
-                         const int dst_step_in_pixel,
-                         const int dst_offset_in_pixel,
-                         __constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSY+1)))))
-{
-	int x = get_global_id(0);
-	int y = get_global_id(1);
-	int l_x = get_local_id(0);
-	int l_y = get_local_id(1);
-	int start_addr = mad24(y,src_step_in_pixel,x);
-	int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
-	int i;
-	float4 sum;
-	float4 temp[READ_TIMES_COL];
-	__local float4 LDS_DAT[LSIZE1*READ_TIMES_COL][LSIZE0+1];
-	//read pixels from src
-	for(i = 0;i<READ_TIMES_COL;i++)
-	{
-		int current_addr = start_addr+i*LSIZE1*src_step_in_pixel;
-		current_addr = current_addr < end_addr ? current_addr : 0;
-		temp[i] = src[current_addr];
-	}
-	//save pixels to lds
-	for(i = 0;i<READ_TIMES_COL;i++)
-	{
-		LDS_DAT[l_y+i*LSIZE1][l_x] = temp[i];
-	}
-	barrier(CLK_LOCAL_MEM_FENCE);
-	//read pixels from lds and calculate the result
-	sum = LDS_DAT[l_y+RADIUSY][l_x]*mat_kernel[RADIUSY];
-	for(i=1;i<=RADIUSY;i++)
-	{
-		temp[0]=LDS_DAT[l_y+RADIUSY-i][l_x];
-		temp[1]=LDS_DAT[l_y+RADIUSY+i][l_x];
-		sum += temp[0] * mat_kernel[RADIUSY-i]+temp[1] * mat_kernel[RADIUSY+i];
-	}
-	//write the result to dst
-	if((x<dst_cols) & (y<dst_rows))
-	{
-		start_addr = mad24(y,dst_step_in_pixel,x+dst_offset_in_pixel);
-		dst[start_addr] = convert_uchar4_sat(sum);
-	}
-}
-__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter_C1_D5
-						(__global const float * restrict src, 
-						 __global float * dst,
-                         const int dst_cols,
-                         const int dst_rows, 
-						 const int src_whole_cols,
-						 const int src_whole_rows,
-                         const int src_step_in_pixel, 
-                         //const int src_offset_x, 
-                         //const int src_offset_y, 
-                         const int dst_step_in_pixel,
-                         const int dst_offset_in_pixel,
-                         __constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSY+1)))))
-{
-	int x = get_global_id(0);
-	int y = get_global_id(1);
-	int l_x = get_local_id(0);
-	int l_y = get_local_id(1);
-	int start_addr = mad24(y,src_step_in_pixel,x);
-	int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
-	int i;
-	float sum;
-	float temp[READ_TIMES_COL];
-	__local float LDS_DAT[LSIZE1*READ_TIMES_COL][LSIZE0+1];
-	//read pixels from src
-	for(i = 0;i<READ_TIMES_COL;i++)
-	{
-		int current_addr = start_addr+i*LSIZE1*src_step_in_pixel;
-		current_addr = current_addr < end_addr ? current_addr : 0;
-		temp[i] = src[current_addr];
-	}
-	//save pixels to lds
-	for(i = 0;i<READ_TIMES_COL;i++)
-	{
-		LDS_DAT[l_y+i*LSIZE1][l_x] = temp[i];
-	}
-	barrier(CLK_LOCAL_MEM_FENCE);
-	//read pixels from lds and calculate the result
-	sum = LDS_DAT[l_y+RADIUSY][l_x]*mat_kernel[RADIUSY];
-	for(i=1;i<=RADIUSY;i++)
-	{
-		temp[0]=LDS_DAT[l_y+RADIUSY-i][l_x];
-		temp[1]=LDS_DAT[l_y+RADIUSY+i][l_x];
-		sum += temp[0] * mat_kernel[RADIUSY-i]+temp[1] * mat_kernel[RADIUSY+i];
-	}
-	//write the result to dst
-	if((x<dst_cols) & (y<dst_rows))
-	{
-		start_addr = mad24(y,dst_step_in_pixel,x+dst_offset_in_pixel);
-		dst[start_addr] = sum;
-	}
-}
-__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter_C4_D5
-						(__global const float4 * restrict src, 
-						 __global float4 * dst,
-                         const int dst_cols,
-                         const int dst_rows, 
-						 const int src_whole_cols,
-						 const int src_whole_rows,
-                         const int src_step_in_pixel, 
-                         //const int src_offset_x, 
-                         //const int src_offset_y, 
-                         const int dst_step_in_pixel,
-                         const int dst_offset_in_pixel,
-                         __constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSY+1)))))
-{
-	int x = get_global_id(0);
-	int y = get_global_id(1);
-	int l_x = get_local_id(0);
-	int l_y = get_local_id(1);
-	int start_addr = mad24(y,src_step_in_pixel,x);
-	int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
-	int i;
-	float4 sum;
-	float4 temp[READ_TIMES_COL];
-	__local float4 LDS_DAT[LSIZE1*READ_TIMES_COL][LSIZE0+1];
-	//read pixels from src
-	for(i = 0;i<READ_TIMES_COL;i++)
-	{
-		int current_addr = start_addr+i*LSIZE1*src_step_in_pixel;
-		current_addr = current_addr < end_addr ? current_addr : 0;
-		temp[i] = src[current_addr];
-	}
-	//save pixels to lds
-	for(i = 0;i<READ_TIMES_COL;i++)
-	{
-		LDS_DAT[l_y+i*LSIZE1][l_x] = temp[i];
-	}
-	barrier(CLK_LOCAL_MEM_FENCE);
-	//read pixels from lds and calculate the result
-	sum = LDS_DAT[l_y+RADIUSY][l_x]*mat_kernel[RADIUSY];
-	for(i=1;i<=RADIUSY;i++)
-	{
-		temp[0]=LDS_DAT[l_y+RADIUSY-i][l_x];
-		temp[1]=LDS_DAT[l_y+RADIUSY+i][l_x];
-		sum += temp[0] * mat_kernel[RADIUSY-i]+temp[1] * mat_kernel[RADIUSY+i];
-	}
-	//write the result to dst
-	if((x<dst_cols) & (y<dst_rows))
-	{
-		start_addr = mad24(y,dst_step_in_pixel,x+dst_offset_in_pixel);
-		dst[start_addr] = sum;
 	}
 }
--- a/modules/ocl/src/kernels/filtering_dilateFilter.cl
+++ b/modules/ocl/src/kernels/filtering_dilateFilter.cl
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Zhang Ying, zhangying913@gmail.com
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other GpuMaterials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#pragma OPENCL FP_CONTRACT ON
-#define UCHAR_MIN 0
-__kernel void dilate_C4_D5(__global const float4 * restrict src, __global float4 *dst, int srcOffset, int dstOffset, 
-					int mincols, int maxcols, int minrows, int maxrows, int cols, int rows, 
-					int srcStep, int dstStep, __constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows)
-{
-    int mX = get_global_id(0);
-    int mY = get_global_id(1);
-    int kX = mX - anX, kY = mY - anY;
-	int end_addr = mad24(src_whole_rows-1,srcStep,src_whole_cols);
-    float4 maxVal = (float4)(-FLT_MAX);
-	  int k=0;
-	  for(int i=0;i<ksY;i++, kY++ , kX = mX - anX)
-    {
-        for(int j=0;j<ksX; j++, kX++)
-        {
-			int current_addr = mad24(kY,srcStep,kX) + srcOffset;
-			current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
-			float4 v = src[current_addr];		
-			uchar now = mat_kernel[k++];
-		    float4 flag = (kX >= mincols & kX <= maxcols & kY >= minrows & kY <= maxrows & now != 0) ? v : (float4)(-FLT_MAX);
-            maxVal = max(maxVal , flag);
-        }
-    }
-	  if(mX < cols && mY < rows)
-        dst[mY * dstStep + mX + dstOffset] = (maxVal);		   
-}
-__kernel void dilate_C1_D5(__global float4 * src, __global float *dst, int srcOffset, int dstOffset, 
-					int mincols, int maxcols, int minrows, int maxrows, int cols, int rows, 
-					int srcStep, int dstStep, __constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows)
-{
-    int mX = (get_global_id(0)<<2) - (dstOffset&3);
-    int mY = get_global_id(1);
-    int kX = mX - anX, kY = mY - anY;
-	int end_addr = mad24(src_whole_rows-1,srcStep,src_whole_cols);
-    float4 maxVal = (float4)(-FLT_MAX);
-	  int k=0;
-	  for(int i=0;i<ksY;i++, kY++ , kX = mX - anX)
-    {
-        for(int j=0;j<ksX;j++, kX++)
-        {
-			int start = mad24(kY,srcStep,kX) + srcOffset;
-			start = ((start < end_addr) && (start > 0)) ? start : 0;
-			int start2 = ((start + 4 < end_addr) && (start > 0)) ? start + 4 : 0;
-			    float8 sVal = (float8)(src[start>>2], src[start2>>2]);
-		    	float sAry[8]= {sVal.s0, sVal.s1, sVal.s2, sVal.s3, sVal.s4, sVal.s5, sVal.s6, sVal.s7};
-		    	int det = start & 3;
-	    		float4 v=(float4)(sAry[det], sAry[det+1], sAry[det+2], sAry[det+3]);		
-			    uchar now = mat_kernel[k++];
-			    float4 flag = (kY >= minrows & kY <= maxrows & now != 0) ? v : maxVal;
-			    flag.x = (kX >= mincols & kX <= maxcols) ? flag.x : -FLT_MAX;
-			    flag.y = (kX+1 >= mincols & kX+1 <= maxcols) ? flag.y : -FLT_MAX;
-			    flag.z = (kX+2 >= mincols & kX+2 <= maxcols) ? flag.z : -FLT_MAX;
-			    flag.w = (kX+3 >= mincols & kX+3 <= maxcols) ? flag.w : -FLT_MAX;
-          maxVal = max(maxVal , flag);
-        }
-    }
-    if(mY < rows && mX < cols)
-	  {
-		    __global float4* d = (__global float4*)(dst + mY * dstStep + mX + dstOffset);
-		    float4 dVal = *d;
-    		maxVal.x = (mX >=0 & mX < cols) ? maxVal.x : dVal.x;
-    		maxVal.y = (mX+1 >=0 & mX+1 < cols) ? maxVal.y : dVal.y;
-    		maxVal.z = (mX+2 >=0 & mX+2 < cols) ? maxVal.z : dVal.z;
-    		maxVal.w = (mX+3 >=0 & mX+3 < cols) ? maxVal.w : dVal.w;
-        *d = (maxVal);	
-	  }
-}
-__kernel void dilate_C1_D0(__global const uchar4 * restrict src, __global uchar *dst, int srcOffset, int dstOffset, 
-					int mincols, int maxcols, int minrows, int maxrows, int cols, int rows, 
-					int srcStep, int dstStep, __constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows)
-{
-    int mX = (get_global_id(0)<<2) - (dstOffset&3);;
-    int mY = get_global_id(1);
-    int kX = mX - anX, kY = mY - anY;
-	int end_addr = mad24(src_whole_rows-1,srcStep,src_whole_cols);
-    uchar4 maxVal = (uchar4)(UCHAR_MIN);
-	  int k=0;
-	  for(int i=0;i<ksY;i++, kY++ , kX = mX - anX)
-    {
-        for(int j=0;j<ksX;j++, kX++)
-        {
-			    int start = mad24(kY,srcStep,kX) + srcOffset;
-				start = ((start < end_addr) && (start > 0)) ? start : 0;
-				int start2 = ((start + 4 < end_addr) && (start > 0)) ? start + 4 : 0;
-			    uchar8 sVal = (uchar8)(src[start>>2], src[start2>>2]);
-			    uchar sAry[8]= {sVal.s0, sVal.s1, sVal.s2, sVal.s3, sVal.s4, sVal.s5, sVal.s6, sVal.s7};
-			    int det = start & 3;
-			    uchar4 v=(uchar4)(sAry[det], sAry[det+1], sAry[det+2], sAry[det+3]);
-			    uchar4 flag = (kY >= minrows & kY <= maxrows & mat_kernel[k++] != 0) ? v : maxVal;
-			    flag.x = (kX >= mincols & kX <= maxcols) ? flag.x : UCHAR_MIN;
-			    flag.y = (kX+1 >= mincols & kX+1 <= maxcols) ? flag.y : UCHAR_MIN;
-			    flag.z = (kX+2 >= mincols & kX+2 <= maxcols) ? flag.z : UCHAR_MIN;
-			    flag.w = (kX+3 >= mincols & kX+3 <= maxcols) ? flag.w : UCHAR_MIN;			
-          maxVal = max(maxVal , flag);
-        }
-    }
-	  if(mY < rows)
-	  {
-		    __global uchar4* d = (__global uchar4*)(dst + mY * dstStep + mX + dstOffset);
-		    uchar4 dVal = *d;
-    		maxVal.x = (mX >=0 & mX < cols) ? maxVal.x : dVal.x;
-    		maxVal.y = (mX+1 >=0 & mX+1 < cols) ? maxVal.y : dVal.y;
-    		maxVal.z = (mX+2 >=0 & mX+2 < cols) ? maxVal.z : dVal.z;
-    		maxVal.w = (mX+3 >=0 & mX+3 < cols) ? maxVal.w : dVal.w;
-        *d = (maxVal);	
-	  }
-}
-__kernel void dilate_C4_D0(__global const uchar4 * restrict src, __global uchar4 *dst, int srcOffset, int dstOffset, 
-					int mincols, int maxcols, int minrows, int maxrows, int cols, int rows, 
-					int srcStep, int dstStep, __constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows)
-{
-    int mX = get_global_id(0);
-    int mY = get_global_id(1);
-    int kX = mX - anX, kY = mY - anY;
-	int end_addr = mad24(src_whole_rows-1,srcStep,src_whole_cols);
-    uchar4 maxVal = (uchar4)(UCHAR_MIN);
-	  int k=0;
-	  for(int i=0;i<ksY;i++, kY++ , kX = mX - anX)
-    {
-        for(int j=0;j<ksX;j++, kX++)
-        {
-			int current_addr = mad24(kY,srcStep,kX) + srcOffset;
-			current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;		
-			uchar4 v = src[current_addr];
-			    uchar now = mat_kernel[k++];
-			    uchar4 flag = (kX >= mincols & kX <= maxcols & kY >= minrows & kY <= maxrows & now != 0) ? v : maxVal;
-          maxVal = max(maxVal , flag);
-        }
-    }
-	  if(mX < cols && mY < rows)
-        dst[mY * dstStep + mX + dstOffset] = (maxVal);		   
-}
--- a/modules/ocl/src/kernels/filtering_erodeFilter.cl
+++ b/modules/ocl/src/kernels/filtering_erodeFilter.cl
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
-// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// @Authors
-//    Niko Li, newlife20080214@gmail.com
-//    Zero Lin, zero.lin@amd.com
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other oclMaterials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors as is and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//
-__kernel void erode_C4_D5(__global const float4 * restrict src, __global float4 *dst, int srcOffset, int dstOffset, 
-					int mincols, int maxcols, int minrows, int maxrows, int cols, int rows, 
-					int srcStep, int dstStep, __constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows)
-{
-    int mX = get_global_id(0);
-    int mY = get_global_id(1);
-    int kX = mX - anX, kY = mY - anY;
-	int end_addr = mad24(src_whole_rows-1,srcStep,src_whole_cols);
-    float4 minVal = (float4)(3.4e+38);
-	int k=0;
-	for(int i=0;i<ksY;i++, kY++ , kX = mX - anX)
-    {
-        for(int j=0;j<ksX; j++, kX++)
-        {
-			int current_addr = mad24(kY,srcStep,kX) + srcOffset;
-			current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
-			float4 v = src[current_addr];
-			uchar now = mat_kernel[k++];
-			float4 flag = (kX >= mincols & kX <= maxcols & kY >= minrows & kY <= maxrows & now != 0) ? v : (float4)(3.4e+38);
-            minVal = min(minVal , flag);
-        }
-    }
-	if(mX < cols && mY < rows)
-        dst[mY * dstStep + mX + dstOffset] = (minVal);		   
-}
-__kernel void erode_C1_D5(__global float4 * src, __global float *dst, int srcOffset, int dstOffset, 
-					int mincols, int maxcols, int minrows, int maxrows, int cols, int rows, 
-					int srcStep, int dstStep, __constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows)
-{
-    int mX = (get_global_id(0)<<2) - (dstOffset&3);
-    int mY = get_global_id(1);
-    int kX = mX - anX, kY = mY - anY;
-	int end_addr = mad24(src_whole_rows-1,srcStep,src_whole_cols);
-    float4 minVal = (float4)(3.4e+38);
-	int k=0;
-	for(int i=0;i<ksY;i++, kY++ , kX = mX - anX)
-    {
-        for(int j=0;j<ksX;j++, kX++)
-        {
-			int start = mad24(kY,srcStep,kX) + srcOffset;
-			start = ((start < end_addr) && (start > 0)) ? start : 0;
-			int start2 = ((start + 4 < end_addr) && (start > 0)) ? start + 4 : 0;
-			float8 sVal = (float8)(src[start>>2], src[start2>>2]);
-			float sAry[8]= {sVal.s0, sVal.s1, sVal.s2, sVal.s3, sVal.s4, sVal.s5, sVal.s6, sVal.s7};
-			int det = start & 3;
-			float4 v=(float4)(sAry[det], sAry[det+1], sAry[det+2], sAry[det+3]);		
-			uchar now = mat_kernel[k++];
-			float4 flag = (kY >= minrows & kY <= maxrows & now != 0) ? v : (float4)(3.4e+38);
-			flag.x = (kX >= mincols & kX <= maxcols) ? flag.x : 3.4e+38;
-			flag.y = (kX+1 >= mincols & kX+1 <= maxcols) ? flag.y : 3.4e+38;
-			flag.z = (kX+2 >= mincols & kX+2 <= maxcols) ? flag.z : 3.4e+38;
-			flag.w = (kX+3 >= mincols & kX+3 <= maxcols) ? flag.w : 3.4e+38;
-            minVal = min(minVal , flag);
-        }
-    }
-	if(mY < rows && mX < cols)
-	{
-		__global float4* d = (__global float4*)(dst + mY * dstStep + mX + dstOffset);
-		float4 dVal = *d;
-		minVal.x = (mX >=0 & mX < cols) ? minVal.x : dVal.x;
-		minVal.y = (mX+1 >=0 & mX+1 < cols) ? minVal.y : dVal.y;
-		minVal.z = (mX+2 >=0 & mX+2 < cols) ? minVal.z : dVal.z;
-		minVal.w = (mX+3 >=0 & mX+3 < cols) ? minVal.w : dVal.w;
-        *d = (minVal);	
-	}
-}
-__kernel void erode_C1_D0(__global const uchar4 * restrict src, __global uchar *dst, int srcOffset, int dstOffset, 
-					int mincols, int maxcols, int minrows, int maxrows, int cols, int rows, 
-					int srcStep, int dstStep, __constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows)
-{
-    int mX = (get_global_id(0)<<2) - (dstOffset&3);
-    int mY = get_global_id(1);
-    int kX = mX - anX, kY = mY - anY;
-	int end_addr = mad24(src_whole_rows-1,srcStep,src_whole_cols);
-    uchar4 minVal = (uchar4)(0xff);
-	int k=0;
-	for(int i=0;i<ksY;i++, kY++ , kX = mX - anX)
-    {
-        for(int j=0;j<ksX;j++, kX++)
-        {
-			int start = mad24(kY,srcStep,kX) + srcOffset;
-			start = ((start < end_addr) && (start > 0)) ? start : 0;
-			int start2 = ((start + 4 < end_addr) && (start > 0)) ? start + 4 : 0;
-			uchar8 sVal = (uchar8)(src[start>>2], src[start2>>2]);
-			uchar sAry[8]= {sVal.s0, sVal.s1, sVal.s2, sVal.s3, sVal.s4, sVal.s5, sVal.s6, sVal.s7};
-			int det = start & 3;
-			uchar4 v=(uchar4)(sAry[det], sAry[det+1], sAry[det+2], sAry[det+3]);
-			uchar4 flag = (kY >= minrows & kY <= maxrows & mat_kernel[k++] != 0) ? v : (uchar4)(0xff);
-			flag.x = (kX >= mincols & kX <= maxcols) ? flag.x : 0xff;
-			flag.y = (kX+1 >= mincols & kX+1 <= maxcols) ? flag.y : 0xff;
-			flag.z = (kX+2 >= mincols & kX+2 <= maxcols) ? flag.z : 0xff;
-			flag.w = (kX+3 >= mincols & kX+3 <= maxcols) ? flag.w : 0xff;			
-            minVal = min(minVal , flag);
-        }
-    }
-	if(mY < rows)
-	{
-		__global uchar4* d = (__global uchar4*)(dst + mY * dstStep + mX + dstOffset);
-		uchar4 dVal = *d;
-		minVal.x = (mX >=0 & mX < cols) ? minVal.x : dVal.x;
-		minVal.y = (mX+1 >=0 & mX+1 < cols) ? minVal.y : dVal.y;
-		minVal.z = (mX+2 >=0 & mX+2 < cols) ? minVal.z : dVal.z;
-		minVal.w = (mX+3 >=0 & mX+3 < cols) ? minVal.w : dVal.w;
-        *d = (minVal);	
-	}
-}
-__kernel void erode_C4_D0(__global const uchar4 * restrict src, __global uchar4 *dst, int srcOffset, int dstOffset, 
-					int mincols, int maxcols, int minrows, int maxrows, int cols, int rows, 
-					int srcStep, int dstStep, __constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows)
-{
-    int mX = get_global_id(0);
-    int mY = get_global_id(1);
-    int kX = mX - anX, kY = mY - anY;
-	int end_addr = mad24(src_whole_rows-1,srcStep,src_whole_cols);
-    uchar4 minVal = (uchar4)(0xff);
-	int k=0;
-	for(int i=0;i<ksY;i++, kY++ , kX = mX - anX)
-    {
-        for(int j=0;j<ksX;j++, kX++)
-        {
-			int current_addr = mad24(kY,srcStep,kX) + srcOffset;
-			current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;		
-			uchar4 v = src[current_addr];
-			uchar now = mat_kernel[k++];
-			uchar4 flag = (kX >= mincols & kX <= maxcols & kY >= minrows & kY <= maxrows & now != 0) ? v : (uchar4)(0xff);
-            minVal = min(minVal , flag);
-        }
-    }
-	if(mX < cols && mY < rows)
-        dst[mY * dstStep + mX + dstOffset] = (minVal);		   
-}
--- a/modules/ocl/src/kernels/filtering_morph.cl
+++ b/modules/ocl/src/kernels/filtering_morph.cl
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Niko Li, newlife20080214@gmail.com
+//    Zero Lin, zero.lin@amd.com
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//
+#ifdef ERODE
+#define MORPH_OP(A,B) min((A),(B))
+#endif
+#ifdef DILATE
+#define MORPH_OP(A,B) max((A),(B))
+#endif
+//BORDER_CONSTANT:      iiiiii|abcdefgh|iiiiiii
+#define ELEM(i,l_edge,r_edge,elem1,elem2) (i)<(l_edge) | (i) >= (r_edge) ? (elem1) : (elem2)
+#ifndef GENTYPE
+__kernel void morph_C1_D0(__global const uchar * restrict src,
+						  __global uchar *dst, 
+						  int src_offset_x, int src_offset_y, 
+						  int cols, int rows, 
+					      int src_step_in_pixel, int dst_step_in_pixel, 
+						  __constant uchar * mat_kernel, 
+						  int src_whole_cols, int src_whole_rows,
+						  int dst_offset_in_pixel)
+{
+	int l_x = get_local_id(0);
+	int l_y = get_local_id(1);
+	int x = get_group_id(0)*4*LSIZE0;
+	int y = get_group_id(1)*LSIZE1;
+	int start_x = x+src_offset_x-RADIUSX & 0xfffffffc;
+	int end_x = x + src_offset_x+LSIZE0*4+RADIUSX & 0xfffffffc;
+	int width = (end_x -start_x+4)>>2;
+	int offset = src_offset_x-RADIUSX & 3;
+	int start_y = y+src_offset_y-RADIUSY;
+	int point1 = mad24(l_y,LSIZE0,l_x);
+	int point2 = point1 + LSIZE0*LSIZE1;
+	int tl_x = (point1 % width)<<2;
+	int tl_y = point1 / width;
+	int tl_x2 = (point2 % width)<<2;
+	int tl_y2 = point2 / width;
+	int cur_x = start_x + tl_x;
+	int cur_y = start_y + tl_y;
+	int cur_x2 = start_x + tl_x2;
+	int cur_y2 = start_y + tl_y2;
+	int start_addr = mad24(cur_y,src_step_in_pixel,cur_x);
+	int start_addr2 = mad24(cur_y2,src_step_in_pixel,cur_x2);
+	uchar4 temp0,temp1;
+	__local uchar4 LDS_DAT[2*LSIZE1*LSIZE0];
+	int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
+	//read pixels from src
+	start_addr = ((start_addr < end_addr) && (start_addr > 0)) ? start_addr : 0;
+	start_addr2 = ((start_addr2 < end_addr) && (start_addr2 > 0)) ? start_addr2 : 0;
+	temp0 = *(__global uchar4*)&src[start_addr];
+	temp1 = *(__global uchar4*)&src[start_addr2];
+	//judge if read out of boundary
+	temp0.x= ELEM(cur_x,0,src_whole_cols,VAL,temp0.x);
+	temp0.y= ELEM(cur_x+1,0,src_whole_cols,VAL,temp0.y);
+	temp0.z= ELEM(cur_x+2,0,src_whole_cols,VAL,temp0.z);
+	temp0.w= ELEM(cur_x+3,0,src_whole_cols,VAL,temp0.w);
+	temp0= ELEM(cur_y,0,src_whole_rows,(uchar4)VAL,temp0);
+	temp1.x= ELEM(cur_x2,0,src_whole_cols,VAL,temp1.x);
+	temp1.y= ELEM(cur_x2+1,0,src_whole_cols,VAL,temp1.y);
+	temp1.z= ELEM(cur_x2+2,0,src_whole_cols,VAL,temp1.z);
+	temp1.w= ELEM(cur_x2+3,0,src_whole_cols,VAL,temp1.w);
+	temp1= ELEM(cur_y2,0,src_whole_rows,(uchar4)VAL,temp1);
+	LDS_DAT[point1] = temp0;
+	LDS_DAT[point2] = temp1;
+	barrier(CLK_LOCAL_MEM_FENCE);
+	uchar4 res = (uchar4)VAL;
+	for(int i=0;i<2*RADIUSY+1;i++)
+		for(int j=0;j<2*RADIUSX+1;j++)
+		{
+			res =mat_kernel[i*(2*RADIUSX+1)+j]? MORPH_OP(res,vload4(0,(__local uchar*)&LDS_DAT[mad24((l_y+i),width,l_x)]+offset+j)):res;
+		}
+	int gidx = get_global_id(0)<<2;
+	int gidy = get_global_id(1);
+	int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel);
+	if(gidx+3<cols && gidy<rows && (dst_offset_in_pixel&3==0))
+	{
+		*(__global uchar4*)&dst[out_addr] = res;
+	}
+	else
+	{
+		if(gidx+3<cols && gidy<rows)
+		{
+			dst[out_addr] = res.x;
+			dst[out_addr+1] = res.y;
+			dst[out_addr+2] = res.z;
+			dst[out_addr+3] = res.w;
+		}	
+		else if(gidx+2<cols && gidy<rows)
+		{
+			dst[out_addr] = res.x;
+			dst[out_addr+1] = res.y;
+			dst[out_addr+2] = res.z;
+		}		
+		else if(gidx+1<cols && gidy<rows)
+		{
+			dst[out_addr] = res.x;
+			dst[out_addr+1] = res.y;
+		}		
+		else if(gidx<cols && gidy<rows)
+		{
+			dst[out_addr] = res.x;
+		}		
+	}
+}
+#else
+__kernel void morph(__global const GENTYPE * restrict src,
+						  __global GENTYPE *dst, 
+						  int src_offset_x, int src_offset_y, 
+						  int cols, int rows, 
+					      int src_step_in_pixel, int dst_step_in_pixel, 
+						  __constant uchar * mat_kernel, 
+						  int src_whole_cols, int src_whole_rows,
+						  int dst_offset_in_pixel)
+{
+	int l_x = get_local_id(0);
+	int l_y = get_local_id(1);
+	int x = get_group_id(0)*LSIZE0;
+	int y = get_group_id(1)*LSIZE1;
+	int start_x = x+src_offset_x-RADIUSX;
+	int end_x = x + src_offset_x+LSIZE0+RADIUSX;
+	int width = end_x -start_x+1;
+	int start_y = y+src_offset_y-RADIUSY;
+	int point1 = mad24(l_y,LSIZE0,l_x);
+	int point2 = point1 + LSIZE0*LSIZE1;
+	int tl_x = point1 % width;
+	int tl_y = point1 / width;
+	int tl_x2 = point2 % width;
+	int tl_y2 = point2 / width;
+	int cur_x = start_x + tl_x;
+	int cur_y = start_y + tl_y;
+	int cur_x2 = start_x + tl_x2;
+	int cur_y2 = start_y + tl_y2;
+	int start_addr = mad24(cur_y,src_step_in_pixel,cur_x);
+	int start_addr2 = mad24(cur_y2,src_step_in_pixel,cur_x2);
+	GENTYPE temp0,temp1;
+	__local GENTYPE LDS_DAT[2*LSIZE1*LSIZE0];
+	int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
+	//read pixels from src
+	start_addr = ((start_addr < end_addr) && (start_addr > 0)) ? start_addr : 0;
+	start_addr2 = ((start_addr2 < end_addr) && (start_addr2 > 0)) ? start_addr2 : 0;
+	temp0 = src[start_addr];
+	temp1 = src[start_addr2];
+	//judge if read out of boundary
+	temp0= ELEM(cur_x,0,src_whole_cols,(GENTYPE)VAL,temp0);
+	temp0= ELEM(cur_y,0,src_whole_rows,(GENTYPE)VAL,temp0);
+	temp1= ELEM(cur_x2,0,src_whole_cols,(GENTYPE)VAL,temp1);
+	temp1= ELEM(cur_y2,0,src_whole_rows,(GENTYPE)VAL,temp1);
+	LDS_DAT[point1] = temp0;
+	LDS_DAT[point2] = temp1;
+	barrier(CLK_LOCAL_MEM_FENCE);
+	GENTYPE res = (GENTYPE)VAL;
+	for(int i=0;i<2*RADIUSY+1;i++)
+		for(int j=0;j<2*RADIUSX+1;j++)
+		{
+			res =mat_kernel[i*(2*RADIUSX+1)+j]? MORPH_OP(res,LDS_DAT[mad24(l_y+i,width,l_x+j)]):res;
+		}
+	int gidx = get_global_id(0);
+	int gidy = get_global_id(1);
+	int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel);
+	if(gidx<cols && gidy<rows)
+	{
+		dst[out_addr] = res;
+	}
+}
+#endif
--- a/modules/ocl/src/kernels/imgproc_remap.cl
+++ b/modules/ocl/src/kernels/imgproc_remap.cl
--- a/modules/ocl/src/matrix_operations.cpp
+++ b/modules/ocl/src/matrix_operations.cpp
@@ -588,6 +588,13 @@ void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kern
 			sprintf(compile_option, "-D GENTYPE=int");
 			args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] ));
 			break;
+		case 2:
+			sprintf(compile_option, "-D GENTYPE=int2");
+			cl_int2 i2val;
+			i2val.s[0] = val.ival.s[0];
+			i2val.s[1] = val.ival.s[1];
+			args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val ));
+			break;
 		case 4:
 			sprintf(compile_option, "-D GENTYPE=int4");
 			args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival ));

--- a/modules/ocl/src/threadsafe.cpp
+++ b/modules/ocl/src/threadsafe.cpp
@@ -44,7 +44,7 @@
 //M*/
 #include "precomp.hpp"
-#include "threadsafe.h"
+#include "Threadsafe.h"
 CriticalSection::CriticalSection()
 {

--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@@ -958,7 +958,7 @@ TEST_P(Remap, Mat)
    if((interpolation == 1 && map1Type == CV_16SC2) ||(interpolation == 1 && map1Type == CV_16SC1 && map2Type == CV_16SC1))
    {
        cout << "LINEAR don't support the map1Type and map2Type" << endl;
-        return;
+        return;                
    }
    int bordertype[] = {cv::BORDER_CONSTANT,cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
    const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};

--- a/samples/android/color-blob-detection/src/org/opencv/samples/colorblobdetect/ColorBlobDetector.java
+++ b/samples/android/color-blob-detection/src/org/opencv/samples/colorblobdetect/ColorBlobDetector.java
@@ -115,5 +115,5 @@ public class ColorBlobDetector
 	// Color radius for range checking in HSV color space
 	private Scalar mColorRadius = new Scalar(25,50,50,0);
 	private Mat mSpectrum = new Mat();
-	private List<MatOfPoint> mContours = new ArrayList<MatOfPoint>();;
+	private List<MatOfPoint> mContours = new ArrayList<MatOfPoint>();
 }
--- a/samples/android/color-blob-detection/src/org/opencv/samples/colorblobdetect/SampleCvViewBase.java
+++ b/samples/android/color-blob-detection/src/org/opencv/samples/colorblobdetect/SampleCvViewBase.java
@@ -52,7 +52,7 @@ public abstract class SampleCvViewBase extends SurfaceView implements SurfaceHol
    }
    public void setupCamera(int width, int height) {
-        Log.i(TAG, "setupCamera("+width+", "+height+")");
+    	Log.i(TAG, "setupCamera("+width+", "+height+")");
        synchronized (this) {
            if (mCamera != null && mCamera.isOpened()) {
                List<Size> sizes = mCamera.getSupportedPreviewSizes();
@@ -111,11 +111,12 @@ public abstract class SampleCvViewBase extends SurfaceView implements SurfaceHol
                bmp = processFrame(mCamera);
            }
            if (bmp != null) {
                Canvas canvas = mHolder.lockCanvas();
                if (canvas != null) {
-                    canvas.drawBitmap(bmp, (canvas.getWidth() - bmp.getWidth()) / 2, (canvas.getHeight() - bmp.getHeight()) / 2, null);
+                	canvas.drawColor(0, android.graphics.PorterDuff.Mode.CLEAR);
+                	canvas.drawBitmap(bmp, (canvas.getWidth() - bmp.getWidth()) / 2, (canvas.getHeight() - bmp.getHeight()) / 2, null);
                    mHolder.unlockCanvasAndPost(canvas);
                }
                bmp.recycle();

--- a/samples/android/image-manipulations/src/org/opencv/samples/imagemanipulations/ImageManipulationsView.java
+++ b/samples/android/image-manipulations/src/org/opencv/samples/imagemanipulations/ImageManipulationsView.java
@@ -127,7 +127,7 @@ class ImageManipulationsView extends SampleCvViewBase {
        case ImageManipulationsActivity.VIEW_MODE_HIST:
            capture.retrieve(mRgba, Highgui.CV_CAP_ANDROID_COLOR_FRAME_RGBA);
-            if (mSizeRgba == null)
+            if ((mSizeRgba == null) || (mRgba.cols() != mSizeRgba.width) || (mRgba.height() != mSizeRgba.height))
                CreateAuxiliaryMats();
            int thikness = (int) (mSizeRgba.width / (mHistSizeNum + 10) / 5);
            if(thikness > 5) thikness = 5;
@@ -171,7 +171,7 @@ class ImageManipulationsView extends SampleCvViewBase {
        case ImageManipulationsActivity.VIEW_MODE_CANNY:
            capture.retrieve(mRgba, Highgui.CV_CAP_ANDROID_COLOR_FRAME_RGBA);
-            if (mRgbaInnerWindow == null || mGrayInnerWindow == null)
+            if ((mRgbaInnerWindow == null) || (mGrayInnerWindow == null) || (mRgba.cols() != mSizeRgba.width) || (mRgba.height() != mSizeRgba.height))
                CreateAuxiliaryMats();
            Imgproc.Canny(mRgbaInnerWindow, mIntermediateMat, 80, 90);
            Imgproc.cvtColor(mIntermediateMat, mRgbaInnerWindow, Imgproc.COLOR_GRAY2BGRA, 4);
@@ -181,7 +181,7 @@ class ImageManipulationsView extends SampleCvViewBase {
            capture.retrieve(mRgba, Highgui.CV_CAP_ANDROID_COLOR_FRAME_RGBA);
            capture.retrieve(mGray, Highgui.CV_CAP_ANDROID_GREY_FRAME);
-            if (mRgbaInnerWindow == null || mGrayInnerWindow == null)
+            if ((mRgbaInnerWindow == null) || (mGrayInnerWindow == null) || (mRgba.cols() != mSizeRgba.width) || (mRgba.height() != mSizeRgba.height))
                CreateAuxiliaryMats();
            Imgproc.Sobel(mGrayInnerWindow, mIntermediateMat, CvType.CV_8U, 1, 1);
@@ -196,7 +196,7 @@ class ImageManipulationsView extends SampleCvViewBase {
        case ImageManipulationsActivity.VIEW_MODE_ZOOM:
            capture.retrieve(mRgba, Highgui.CV_CAP_ANDROID_COLOR_FRAME_RGBA);
-            if (mZoomCorner == null || mZoomWindow == null)
+            if ((mZoomCorner == null) || (mZoomWindow == null) || (mRgba.cols() != mSizeRgba.width) || (mRgba.height() != mSizeRgba.height))
                CreateAuxiliaryMats();
            Imgproc.resize(mZoomWindow, mZoomCorner, mZoomCorner.size());
@@ -206,7 +206,7 @@ class ImageManipulationsView extends SampleCvViewBase {
        case ImageManipulationsActivity.VIEW_MODE_PIXELIZE:
            capture.retrieve(mRgba, Highgui.CV_CAP_ANDROID_COLOR_FRAME_RGBA);
-            if (mRgbaInnerWindow == null)
+            if ((mRgbaInnerWindow == null) || (mRgba.cols() != mSizeRgba.width) || (mRgba.height() != mSizeRgba.height))
                CreateAuxiliaryMats();
            Imgproc.resize(mRgbaInnerWindow, mIntermediateMat, mSize0, 0.1, 0.1, Imgproc.INTER_NEAREST);
            Imgproc.resize(mIntermediateMat, mRgbaInnerWindow, mSizeRgbaInner, 0., 0., Imgproc.INTER_NEAREST);
@@ -214,7 +214,7 @@ class ImageManipulationsView extends SampleCvViewBase {
        case ImageManipulationsActivity.VIEW_MODE_POSTERIZE:
            capture.retrieve(mRgba, Highgui.CV_CAP_ANDROID_COLOR_FRAME_RGBA);
-            if (mRgbaInnerWindow == null)
+            if ((mRgbaInnerWindow == null) || (mRgba.cols() != mSizeRgba.width) || (mRgba.height() != mSizeRgba.height))
                CreateAuxiliaryMats();
            /*
            Imgproc.cvtColor(mRgbaInnerWindow, mIntermediateMat, Imgproc.COLOR_RGBA2RGB);

--- a/samples/android/tutorial-0-androidcamera/src/org/opencv/samples/tutorial0/Sample0Base.java
+++ b/samples/android/tutorial-0-androidcamera/src/org/opencv/samples/tutorial0/Sample0Base.java
@@ -10,12 +10,12 @@ import android.view.MenuItem;
 import android.view.Window;
 public class Sample0Base extends Activity {
-    private static final String TAG            = "Sample::Activity";
+	private static final String TAG            = "Sample::Activity";
    private MenuItem            mItemPreviewRGBA;
    private MenuItem            mItemPreviewGray;
-    private Sample0View mView;
+    private Sample0View         mView;
    public Sample0Base() {
        Log.i(TAG, "Instantiated new " + this.getClass());

--- a/samples/android/tutorial-2-opencvcamera/src/org/opencv/samples/tutorial2/Sample2View.java
+++ b/samples/android/tutorial-2-opencvcamera/src/org/opencv/samples/tutorial2/Sample2View.java
@@ -44,7 +44,7 @@ class Sample2View extends SampleCvViewBase {
            break;
        case Sample2NativeCamera.VIEW_MODE_RGBA:
            capture.retrieve(mRgba, Highgui.CV_CAP_ANDROID_COLOR_FRAME_RGBA);
-            Core.putText(mRgba, "OpenCV + Android", new Point(10, 100), 3, 2, new Scalar(255, 0, 0, 255), 3);
+            Core.putText(mRgba, "OpenCV+Android", new Point(10, 50), 3, 1, new Scalar(255, 0, 0, 255), 2);
            break;
        case Sample2NativeCamera.VIEW_MODE_CANNY:
            capture.retrieve(mGray, Highgui.CV_CAP_ANDROID_GREY_FRAME);

--- a/samples/android/tutorial-2-opencvcamera/src/org/opencv/samples/tutorial2/SampleCvViewBase.java
+++ b/samples/android/tutorial-2-opencvcamera/src/org/opencv/samples/tutorial2/SampleCvViewBase.java
@@ -115,7 +115,8 @@ public abstract class SampleCvViewBase extends SurfaceView implements SurfaceHol
            if (bmp != null) {
                Canvas canvas = mHolder.lockCanvas();
                if (canvas != null) {
-                    canvas.drawBitmap(bmp, (canvas.getWidth() - bmp.getWidth()) / 2, (canvas.getHeight() - bmp.getHeight()) / 2, null);
+                	canvas.drawColor(0, android.graphics.PorterDuff.Mode.CLEAR);
+                	canvas.drawBitmap(bmp, (canvas.getWidth()-bmp.getWidth()) / 2, (canvas.getHeight()-bmp.getHeight()) / 2, null);
                    mHolder.unlockCanvasAndPost(canvas);
                }
                bmp.recycle();

--- a/samples/ocl/squares.cpp
+++ b/samples/ocl/squares.cpp
+// The "Square Detector" program.
+// It loads several images sequentially and tries to find squares in
+// each image
+#include "opencv2/core/core.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/ocl/ocl.hpp"
+#include <iostream>
+#include <math.h>
+#include <string.h>
+using namespace cv;
+using namespace std;
+void help()
+{
+    cout <<
+        "\nA program using OCL module pyramid scaling, Canny, dilate functions, threshold, split; cpu contours, contour simpification and\n"
+        "memory storage (it's got it all folks) to find\n"
+        "squares in a list of images pic1-6.png\n"
+        "Returns sequence of squares detected on the image.\n"
+        "the sequence is stored in the specified memory storage\n"
+        "Call:\n"
+        "./squares\n"
+        "Using OpenCV version %s\n" << CV_VERSION << "\n" << endl;
+}
+int thresh = 50, N = 11;
+const char* wndname = "OpenCL Square Detection Demo";
+// helper function:
+// finds a cosine of angle between vectors
+// from pt0->pt1 and from pt0->pt2
+double angle( Point pt1, Point pt2, Point pt0 )
+{
+    double dx1 = pt1.x - pt0.x;
+    double dy1 = pt1.y - pt0.y;
+    double dx2 = pt2.x - pt0.x;
+    double dy2 = pt2.y - pt0.y;
+    return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10);
+}
+// returns sequence of squares detected on the image.
+// the sequence is stored in the specified memory storage
+void findSquares( const Mat& image, vector<vector<Point> >& squares )
+{
+    squares.clear();
+    Mat gray;
+    cv::ocl::oclMat pyr_ocl, timg_ocl, gray0_ocl, gray_ocl;
+    // down-scale and upscale the image to filter out the noise
+    ocl::pyrDown(ocl::oclMat(image), pyr_ocl);
+    ocl::pyrUp(pyr_ocl, timg_ocl);
+    vector<vector<Point> > contours;
+    vector<cv::ocl::oclMat> gray0s;
+    ocl::split(timg_ocl, gray0s); // split 3 channels into a vector of oclMat
+    // find squares in every color plane of the image
+    for( int c = 0; c < 3; c++ )
+    {
+        gray0_ocl = gray0s[c];
+        // try several threshold levels
+        for( int l = 0; l < N; l++ )
+        {
+            // hack: use Canny instead of zero threshold level.
+            // Canny helps to catch squares with gradient shading
+            if( l == 0 )
+            {
+                // do canny on OpenCL device
+                // apply Canny. Take the upper threshold from slider
+                // and set the lower to 0 (which forces edges merging)
+                cv::ocl::Canny(gray0_ocl, gray_ocl, 0, thresh, 5);
+                // dilate canny output to remove potential
+                // holes between edge segments
+                ocl::dilate(gray_ocl, gray_ocl, Mat(), Point(-1,-1));
+                gray = Mat(gray_ocl);
+            }
+            else
+            {
+                // apply threshold if l!=0:
+                //     tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0
+                cv::ocl::threshold(gray0_ocl, gray_ocl, (l+1)*255/N, 255, THRESH_BINARY);
+                gray = gray_ocl;
+            }
+            // find contours and store them all as a list
+            findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);
+            vector<Point> approx;
+            // test each contour
+            for( size_t i = 0; i < contours.size(); i++ )
+            {
+                // approximate contour with accuracy proportional
+                // to the contour perimeter
+                approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);
+                // square contours should have 4 vertices after approximation
+                // relatively large area (to filter out noisy contours)
+                // and be convex.
+                // Note: absolute value of an area is used because
+                // area may be positive or negative - in accordance with the
+                // contour orientation
+                if( approx.size() == 4 &&
+                    fabs(contourArea(Mat(approx))) > 1000 &&
+                    isContourConvex(Mat(approx)) )
+                {
+                    double maxCosine = 0;
+                    for( int j = 2; j < 5; j++ )
+                    {
+                        // find the maximum cosine of the angle between joint edges
+                        double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
+                        maxCosine = MAX(maxCosine, cosine);
+                    }
+                    // if cosines of all angles are small
+                    // (all angles are ~90 degree) then write quandrange
+                    // vertices to resultant sequence
+                    if( maxCosine < 0.3 )
+                        squares.push_back(approx);
+                }
+            }
+        }
+    }
+}
+// the function draws all the squares in the image
+void drawSquares( Mat& image, const vector<vector<Point> >& squares )
+{
+    for( size_t i = 0; i < squares.size(); i++ )
+    {
+        const Point* p = &squares[i][0];
+        int n = (int)squares[i].size();
+        polylines(image, &p, &n, 1, true, Scalar(0,255,0), 3, CV_AA);
+    }
+    imshow(wndname, image);
+}
+int main(int /*argc*/, char** /*argv*/)
+{
+    //ocl::setBinpath("F:/kernel_bin");
+    vector<ocl::Info> info;
+    CV_Assert(ocl::getDevice(info));
+    static const char* names[] = { "pic1.png", "pic2.png", "pic3.png",
+        "pic4.png", "pic5.png", "pic6.png", 0 };
+    help();
+    namedWindow( wndname, 1 );
+    vector<vector<Point> > squares;
+    for( int i = 0; names[i] != 0; i++ )
+    {
+        Mat image = imread(names[i], 1);
+        if( image.empty() )
+        {
+            cout << "Couldn't load " << names[i] << endl;
+            continue;
+        }
+        findSquares(image, squares);
+        drawSquares(image, squares);
+        int c = waitKey();
+        if( (char)c == 27 )
+            break;
+    }
+    return 0;
+}