diff --git a/modules/imgproc/src/opencl/pyr_down.cl b/modules/imgproc/src/opencl/pyr_down.cl
index 6ba0cc691d66165e61f74769ceb63846220fdcf5..b8b06b712be6bce5c60cfdd8bc40e15026dd761d 100644
--- a/modules/imgproc/src/opencl/pyr_down.cl
+++ b/modules/imgproc/src/opencl/pyr_down.cl
@@ -51,6 +51,22 @@
 #endif
 #endif
 
+#if defined BORDER_REPLICATE
+// aaaaaa|abcdefgh|hhhhhhh
+#define EXTRAPOLATE(x, maxV) clamp(x, 0, maxV-1)
+#elif defined BORDER_WRAP
+// cdefgh|abcdefgh|abcdefg
+#define EXTRAPOLATE(x, maxV) ( (x) + (maxV) ) % (maxV)
+#elif defined BORDER_REFLECT
+// fedcba|abcdefgh|hgfedcb
+#define EXTRAPOLATE(x, maxV) min(((maxV)-1)*2-(x)+1, max((x),-(x)-1) )
+#elif defined BORDER_REFLECT_101 || defined BORDER_REFLECT101
+// gfedcb|abcdefgh|gfedcba
+#define EXTRAPOLATE(x, maxV) min(((maxV)-1)*2-(x), max((x),-(x)) )
+#else
+#error No extrapolation method
+#endif
+
 #if cn != 3
 #define loadpix(addr)  *(__global const T*)(addr)
 #define storepix(val, addr)  *(__global T*)(addr) = (val)
@@ -61,37 +77,9 @@
 #define PIXSIZE ((int)sizeof(T1)*3)
 #endif
 
-#define noconvert
-
-inline int idx_row_low(int y, int last_row)
-{
-    return abs(y) % (last_row + 1);
-}
-
-inline int idx_row_high(int y, int last_row)
-{
-    return abs(last_row - (int)abs(last_row - y)) % (last_row + 1);
-}
-
-inline int idx_row(int y, int last_row)
-{
-    return idx_row_low(idx_row_high(y, last_row), last_row);
-}
-
-inline int idx_col_low(int x, int last_col)
-{
-    return abs(x) % (last_col + 1);
-}
+#define SRC(_x,_y) convertToFT(loadpix(srcData + mad24(_y, src_step, PIXSIZE * _x)))
 
-inline int idx_col_high(int x, int last_col)
-{
-    return abs(last_col - (int)abs(last_col - x)) % (last_col + 1);
-}
-
-inline int idx_col(int x, int last_col)
-{
-    return idx_col_low(idx_col_high(x, last_col), last_col);
-}
+#define noconvert
 
 __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
                          __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
@@ -99,7 +87,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
     const int x = get_global_id(0);
     const int y = get_group_id(1);
 
-    __local FT smem[256 + 4];
+    __local FT smem[LOCAL_SIZE + 4];
     __global uchar * dstData = dst + dst_offset;
     __global const uchar * srcData = src + src_offset;
 
@@ -109,16 +97,14 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
     FT co3 = 0.0625f;
 
     const int src_y = 2*y;
-    const int last_row = src_rows - 1;
-    const int last_col = src_cols - 1;
 
     if (src_y >= 2 && src_y < src_rows - 2 && x >= 2 && x < src_cols - 2)
     {
-        sum =       co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + x * PIXSIZE));
-        sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + x * PIXSIZE));
-        sum = sum + co1 * convertToFT(loadpix(srcData + (src_y    ) * src_step + x * PIXSIZE));
-        sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + x * PIXSIZE));
-        sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + x * PIXSIZE));
+        sum =       co3 * SRC(x, src_y - 2);
+        sum = sum + co2 * SRC(x, src_y - 1);
+        sum = sum + co1 * SRC(x, src_y    );
+        sum = sum + co2 * SRC(x, src_y + 1);
+        sum = sum + co3 * SRC(x, src_y + 2);
 
         smem[2 + get_local_id(0)] = sum;
 
@@ -126,66 +112,62 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
         {
             const int left_x = x - 2;
 
-            sum =       co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + left_x * PIXSIZE));
-            sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + left_x * PIXSIZE));
-            sum = sum + co1 * convertToFT(loadpix(srcData + (src_y    ) * src_step + left_x * PIXSIZE));
-            sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + left_x * PIXSIZE));
-            sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + left_x * PIXSIZE));
+            sum =       co3 * SRC(left_x, src_y - 2);
+            sum = sum + co2 * SRC(left_x, src_y - 1);
+            sum = sum + co1 * SRC(left_x, src_y    );
+            sum = sum + co2 * SRC(left_x, src_y + 1);
+            sum = sum + co3 * SRC(left_x, src_y + 2);
 
             smem[get_local_id(0)] = sum;
         }
 
-        if (get_local_id(0) > 253)
+        if (get_local_id(0) > LOCAL_SIZE - 3)
         {
             const int right_x = x + 2;
 
-            sum =       co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + right_x * PIXSIZE));
-            sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + right_x * PIXSIZE));
-            sum = sum + co1 * convertToFT(loadpix(srcData + (src_y    ) * src_step + right_x * PIXSIZE));
-            sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + right_x * PIXSIZE));
-            sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + right_x * PIXSIZE));
+            sum =       co3 * SRC(right_x, src_y - 2);
+            sum = sum + co2 * SRC(right_x, src_y - 1);
+            sum = sum + co1 * SRC(right_x, src_y    );
+            sum = sum + co2 * SRC(right_x, src_y + 1);
+            sum = sum + co3 * SRC(right_x, src_y + 2);
 
             smem[4 + get_local_id(0)] = sum;
         }
     }
     else
     {
-        int col = idx_col(x, last_col);
+        int col = EXTRAPOLATE(x, src_cols);
 
-        sum =       co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE));
-        sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE));
-        sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y    , last_row) * src_step + col * PIXSIZE));
-        sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE));
-        sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE));
+        sum =       co3 * SRC(col, EXTRAPOLATE(src_y - 2, src_rows));
+        sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y - 1, src_rows));
+        sum = sum + co1 * SRC(col, EXTRAPOLATE(src_y    , src_rows));
+        sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y + 1, src_rows));
+        sum = sum + co3 * SRC(col, EXTRAPOLATE(src_y + 2, src_rows));
 
         smem[2 + get_local_id(0)] = sum;
 
         if (get_local_id(0) < 2)
         {
-            const int left_x = x - 2;
+            col = EXTRAPOLATE(x - 2, src_cols);
 
-            col = idx_col(left_x, last_col);
-
-            sum =       co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE));
-            sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE));
-            sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y    , last_row) * src_step + col * PIXSIZE));
-            sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE));
-            sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE));
+            sum =       co3 * SRC(col, EXTRAPOLATE(src_y - 2, src_rows));
+            sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y - 1, src_rows));
+            sum = sum + co1 * SRC(col, EXTRAPOLATE(src_y    , src_rows));
+            sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y + 1, src_rows));
+            sum = sum + co3 * SRC(col, EXTRAPOLATE(src_y + 2, src_rows));
 
             smem[get_local_id(0)] = sum;
         }
 
-        if (get_local_id(0) > 253)
+        if (get_local_id(0) > LOCAL_SIZE - 3)
         {
-            const int right_x = x + 2;
-
-            col = idx_col(right_x, last_col);
+            col = EXTRAPOLATE(x + 2, src_cols);
 
-            sum =       co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE));
-            sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE));
-            sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y    , last_row) * src_step + col * PIXSIZE));
-            sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE));
-            sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE));
+            sum =       co3 * SRC(col, EXTRAPOLATE(src_y - 2, src_rows));
+            sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y - 1, src_rows));
+            sum = sum + co1 * SRC(col, EXTRAPOLATE(src_y    , src_rows));
+            sum = sum + co2 * SRC(col, EXTRAPOLATE(src_y + 1, src_rows));
+            sum = sum + co3 * SRC(col, EXTRAPOLATE(src_y + 2, src_rows));
 
             smem[4 + get_local_id(0)] = sum;
         }
@@ -193,7 +175,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset,
 
     barrier(CLK_LOCAL_MEM_FENCE);
 
-    if (get_local_id(0) < 128)
+    if (get_local_id(0) < LOCAL_SIZE / 2)
     {
         const int tid2 = get_local_id(0) * 2;
 
diff --git a/modules/imgproc/src/pyramids.cpp b/modules/imgproc/src/pyramids.cpp
index 42464c1a5d5e2b126ff2d6ae3e3594bfe9c591c5..d1ed92d5d9123f83269bc37c9cba70ede56743c6 100644
--- a/modules/imgproc/src/pyramids.cpp
+++ b/modules/imgproc/src/pyramids.cpp
@@ -407,11 +407,8 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
 {
     int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);
 
-    if (channels > 4 || borderType != BORDER_DEFAULT)
-        return false;
-
     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
-    if ((depth == CV_64F) && !(doubleSupport))
+    if (channels > 4 || (depth == CV_64F && !doubleSupport))
         return false;
 
     Size ssize = _src.size();
@@ -425,15 +422,18 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
     UMat dst = _dst.getUMat();
 
     int float_depth = depth == CV_64F ? CV_64F : CV_32F;
+    const int local_size = 256;
+    const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
+                                       "BORDER_REFLECT_101" };
     char cvt[2][50];
     String buildOptions = format(
             "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s "
-            "-D T1=%s -D cn=%d",
+            "-D T1=%s -D cn=%d -D %s -D LOCAL_SIZE=%d",
             ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
             ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
             ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
             doubleSupport ? " -D DOUBLE_SUPPORT" : "",
-            ocl::typeToStr(depth), channels
+            ocl::typeToStr(depth), channels, borderMap[borderType], local_size
     );
     ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions);
     if (k.empty())
@@ -441,7 +441,7 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
 
     k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
 
-    size_t localThreads[2]  = { 256, 1 };
+    size_t localThreads[2]  = { local_size, 1 };
     size_t globalThreads[2] = { src.cols, dst.rows };
     return k.run(2, globalThreads, localThreads, false);
 }
diff --git a/modules/imgproc/test/ocl/test_pyramids.cpp b/modules/imgproc/test/ocl/test_pyramids.cpp
index 113349b30253f102dce8cb6321f965243ad780cb..a129c7f771acd92f4bd4c630eefb2d11cb94b058 100644
--- a/modules/imgproc/test/ocl/test_pyramids.cpp
+++ b/modules/imgproc/test/ocl/test_pyramids.cpp
@@ -52,9 +52,9 @@
 namespace cvtest {
 namespace ocl {
 
-PARAM_TEST_CASE(PyrTestBase, MatDepth, Channels, bool)
+PARAM_TEST_CASE(PyrTestBase, MatDepth, Channels, BorderType, bool)
 {
-    int depth, channels;
+    int depth, channels, borderType;
     bool use_roi;
 
     TEST_DECLARE_INPUT_PARAMETER(src);
@@ -64,7 +64,8 @@ PARAM_TEST_CASE(PyrTestBase, MatDepth, Channels, bool)
     {
         depth = GET_PARAM(0);
         channels = GET_PARAM(1);
-        use_roi = GET_PARAM(2);
+        borderType = GET_PARAM(2);
+        use_roi = GET_PARAM(3);
     }
 
     void generateTestData(Size src_roiSize, Size dst_roiSize)
@@ -99,8 +100,8 @@ OCL_TEST_P(PyrDown, Mat)
         dst_roiSize = dst_roiSize.area() == 0 ? Size((src_roiSize.width + 1) / 2, (src_roiSize.height + 1) / 2) : dst_roiSize;
         generateTestData(src_roiSize, dst_roiSize);
 
-        OCL_OFF(pyrDown(src_roi, dst_roi, dst_roiSize));
-        OCL_ON(pyrDown(usrc_roi, udst_roi, dst_roiSize));
+        OCL_OFF(pyrDown(src_roi, dst_roi, dst_roiSize, borderType));
+        OCL_ON(pyrDown(usrc_roi, udst_roi, dst_roiSize, borderType));
 
         Near(depth == CV_32F ? 1e-4f : 1.0f);
     }
@@ -109,6 +110,8 @@ OCL_TEST_P(PyrDown, Mat)
 OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrDown, Combine(
                             Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
                             Values(1, 2, 3, 4),
+                            Values((BorderType)BORDER_REPLICATE,
+                            (BorderType)BORDER_REFLECT, (BorderType)BORDER_REFLECT_101),
                             Bool()
                             ));
 
@@ -124,8 +127,8 @@ OCL_TEST_P(PyrUp, Mat)
         Size dst_roiSize = Size(2 * src_roiSize.width, 2 * src_roiSize.height);
         generateTestData(src_roiSize, dst_roiSize);
 
-        OCL_OFF(pyrUp(src_roi, dst_roi, dst_roiSize));
-        OCL_ON(pyrUp(usrc_roi, udst_roi, dst_roiSize));
+        OCL_OFF(pyrUp(src_roi, dst_roi, dst_roiSize, borderType));
+        OCL_ON(pyrUp(usrc_roi, udst_roi, dst_roiSize, borderType));
 
         Near(depth == CV_32F ? 1e-4f : 1.0f);
     }
@@ -134,6 +137,7 @@ OCL_TEST_P(PyrUp, Mat)
 OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrUp, Combine(
                             Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
                             Values(1, 2, 3, 4),
+                            Values((BorderType)BORDER_REFLECT_101),
                             Bool()
                             ));