Commit 0913dd7f authored by Andrey Pavlenko's avatar Andrey Pavlenko Committed by OpenCV Buildbot

Merge pull request #2055 from vbystricky:ocl_sepFilter2D

parents 2d2312d7 26d53c74
......@@ -42,6 +42,7 @@
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#include <sstream>
/****************************************************************************************\
Base Image Filter
......@@ -3314,6 +3315,246 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
}
return kernel.run(2, globalsize, localsize, true);
}
static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, int borderType, bool sync)
{
int type = src.type();
int cn = CV_MAT_CN(type);
int sdepth = CV_MAT_DEPTH(type);
Size bufSize = buf.size();
#ifdef ANDROID
size_t localsize[2] = {16, 10};
#else
size_t localsize[2] = {16, 16};
#endif
size_t globalsize[2] = {DIVUP(bufSize.width, localsize[0]) * localsize[0], DIVUP(bufSize.height, localsize[1]) * localsize[1]};
if (CV_8U == sdepth)
{
switch (cn)
{
case 1:
globalsize[0] = DIVUP((bufSize.width + 3) >> 2, localsize[0]) * localsize[0];
break;
case 2:
globalsize[0] = DIVUP((bufSize.width + 1) >> 1, localsize[0]) * localsize[0];
break;
case 4:
globalsize[0] = DIVUP(bufSize.width, localsize[0]) * localsize[0];
break;
}
}
int radiusX = anchor;
int radiusY = (int)((buf.rows - src.rows) >> 1);
bool isIsolatedBorder = (borderType & BORDER_ISOLATED) != 0;
const char* btype = NULL;
switch (borderType & ~BORDER_ISOLATED)
{
case BORDER_CONSTANT:
btype = "BORDER_CONSTANT";
break;
case BORDER_REPLICATE:
btype = "BORDER_REPLICATE";
break;
case BORDER_REFLECT:
btype = "BORDER_REFLECT";
break;
case BORDER_WRAP:
btype = "BORDER_WRAP";
break;
case BORDER_REFLECT101:
btype = "BORDER_REFLECT_101";
break;
default:
return false;
}
bool extra_extrapolation = src.rows < (int)((-radiusY + globalsize[1]) >> 1) + 1;
extra_extrapolation |= src.rows < radiusY;
extra_extrapolation |= src.cols < (int)((-radiusX + globalsize[0] + 8 * localsize[0] + 3) >> 1) + 1;
extra_extrapolation |= src.cols < radiusX;
cv::String build_options = cv::format("-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D %s -D %s",
radiusX, (int)localsize[0], (int)localsize[1], cn,
btype,
extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
isIsolatedBorder ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED");
Size srcWholeSize; Point srcOffset;
src.locateROI(srcWholeSize, srcOffset);
std::stringstream strKernel;
strKernel << "row_filter";
if (-1 != cn)
strKernel << "_C" << cn;
if (-1 != sdepth)
strKernel << "_D" << sdepth;
ocl::Kernel kernelRow;
if (!kernelRow.create(strKernel.str().c_str(), cv::ocl::imgproc::filterSepRow_oclsrc, build_options))
return false;
int idxArg = 0;
idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(src));
idxArg = kernelRow.set(idxArg, (int)(src.step / src.elemSize()));
idxArg = kernelRow.set(idxArg, srcOffset.x);
idxArg = kernelRow.set(idxArg, srcOffset.y);
idxArg = kernelRow.set(idxArg, src.cols);
idxArg = kernelRow.set(idxArg, src.rows);
idxArg = kernelRow.set(idxArg, srcWholeSize.width);
idxArg = kernelRow.set(idxArg, srcWholeSize.height);
idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrWriteOnly(buf));
idxArg = kernelRow.set(idxArg, (int)(buf.step / buf.elemSize()));
idxArg = kernelRow.set(idxArg, buf.cols);
idxArg = kernelRow.set(idxArg, buf.rows);
idxArg = kernelRow.set(idxArg, radiusY);
idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelX.getUMat(ACCESS_READ)));
return kernelRow.run(2, globalsize, localsize, sync);
}
static bool ocl_sepColFilter2D(UMat &buf, UMat &dst, Mat &kernelY, int anchor, bool sync)
{
#ifdef ANDROID
size_t localsize[2] = {16, 10};
#else
size_t localsize[2] = {16, 16};
#endif
size_t globalsize[2] = {0, 0};
int type = dst.type();
int cn = CV_MAT_CN(type);
int ddepth = CV_MAT_DEPTH(type);
Size sz = dst.size();
globalsize[1] = DIVUP(sz.height, localsize[1]) * localsize[1];
cv::String build_options;
if (CV_8U == ddepth)
{
switch (cn)
{
case 1:
globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0];
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
anchor, (int)localsize[0], (int)localsize[1], cn, "float", "uchar", "convert_uchar_sat");
break;
case 2:
globalsize[0] = DIVUP((sz.width + 1) / 2, localsize[0]) * localsize[0];
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
anchor, (int)localsize[0], (int)localsize[1], cn, "float2", "uchar2", "convert_uchar2_sat");
break;
case 3:
case 4:
globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0];
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
anchor, (int)localsize[0], (int)localsize[1], cn, "float4", "uchar4", "convert_uchar4_sat");
break;
}
}
else
{
globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0];
switch (dst.type())
{
case CV_32SC1:
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
anchor, (int)localsize[0], (int)localsize[1], cn, "float", "int", "convert_int_sat");
break;
case CV_32SC3:
case CV_32SC4:
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
anchor, (int)localsize[0], (int)localsize[1], cn, "float4", "int4", "convert_int4_sat");
break;
case CV_32FC1:
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
anchor, (int)localsize[0], (int)localsize[1], cn, "float", "float", "");
break;
case CV_32FC3:
case CV_32FC4:
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
anchor, (int)localsize[0], (int)localsize[1], cn, "float4", "float4", "");
break;
}
}
ocl::Kernel kernelCol;
if (!kernelCol.create("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc, build_options))
return false;
int idxArg = 0;
idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(buf));
idxArg = kernelCol.set(idxArg, (int)(buf.step / buf.elemSize()));
idxArg = kernelCol.set(idxArg, buf.cols);
idxArg = kernelCol.set(idxArg, buf.rows);
idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
idxArg = kernelCol.set(idxArg, (int)(dst.offset / dst.elemSize()));
idxArg = kernelCol.set(idxArg, (int)(dst.step / dst.elemSize()));
idxArg = kernelCol.set(idxArg, dst.cols);
idxArg = kernelCol.set(idxArg, dst.rows);
idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelY.getUMat(ACCESS_READ)));
return kernelCol.run(2, globalsize, localsize, sync);
}
static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
InputArray _kernelX, InputArray _kernelY, Point anchor,
double delta, int borderType )
{
if (abs(delta)> FLT_MIN)
return false;
int type = _src.type();
if ((CV_8UC1 != type) && (CV_8UC4 == type) &&
(CV_32FC1 != type) && (CV_32FC4 == type))
return false;
int cn = CV_MAT_CN(type);
Mat kernelX = _kernelX.getMat().reshape(1, 1);
if (1 != (kernelX.cols % 2))
return false;
Mat kernelY = _kernelY.getMat().reshape(1, 1);
if (1 != (kernelY.cols % 2))
return false;
int sdepth = CV_MAT_DEPTH(type);
if( anchor.x < 0 )
anchor.x = kernelX.cols >> 1;
if( anchor.y < 0 )
anchor.y = kernelY.cols >> 1;
if( ddepth < 0 )
ddepth = sdepth;
else if (ddepth != sdepth)
return false;
UMat src = _src.getUMat();
Size srcWholeSize; Point srcOffset;
src.locateROI(srcWholeSize, srcOffset);
if ( (0 != (srcOffset.x % 4)) ||
(0 != (src.cols % 4)) ||
(0 != ((src.step / src.elemSize()) % 4))
)
{
return false;
}
Size srcSize = src.size();
Size bufSize(srcSize.width, srcSize.height + kernelY.cols - 1);
UMat buf; buf.create(bufSize, CV_MAKETYPE(CV_32F, cn));
if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, true))
return false;
_dst.create(srcSize, CV_MAKETYPE(ddepth, cn));
UMat dst = _dst.getUMat();
return ocl_sepColFilter2D(buf, dst, kernelY, anchor.y, true);
}
}
cv::Ptr<cv::BaseFilter> cv::getLinearFilter(int srcType, int dstType,
......@@ -3481,6 +3722,10 @@ void cv::sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
InputArray _kernelX, InputArray _kernelY, Point anchor,
double delta, int borderType )
{
bool use_opencl = ocl::useOpenCL() && _dst.isUMat();
if( use_opencl && ocl_sepFilter2D(_src, _dst, ddepth, _kernelX, _kernelY, anchor, delta, borderType))
return;
Mat src = _src.getMat(), kernelX = _kernelX.getMat(), kernelY = _kernelY.getMat();
if( ddepth < 0 )
......
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Niko Li, newlife20080214@gmail.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//
#define READ_TIMES_COL ((2*(RADIUSY+LSIZE1)-1)/LSIZE1)
#define RADIUS 1
#if CN ==1
#define ALIGN (((RADIUS)+3)>>2<<2)
#elif CN==2
#define ALIGN (((RADIUS)+1)>>1<<1)
#elif CN==3
#define ALIGN (((RADIUS)+3)>>2<<2)
#elif CN==4
#define ALIGN (RADIUS)
#define READ_TIMES_ROW ((2*(RADIUS+LSIZE0)-1)/LSIZE0)
#endif
/**********************************************************************************
These kernels are written for separable filters such as Sobel, Scharr, GaussianBlur.
Now(6/29/2011) the kernels only support 8U data type and the anchor of the convovle
kernel must be in the center. ROI is not supported either.
Each kernels read 4 elements(not 4 pixels), save them to LDS and read the data needed
from LDS to calculate the result.
The length of the convovle kernel supported is only related to the MAX size of LDS,
which is HW related.
Niko
6/29/2011
The info above maybe obsolete.
***********************************************************************************/
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter
(__global const GENTYPE_SRC * restrict src,
const int src_step_in_pixel,
const int src_whole_cols,
const int src_whole_rows,
__global GENTYPE_DST * dst,
const int dst_offset_in_pixel,
const int dst_step_in_pixel,
const int dst_cols,
const int dst_rows,
__constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSY+1)))))
{
int x = get_global_id(0);
int y = get_global_id(1);
int l_x = get_local_id(0);
int l_y = get_local_id(1);
int start_addr = mad24(y, src_step_in_pixel, x);
int end_addr = mad24(src_whole_rows - 1, src_step_in_pixel, src_whole_cols);
int i;
GENTYPE_SRC sum, temp[READ_TIMES_COL];
__local GENTYPE_SRC LDS_DAT[LSIZE1 * READ_TIMES_COL][LSIZE0 + 1];
//read pixels from src
for(i = 0;i<READ_TIMES_COL;i++)
{
int current_addr = start_addr+i*LSIZE1*src_step_in_pixel;
current_addr = current_addr < end_addr ? current_addr : 0;
temp[i] = src[current_addr];
}
//save pixels to lds
for(i = 0;i<READ_TIMES_COL;i++)
{
LDS_DAT[l_y+i*LSIZE1][l_x] = temp[i];
}
barrier(CLK_LOCAL_MEM_FENCE);
//read pixels from lds and calculate the result
sum = LDS_DAT[l_y+RADIUSY][l_x]*mat_kernel[RADIUSY];
for(i=1;i<=RADIUSY;i++)
{
temp[0]=LDS_DAT[l_y+RADIUSY-i][l_x];
temp[1]=LDS_DAT[l_y+RADIUSY+i][l_x];
sum += temp[0] * mat_kernel[RADIUSY-i]+temp[1] * mat_kernel[RADIUSY+i];
}
//write the result to dst
if((x<dst_cols) & (y<dst_rows))
{
start_addr = mad24(y, dst_step_in_pixel, x + dst_offset_in_pixel);
dst[start_addr] = convert_to_DST(sum);
}
}
This diff is collapsed.
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
/////////////////////////////////////////////////////////////////////////////////////////////////
// sepFilter2D
PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
{
static const int kernelMinSize = 2;
static const int kernelMaxSize = 10;
int type;
Point anchor;
int borderType;
bool useRoi;
Mat kernelX, kernelY;
TEST_DECLARE_INPUT_PARAMETER(src)
TEST_DECLARE_OUTPUT_PARAMETER(dst)
virtual void SetUp()
{
type = CV_MAKE_TYPE(GET_PARAM(0), GET_PARAM(1));
borderType = GET_PARAM(2) | (GET_PARAM(3) ? BORDER_ISOLATED : 0);
useRoi = GET_PARAM(4);
}
void random_roi()
{
Size ksize = randomSize(kernelMinSize, kernelMaxSize);
if (1 != (ksize.width % 2))
ksize.width++;
if (1 != (ksize.height % 2))
ksize.height++;
Mat temp = randomMat(Size(ksize.width, 1), CV_MAKE_TYPE(CV_32F, 1), -MAX_VALUE, MAX_VALUE);
cv::normalize(temp, kernelX, 1.0, 0.0, NORM_L1);
temp = randomMat(Size(1, ksize.height), CV_MAKE_TYPE(CV_32F, 1), -MAX_VALUE, MAX_VALUE);
cv::normalize(temp, kernelY, 1.0, 0.0, NORM_L1);
Size roiSize = randomSize(ksize.width, MAX_VALUE, ksize.height, MAX_VALUE);
int rest = roiSize.width % 4;
if (0 != rest)
roiSize.width += (4 - rest);
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
rest = srcBorder.lef % 4;
if (0 != rest)
srcBorder.lef += (4 - rest);
rest = srcBorder.rig % 4;
if (0 != rest)
srcBorder.rig += (4 - rest);
randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
anchor.x = -1;
anchor.y = -1;
UMAT_UPLOAD_INPUT_PARAMETER(src)
UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
}
void Near(double threshold = 0.0)
{
OCL_EXPECT_MATS_NEAR(dst, threshold);
}
};
OCL_TEST_P(SepFilter2D, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
random_roi();
OCL_OFF(cv::sepFilter2D(src_roi, dst_roi, -1, kernelX, kernelY, anchor, 0.0, borderType));
OCL_ON(cv::sepFilter2D(usrc_roi, udst_roi, -1, kernelX, kernelY, anchor, 0.0, borderType));
Near(1.0);
}
}
OCL_INSTANTIATE_TEST_CASE_P(ImageProc, SepFilter2D,
Combine(
Values(CV_8U, CV_32F),
Values(1, 4),
Values(
(BorderType)BORDER_CONSTANT,
(BorderType)BORDER_REPLICATE,
(BorderType)BORDER_REFLECT,
(BorderType)BORDER_REFLECT_101),
Bool(), // BORDER_ISOLATED
Bool() // ROI
)
);
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment