Commit 124ede61 authored by peng xiao's avatar peng xiao

Update with apavlenko's suggestions.

parent 7fe84030
...@@ -691,7 +691,7 @@ namespace cv ...@@ -691,7 +691,7 @@ namespace cv
//! returns 2D filter with the specified kernel //! returns 2D filter with the specified kernel
// supports CV_8UC1 and CV_8UC4 types // supports CV_8UC1 and CV_8UC4 types
CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize, CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
//! returns the non-separable linear filter engine //! returns the non-separable linear filter engine
CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel,
......
...@@ -572,7 +572,7 @@ void cv::ocl::morphologyEx(const oclMat &src, oclMat &dst, int op, const Mat &ke ...@@ -572,7 +572,7 @@ void cv::ocl::morphologyEx(const oclMat &src, oclMat &dst, int op, const Mat &ke
namespace namespace
{ {
typedef void (*GPUFilter2D_t)(const oclMat & , oclMat & , oclMat & , Size &, const Point, const int); typedef void (*GPUFilter2D_t)(const oclMat & , oclMat & , const oclMat & , const Size &, const Point&, const int);
class LinearFilter_GPU : public BaseFilter_GPU class LinearFilter_GPU : public BaseFilter_GPU
{ {
...@@ -591,8 +591,8 @@ public: ...@@ -591,8 +591,8 @@ public:
}; };
} }
static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel, static void GPUFilter2D(const oclMat &src, oclMat &dst, const oclMat &mat_kernel,
Size &ksize, const Point anchor, const int borderType) const Size &ksize, const Point& anchor, const int borderType)
{ {
CV_Assert(src.clCxt == dst.clCxt); CV_Assert(src.clCxt == dst.clCxt);
CV_Assert((src.cols == dst.cols) && CV_Assert((src.cols == dst.cols) &&
...@@ -614,7 +614,7 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel, ...@@ -614,7 +614,7 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
size_t dst_offset_x = (dst.offset % dst.step) / dst.elemSize(); size_t dst_offset_x = (dst.offset % dst.step) / dst.elemSize();
size_t dst_offset_y = dst.offset / dst.step; size_t dst_offset_y = dst.offset / dst.step;
int paddingPixels = (int)(filterWidth/2)*2; int paddingPixels = filterWidth & (-2);
size_t localThreads[3] = {ksize_3x3 ? 256 : 16, ksize_3x3 ? 1 : 16, 1}; size_t localThreads[3] = {ksize_3x3 ? 256 : 16, ksize_3x3 ? 1 : 16, 1};
size_t globalThreads[3] = {src.wholecols, src.wholerows, 1}; size_t globalThreads[3] = {src.wholecols, src.wholerows, 1};
...@@ -626,6 +626,8 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel, ...@@ -626,6 +626,8 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
int localWidth = localThreads[0] + paddingPixels; int localWidth = localThreads[0] + paddingPixels;
int localHeight = localThreads[1] + paddingPixels; int localHeight = localThreads[1] + paddingPixels;
// 260 = divup((localThreads[0] + filterWidth * 2), 4) * 4
// 6 = (ROWS_PER_GROUP_WHICH_IS_4 + filterWidth * 2)
size_t localMemSize = ksize_3x3 ? 260 * 6 * src.elemSize() : (localWidth * localHeight) * src.elemSize(); size_t localMemSize = ksize_3x3 ? 260 * 6 * src.elemSize() : (localWidth * localHeight) * src.elemSize();
int vector_lengths[4][7] = {{4, 4, 4, 4, 4, 4, 4}, int vector_lengths[4][7] = {{4, 4, 4, 4, 4, 4, 4},
...@@ -677,15 +679,16 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel, ...@@ -677,15 +679,16 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
} }
Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize, Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
Point anchor, int borderType) const Point &anchor, int borderType)
{ {
static const GPUFilter2D_t GPUFilter2D_callers[] = {0, GPUFilter2D, 0, GPUFilter2D, GPUFilter2D}; static const GPUFilter2D_t GPUFilter2D_callers[] = {0, GPUFilter2D, 0, GPUFilter2D, GPUFilter2D};
CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC3 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC3 || srcType == CV_32FC4) && dstType == srcType); CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC3 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC3 || srcType == CV_32FC4) && dstType == srcType);
oclMat gpu_krnl; oclMat gpu_krnl;
Point norm_archor = anchor;
normalizeKernel(kernel, gpu_krnl, CV_32FC1); normalizeKernel(kernel, gpu_krnl, CV_32FC1);
normalizeAnchor(anchor, ksize); normalizeAnchor(norm_archor, ksize);
return Ptr<BaseFilter_GPU>(new LinearFilter_GPU(ksize, anchor, gpu_krnl, GPUFilter2D_callers[CV_MAT_CN(srcType)], return Ptr<BaseFilter_GPU>(new LinearFilter_GPU(ksize, anchor, gpu_krnl, GPUFilter2D_callers[CV_MAT_CN(srcType)],
borderType)); borderType));
......
...@@ -44,7 +44,6 @@ ...@@ -44,7 +44,6 @@
// the use of this software, even if advised of the possibility of such damage. // the use of this software, even if advised of the possibility of such damage.
// //
//M*/ //M*/
//#define BORDER_REFLECT_101
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Macro for border type//////////////////////////////////////////// /////////////////////////////////Macro for border type////////////////////////////////////////////
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment