Commit 6b6cfa89 authored by Andrey Pavlenko's avatar Andrey Pavlenko Committed by OpenCV Buildbot

Merge pull request #2382 from ilya-lavrenov:tapi_nlmeans

parents 553673ee 9b31e6cd
...@@ -26,7 +26,7 @@ OCL_PERF_TEST(Photo, DenoisingGrayscale) ...@@ -26,7 +26,7 @@ OCL_PERF_TEST(Photo, DenoisingGrayscale)
OCL_TEST_CYCLE() OCL_TEST_CYCLE()
cv::fastNlMeansDenoising(original, result, 10); cv::fastNlMeansDenoising(original, result, 10);
SANITY_CHECK(result); SANITY_CHECK(result, 1);
} }
OCL_PERF_TEST(Photo, DenoisingColored) OCL_PERF_TEST(Photo, DenoisingColored)
...@@ -42,10 +42,10 @@ OCL_PERF_TEST(Photo, DenoisingColored) ...@@ -42,10 +42,10 @@ OCL_PERF_TEST(Photo, DenoisingColored)
OCL_TEST_CYCLE() OCL_TEST_CYCLE()
cv::fastNlMeansDenoisingColored(original, result, 10, 10); cv::fastNlMeansDenoisingColored(original, result, 10, 10);
SANITY_CHECK(result); SANITY_CHECK(result, 2);
} }
OCL_PERF_TEST(Photo, DenoisingGrayscaleMulti) OCL_PERF_TEST(Photo, DISABLED_DenoisingGrayscaleMulti)
{ {
const int imgs_count = 3; const int imgs_count = 3;
...@@ -68,7 +68,7 @@ OCL_PERF_TEST(Photo, DenoisingGrayscaleMulti) ...@@ -68,7 +68,7 @@ OCL_PERF_TEST(Photo, DenoisingGrayscaleMulti)
SANITY_CHECK(result); SANITY_CHECK(result);
} }
OCL_PERF_TEST(Photo, DenoisingColoredMulti) OCL_PERF_TEST(Photo, DISABLED_DenoisingColoredMulti)
{ {
const int imgs_count = 3; const int imgs_count = 3;
......
...@@ -39,10 +39,14 @@ ...@@ -39,10 +39,14 @@
// //
//M*/ //M*/
#include "opencv2/core/base.hpp"
#ifndef __OPENCV_DENOISING_ARRAYS_HPP__ #ifndef __OPENCV_DENOISING_ARRAYS_HPP__
#define __OPENCV_DENOISING_ARRAYS_HPP__ #define __OPENCV_DENOISING_ARRAYS_HPP__
template <class T> struct Array2d { template <class T>
struct Array2d
{
T* a; T* a;
int n1,n2; int n1,n2;
bool needToDeallocArray; bool needToDeallocArray;
...@@ -50,14 +54,16 @@ template <class T> struct Array2d { ...@@ -50,14 +54,16 @@ template <class T> struct Array2d {
Array2d(const Array2d& array2d): Array2d(const Array2d& array2d):
a(array2d.a), n1(array2d.n1), n2(array2d.n2), needToDeallocArray(false) a(array2d.a), n1(array2d.n1), n2(array2d.n2), needToDeallocArray(false)
{ {
if (array2d.needToDeallocArray) { if (array2d.needToDeallocArray)
// copy constructor for self allocating arrays not supported {
throw new std::exception(); CV_Error(Error::BadDataPtr, "Copy constructor for self allocating arrays not supported");
} }
} }
Array2d(T* _a, int _n1, int _n2): Array2d(T* _a, int _n1, int _n2):
a(_a), n1(_n1), n2(_n2), needToDeallocArray(false) {} a(_a), n1(_n1), n2(_n2), needToDeallocArray(false)
{
}
Array2d(int _n1, int _n2): Array2d(int _n1, int _n2):
n1(_n1), n2(_n2), needToDeallocArray(true) n1(_n1), n2(_n2), needToDeallocArray(true)
...@@ -65,28 +71,34 @@ template <class T> struct Array2d { ...@@ -65,28 +71,34 @@ template <class T> struct Array2d {
a = new T[n1*n2]; a = new T[n1*n2];
} }
~Array2d() { ~Array2d()
if (needToDeallocArray) { {
if (needToDeallocArray)
delete[] a; delete[] a;
}
} }
T* operator [] (int i) { T* operator [] (int i)
{
return a + i*n2; return a + i*n2;
} }
inline T* row_ptr(int i) { inline T* row_ptr(int i)
{
return (*this)[i]; return (*this)[i];
} }
}; };
template <class T> struct Array3d { template <class T>
struct Array3d
{
T* a; T* a;
int n1,n2,n3; int n1,n2,n3;
bool needToDeallocArray; bool needToDeallocArray;
Array3d(T* _a, int _n1, int _n2, int _n3): Array3d(T* _a, int _n1, int _n2, int _n3):
a(_a), n1(_n1), n2(_n2), n3(_n3), needToDeallocArray(false) {} a(_a), n1(_n1), n2(_n2), n3(_n3), needToDeallocArray(false)
{
}
Array3d(int _n1, int _n2, int _n3): Array3d(int _n1, int _n2, int _n3):
n1(_n1), n2(_n2), n3(_n3), needToDeallocArray(true) n1(_n1), n2(_n2), n3(_n3), needToDeallocArray(true)
...@@ -94,64 +106,72 @@ template <class T> struct Array3d { ...@@ -94,64 +106,72 @@ template <class T> struct Array3d {
a = new T[n1*n2*n3]; a = new T[n1*n2*n3];
} }
~Array3d() { ~Array3d()
if (needToDeallocArray) { {
if (needToDeallocArray)
delete[] a; delete[] a;
}
} }
Array2d<T> operator [] (int i) { Array2d<T> operator [] (int i)
{
Array2d<T> array2d(a + i*n2*n3, n2, n3); Array2d<T> array2d(a + i*n2*n3, n2, n3);
return array2d; return array2d;
} }
inline T* row_ptr(int i1, int i2) { inline T* row_ptr(int i1, int i2)
{
return a + i1*n2*n3 + i2*n3; return a + i1*n2*n3 + i2*n3;
} }
}; };
template <class T> struct Array4d { template <class T>
struct Array4d
{
T* a; T* a;
int n1,n2,n3,n4; int n1,n2,n3,n4;
bool needToDeallocArray; bool needToDeallocArray;
int steps[4]; int steps[4];
void init_steps() { void init_steps()
{
steps[0] = n2*n3*n4; steps[0] = n2*n3*n4;
steps[1] = n3*n4; steps[1] = n3*n4;
steps[2] = n4; steps[2] = n4;
steps[3] = 1; steps[3] = 1;
} }
Array4d(T* _a, int _n1, int _n2, int _n3, int _n4): Array4d(T* _a, int _n1, int _n2, int _n3, int _n4) :
a(_a), n1(_n1), n2(_n2), n3(_n3), n4(_n4), needToDeallocArray(false) a(_a), n1(_n1), n2(_n2), n3(_n3), n4(_n4), needToDeallocArray(false)
{ {
init_steps(); init_steps();
} }
Array4d(int _n1, int _n2, int _n3, int _n4): Array4d(int _n1, int _n2, int _n3, int _n4) :
n1(_n1), n2(_n2), n3(_n3), n4(_n4), needToDeallocArray(true) n1(_n1), n2(_n2), n3(_n3), n4(_n4), needToDeallocArray(true)
{ {
a = new T[n1*n2*n3*n4]; a = new T[n1*n2*n3*n4];
init_steps(); init_steps();
} }
~Array4d() { ~Array4d()
if (needToDeallocArray) { {
if (needToDeallocArray)
delete[] a; delete[] a;
}
} }
Array3d<T> operator [] (int i) { Array3d<T> operator [] (int i)
{
Array3d<T> array3d(a + i*n2*n3*n4, n2, n3, n4); Array3d<T> array3d(a + i*n2*n3*n4, n2, n3, n4);
return array3d; return array3d;
} }
inline T* row_ptr(int i1, int i2, int i3) { inline T* row_ptr(int i1, int i2, int i3)
{
return a + i1*n2*n3*n4 + i2*n3*n4 + i3*n4; return a + i1*n2*n3*n4 + i2*n3*n4 + i3*n4;
} }
inline int step_size(int dimension) { inline int step_size(int dimension)
{
return steps[dimension]; return steps[dimension];
} }
}; };
......
...@@ -40,14 +40,17 @@ ...@@ -40,14 +40,17 @@
//M*/ //M*/
#include "precomp.hpp" #include "precomp.hpp"
#include "opencv2/photo.hpp"
#include "opencv2/imgproc.hpp"
#include "fast_nlmeans_denoising_invoker.hpp" #include "fast_nlmeans_denoising_invoker.hpp"
#include "fast_nlmeans_multi_denoising_invoker.hpp" #include "fast_nlmeans_multi_denoising_invoker.hpp"
#include "fast_nlmeans_denoising_opencl.hpp"
void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h,
int templateWindowSize, int searchWindowSize) int templateWindowSize, int searchWindowSize)
{ {
CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()),
ocl_fastNlMeansDenoising(_src, _dst, h, templateWindowSize, searchWindowSize))
Mat src = _src.getMat(); Mat src = _src.getMat();
_dst.create(src.size(), src.type()); _dst.create(src.size(), src.type());
Mat dst = _dst.getMat(); Mat dst = _dst.getMat();
...@@ -83,15 +86,20 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst, ...@@ -83,15 +86,20 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
float h, float hForColorComponents, float h, float hForColorComponents,
int templateWindowSize, int searchWindowSize) int templateWindowSize, int searchWindowSize)
{ {
Mat src = _src.getMat(); if (_src.type() != CV_8UC3)
_dst.create(src.size(), src.type()); {
Mat dst = _dst.getMat();
if (src.type() != CV_8UC3) {
CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3!"); CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3!");
return; return;
} }
CV_OCL_RUN(_src.dims() <= 2 && (_dst.isUMat() || _src.isUMat()),
ocl_fastNlMeansDenoisingColored(_src, _dst, h, hForColorComponents,
templateWindowSize, searchWindowSize))
Mat src = _src.getMat();
_dst.create(src.size(), src.type());
Mat dst = _dst.getMat();
Mat src_lab; Mat src_lab;
cvtColor(src, src_lab, COLOR_LBGR2Lab); cvtColor(src, src_lab, COLOR_LBGR2Lab);
...@@ -117,7 +125,8 @@ static void fastNlMeansDenoisingMultiCheckPreconditions( ...@@ -117,7 +125,8 @@ static void fastNlMeansDenoisingMultiCheckPreconditions(
int templateWindowSize, int searchWindowSize) int templateWindowSize, int searchWindowSize)
{ {
int src_imgs_size = static_cast<int>(srcImgs.size()); int src_imgs_size = static_cast<int>(srcImgs.size());
if (src_imgs_size == 0) { if (src_imgs_size == 0)
{
CV_Error(Error::StsBadArg, "Input images vector should not be empty!"); CV_Error(Error::StsBadArg, "Input images vector should not be empty!");
} }
...@@ -136,11 +145,11 @@ static void fastNlMeansDenoisingMultiCheckPreconditions( ...@@ -136,11 +145,11 @@ static void fastNlMeansDenoisingMultiCheckPreconditions(
"should be chosen corresponding srcImgs size!"); "should be chosen corresponding srcImgs size!");
} }
for (int i = 1; i < src_imgs_size; i++) { for (int i = 1; i < src_imgs_size; i++)
if (srcImgs[0].size() != srcImgs[i].size() || srcImgs[0].type() != srcImgs[i].type()) { if (srcImgs[0].size() != srcImgs[i].size() || srcImgs[0].type() != srcImgs[i].type())
{
CV_Error(Error::StsBadArg, "Input images should have the same size and type!"); CV_Error(Error::StsBadArg, "Input images should have the same size and type!");
} }
}
} }
void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst, void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst,
...@@ -152,12 +161,13 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds ...@@ -152,12 +161,13 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds
fastNlMeansDenoisingMultiCheckPreconditions( fastNlMeansDenoisingMultiCheckPreconditions(
srcImgs, imgToDenoiseIndex, srcImgs, imgToDenoiseIndex,
temporalWindowSize, templateWindowSize, searchWindowSize temporalWindowSize, templateWindowSize, searchWindowSize);
);
_dst.create(srcImgs[0].size(), srcImgs[0].type()); _dst.create(srcImgs[0].size(), srcImgs[0].type());
Mat dst = _dst.getMat(); Mat dst = _dst.getMat();
switch (srcImgs[0].type()) { switch (srcImgs[0].type())
{
case CV_8U: case CV_8U:
parallel_for_(cv::Range(0, srcImgs[0].rows), parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<uchar>( FastNlMeansMultiDenoisingInvoker<uchar>(
...@@ -192,15 +202,15 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr ...@@ -192,15 +202,15 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr
fastNlMeansDenoisingMultiCheckPreconditions( fastNlMeansDenoisingMultiCheckPreconditions(
srcImgs, imgToDenoiseIndex, srcImgs, imgToDenoiseIndex,
temporalWindowSize, templateWindowSize, searchWindowSize temporalWindowSize, templateWindowSize, searchWindowSize);
);
_dst.create(srcImgs[0].size(), srcImgs[0].type()); _dst.create(srcImgs[0].size(), srcImgs[0].type());
Mat dst = _dst.getMat(); Mat dst = _dst.getMat();
int src_imgs_size = static_cast<int>(srcImgs.size()); int src_imgs_size = static_cast<int>(srcImgs.size());
if (srcImgs[0].type() != CV_8UC3) { if (srcImgs[0].type() != CV_8UC3)
{
CV_Error(Error::StsBadArg, "Type of input images should be CV_8UC3!"); CV_Error(Error::StsBadArg, "Type of input images should be CV_8UC3!");
return; return;
} }
...@@ -211,7 +221,8 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr ...@@ -211,7 +221,8 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr
std::vector<Mat> src_lab(src_imgs_size); std::vector<Mat> src_lab(src_imgs_size);
std::vector<Mat> l(src_imgs_size); std::vector<Mat> l(src_imgs_size);
std::vector<Mat> ab(src_imgs_size); std::vector<Mat> ab(src_imgs_size);
for (int i = 0; i < src_imgs_size; i++) { for (int i = 0; i < src_imgs_size; i++)
{
src_lab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC3); src_lab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC3);
l[i] = Mat::zeros(srcImgs[0].size(), CV_8UC1); l[i] = Mat::zeros(srcImgs[0].size(), CV_8UC1);
ab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC2); ab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC2);
......
...@@ -46,29 +46,35 @@ using namespace cv; ...@@ -46,29 +46,35 @@ using namespace cv;
template <typename T> static inline int calcDist(const T a, const T b); template <typename T> static inline int calcDist(const T a, const T b);
template <> inline int calcDist(const uchar a, const uchar b) { template <> inline int calcDist(const uchar a, const uchar b)
{
return (a-b) * (a-b); return (a-b) * (a-b);
} }
template <> inline int calcDist(const Vec2b a, const Vec2b b) { template <> inline int calcDist(const Vec2b a, const Vec2b b)
{
return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]); return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]);
} }
template <> inline int calcDist(const Vec3b a, const Vec3b b) { template <> inline int calcDist(const Vec3b a, const Vec3b b)
{
return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]) + (a[2]-b[2])*(a[2]-b[2]); return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]) + (a[2]-b[2])*(a[2]-b[2]);
} }
template <typename T> static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2) { template <typename T> static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2)
{
const T a = m.at<T>(i1, j1); const T a = m.at<T>(i1, j1);
const T b = m.at<T>(i2, j2); const T b = m.at<T>(i2, j2);
return calcDist<T>(a,b); return calcDist<T>(a,b);
} }
template <typename T> static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down) { template <typename T> static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down)
return calcDist(a_down,b_down) - calcDist(a_up, b_up); {
return calcDist(a_down, b_down) - calcDist(a_up, b_up);
} }
template <> inline int calcUpDownDist(uchar a_up, uchar a_down, uchar b_up, uchar b_down) { template <> inline int calcUpDownDist(uchar a_up, uchar a_down, uchar b_up, uchar b_down)
{
int A = a_down - b_down; int A = a_down - b_down;
int B = a_up - b_up; int B = a_up - b_up;
return (A-B)*(A+B); return (A-B)*(A+B);
...@@ -76,16 +82,37 @@ template <> inline int calcUpDownDist(uchar a_up, uchar a_down, uchar b_up, uch ...@@ -76,16 +82,37 @@ template <> inline int calcUpDownDist(uchar a_up, uchar a_down, uchar b_up, uch
template <typename T> static inline void incWithWeight(int* estimation, int weight, T p); template <typename T> static inline void incWithWeight(int* estimation, int weight, T p);
template <> inline void incWithWeight(int* estimation, int weight, uchar p) { template <> inline void incWithWeight(int* estimation, int weight, uchar p)
{
estimation[0] += weight * p; estimation[0] += weight * p;
} }
template <> inline void incWithWeight(int* estimation, int weight, Vec2b p) { template <> inline void incWithWeight(int* estimation, int weight, Vec2b p)
{
estimation[0] += weight * p[0]; estimation[0] += weight * p[0];
estimation[1] += weight * p[1]; estimation[1] += weight * p[1];
} }
template <> inline void incWithWeight(int* estimation, int weight, Vec3b p) { template <> inline void incWithWeight(int* estimation, int weight, Vec3b p)
{
estimation[0] += weight * p[0];
estimation[1] += weight * p[1];
estimation[2] += weight * p[2];
}
template <> inline void incWithWeight(int* estimation, int weight, int p)
{
estimation[0] += weight * p;
}
template <> inline void incWithWeight(int* estimation, int weight, Vec2i p)
{
estimation[0] += weight * p[0];
estimation[1] += weight * p[1];
}
template <> inline void incWithWeight(int* estimation, int weight, Vec3i p)
{
estimation[0] += weight * p[0]; estimation[0] += weight * p[0];
estimation[1] += weight * p[1]; estimation[1] += weight * p[1];
estimation[2] += weight * p[2]; estimation[2] += weight * p[2];
...@@ -93,18 +120,21 @@ template <> inline void incWithWeight(int* estimation, int weight, Vec3b p) { ...@@ -93,18 +120,21 @@ template <> inline void incWithWeight(int* estimation, int weight, Vec3b p) {
template <typename T> static inline T saturateCastFromArray(int* estimation); template <typename T> static inline T saturateCastFromArray(int* estimation);
template <> inline uchar saturateCastFromArray(int* estimation) { template <> inline uchar saturateCastFromArray(int* estimation)
{
return saturate_cast<uchar>(estimation[0]); return saturate_cast<uchar>(estimation[0]);
} }
template <> inline Vec2b saturateCastFromArray(int* estimation) { template <> inline Vec2b saturateCastFromArray(int* estimation)
{
Vec2b res; Vec2b res;
res[0] = saturate_cast<uchar>(estimation[0]); res[0] = saturate_cast<uchar>(estimation[0]);
res[1] = saturate_cast<uchar>(estimation[1]); res[1] = saturate_cast<uchar>(estimation[1]);
return res; return res;
} }
template <> inline Vec3b saturateCastFromArray(int* estimation) { template <> inline Vec3b saturateCastFromArray(int* estimation)
{
Vec3b res; Vec3b res;
res[0] = saturate_cast<uchar>(estimation[0]); res[0] = saturate_cast<uchar>(estimation[0]);
res[1] = saturate_cast<uchar>(estimation[1]); res[1] = saturate_cast<uchar>(estimation[1]);
...@@ -112,4 +142,20 @@ template <> inline Vec3b saturateCastFromArray(int* estimation) { ...@@ -112,4 +142,20 @@ template <> inline Vec3b saturateCastFromArray(int* estimation) {
return res; return res;
} }
template <> inline int saturateCastFromArray(int* estimation)
{
return estimation[0];
}
template <> inline Vec2i saturateCastFromArray(int* estimation)
{
estimation[1] = 0;
return Vec2i(estimation);
}
template <> inline Vec3i saturateCastFromArray(int* estimation)
{
return Vec3i(estimation);
}
#endif #endif
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
#ifndef __OPENCV_FAST_NLMEANS_DENOISING_OPENCL_HPP__
#define __OPENCV_FAST_NLMEANS_DENOISING_OPENCL_HPP__
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#ifdef HAVE_OPENCL
namespace cv {
enum
{
BLOCK_ROWS = 32,
BLOCK_COLS = 32,
CTA_SIZE = 256
};
static int divUp(int a, int b)
{
return (a + b - 1) / b;
}
template <typename FT>
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT h, int cn,
int & almostTemplateWindowSizeSqBinShift)
{
const int maxEstimateSumValue = searchWindowSize * searchWindowSize * 255;
int fixedPointMult = std::numeric_limits<int>::max() / maxEstimateSumValue;
int depth = DataType<FT>::depth;
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if (depth == CV_64F && !doubleSupport)
return false;
// precalc weight for every possible l2 dist between blocks
// additional optimization of precalced weights to replace division(averaging) by binary shift
CV_Assert(templateWindowSize <= 46340); // sqrt(INT_MAX)
int templateWindowSizeSq = templateWindowSize * templateWindowSize;
almostTemplateWindowSizeSqBinShift = getNearestPowerOf2(templateWindowSizeSq);
FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;
const FT WEIGHT_THRESHOLD = 1e-3f;
int maxDist = 255 * 255 * cn;
int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1);
FT den = 1.0f / (h * h * cn);
almostDist2Weight.create(1, almostMaxDist, CV_32SC1);
ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc,
format("-D OP_CALC_WEIGHTS -D FT=%s%s", ocl::typeToStr(depth),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
if (k.empty())
return false;
k.args(ocl::KernelArg::PtrWriteOnly(almostDist2Weight), almostMaxDist,
almostDist2ActualDistMultiplier, fixedPointMult, den, WEIGHT_THRESHOLD);
size_t globalsize[1] = { almostMaxDist };
return k.run(1, globalsize, NULL, false);
}
static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
int templateWindowSize, int searchWindowSize)
{
int type = _src.type(), cn = CV_MAT_CN(type);
Size size = _src.size();
if ( type != CV_8UC1 || type != CV_8UC2 || type != CV_8UC4 )
return false;
int templateWindowHalfWize = templateWindowSize / 2;
int searchWindowHalfSize = searchWindowSize / 2;
templateWindowSize = templateWindowHalfWize * 2 + 1;
searchWindowSize = searchWindowHalfSize * 2 + 1;
int nblocksx = divUp(size.width, BLOCK_COLS), nblocksy = divUp(size.height, BLOCK_ROWS);
int almostTemplateWindowSizeSqBinShift = -1;
char cvt[2][40];
String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
" -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
" -D convert_int_t=%s -D cn=%d -D CTA_SIZE2=%d -D convert_uchar_t=%s",
templateWindowSize, searchWindowSize, ocl::typeToStr(type),
ocl::typeToStr(CV_32SC(cn)), BLOCK_COLS, BLOCK_ROWS, CTA_SIZE,
templateWindowHalfWize, searchWindowHalfSize,
ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), cn,
CTA_SIZE >> 1, ocl::convertTypeStr(CV_32S, CV_8U, cn, cvt[1]));
ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts);
if (k.empty())
return false;
UMat almostDist2Weight;
if (!ocl_calcAlmostDist2Weight<float>(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn,
almostTemplateWindowSizeSqBinShift))
return false;
CV_Assert(almostTemplateWindowSizeSqBinShift >= 0);
UMat srcex;
int borderSize = searchWindowHalfSize + templateWindowHalfWize;
copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT);
_dst.create(size, type);
UMat dst = _dst.getUMat();
int searchWindowSizeSq = searchWindowSize * searchWindowSize;
Size upColSumSize(size.width, searchWindowSizeSq * nblocksy);
Size colSumSize(nblocksx * templateWindowSize, searchWindowSizeSq * nblocksy);
UMat buffer(upColSumSize + colSumSize, CV_32SC(cn));
srcex = srcex(Rect(Point(borderSize, borderSize), size));
k.args(ocl::KernelArg::ReadOnlyNoSize(srcex), ocl::KernelArg::WriteOnly(dst),
ocl::KernelArg::PtrReadOnly(almostDist2Weight),
ocl::KernelArg::PtrReadOnly(buffer), almostTemplateWindowSizeSqBinShift);
size_t globalsize[2] = { nblocksx * CTA_SIZE, nblocksy }, localsize[2] = { CTA_SIZE, 1 };
return k.run(2, globalsize, localsize, false);
}
static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
float h, float hForColorComponents,
int templateWindowSize, int searchWindowSize)
{
UMat src = _src.getUMat();
_dst.create(src.size(), src.type());
UMat dst = _dst.getUMat();
UMat src_lab;
cvtColor(src, src_lab, COLOR_LBGR2Lab);
UMat l(src.size(), CV_8U);
UMat ab(src.size(), CV_8UC2);
std::vector<UMat> l_ab(2), l_ab_denoised(2);
l_ab[0] = l;
l_ab[1] = ab;
l_ab_denoised[0].create(src.size(), CV_8U);
l_ab_denoised[1].create(src.size(), CV_8UC2);
int from_to[] = { 0,0, 1,1, 2,2 };
mixChannels(std::vector<UMat>(1, src_lab), l_ab, from_to, 3);
fastNlMeansDenoising(l_ab[0], l_ab_denoised[0], h, templateWindowSize, searchWindowSize);
fastNlMeansDenoising(l_ab[1], l_ab_denoised[1], hForColorComponents, templateWindowSize, searchWindowSize);
UMat dst_lab(src.size(), src.type());
mixChannels(l_ab_denoised, std::vector<UMat>(1, dst_lab), from_to, 3);
cvtColor(dst_lab, dst, COLOR_Lab2LBGR);
return true;
}
}
#endif
#endif
This diff is collapsed.
...@@ -46,6 +46,8 @@ ...@@ -46,6 +46,8 @@
#include "opencv2/core/private.hpp" #include "opencv2/core/private.hpp"
#include "opencv2/core/utility.hpp" #include "opencv2/core/utility.hpp"
#include "opencv2/photo.hpp" #include "opencv2/photo.hpp"
#include "opencv2/core/ocl.hpp"
#include "opencv2/imgproc.hpp"
#ifdef HAVE_TEGRA_OPTIMIZATION #ifdef HAVE_TEGRA_OPTIMIZATION
#include "opencv2/photo/photo_tegra.hpp" #include "opencv2/photo/photo_tegra.hpp"
......
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
#include "test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool)
{
int cn, templateWindowSize, searchWindowSize;
float h;
bool use_roi;
TEST_DECLARE_INPUT_PARAMETER(src)
TEST_DECLARE_OUTPUT_PARAMETER(dst)
virtual void SetUp()
{
cn = GET_PARAM(0);
use_roi = GET_PARAM(1);
templateWindowSize = 7;
searchWindowSize = 21;
h = 3.0f;
}
virtual void generateTestData()
{
Mat image;
if (cn == 1)
{
image = readImage("denoising/lena_noised_gaussian_sigma=10.png", IMREAD_GRAYSCALE);
ASSERT_FALSE(image.empty());
}
const int type = CV_8UC(cn);
Size roiSize = cn == 1 ? image.size() : randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, type, 0, 255);
if (cn == 1)
image.copyTo(src_roi);
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 0, 255);
UMAT_UPLOAD_INPUT_PARAMETER(src)
UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
}
};
typedef FastNlMeansDenoisingTestBase FastNlMeansDenoising;
OCL_TEST_P(FastNlMeansDenoising, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize));
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize));
OCL_EXPECT_MATS_NEAR(dst, 1)
}
}
typedef FastNlMeansDenoisingTestBase fastNlMeansDenoisingColored;
OCL_TEST_P(fastNlMeansDenoisingColored, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(cv::fastNlMeansDenoisingColored(src_roi, dst_roi, h, h, templateWindowSize, searchWindowSize));
OCL_ON(cv::fastNlMeansDenoisingColored(usrc_roi, udst_roi, h, h, templateWindowSize, searchWindowSize));
OCL_EXPECT_MATS_NEAR(dst, 1)
}
}
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, Combine(Values(1, 2), Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Photo, fastNlMeansDenoisingColored, Combine(Values(Channels(3)), Bool()));
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment