Commit 5501cfd8 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #3814 from erikrk:denoising-16bit-master

parents 7ea02397 01d3df0d
......@@ -442,6 +442,10 @@ template<typename _Tp> static inline _Tp saturate_cast(int v) { return _Tp(
template<typename _Tp> static inline _Tp saturate_cast(float v) { return _Tp(v); }
/** @overload */
template<typename _Tp> static inline _Tp saturate_cast(double v) { return _Tp(v); }
/** @overload */
template<typename _Tp> static inline _Tp saturate_cast(int64 v) { return _Tp(v); }
/** @overload */
template<typename _Tp> static inline _Tp saturate_cast(uint64 v) { return _Tp(v); }
//! @cond IGNORED
......@@ -452,6 +456,8 @@ template<> inline uchar saturate_cast<uchar>(short v) { return saturate_c
template<> inline uchar saturate_cast<uchar>(unsigned v) { return (uchar)std::min(v, (unsigned)UCHAR_MAX); }
template<> inline uchar saturate_cast<uchar>(float v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
template<> inline uchar saturate_cast<uchar>(double v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
template<> inline uchar saturate_cast<uchar>(int64 v) { return (uchar)((uint64)v <= (uint64)UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
template<> inline uchar saturate_cast<uchar>(uint64 v) { return (uchar)std::min(v, (uint64)UCHAR_MAX); }
template<> inline schar saturate_cast<schar>(uchar v) { return (schar)std::min((int)v, SCHAR_MAX); }
template<> inline schar saturate_cast<schar>(ushort v) { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); }
......@@ -460,6 +466,8 @@ template<> inline schar saturate_cast<schar>(short v) { return saturate_c
template<> inline schar saturate_cast<schar>(unsigned v) { return (schar)std::min(v, (unsigned)SCHAR_MAX); }
template<> inline schar saturate_cast<schar>(float v) { int iv = cvRound(v); return saturate_cast<schar>(iv); }
template<> inline schar saturate_cast<schar>(double v) { int iv = cvRound(v); return saturate_cast<schar>(iv); }
template<> inline schar saturate_cast<schar>(int64 v) { return (schar)((uint64)((int64)v-SCHAR_MIN) <= (uint64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
template<> inline schar saturate_cast<schar>(uint64 v) { return (schar)std::min(v, (uint64)SCHAR_MAX); }
template<> inline ushort saturate_cast<ushort>(schar v) { return (ushort)std::max((int)v, 0); }
template<> inline ushort saturate_cast<ushort>(short v) { return (ushort)std::max((int)v, 0); }
......@@ -467,12 +475,16 @@ template<> inline ushort saturate_cast<ushort>(int v) { return (ushort)((
template<> inline ushort saturate_cast<ushort>(unsigned v) { return (ushort)std::min(v, (unsigned)USHRT_MAX); }
template<> inline ushort saturate_cast<ushort>(float v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
template<> inline ushort saturate_cast<ushort>(double v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
template<> inline ushort saturate_cast<ushort>(int64 v) { return (ushort)((uint64)v <= (uint64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
template<> inline ushort saturate_cast<ushort>(uint64 v) { return (ushort)std::min(v, (uint64)USHRT_MAX); }
template<> inline short saturate_cast<short>(ushort v) { return (short)std::min((int)v, SHRT_MAX); }
template<> inline short saturate_cast<short>(int v) { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
template<> inline short saturate_cast<short>(unsigned v) { return (short)std::min(v, (unsigned)SHRT_MAX); }
template<> inline short saturate_cast<short>(float v) { int iv = cvRound(v); return saturate_cast<short>(iv); }
template<> inline short saturate_cast<short>(double v) { int iv = cvRound(v); return saturate_cast<short>(iv); }
template<> inline short saturate_cast<short>(int64 v) { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
template<> inline short saturate_cast<short>(uint64 v) { return (short)std::min(v, (uint64)SHRT_MAX); }
template<> inline int saturate_cast<int>(float v) { return cvRound(v); }
template<> inline int saturate_cast<int>(double v) { return cvRound(v); }
......
......@@ -119,7 +119,7 @@ CV_EXPORTS_W void inpaint( InputArray src, InputArray inpaintMask,
<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising/> with several computational
optimizations. Noise expected to be a gaussian white noise
@param src Input 8-bit 1-channel, 2-channel or 3-channel image.
@param src Input 8-bit 1-channel, 2-channel, 3-channel or 4-channel image.
@param dst Output image with the same size and type as src .
@param templateWindowSize Size in pixels of the template patch that is used to compute weights.
Should be odd. Recommended value 7 pixels
......@@ -138,6 +138,35 @@ parameter.
CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, float h = 3,
int templateWindowSize = 7, int searchWindowSize = 21);
/** @brief Perform image denoising using Non-local Means Denoising algorithm
<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising/> with several computational
optimizations. Noise expected to be a gaussian white noise
@param src Input 8-bit or 16-bit (only with NORM_L1) 1-channel,
2-channel, 3-channel or 4-channel image.
@param dst Output image with the same size and type as src .
@param templateWindowSize Size in pixels of the template patch that is used to compute weights.
Should be odd. Recommended value 7 pixels
@param searchWindowSize Size in pixels of the window that is used to compute weighted average for
given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
denoising time. Recommended value 21 pixels
@param h Array of parameters regulating filter strength, either one
parameter applied to all channels or one per channel in dst. Big h value
perfectly removes noise but also removes image details, smaller h
value preserves details but also preserves some noise
@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1
This function expected to be applied to grayscale images. For colored images look at
fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored
image in different colorspaces. Such approach is used in fastNlMeansDenoisingColored by converting
image to CIELAB colorspace and then separately denoise L and AB components with different h
parameter.
*/
CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst,
const std::vector<float>& h,
int templateWindowSize = 7, int searchWindowSize = 21,
int normType = NORM_L2);
/** @brief Modification of fastNlMeansDenoising function for colored images
@param src Input 8-bit 3-channel image.
......@@ -165,8 +194,9 @@ captured in small period of time. For example video. This version of the functio
images or for manual manipulation with colorspaces. For more details see
<http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131.6394>
@param srcImgs Input 8-bit 1-channel, 2-channel or 3-channel images sequence. All images should
have the same type and size.
@param srcImgs Input 8-bit 1-channel, 2-channel, 3-channel or
4-channel images sequence. All images should have the same type and
size.
@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence
@param temporalWindowSize Number of surrounding images to use for target image denoising. Should
be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to
......@@ -178,14 +208,45 @@ Should be odd. Recommended value 7 pixels
@param searchWindowSize Size in pixels of the window that is used to compute weighted average for
given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
denoising time. Recommended value 21 pixels
@param h Parameter regulating filter strength for luminance component. Bigger h value perfectly
removes noise but also removes image details, smaller h value preserves details but also preserves
some noise
@param h Parameter regulating filter strength. Bigger h value
perfectly removes noise but also removes image details, smaller h
value preserves details but also preserves some noise
*/
CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst,
int imgToDenoiseIndex, int temporalWindowSize,
float h = 3, int templateWindowSize = 7, int searchWindowSize = 21);
/** @brief Modification of fastNlMeansDenoising function for images sequence where consequtive images have been
captured in small period of time. For example video. This version of the function is for grayscale
images or for manual manipulation with colorspaces. For more details see
<http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131.6394>
@param srcImgs Input 8-bit or 16-bit (only with NORM_L1) 1-channel,
2-channel, 3-channel or 4-channel images sequence. All images should
have the same type and size.
@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence
@param temporalWindowSize Number of surrounding images to use for target image denoising. Should
be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to
imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise
srcImgs[imgToDenoiseIndex] image.
@param dst Output image with the same size and type as srcImgs images.
@param templateWindowSize Size in pixels of the template patch that is used to compute weights.
Should be odd. Recommended value 7 pixels
@param searchWindowSize Size in pixels of the window that is used to compute weighted average for
given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
denoising time. Recommended value 21 pixels
@param h Array of parameters regulating filter strength, either one
parameter applied to all channels or one per channel in dst. Big h value
perfectly removes noise but also removes image details, smaller h
value preserves details but also preserves some noise
@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1
*/
CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst,
int imgToDenoiseIndex, int temporalWindowSize,
const std::vector<float>& h,
int templateWindowSize = 7, int searchWindowSize = 21,
int normType = NORM_L2);
/** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences
@param srcImgs Input 8-bit 3-channel images sequence. All images should have the same type and
......
This diff is collapsed.
......@@ -50,13 +50,13 @@
using namespace cv;
template <typename T>
template <typename T, typename IT, typename UIT, typename D, typename WT>
struct FastNlMeansDenoisingInvoker :
public ParallelLoopBody
{
public:
FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst,
int template_window_size, int search_window_size, const float h);
int template_window_size, int search_window_size, const float *h);
void operator() (const Range& range) const;
......@@ -75,9 +75,9 @@ private:
int template_window_half_size_;
int search_window_half_size_;
int fixed_point_mult_;
typename pixelInfo<WT>::sampleType fixed_point_mult_;
int almost_template_window_size_sq_bin_shift_;
std::vector<int> almost_dist2weight_;
std::vector<WT> almost_dist2weight_;
void calcDistSumsForFirstElementInRow(
int i, Array2d<int>& dist_sums,
......@@ -99,15 +99,15 @@ inline int getNearestPowerOf2(int value)
return p;
}
template <class T>
FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
template <typename T, typename IT, typename UIT, typename D, typename WT>
FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansDenoisingInvoker(
const Mat& src, Mat& dst,
int template_window_size,
int search_window_size,
const float h) :
const float *h) :
src_(src), dst_(dst)
{
CV_Assert(src.channels() == sizeof(T)); //T is Vec1b or Vec2b or Vec3b
CV_Assert(src.channels() == pixelInfo<T>::channels);
template_window_half_size_ = template_window_size / 2;
search_window_half_size_ = search_window_size / 2;
......@@ -117,8 +117,10 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
border_size_ = search_window_half_size_ + template_window_half_size_;
copyMakeBorder(src_, extended_src_, border_size_, border_size_, border_size_, border_size_, BORDER_DEFAULT);
const int max_estimate_sum_value = search_window_size_ * search_window_size_ * 255;
fixed_point_mult_ = std::numeric_limits<int>::max() / max_estimate_sum_value;
const IT max_estimate_sum_value =
(IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo<T>::sampleMax();
fixed_point_mult_ = (int)std::min<IT>(std::numeric_limits<IT>::max() / max_estimate_sum_value,
pixelInfo<WT>::sampleMax());
// precalc weight for every possible l2 dist between blocks
// additional optimization of precalced weights to replace division(averaging) by binary shift
......@@ -127,30 +129,24 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq);
double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq;
int max_dist = 255 * 255 * sizeof(T);
int max_dist = D::template maxDist<T>();
int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
almost_dist2weight_.resize(almost_max_dist);
const double WEIGHT_THRESHOLD = 0.001;
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
{
double dist = almost_dist * almost_dist2actual_dist_multiplier;
int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
weight = 0;
almost_dist2weight_[almost_dist] = weight;
almost_dist2weight_[almost_dist] =
D::template calcWeight<T, WT>(dist, h, fixed_point_mult_);
}
CV_Assert(almost_dist2weight_[0] == fixed_point_mult_);
// additional optimization init end
if (dst_.empty())
dst_ = Mat::zeros(src_.size(), src_.type());
}
template <class T>
void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
template <typename T, typename IT, typename UIT, typename D, typename WT>
void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& range) const
{
int row_from = range.start;
int row_to = range.end - 1;
......@@ -215,7 +211,7 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
dist_sums_row[x] -= col_dist_sums_row[x];
int bx = start_bx + x;
col_dist_sums_row[x] = up_col_dist_sums_row[x] + calcUpDownDist(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]);
col_dist_sums_row[x] = up_col_dist_sums_row[x] + D::template calcUpDownDist<T>(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]);
dist_sums_row[x] += col_dist_sums_row[x];
up_col_dist_sums_row[x] = col_dist_sums_row[x];
......@@ -227,9 +223,11 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
}
// calc weights
int estimation[3], weights_sum = 0;
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<WT>::channels];
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
estimation[channel_num] = 0;
for (size_t channel_num = 0; channel_num < pixelInfo<WT>::channels; channel_num++)
weights_sum[channel_num] = 0;
for (int y = 0; y < search_window_size_; y++)
{
......@@ -238,24 +236,21 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
for (int x = 0; x < search_window_size_; x++)
{
int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_;
int weight = almost_dist2weight_[almostAvgDist];
weights_sum += weight;
WT weight = almost_dist2weight_[almostAvgDist];
T p = cur_row_ptr[border_size_ + search_window_x + x];
incWithWeight(estimation, weight, p);
incWithWeight<T, IT, WT>(estimation, weights_sum, weight, p);
}
}
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum/2) / weights_sum;
dst_.at<T>(i,j) = saturateCastFromArray<T>(estimation);
divByWeightsSum<IT, UIT, pixelInfo<T>::channels, pixelInfo<WT>::channels>(estimation,
weights_sum);
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
}
}
}
template <class T>
inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
template <typename T, typename IT, typename UIT, typename D, typename WT>
inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForFirstElementInRow(
int i,
Array2d<int>& dist_sums,
Array3d<int>& col_dist_sums,
......@@ -276,7 +271,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++)
{
int dist = calcDist<T>(extended_src_,
int dist = D::template calcDist<T>(extended_src_,
border_size_ + i + ty, border_size_ + j + tx,
border_size_ + start_y + ty, border_size_ + start_x + tx);
......@@ -288,8 +283,8 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
}
}
template <class T>
inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
template <typename T, typename IT, typename UIT, typename D, typename WT>
inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForElementInFirstRow(
int i, int j, int first_col_num,
Array2d<int>& dist_sums,
Array3d<int>& col_dist_sums,
......@@ -312,7 +307,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
int by = start_by + y;
int bx = start_bx + x;
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
col_dist_sums[new_last_col_num][y][x] += calcDist<T>(extended_src_, ay + ty, ax, by + ty, bx);
col_dist_sums[new_last_col_num][y][x] += D::template calcDist<T>(extended_src_, ay + ty, ax, by + ty, bx);
dist_sums[y][x] += col_dist_sums[new_last_col_num][y][x];
up_col_dist_sums[j][y][x] = col_dist_sums[new_last_col_num][y][x];
......
......@@ -28,12 +28,16 @@ static int divUp(int a, int b)
return (a + b - 1) / b;
}
template <typename FT>
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT h, int cn,
template <typename FT, typename ST, typename WT>
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight,
int searchWindowSize, int templateWindowSize,
const FT *h, int hn, int cn, int normType,
int & almostTemplateWindowSizeSqBinShift)
{
const int maxEstimateSumValue = searchWindowSize * searchWindowSize * 255;
int fixedPointMult = std::numeric_limits<int>::max() / maxEstimateSumValue;
const WT maxEstimateSumValue = searchWindowSize * searchWindowSize *
std::numeric_limits<ST>::max();
int fixedPointMult = (int)std::min<WT>(std::numeric_limits<WT>::max() / maxEstimateSumValue,
std::numeric_limits<int>::max());
int depth = DataType<FT>::depth;
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
......@@ -48,33 +52,44 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow
FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;
const FT WEIGHT_THRESHOLD = 1e-3f;
int maxDist = 255 * 255 * cn;
int maxDist = normType == NORM_L1 ? std::numeric_limits<ST>::max() * cn :
std::numeric_limits<ST>::max() * std::numeric_limits<ST>::max() * cn;
int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1);
FT den = 1.0f / (h * h * cn);
FT den[4];
CV_Assert(hn > 0 && hn <= 4);
for (int i=0; i<hn; i++)
den[i] = 1.0f / (h[i] * h[i] * cn);
almostDist2Weight.create(1, almostMaxDist, CV_32SC1);
almostDist2Weight.create(1, almostMaxDist, CV_32SC(hn == 3 ? 4 : hn));
char buf[40];
ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc,
format("-D OP_CALC_WEIGHTS -D FT=%s%s", ocl::typeToStr(depth),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
format("-D OP_CALC_WEIGHTS -D FT=%s -D w_t=%s"
" -D wlut_t=%s -D convert_wlut_t=%s%s%s",
ocl::typeToStr(depth), ocl::typeToStr(CV_MAKE_TYPE(depth, hn)),
ocl::typeToStr(CV_32SC(hn)), ocl::convertTypeStr(depth, CV_32S, hn, buf),
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
normType == NORM_L1 ? " -D ABS" : ""));
if (k.empty())
return false;
k.args(ocl::KernelArg::PtrWriteOnly(almostDist2Weight), almostMaxDist,
almostDist2ActualDistMultiplier, fixedPointMult, den, WEIGHT_THRESHOLD);
almostDist2ActualDistMultiplier, fixedPointMult,
ocl::KernelArg::Constant(den, (hn == 3 ? 4 : hn)*sizeof(FT)), WEIGHT_THRESHOLD);
size_t globalsize[1] = { almostMaxDist };
return k.run(1, globalsize, NULL, false);
}
static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
int templateWindowSize, int searchWindowSize)
static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, const float *h, int hn,
int templateWindowSize, int searchWindowSize, int normType)
{
int type = _src.type(), cn = CV_MAT_CN(type);
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
int ctaSize = ocl::Device::getDefault().isIntel() ? CTA_SIZE_INTEL : CTA_SIZE_DEFAULT;
Size size = _src.size();
if ( type != CV_8UC1 && type != CV_8UC2 && type != CV_8UC4 )
if (cn < 1 || cn > 4 || ((normType != NORM_L2 || depth != CV_8U) &&
(normType != NORM_L1 || (depth != CV_8U && depth != CV_16U))))
return false;
int templateWindowHalfWize = templateWindowSize / 2;
......@@ -84,33 +99,68 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
int nblocksx = divUp(size.width, BLOCK_COLS), nblocksy = divUp(size.height, BLOCK_ROWS);
int almostTemplateWindowSizeSqBinShift = -1;
char cvt[2][40];
char buf[4][40];
String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
" -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D pixel_t=%s -D int_t=%s -D wlut_t=%s"
" -D weight_t=%s -D convert_weight_t=%s -D sum_t=%s -D convert_sum_t=%s"
" -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
" -D convert_int_t=%s -D cn=%d -D convert_uchar_t=%s",
templateWindowSize, searchWindowSize, ocl::typeToStr(type),
ocl::typeToStr(CV_32SC(cn)), BLOCK_COLS, BLOCK_ROWS, ctaSize,
templateWindowHalfWize, searchWindowHalfSize,
ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), cn,
ocl::convertTypeStr(CV_32S, CV_8U, cn, cvt[1]));
" -D convert_int_t=%s -D cn=%d -D psz=%d -D convert_pixel_t=%s%s",
templateWindowSize, searchWindowSize,
ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)),
ocl::typeToStr(CV_32SC(hn)),
depth == CV_8U ? ocl::typeToStr(CV_32SC(hn)) :
format("long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(),
depth == CV_8U ? ocl::convertTypeStr(CV_32S, CV_32S, hn, buf[0]) :
format("convert_long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(),
depth == CV_8U ? ocl::typeToStr(CV_32SC(cn)) :
format("long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(),
depth == CV_8U ? ocl::convertTypeStr(depth, CV_32S, cn, buf[1]) :
format("convert_long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(),
BLOCK_COLS, BLOCK_ROWS,
ctaSize, templateWindowHalfWize, searchWindowHalfSize,
ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn,
(depth == CV_8U ? sizeof(uchar) : sizeof(ushort)) * (cn == 3 ? 4 : cn),
ocl::convertTypeStr(CV_32S, depth, cn, buf[3]),
normType == NORM_L1 ? " -D ABS" : "");
ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts);
if (k.empty())
return false;
UMat almostDist2Weight;
if (!ocl_calcAlmostDist2Weight<float>(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn,
almostTemplateWindowSizeSqBinShift))
if ((depth == CV_8U &&
!ocl_calcAlmostDist2Weight<float, uchar, int>(almostDist2Weight,
searchWindowSize, templateWindowSize,
h, hn, cn, normType,
almostTemplateWindowSizeSqBinShift)) ||
(depth == CV_16U &&
!ocl_calcAlmostDist2Weight<float, ushort, int64>(almostDist2Weight,
searchWindowSize, templateWindowSize,
h, hn, cn, normType,
almostTemplateWindowSizeSqBinShift)))
return false;
CV_Assert(almostTemplateWindowSizeSqBinShift >= 0);
UMat srcex;
int borderSize = searchWindowHalfSize + templateWindowHalfWize;
copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT);
if (cn == 3) {
srcex.create(size.height + 2*borderSize, size.width + 2*borderSize, CV_MAKE_TYPE(depth, 4));
UMat src(srcex, Rect(borderSize, borderSize, size.width, size.height));
int from_to[] = { 0,0, 1,1, 2,2 };
mixChannels(std::vector<UMat>(1, _src.getUMat()), std::vector<UMat>(1, src), from_to, 3);
copyMakeBorder(src, srcex, borderSize, borderSize, borderSize, borderSize,
BORDER_DEFAULT|BORDER_ISOLATED); // create borders in place
}
else
copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT);
_dst.create(size, type);
UMat dst = _dst.getUMat();
UMat dst;
if (cn == 3)
dst.create(size, CV_MAKE_TYPE(depth, 4));
else
dst = _dst.getUMat();
int searchWindowSizeSq = searchWindowSize * searchWindowSize;
Size upColSumSize(size.width, searchWindowSizeSq * nblocksy);
......@@ -123,7 +173,14 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
ocl::KernelArg::PtrReadOnly(buffer), almostTemplateWindowSizeSqBinShift);
size_t globalsize[2] = { nblocksx * ctaSize, nblocksy }, localsize[2] = { ctaSize, 1 };
return k.run(2, globalsize, localsize, false);
if (!k.run(2, globalsize, localsize, false)) return false;
if (cn == 3) {
int from_to[] = { 0,0, 1,1, 2,2 };
mixChannels(std::vector<UMat>(1, dst), std::vector<UMat>(1, _dst.getUMat()), from_to, 3);
}
return true;
}
static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
......
......@@ -50,14 +50,14 @@
using namespace cv;
template <typename T>
template <typename T, typename IT, typename UIT, typename D, typename WT>
struct FastNlMeansMultiDenoisingInvoker :
ParallelLoopBody
{
public:
FastNlMeansMultiDenoisingInvoker(const std::vector<Mat>& srcImgs, int imgToDenoiseIndex,
int temporalWindowSize, Mat& dst, int template_window_size,
int search_window_size, const float h);
int search_window_size, const float *h);
void operator() (const Range& range) const;
......@@ -81,9 +81,9 @@ private:
int search_window_half_size_;
int temporal_window_half_size_;
int fixed_point_mult_;
typename pixelInfo<WT>::sampleType fixed_point_mult_;
int almost_template_window_size_sq_bin_shift;
std::vector<int> almost_dist2weight;
std::vector<WT> almost_dist2weight;
void calcDistSumsForFirstElementInRow(int i, Array3d<int>& dist_sums,
Array4d<int>& col_dist_sums,
......@@ -94,19 +94,19 @@ private:
Array4d<int>& up_col_dist_sums) const;
};
template <class T>
FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
template <typename T, typename IT, typename UIT, typename D, typename WT>
FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansMultiDenoisingInvoker(
const std::vector<Mat>& srcImgs,
int imgToDenoiseIndex,
int temporalWindowSize,
cv::Mat& dst,
int template_window_size,
int search_window_size,
const float h) :
const float *h) :
dst_(dst), extended_srcs_(srcImgs.size())
{
CV_Assert(srcImgs.size() > 0);
CV_Assert(srcImgs[0].channels() == sizeof(T));
CV_Assert(srcImgs[0].channels() == pixelInfo<T>::channels);
rows_ = srcImgs[0].rows;
cols_ = srcImgs[0].cols;
......@@ -125,8 +125,10 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
border_size_, border_size_, border_size_, border_size_, cv::BORDER_DEFAULT);
main_extended_src_ = extended_srcs_[temporal_window_half_size_];
const int max_estimate_sum_value = temporal_window_size_ * search_window_size_ * search_window_size_ * 255;
fixed_point_mult_ = std::numeric_limits<int>::max() / max_estimate_sum_value;
const IT max_estimate_sum_value =
(IT)temporal_window_size_ * (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo<T>::sampleMax();
fixed_point_mult_ = (int)std::min<IT>(std::numeric_limits<IT>::max() / max_estimate_sum_value,
pixelInfo<WT>::sampleMax());
// precalc weight for every possible l2 dist between blocks
// additional optimization of precalced weights to replace division(averaging) by binary shift
......@@ -138,30 +140,24 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift;
double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq;
int max_dist = 255 * 255 * sizeof(T);
int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1);
int max_dist = D::template maxDist<T>();
int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
almost_dist2weight.resize(almost_max_dist);
const double WEIGHT_THRESHOLD = 0.001;
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
{
double dist = almost_dist * almost_dist2actual_dist_multiplier;
int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
weight = 0;
almost_dist2weight[almost_dist] = weight;
almost_dist2weight[almost_dist] =
D::template calcWeight<T, WT>(dist, h, fixed_point_mult_);
}
CV_Assert(almost_dist2weight[0] == fixed_point_mult_);
// additional optimization init end
if (dst_.empty())
dst_ = Mat::zeros(srcImgs[0].size(), srcImgs[0].type());
}
template <class T>
void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
template <typename T, typename IT, typename UIT, typename D, typename WT>
void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& range) const
{
int row_from = range.start;
int row_to = range.end - 1;
......@@ -234,7 +230,7 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
dist_sums_row[x] -= col_dist_sums_row[x];
col_dist_sums_row[x] = up_col_dist_sums_row[x] +
calcUpDownDist(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]);
D::template calcUpDownDist<T>(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]);
dist_sums_row[x] += col_dist_sums_row[x];
up_col_dist_sums_row[x] = col_dist_sums_row[x];
......@@ -247,11 +243,11 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
}
// calc weights
int weights_sum = 0;
int estimation[3];
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<WT>::channels];
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
estimation[channel_num] = 0;
for (size_t channel_num = 0; channel_num < pixelInfo<WT>::channels; channel_num++)
weights_sum[channel_num] = 0;
for (int d = 0; d < temporal_window_size_; d++)
{
......@@ -266,26 +262,22 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
{
int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift;
int weight = almost_dist2weight[almostAvgDist];
weights_sum += weight;
WT weight = almost_dist2weight[almostAvgDist];
T p = cur_row_ptr[border_size_ + search_window_x + x];
incWithWeight(estimation, weight, p);
incWithWeight<T, IT, WT>(estimation, weights_sum, weight, p);
}
}
}
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum / 2) / weights_sum;
dst_.at<T>(i,j) = saturateCastFromArray<T>(estimation);
divByWeightsSum<IT, UIT, pixelInfo<T>::channels, pixelInfo<WT>::channels>(estimation,
weights_sum);
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
}
}
}
template <class T>
inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
template <typename T, typename IT, typename UIT, typename D, typename WT>
inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForFirstElementInRow(
int i, Array3d<int>& dist_sums, Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
{
int j = 0;
......@@ -310,7 +302,7 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
{
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
{
int dist = calcDist<T>(
int dist = D::template calcDist<T>(
main_extended_src_.at<T>(border_size_ + i + ty, border_size_ + j + tx),
cur_extended_src.at<T>(border_size_ + start_y + ty, border_size_ + start_x + tx));
......@@ -325,8 +317,8 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
}
}
template <class T>
inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
template <typename T, typename IT, typename UIT, typename D, typename WT>
inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForElementInFirstRow(
int i, int j, int first_col_num, Array3d<int>& dist_sums,
Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
{
......@@ -353,7 +345,7 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRo
int* col_dist_sums_ptr = &col_dist_sums[new_last_col_num][d][y][x];
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
{
*col_dist_sums_ptr += calcDist<T>(
*col_dist_sums_ptr += D::template calcDist<T>(
main_extended_src_.at<T>(ay + ty, ax),
cur_extended_src.at<T>(by + ty, bx));
}
......
This diff is collapsed.
......@@ -13,11 +13,11 @@
namespace cvtest {
namespace ocl {
PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool)
PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, int, bool, bool)
{
int cn, templateWindowSize, searchWindowSize;
float h;
bool use_roi;
int cn, normType, templateWindowSize, searchWindowSize;
std::vector<float> h;
bool use_roi, use_image;
TEST_DECLARE_INPUT_PARAMETER(src);
TEST_DECLARE_OUTPUT_PARAMETER(dst);
......@@ -25,29 +25,46 @@ PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool)
virtual void SetUp()
{
cn = GET_PARAM(0);
use_roi = GET_PARAM(1);
normType = GET_PARAM(1);
use_roi = GET_PARAM(2);
use_image = GET_PARAM(3);
templateWindowSize = 7;
searchWindowSize = 21;
h = 3.0f;
h.resize(cn);
for (int i=0; i<cn; i++)
h[i] = 3.0f + 0.5f*i;
}
virtual void generateTestData()
{
const int type = CV_8UC(cn);
Mat image;
if (cn == 1)
{
image = readImage("denoising/lena_noised_gaussian_sigma=10.png", IMREAD_GRAYSCALE);
if (use_image) {
image = readImage("denoising/lena_noised_gaussian_sigma=10.png",
cn == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
ASSERT_FALSE(image.empty());
}
const int type = CV_8UC(cn);
Size roiSize = cn == 1 ? image.size() : randomSize(1, MAX_VALUE);
Size roiSize = use_image ? image.size() : randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, type, 0, 255);
if (cn == 1)
image.copyTo(src_roi);
if (use_image) {
ASSERT_TRUE(cn > 0 && cn <= 4);
if (cn == 2) {
int from_to[] = { 0,0, 1,1 };
src_roi.create(roiSize, type);
mixChannels(&image, 1, &src_roi, 1, from_to, 2);
}
else if (cn == 4) {
int from_to[] = { 0,0, 1,1, 2,2, 1,3};
src_roi.create(roiSize, type);
mixChannels(&image, 1, &src_roi, 1, from_to, 4);
}
else image.copyTo(src_roi);
}
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 0, 255);
......@@ -65,8 +82,23 @@ OCL_TEST_P(FastNlMeansDenoising, Mat)
{
generateTestData();
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize));
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize));
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, std::vector<float>(1, h[0]), templateWindowSize, searchWindowSize, normType));
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, std::vector<float>(1, h[0]), templateWindowSize, searchWindowSize, normType));
OCL_EXPECT_MATS_NEAR(dst, 1);
}
}
typedef FastNlMeansDenoisingTestBase FastNlMeansDenoising_hsep;
OCL_TEST_P(FastNlMeansDenoising_hsep, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize, normType));
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize, normType));
OCL_EXPECT_MATS_NEAR(dst, 1);
}
......@@ -80,15 +112,21 @@ OCL_TEST_P(FastNlMeansDenoisingColored, Mat)
{
generateTestData();
OCL_OFF(cv::fastNlMeansDenoisingColored(src_roi, dst_roi, h, h, templateWindowSize, searchWindowSize));
OCL_ON(cv::fastNlMeansDenoisingColored(usrc_roi, udst_roi, h, h, templateWindowSize, searchWindowSize));
OCL_OFF(cv::fastNlMeansDenoisingColored(src_roi, dst_roi, h[0], h[0], templateWindowSize, searchWindowSize));
OCL_ON(cv::fastNlMeansDenoisingColored(usrc_roi, udst_roi, h[0], h[0], templateWindowSize, searchWindowSize));
OCL_EXPECT_MATS_NEAR(dst, 1);
}
}
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, Combine(Values(1, 2), Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored, Combine(Values(3, 4), Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising,
Combine(Values(1, 2, 3, 4), Values((int)NORM_L2, (int)NORM_L1),
Bool(), Values(true)));
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising_hsep,
Combine(Values(1, 2, 3, 4), Values((int)NORM_L2, (int)NORM_L1),
Bool(), Values(true)));
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored,
Combine(Values(3, 4), Values((int)NORM_L2), Bool(), Values(false)));
} } // namespace cvtest::ocl
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment