Commit 5501cfd8 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #3814 from erikrk:denoising-16bit-master

parents 7ea02397 01d3df0d
...@@ -442,6 +442,10 @@ template<typename _Tp> static inline _Tp saturate_cast(int v) { return _Tp( ...@@ -442,6 +442,10 @@ template<typename _Tp> static inline _Tp saturate_cast(int v) { return _Tp(
template<typename _Tp> static inline _Tp saturate_cast(float v) { return _Tp(v); } template<typename _Tp> static inline _Tp saturate_cast(float v) { return _Tp(v); }
/** @overload */ /** @overload */
template<typename _Tp> static inline _Tp saturate_cast(double v) { return _Tp(v); } template<typename _Tp> static inline _Tp saturate_cast(double v) { return _Tp(v); }
/** @overload */
template<typename _Tp> static inline _Tp saturate_cast(int64 v) { return _Tp(v); }
/** @overload */
template<typename _Tp> static inline _Tp saturate_cast(uint64 v) { return _Tp(v); }
//! @cond IGNORED //! @cond IGNORED
...@@ -452,6 +456,8 @@ template<> inline uchar saturate_cast<uchar>(short v) { return saturate_c ...@@ -452,6 +456,8 @@ template<> inline uchar saturate_cast<uchar>(short v) { return saturate_c
template<> inline uchar saturate_cast<uchar>(unsigned v) { return (uchar)std::min(v, (unsigned)UCHAR_MAX); } template<> inline uchar saturate_cast<uchar>(unsigned v) { return (uchar)std::min(v, (unsigned)UCHAR_MAX); }
template<> inline uchar saturate_cast<uchar>(float v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); } template<> inline uchar saturate_cast<uchar>(float v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
template<> inline uchar saturate_cast<uchar>(double v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); } template<> inline uchar saturate_cast<uchar>(double v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
template<> inline uchar saturate_cast<uchar>(int64 v) { return (uchar)((uint64)v <= (uint64)UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
template<> inline uchar saturate_cast<uchar>(uint64 v) { return (uchar)std::min(v, (uint64)UCHAR_MAX); }
template<> inline schar saturate_cast<schar>(uchar v) { return (schar)std::min((int)v, SCHAR_MAX); } template<> inline schar saturate_cast<schar>(uchar v) { return (schar)std::min((int)v, SCHAR_MAX); }
template<> inline schar saturate_cast<schar>(ushort v) { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); } template<> inline schar saturate_cast<schar>(ushort v) { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); }
...@@ -460,6 +466,8 @@ template<> inline schar saturate_cast<schar>(short v) { return saturate_c ...@@ -460,6 +466,8 @@ template<> inline schar saturate_cast<schar>(short v) { return saturate_c
template<> inline schar saturate_cast<schar>(unsigned v) { return (schar)std::min(v, (unsigned)SCHAR_MAX); } template<> inline schar saturate_cast<schar>(unsigned v) { return (schar)std::min(v, (unsigned)SCHAR_MAX); }
template<> inline schar saturate_cast<schar>(float v) { int iv = cvRound(v); return saturate_cast<schar>(iv); } template<> inline schar saturate_cast<schar>(float v) { int iv = cvRound(v); return saturate_cast<schar>(iv); }
template<> inline schar saturate_cast<schar>(double v) { int iv = cvRound(v); return saturate_cast<schar>(iv); } template<> inline schar saturate_cast<schar>(double v) { int iv = cvRound(v); return saturate_cast<schar>(iv); }
template<> inline schar saturate_cast<schar>(int64 v) { return (schar)((uint64)((int64)v-SCHAR_MIN) <= (uint64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
template<> inline schar saturate_cast<schar>(uint64 v) { return (schar)std::min(v, (uint64)SCHAR_MAX); }
template<> inline ushort saturate_cast<ushort>(schar v) { return (ushort)std::max((int)v, 0); } template<> inline ushort saturate_cast<ushort>(schar v) { return (ushort)std::max((int)v, 0); }
template<> inline ushort saturate_cast<ushort>(short v) { return (ushort)std::max((int)v, 0); } template<> inline ushort saturate_cast<ushort>(short v) { return (ushort)std::max((int)v, 0); }
...@@ -467,12 +475,16 @@ template<> inline ushort saturate_cast<ushort>(int v) { return (ushort)(( ...@@ -467,12 +475,16 @@ template<> inline ushort saturate_cast<ushort>(int v) { return (ushort)((
template<> inline ushort saturate_cast<ushort>(unsigned v) { return (ushort)std::min(v, (unsigned)USHRT_MAX); } template<> inline ushort saturate_cast<ushort>(unsigned v) { return (ushort)std::min(v, (unsigned)USHRT_MAX); }
template<> inline ushort saturate_cast<ushort>(float v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); } template<> inline ushort saturate_cast<ushort>(float v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
template<> inline ushort saturate_cast<ushort>(double v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); } template<> inline ushort saturate_cast<ushort>(double v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
template<> inline ushort saturate_cast<ushort>(int64 v) { return (ushort)((uint64)v <= (uint64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
template<> inline ushort saturate_cast<ushort>(uint64 v) { return (ushort)std::min(v, (uint64)USHRT_MAX); }
template<> inline short saturate_cast<short>(ushort v) { return (short)std::min((int)v, SHRT_MAX); } template<> inline short saturate_cast<short>(ushort v) { return (short)std::min((int)v, SHRT_MAX); }
template<> inline short saturate_cast<short>(int v) { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); } template<> inline short saturate_cast<short>(int v) { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
template<> inline short saturate_cast<short>(unsigned v) { return (short)std::min(v, (unsigned)SHRT_MAX); } template<> inline short saturate_cast<short>(unsigned v) { return (short)std::min(v, (unsigned)SHRT_MAX); }
template<> inline short saturate_cast<short>(float v) { int iv = cvRound(v); return saturate_cast<short>(iv); } template<> inline short saturate_cast<short>(float v) { int iv = cvRound(v); return saturate_cast<short>(iv); }
template<> inline short saturate_cast<short>(double v) { int iv = cvRound(v); return saturate_cast<short>(iv); } template<> inline short saturate_cast<short>(double v) { int iv = cvRound(v); return saturate_cast<short>(iv); }
template<> inline short saturate_cast<short>(int64 v) { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
template<> inline short saturate_cast<short>(uint64 v) { return (short)std::min(v, (uint64)SHRT_MAX); }
template<> inline int saturate_cast<int>(float v) { return cvRound(v); } template<> inline int saturate_cast<int>(float v) { return cvRound(v); }
template<> inline int saturate_cast<int>(double v) { return cvRound(v); } template<> inline int saturate_cast<int>(double v) { return cvRound(v); }
......
...@@ -119,7 +119,7 @@ CV_EXPORTS_W void inpaint( InputArray src, InputArray inpaintMask, ...@@ -119,7 +119,7 @@ CV_EXPORTS_W void inpaint( InputArray src, InputArray inpaintMask,
<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising/> with several computational <http://www.ipol.im/pub/algo/bcm_non_local_means_denoising/> with several computational
optimizations. Noise expected to be a gaussian white noise optimizations. Noise expected to be a gaussian white noise
@param src Input 8-bit 1-channel, 2-channel or 3-channel image. @param src Input 8-bit 1-channel, 2-channel, 3-channel or 4-channel image.
@param dst Output image with the same size and type as src . @param dst Output image with the same size and type as src .
@param templateWindowSize Size in pixels of the template patch that is used to compute weights. @param templateWindowSize Size in pixels of the template patch that is used to compute weights.
Should be odd. Recommended value 7 pixels Should be odd. Recommended value 7 pixels
...@@ -138,6 +138,35 @@ parameter. ...@@ -138,6 +138,35 @@ parameter.
CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, float h = 3, CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, float h = 3,
int templateWindowSize = 7, int searchWindowSize = 21); int templateWindowSize = 7, int searchWindowSize = 21);
/** @brief Perform image denoising using Non-local Means Denoising algorithm
<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising/> with several computational
optimizations. Noise expected to be a gaussian white noise
@param src Input 8-bit or 16-bit (only with NORM_L1) 1-channel,
2-channel, 3-channel or 4-channel image.
@param dst Output image with the same size and type as src .
@param templateWindowSize Size in pixels of the template patch that is used to compute weights.
Should be odd. Recommended value 7 pixels
@param searchWindowSize Size in pixels of the window that is used to compute weighted average for
given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
denoising time. Recommended value 21 pixels
@param h Array of parameters regulating filter strength, either one
parameter applied to all channels or one per channel in dst. Big h value
perfectly removes noise but also removes image details, smaller h
value preserves details but also preserves some noise
@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1
This function expected to be applied to grayscale images. For colored images look at
fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored
image in different colorspaces. Such approach is used in fastNlMeansDenoisingColored by converting
image to CIELAB colorspace and then separately denoise L and AB components with different h
parameter.
*/
CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst,
const std::vector<float>& h,
int templateWindowSize = 7, int searchWindowSize = 21,
int normType = NORM_L2);
/** @brief Modification of fastNlMeansDenoising function for colored images /** @brief Modification of fastNlMeansDenoising function for colored images
@param src Input 8-bit 3-channel image. @param src Input 8-bit 3-channel image.
...@@ -165,8 +194,9 @@ captured in small period of time. For example video. This version of the functio ...@@ -165,8 +194,9 @@ captured in small period of time. For example video. This version of the functio
images or for manual manipulation with colorspaces. For more details see images or for manual manipulation with colorspaces. For more details see
<http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131.6394> <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131.6394>
@param srcImgs Input 8-bit 1-channel, 2-channel or 3-channel images sequence. All images should @param srcImgs Input 8-bit 1-channel, 2-channel, 3-channel or
have the same type and size. 4-channel images sequence. All images should have the same type and
size.
@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence @param imgToDenoiseIndex Target image to denoise index in srcImgs sequence
@param temporalWindowSize Number of surrounding images to use for target image denoising. Should @param temporalWindowSize Number of surrounding images to use for target image denoising. Should
be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to
...@@ -178,14 +208,45 @@ Should be odd. Recommended value 7 pixels ...@@ -178,14 +208,45 @@ Should be odd. Recommended value 7 pixels
@param searchWindowSize Size in pixels of the window that is used to compute weighted average for @param searchWindowSize Size in pixels of the window that is used to compute weighted average for
given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
denoising time. Recommended value 21 pixels denoising time. Recommended value 21 pixels
@param h Parameter regulating filter strength for luminance component. Bigger h value perfectly @param h Parameter regulating filter strength. Bigger h value
removes noise but also removes image details, smaller h value preserves details but also preserves perfectly removes noise but also removes image details, smaller h
some noise value preserves details but also preserves some noise
*/ */
CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst, CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst,
int imgToDenoiseIndex, int temporalWindowSize, int imgToDenoiseIndex, int temporalWindowSize,
float h = 3, int templateWindowSize = 7, int searchWindowSize = 21); float h = 3, int templateWindowSize = 7, int searchWindowSize = 21);
/** @brief Modification of fastNlMeansDenoising function for images sequence where consequtive images have been
captured in small period of time. For example video. This version of the function is for grayscale
images or for manual manipulation with colorspaces. For more details see
<http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131.6394>
@param srcImgs Input 8-bit or 16-bit (only with NORM_L1) 1-channel,
2-channel, 3-channel or 4-channel images sequence. All images should
have the same type and size.
@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence
@param temporalWindowSize Number of surrounding images to use for target image denoising. Should
be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to
imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise
srcImgs[imgToDenoiseIndex] image.
@param dst Output image with the same size and type as srcImgs images.
@param templateWindowSize Size in pixels of the template patch that is used to compute weights.
Should be odd. Recommended value 7 pixels
@param searchWindowSize Size in pixels of the window that is used to compute weighted average for
given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
denoising time. Recommended value 21 pixels
@param h Array of parameters regulating filter strength, either one
parameter applied to all channels or one per channel in dst. Big h value
perfectly removes noise but also removes image details, smaller h
value preserves details but also preserves some noise
@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1
*/
CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst,
int imgToDenoiseIndex, int temporalWindowSize,
const std::vector<float>& h,
int templateWindowSize = 7, int searchWindowSize = 21,
int normType = NORM_L2);
/** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences /** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences
@param srcImgs Input 8-bit 3-channel images sequence. All images should have the same type and @param srcImgs Input 8-bit 3-channel images sequence. All images should have the same type and
......
This diff is collapsed.
...@@ -50,13 +50,13 @@ ...@@ -50,13 +50,13 @@
using namespace cv; using namespace cv;
template <typename T> template <typename T, typename IT, typename UIT, typename D, typename WT>
struct FastNlMeansDenoisingInvoker : struct FastNlMeansDenoisingInvoker :
public ParallelLoopBody public ParallelLoopBody
{ {
public: public:
FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst, FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst,
int template_window_size, int search_window_size, const float h); int template_window_size, int search_window_size, const float *h);
void operator() (const Range& range) const; void operator() (const Range& range) const;
...@@ -75,9 +75,9 @@ private: ...@@ -75,9 +75,9 @@ private:
int template_window_half_size_; int template_window_half_size_;
int search_window_half_size_; int search_window_half_size_;
int fixed_point_mult_; typename pixelInfo<WT>::sampleType fixed_point_mult_;
int almost_template_window_size_sq_bin_shift_; int almost_template_window_size_sq_bin_shift_;
std::vector<int> almost_dist2weight_; std::vector<WT> almost_dist2weight_;
void calcDistSumsForFirstElementInRow( void calcDistSumsForFirstElementInRow(
int i, Array2d<int>& dist_sums, int i, Array2d<int>& dist_sums,
...@@ -99,15 +99,15 @@ inline int getNearestPowerOf2(int value) ...@@ -99,15 +99,15 @@ inline int getNearestPowerOf2(int value)
return p; return p;
} }
template <class T> template <typename T, typename IT, typename UIT, typename D, typename WT>
FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker( FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansDenoisingInvoker(
const Mat& src, Mat& dst, const Mat& src, Mat& dst,
int template_window_size, int template_window_size,
int search_window_size, int search_window_size,
const float h) : const float *h) :
src_(src), dst_(dst) src_(src), dst_(dst)
{ {
CV_Assert(src.channels() == sizeof(T)); //T is Vec1b or Vec2b or Vec3b CV_Assert(src.channels() == pixelInfo<T>::channels);
template_window_half_size_ = template_window_size / 2; template_window_half_size_ = template_window_size / 2;
search_window_half_size_ = search_window_size / 2; search_window_half_size_ = search_window_size / 2;
...@@ -117,8 +117,10 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker( ...@@ -117,8 +117,10 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
border_size_ = search_window_half_size_ + template_window_half_size_; border_size_ = search_window_half_size_ + template_window_half_size_;
copyMakeBorder(src_, extended_src_, border_size_, border_size_, border_size_, border_size_, BORDER_DEFAULT); copyMakeBorder(src_, extended_src_, border_size_, border_size_, border_size_, border_size_, BORDER_DEFAULT);
const int max_estimate_sum_value = search_window_size_ * search_window_size_ * 255; const IT max_estimate_sum_value =
fixed_point_mult_ = std::numeric_limits<int>::max() / max_estimate_sum_value; (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo<T>::sampleMax();
fixed_point_mult_ = (int)std::min<IT>(std::numeric_limits<IT>::max() / max_estimate_sum_value,
pixelInfo<WT>::sampleMax());
// precalc weight for every possible l2 dist between blocks // precalc weight for every possible l2 dist between blocks
// additional optimization of precalced weights to replace division(averaging) by binary shift // additional optimization of precalced weights to replace division(averaging) by binary shift
...@@ -127,30 +129,24 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker( ...@@ -127,30 +129,24 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq); almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq);
double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq; double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq;
int max_dist = 255 * 255 * sizeof(T); int max_dist = D::template maxDist<T>();
int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1); int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
almost_dist2weight_.resize(almost_max_dist); almost_dist2weight_.resize(almost_max_dist);
const double WEIGHT_THRESHOLD = 0.001;
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
{ {
double dist = almost_dist * almost_dist2actual_dist_multiplier; double dist = almost_dist * almost_dist2actual_dist_multiplier;
int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T)))); almost_dist2weight_[almost_dist] =
D::template calcWeight<T, WT>(dist, h, fixed_point_mult_);
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
weight = 0;
almost_dist2weight_[almost_dist] = weight;
} }
CV_Assert(almost_dist2weight_[0] == fixed_point_mult_);
// additional optimization init end // additional optimization init end
if (dst_.empty()) if (dst_.empty())
dst_ = Mat::zeros(src_.size(), src_.type()); dst_ = Mat::zeros(src_.size(), src_.type());
} }
template <class T> template <typename T, typename IT, typename UIT, typename D, typename WT>
void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& range) const
{ {
int row_from = range.start; int row_from = range.start;
int row_to = range.end - 1; int row_to = range.end - 1;
...@@ -215,7 +211,7 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const ...@@ -215,7 +211,7 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
dist_sums_row[x] -= col_dist_sums_row[x]; dist_sums_row[x] -= col_dist_sums_row[x];
int bx = start_bx + x; int bx = start_bx + x;
col_dist_sums_row[x] = up_col_dist_sums_row[x] + calcUpDownDist(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]); col_dist_sums_row[x] = up_col_dist_sums_row[x] + D::template calcUpDownDist<T>(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]);
dist_sums_row[x] += col_dist_sums_row[x]; dist_sums_row[x] += col_dist_sums_row[x];
up_col_dist_sums_row[x] = col_dist_sums_row[x]; up_col_dist_sums_row[x] = col_dist_sums_row[x];
...@@ -227,9 +223,11 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const ...@@ -227,9 +223,11 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
} }
// calc weights // calc weights
int estimation[3], weights_sum = 0; IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<WT>::channels];
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
estimation[channel_num] = 0; estimation[channel_num] = 0;
for (size_t channel_num = 0; channel_num < pixelInfo<WT>::channels; channel_num++)
weights_sum[channel_num] = 0;
for (int y = 0; y < search_window_size_; y++) for (int y = 0; y < search_window_size_; y++)
{ {
...@@ -238,24 +236,21 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const ...@@ -238,24 +236,21 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
for (int x = 0; x < search_window_size_; x++) for (int x = 0; x < search_window_size_; x++)
{ {
int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_; int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_;
int weight = almost_dist2weight_[almostAvgDist]; WT weight = almost_dist2weight_[almostAvgDist];
weights_sum += weight;
T p = cur_row_ptr[border_size_ + search_window_x + x]; T p = cur_row_ptr[border_size_ + search_window_x + x];
incWithWeight(estimation, weight, p); incWithWeight<T, IT, WT>(estimation, weights_sum, weight, p);
} }
} }
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) divByWeightsSum<IT, UIT, pixelInfo<T>::channels, pixelInfo<WT>::channels>(estimation,
estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum/2) / weights_sum; weights_sum);
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
dst_.at<T>(i,j) = saturateCastFromArray<T>(estimation);
} }
} }
} }
template <class T> template <typename T, typename IT, typename UIT, typename D, typename WT>
inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow( inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForFirstElementInRow(
int i, int i,
Array2d<int>& dist_sums, Array2d<int>& dist_sums,
Array3d<int>& col_dist_sums, Array3d<int>& col_dist_sums,
...@@ -276,7 +271,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow( ...@@ -276,7 +271,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++) for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++)
{ {
int dist = calcDist<T>(extended_src_, int dist = D::template calcDist<T>(extended_src_,
border_size_ + i + ty, border_size_ + j + tx, border_size_ + i + ty, border_size_ + j + tx,
border_size_ + start_y + ty, border_size_ + start_x + tx); border_size_ + start_y + ty, border_size_ + start_x + tx);
...@@ -288,8 +283,8 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow( ...@@ -288,8 +283,8 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
} }
} }
template <class T> template <typename T, typename IT, typename UIT, typename D, typename WT>
inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow( inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForElementInFirstRow(
int i, int j, int first_col_num, int i, int j, int first_col_num,
Array2d<int>& dist_sums, Array2d<int>& dist_sums,
Array3d<int>& col_dist_sums, Array3d<int>& col_dist_sums,
...@@ -312,7 +307,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow( ...@@ -312,7 +307,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
int by = start_by + y; int by = start_by + y;
int bx = start_bx + x; int bx = start_bx + x;
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
col_dist_sums[new_last_col_num][y][x] += calcDist<T>(extended_src_, ay + ty, ax, by + ty, bx); col_dist_sums[new_last_col_num][y][x] += D::template calcDist<T>(extended_src_, ay + ty, ax, by + ty, bx);
dist_sums[y][x] += col_dist_sums[new_last_col_num][y][x]; dist_sums[y][x] += col_dist_sums[new_last_col_num][y][x];
up_col_dist_sums[j][y][x] = col_dist_sums[new_last_col_num][y][x]; up_col_dist_sums[j][y][x] = col_dist_sums[new_last_col_num][y][x];
......
...@@ -28,12 +28,16 @@ static int divUp(int a, int b) ...@@ -28,12 +28,16 @@ static int divUp(int a, int b)
return (a + b - 1) / b; return (a + b - 1) / b;
} }
template <typename FT> template <typename FT, typename ST, typename WT>
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT h, int cn, static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight,
int searchWindowSize, int templateWindowSize,
const FT *h, int hn, int cn, int normType,
int & almostTemplateWindowSizeSqBinShift) int & almostTemplateWindowSizeSqBinShift)
{ {
const int maxEstimateSumValue = searchWindowSize * searchWindowSize * 255; const WT maxEstimateSumValue = searchWindowSize * searchWindowSize *
int fixedPointMult = std::numeric_limits<int>::max() / maxEstimateSumValue; std::numeric_limits<ST>::max();
int fixedPointMult = (int)std::min<WT>(std::numeric_limits<WT>::max() / maxEstimateSumValue,
std::numeric_limits<int>::max());
int depth = DataType<FT>::depth; int depth = DataType<FT>::depth;
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
...@@ -48,33 +52,44 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow ...@@ -48,33 +52,44 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow
FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq; FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;
const FT WEIGHT_THRESHOLD = 1e-3f; const FT WEIGHT_THRESHOLD = 1e-3f;
int maxDist = 255 * 255 * cn; int maxDist = normType == NORM_L1 ? std::numeric_limits<ST>::max() * cn :
std::numeric_limits<ST>::max() * std::numeric_limits<ST>::max() * cn;
int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1); int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1);
FT den = 1.0f / (h * h * cn); FT den[4];
CV_Assert(hn > 0 && hn <= 4);
for (int i=0; i<hn; i++)
den[i] = 1.0f / (h[i] * h[i] * cn);
almostDist2Weight.create(1, almostMaxDist, CV_32SC1); almostDist2Weight.create(1, almostMaxDist, CV_32SC(hn == 3 ? 4 : hn));
char buf[40];
ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc, ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc,
format("-D OP_CALC_WEIGHTS -D FT=%s%s", ocl::typeToStr(depth), format("-D OP_CALC_WEIGHTS -D FT=%s -D w_t=%s"
doubleSupport ? " -D DOUBLE_SUPPORT" : "")); " -D wlut_t=%s -D convert_wlut_t=%s%s%s",
ocl::typeToStr(depth), ocl::typeToStr(CV_MAKE_TYPE(depth, hn)),
ocl::typeToStr(CV_32SC(hn)), ocl::convertTypeStr(depth, CV_32S, hn, buf),
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
normType == NORM_L1 ? " -D ABS" : ""));
if (k.empty()) if (k.empty())
return false; return false;
k.args(ocl::KernelArg::PtrWriteOnly(almostDist2Weight), almostMaxDist, k.args(ocl::KernelArg::PtrWriteOnly(almostDist2Weight), almostMaxDist,
almostDist2ActualDistMultiplier, fixedPointMult, den, WEIGHT_THRESHOLD); almostDist2ActualDistMultiplier, fixedPointMult,
ocl::KernelArg::Constant(den, (hn == 3 ? 4 : hn)*sizeof(FT)), WEIGHT_THRESHOLD);
size_t globalsize[1] = { almostMaxDist }; size_t globalsize[1] = { almostMaxDist };
return k.run(1, globalsize, NULL, false); return k.run(1, globalsize, NULL, false);
} }
static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, const float *h, int hn,
int templateWindowSize, int searchWindowSize) int templateWindowSize, int searchWindowSize, int normType)
{ {
int type = _src.type(), cn = CV_MAT_CN(type); int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
int ctaSize = ocl::Device::getDefault().isIntel() ? CTA_SIZE_INTEL : CTA_SIZE_DEFAULT; int ctaSize = ocl::Device::getDefault().isIntel() ? CTA_SIZE_INTEL : CTA_SIZE_DEFAULT;
Size size = _src.size(); Size size = _src.size();
if ( type != CV_8UC1 && type != CV_8UC2 && type != CV_8UC4 ) if (cn < 1 || cn > 4 || ((normType != NORM_L2 || depth != CV_8U) &&
(normType != NORM_L1 || (depth != CV_8U && depth != CV_16U))))
return false; return false;
int templateWindowHalfWize = templateWindowSize / 2; int templateWindowHalfWize = templateWindowSize / 2;
...@@ -84,33 +99,68 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, ...@@ -84,33 +99,68 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
int nblocksx = divUp(size.width, BLOCK_COLS), nblocksy = divUp(size.height, BLOCK_ROWS); int nblocksx = divUp(size.width, BLOCK_COLS), nblocksy = divUp(size.height, BLOCK_ROWS);
int almostTemplateWindowSizeSqBinShift = -1; int almostTemplateWindowSizeSqBinShift = -1;
char cvt[2][40]; char buf[4][40];
String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d" String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
" -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d" " -D pixel_t=%s -D int_t=%s -D wlut_t=%s"
" -D weight_t=%s -D convert_weight_t=%s -D sum_t=%s -D convert_sum_t=%s"
" -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d" " -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
" -D convert_int_t=%s -D cn=%d -D convert_uchar_t=%s", " -D convert_int_t=%s -D cn=%d -D psz=%d -D convert_pixel_t=%s%s",
templateWindowSize, searchWindowSize, ocl::typeToStr(type), templateWindowSize, searchWindowSize,
ocl::typeToStr(CV_32SC(cn)), BLOCK_COLS, BLOCK_ROWS, ctaSize, ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)),
templateWindowHalfWize, searchWindowHalfSize, ocl::typeToStr(CV_32SC(hn)),
ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), cn, depth == CV_8U ? ocl::typeToStr(CV_32SC(hn)) :
ocl::convertTypeStr(CV_32S, CV_8U, cn, cvt[1])); format("long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(),
depth == CV_8U ? ocl::convertTypeStr(CV_32S, CV_32S, hn, buf[0]) :
format("convert_long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(),
depth == CV_8U ? ocl::typeToStr(CV_32SC(cn)) :
format("long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(),
depth == CV_8U ? ocl::convertTypeStr(depth, CV_32S, cn, buf[1]) :
format("convert_long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(),
BLOCK_COLS, BLOCK_ROWS,
ctaSize, templateWindowHalfWize, searchWindowHalfSize,
ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn,
(depth == CV_8U ? sizeof(uchar) : sizeof(ushort)) * (cn == 3 ? 4 : cn),
ocl::convertTypeStr(CV_32S, depth, cn, buf[3]),
normType == NORM_L1 ? " -D ABS" : "");
ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts); ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts);
if (k.empty()) if (k.empty())
return false; return false;
UMat almostDist2Weight; UMat almostDist2Weight;
if (!ocl_calcAlmostDist2Weight<float>(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn, if ((depth == CV_8U &&
almostTemplateWindowSizeSqBinShift)) !ocl_calcAlmostDist2Weight<float, uchar, int>(almostDist2Weight,
searchWindowSize, templateWindowSize,
h, hn, cn, normType,
almostTemplateWindowSizeSqBinShift)) ||
(depth == CV_16U &&
!ocl_calcAlmostDist2Weight<float, ushort, int64>(almostDist2Weight,
searchWindowSize, templateWindowSize,
h, hn, cn, normType,
almostTemplateWindowSizeSqBinShift)))
return false; return false;
CV_Assert(almostTemplateWindowSizeSqBinShift >= 0); CV_Assert(almostTemplateWindowSizeSqBinShift >= 0);
UMat srcex; UMat srcex;
int borderSize = searchWindowHalfSize + templateWindowHalfWize; int borderSize = searchWindowHalfSize + templateWindowHalfWize;
if (cn == 3) {
srcex.create(size.height + 2*borderSize, size.width + 2*borderSize, CV_MAKE_TYPE(depth, 4));
UMat src(srcex, Rect(borderSize, borderSize, size.width, size.height));
int from_to[] = { 0,0, 1,1, 2,2 };
mixChannels(std::vector<UMat>(1, _src.getUMat()), std::vector<UMat>(1, src), from_to, 3);
copyMakeBorder(src, srcex, borderSize, borderSize, borderSize, borderSize,
BORDER_DEFAULT|BORDER_ISOLATED); // create borders in place
}
else
copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT); copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT);
_dst.create(size, type); _dst.create(size, type);
UMat dst = _dst.getUMat(); UMat dst;
if (cn == 3)
dst.create(size, CV_MAKE_TYPE(depth, 4));
else
dst = _dst.getUMat();
int searchWindowSizeSq = searchWindowSize * searchWindowSize; int searchWindowSizeSq = searchWindowSize * searchWindowSize;
Size upColSumSize(size.width, searchWindowSizeSq * nblocksy); Size upColSumSize(size.width, searchWindowSizeSq * nblocksy);
...@@ -123,7 +173,14 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, ...@@ -123,7 +173,14 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
ocl::KernelArg::PtrReadOnly(buffer), almostTemplateWindowSizeSqBinShift); ocl::KernelArg::PtrReadOnly(buffer), almostTemplateWindowSizeSqBinShift);
size_t globalsize[2] = { nblocksx * ctaSize, nblocksy }, localsize[2] = { ctaSize, 1 }; size_t globalsize[2] = { nblocksx * ctaSize, nblocksy }, localsize[2] = { ctaSize, 1 };
return k.run(2, globalsize, localsize, false); if (!k.run(2, globalsize, localsize, false)) return false;
if (cn == 3) {
int from_to[] = { 0,0, 1,1, 2,2 };
mixChannels(std::vector<UMat>(1, dst), std::vector<UMat>(1, _dst.getUMat()), from_to, 3);
}
return true;
} }
static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst, static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
......
...@@ -50,14 +50,14 @@ ...@@ -50,14 +50,14 @@
using namespace cv; using namespace cv;
template <typename T> template <typename T, typename IT, typename UIT, typename D, typename WT>
struct FastNlMeansMultiDenoisingInvoker : struct FastNlMeansMultiDenoisingInvoker :
ParallelLoopBody ParallelLoopBody
{ {
public: public:
FastNlMeansMultiDenoisingInvoker(const std::vector<Mat>& srcImgs, int imgToDenoiseIndex, FastNlMeansMultiDenoisingInvoker(const std::vector<Mat>& srcImgs, int imgToDenoiseIndex,
int temporalWindowSize, Mat& dst, int template_window_size, int temporalWindowSize, Mat& dst, int template_window_size,
int search_window_size, const float h); int search_window_size, const float *h);
void operator() (const Range& range) const; void operator() (const Range& range) const;
...@@ -81,9 +81,9 @@ private: ...@@ -81,9 +81,9 @@ private:
int search_window_half_size_; int search_window_half_size_;
int temporal_window_half_size_; int temporal_window_half_size_;
int fixed_point_mult_; typename pixelInfo<WT>::sampleType fixed_point_mult_;
int almost_template_window_size_sq_bin_shift; int almost_template_window_size_sq_bin_shift;
std::vector<int> almost_dist2weight; std::vector<WT> almost_dist2weight;
void calcDistSumsForFirstElementInRow(int i, Array3d<int>& dist_sums, void calcDistSumsForFirstElementInRow(int i, Array3d<int>& dist_sums,
Array4d<int>& col_dist_sums, Array4d<int>& col_dist_sums,
...@@ -94,19 +94,19 @@ private: ...@@ -94,19 +94,19 @@ private:
Array4d<int>& up_col_dist_sums) const; Array4d<int>& up_col_dist_sums) const;
}; };
template <class T> template <typename T, typename IT, typename UIT, typename D, typename WT>
FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker( FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansMultiDenoisingInvoker(
const std::vector<Mat>& srcImgs, const std::vector<Mat>& srcImgs,
int imgToDenoiseIndex, int imgToDenoiseIndex,
int temporalWindowSize, int temporalWindowSize,
cv::Mat& dst, cv::Mat& dst,
int template_window_size, int template_window_size,
int search_window_size, int search_window_size,
const float h) : const float *h) :
dst_(dst), extended_srcs_(srcImgs.size()) dst_(dst), extended_srcs_(srcImgs.size())
{ {
CV_Assert(srcImgs.size() > 0); CV_Assert(srcImgs.size() > 0);
CV_Assert(srcImgs[0].channels() == sizeof(T)); CV_Assert(srcImgs[0].channels() == pixelInfo<T>::channels);
rows_ = srcImgs[0].rows; rows_ = srcImgs[0].rows;
cols_ = srcImgs[0].cols; cols_ = srcImgs[0].cols;
...@@ -125,8 +125,10 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker( ...@@ -125,8 +125,10 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
border_size_, border_size_, border_size_, border_size_, cv::BORDER_DEFAULT); border_size_, border_size_, border_size_, border_size_, cv::BORDER_DEFAULT);
main_extended_src_ = extended_srcs_[temporal_window_half_size_]; main_extended_src_ = extended_srcs_[temporal_window_half_size_];
const int max_estimate_sum_value = temporal_window_size_ * search_window_size_ * search_window_size_ * 255; const IT max_estimate_sum_value =
fixed_point_mult_ = std::numeric_limits<int>::max() / max_estimate_sum_value; (IT)temporal_window_size_ * (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo<T>::sampleMax();
fixed_point_mult_ = (int)std::min<IT>(std::numeric_limits<IT>::max() / max_estimate_sum_value,
pixelInfo<WT>::sampleMax());
// precalc weight for every possible l2 dist between blocks // precalc weight for every possible l2 dist between blocks
// additional optimization of precalced weights to replace division(averaging) by binary shift // additional optimization of precalced weights to replace division(averaging) by binary shift
...@@ -138,30 +140,24 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker( ...@@ -138,30 +140,24 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift; int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift;
double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq; double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq;
int max_dist = 255 * 255 * sizeof(T); int max_dist = D::template maxDist<T>();
int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1); int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
almost_dist2weight.resize(almost_max_dist); almost_dist2weight.resize(almost_max_dist);
const double WEIGHT_THRESHOLD = 0.001;
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
{ {
double dist = almost_dist * almost_dist2actual_dist_multiplier; double dist = almost_dist * almost_dist2actual_dist_multiplier;
int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T)))); almost_dist2weight[almost_dist] =
D::template calcWeight<T, WT>(dist, h, fixed_point_mult_);
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
weight = 0;
almost_dist2weight[almost_dist] = weight;
} }
CV_Assert(almost_dist2weight[0] == fixed_point_mult_);
// additional optimization init end // additional optimization init end
if (dst_.empty()) if (dst_.empty())
dst_ = Mat::zeros(srcImgs[0].size(), srcImgs[0].type()); dst_ = Mat::zeros(srcImgs[0].size(), srcImgs[0].type());
} }
template <class T> template <typename T, typename IT, typename UIT, typename D, typename WT>
void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& range) const
{ {
int row_from = range.start; int row_from = range.start;
int row_to = range.end - 1; int row_to = range.end - 1;
...@@ -234,7 +230,7 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const ...@@ -234,7 +230,7 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
dist_sums_row[x] -= col_dist_sums_row[x]; dist_sums_row[x] -= col_dist_sums_row[x];
col_dist_sums_row[x] = up_col_dist_sums_row[x] + col_dist_sums_row[x] = up_col_dist_sums_row[x] +
calcUpDownDist(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]); D::template calcUpDownDist<T>(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]);
dist_sums_row[x] += col_dist_sums_row[x]; dist_sums_row[x] += col_dist_sums_row[x];
up_col_dist_sums_row[x] = col_dist_sums_row[x]; up_col_dist_sums_row[x] = col_dist_sums_row[x];
...@@ -247,11 +243,11 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const ...@@ -247,11 +243,11 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
} }
// calc weights // calc weights
int weights_sum = 0; IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<WT>::channels];
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
int estimation[3];
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
estimation[channel_num] = 0; estimation[channel_num] = 0;
for (size_t channel_num = 0; channel_num < pixelInfo<WT>::channels; channel_num++)
weights_sum[channel_num] = 0;
for (int d = 0; d < temporal_window_size_; d++) for (int d = 0; d < temporal_window_size_; d++)
{ {
...@@ -266,26 +262,22 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const ...@@ -266,26 +262,22 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
{ {
int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift; int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift;
int weight = almost_dist2weight[almostAvgDist]; WT weight = almost_dist2weight[almostAvgDist];
weights_sum += weight;
T p = cur_row_ptr[border_size_ + search_window_x + x]; T p = cur_row_ptr[border_size_ + search_window_x + x];
incWithWeight(estimation, weight, p); incWithWeight<T, IT, WT>(estimation, weights_sum, weight, p);
} }
} }
} }
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) divByWeightsSum<IT, UIT, pixelInfo<T>::channels, pixelInfo<WT>::channels>(estimation,
estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum / 2) / weights_sum; weights_sum);
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
dst_.at<T>(i,j) = saturateCastFromArray<T>(estimation);
} }
} }
} }
template <class T> template <typename T, typename IT, typename UIT, typename D, typename WT>
inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRow( inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForFirstElementInRow(
int i, Array3d<int>& dist_sums, Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const int i, Array3d<int>& dist_sums, Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
{ {
int j = 0; int j = 0;
...@@ -310,7 +302,7 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo ...@@ -310,7 +302,7 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
{ {
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
{ {
int dist = calcDist<T>( int dist = D::template calcDist<T>(
main_extended_src_.at<T>(border_size_ + i + ty, border_size_ + j + tx), main_extended_src_.at<T>(border_size_ + i + ty, border_size_ + j + tx),
cur_extended_src.at<T>(border_size_ + start_y + ty, border_size_ + start_x + tx)); cur_extended_src.at<T>(border_size_ + start_y + ty, border_size_ + start_x + tx));
...@@ -325,8 +317,8 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo ...@@ -325,8 +317,8 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
} }
} }
template <class T> template <typename T, typename IT, typename UIT, typename D, typename WT>
inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRow( inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForElementInFirstRow(
int i, int j, int first_col_num, Array3d<int>& dist_sums, int i, int j, int first_col_num, Array3d<int>& dist_sums,
Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
{ {
...@@ -353,7 +345,7 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRo ...@@ -353,7 +345,7 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRo
int* col_dist_sums_ptr = &col_dist_sums[new_last_col_num][d][y][x]; int* col_dist_sums_ptr = &col_dist_sums[new_last_col_num][d][y][x];
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
{ {
*col_dist_sums_ptr += calcDist<T>( *col_dist_sums_ptr += D::template calcDist<T>(
main_extended_src_.at<T>(ay + ty, ax), main_extended_src_.at<T>(ay + ty, ax),
cur_extended_src.at<T>(by + ty, bx)); cur_extended_src.at<T>(by + ty, bx));
} }
......
This diff is collapsed.
...@@ -13,11 +13,11 @@ ...@@ -13,11 +13,11 @@
namespace cvtest { namespace cvtest {
namespace ocl { namespace ocl {
PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool) PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, int, bool, bool)
{ {
int cn, templateWindowSize, searchWindowSize; int cn, normType, templateWindowSize, searchWindowSize;
float h; std::vector<float> h;
bool use_roi; bool use_roi, use_image;
TEST_DECLARE_INPUT_PARAMETER(src); TEST_DECLARE_INPUT_PARAMETER(src);
TEST_DECLARE_OUTPUT_PARAMETER(dst); TEST_DECLARE_OUTPUT_PARAMETER(dst);
...@@ -25,29 +25,46 @@ PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool) ...@@ -25,29 +25,46 @@ PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool)
virtual void SetUp() virtual void SetUp()
{ {
cn = GET_PARAM(0); cn = GET_PARAM(0);
use_roi = GET_PARAM(1); normType = GET_PARAM(1);
use_roi = GET_PARAM(2);
use_image = GET_PARAM(3);
templateWindowSize = 7; templateWindowSize = 7;
searchWindowSize = 21; searchWindowSize = 21;
h = 3.0f;
h.resize(cn);
for (int i=0; i<cn; i++)
h[i] = 3.0f + 0.5f*i;
} }
virtual void generateTestData() virtual void generateTestData()
{ {
const int type = CV_8UC(cn);
Mat image; Mat image;
if (cn == 1)
{ if (use_image) {
image = readImage("denoising/lena_noised_gaussian_sigma=10.png", IMREAD_GRAYSCALE); image = readImage("denoising/lena_noised_gaussian_sigma=10.png",
cn == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
ASSERT_FALSE(image.empty()); ASSERT_FALSE(image.empty());
} }
const int type = CV_8UC(cn); Size roiSize = use_image ? image.size() : randomSize(1, MAX_VALUE);
Size roiSize = cn == 1 ? image.size() : randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, type, 0, 255); randomSubMat(src, src_roi, roiSize, srcBorder, type, 0, 255);
if (cn == 1) if (use_image) {
image.copyTo(src_roi); ASSERT_TRUE(cn > 0 && cn <= 4);
if (cn == 2) {
int from_to[] = { 0,0, 1,1 };
src_roi.create(roiSize, type);
mixChannels(&image, 1, &src_roi, 1, from_to, 2);
}
else if (cn == 4) {
int from_to[] = { 0,0, 1,1, 2,2, 1,3};
src_roi.create(roiSize, type);
mixChannels(&image, 1, &src_roi, 1, from_to, 4);
}
else image.copyTo(src_roi);
}
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 0, 255); randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 0, 255);
...@@ -65,8 +82,23 @@ OCL_TEST_P(FastNlMeansDenoising, Mat) ...@@ -65,8 +82,23 @@ OCL_TEST_P(FastNlMeansDenoising, Mat)
{ {
generateTestData(); generateTestData();
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize)); OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, std::vector<float>(1, h[0]), templateWindowSize, searchWindowSize, normType));
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize)); OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, std::vector<float>(1, h[0]), templateWindowSize, searchWindowSize, normType));
OCL_EXPECT_MATS_NEAR(dst, 1);
}
}
typedef FastNlMeansDenoisingTestBase FastNlMeansDenoising_hsep;
OCL_TEST_P(FastNlMeansDenoising_hsep, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize, normType));
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize, normType));
OCL_EXPECT_MATS_NEAR(dst, 1); OCL_EXPECT_MATS_NEAR(dst, 1);
} }
...@@ -80,15 +112,21 @@ OCL_TEST_P(FastNlMeansDenoisingColored, Mat) ...@@ -80,15 +112,21 @@ OCL_TEST_P(FastNlMeansDenoisingColored, Mat)
{ {
generateTestData(); generateTestData();
OCL_OFF(cv::fastNlMeansDenoisingColored(src_roi, dst_roi, h, h, templateWindowSize, searchWindowSize)); OCL_OFF(cv::fastNlMeansDenoisingColored(src_roi, dst_roi, h[0], h[0], templateWindowSize, searchWindowSize));
OCL_ON(cv::fastNlMeansDenoisingColored(usrc_roi, udst_roi, h, h, templateWindowSize, searchWindowSize)); OCL_ON(cv::fastNlMeansDenoisingColored(usrc_roi, udst_roi, h[0], h[0], templateWindowSize, searchWindowSize));
OCL_EXPECT_MATS_NEAR(dst, 1); OCL_EXPECT_MATS_NEAR(dst, 1);
} }
} }
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, Combine(Values(1, 2), Bool())); OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising,
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored, Combine(Values(3, 4), Bool())); Combine(Values(1, 2, 3, 4), Values((int)NORM_L2, (int)NORM_L1),
Bool(), Values(true)));
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising_hsep,
Combine(Values(1, 2, 3, 4), Values((int)NORM_L2, (int)NORM_L1),
Bool(), Values(true)));
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored,
Combine(Values(3, 4), Values((int)NORM_L2), Bool(), Values(false)));
} } // namespace cvtest::ocl } } // namespace cvtest::ocl
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment