Commit e178294b authored by Erik Karlsson's avatar Erik Karlsson

Refactoring in preparation for 16-bit implementation of fastNlMeansDenoising

parent 5466e321
...@@ -65,17 +65,17 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, ...@@ -65,17 +65,17 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h,
switch (src.type()) { switch (src.type()) {
case CV_8U: case CV_8U:
parallel_for_(cv::Range(0, src.rows), parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<uchar>( FastNlMeansDenoisingInvoker<uchar, int, unsigned int>(
src, dst, templateWindowSize, searchWindowSize, h)); src, dst, templateWindowSize, searchWindowSize, h));
break; break;
case CV_8UC2: case CV_8UC2:
parallel_for_(cv::Range(0, src.rows), parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<cv::Vec2b>( FastNlMeansDenoisingInvoker<cv::Vec2b, int, unsigned int>(
src, dst, templateWindowSize, searchWindowSize, h)); src, dst, templateWindowSize, searchWindowSize, h));
break; break;
case CV_8UC3: case CV_8UC3:
parallel_for_(cv::Range(0, src.rows), parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<cv::Vec3b>( FastNlMeansDenoisingInvoker<cv::Vec3b, int, unsigned int>(
src, dst, templateWindowSize, searchWindowSize, h)); src, dst, templateWindowSize, searchWindowSize, h));
break; break;
default: default:
...@@ -175,19 +175,19 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds ...@@ -175,19 +175,19 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds
{ {
case CV_8U: case CV_8U:
parallel_for_(cv::Range(0, srcImgs[0].rows), parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<uchar>( FastNlMeansMultiDenoisingInvoker<uchar, int, unsigned int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize, srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, h)); dst, templateWindowSize, searchWindowSize, h));
break; break;
case CV_8UC2: case CV_8UC2:
parallel_for_(cv::Range(0, srcImgs[0].rows), parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<cv::Vec2b>( FastNlMeansMultiDenoisingInvoker<cv::Vec2b, int, unsigned int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize, srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, h)); dst, templateWindowSize, searchWindowSize, h));
break; break;
case CV_8UC3: case CV_8UC3:
parallel_for_(cv::Range(0, srcImgs[0].rows), parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<cv::Vec3b>( FastNlMeansMultiDenoisingInvoker<cv::Vec3b, int, unsigned int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize, srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, h)); dst, templateWindowSize, searchWindowSize, h));
break; break;
......
...@@ -50,7 +50,7 @@ ...@@ -50,7 +50,7 @@
using namespace cv; using namespace cv;
template <typename T> template <typename T, typename IT, typename UIT>
struct FastNlMeansDenoisingInvoker : struct FastNlMeansDenoisingInvoker :
public ParallelLoopBody public ParallelLoopBody
{ {
...@@ -75,20 +75,20 @@ private: ...@@ -75,20 +75,20 @@ private:
int template_window_half_size_; int template_window_half_size_;
int search_window_half_size_; int search_window_half_size_;
int fixed_point_mult_; IT fixed_point_mult_;
int almost_template_window_size_sq_bin_shift_; int almost_template_window_size_sq_bin_shift_;
std::vector<int> almost_dist2weight_; std::vector<IT> almost_dist2weight_;
void calcDistSumsForFirstElementInRow( void calcDistSumsForFirstElementInRow(
int i, Array2d<int>& dist_sums, int i, Array2d<IT>& dist_sums,
Array3d<int>& col_dist_sums, Array3d<IT>& col_dist_sums,
Array3d<int>& up_col_dist_sums) const; Array3d<IT>& up_col_dist_sums) const;
void calcDistSumsForElementInFirstRow( void calcDistSumsForElementInFirstRow(
int i, int j, int first_col_num, int i, int j, int first_col_num,
Array2d<int>& dist_sums, Array2d<IT>& dist_sums,
Array3d<int>& col_dist_sums, Array3d<IT>& col_dist_sums,
Array3d<int>& up_col_dist_sums) const; Array3d<IT>& up_col_dist_sums) const;
}; };
inline int getNearestPowerOf2(int value) inline int getNearestPowerOf2(int value)
...@@ -99,8 +99,8 @@ inline int getNearestPowerOf2(int value) ...@@ -99,8 +99,8 @@ inline int getNearestPowerOf2(int value)
return p; return p;
} }
template <class T> template <class T, typename IT, typename UIT>
FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker( FastNlMeansDenoisingInvoker<T, IT, UIT>::FastNlMeansDenoisingInvoker(
const Mat& src, Mat& dst, const Mat& src, Mat& dst,
int template_window_size, int template_window_size,
int search_window_size, int search_window_size,
...@@ -117,8 +117,8 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker( ...@@ -117,8 +117,8 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
border_size_ = search_window_half_size_ + template_window_half_size_; border_size_ = search_window_half_size_ + template_window_half_size_;
copyMakeBorder(src_, extended_src_, border_size_, border_size_, border_size_, border_size_, BORDER_DEFAULT); copyMakeBorder(src_, extended_src_, border_size_, border_size_, border_size_, border_size_, BORDER_DEFAULT);
const int max_estimate_sum_value = search_window_size_ * search_window_size_ * 255; const IT max_estimate_sum_value = (IT)search_window_size_ * (IT)search_window_size_ * 255;
fixed_point_mult_ = std::numeric_limits<int>::max() / max_estimate_sum_value; fixed_point_mult_ = std::numeric_limits<IT>::max() / max_estimate_sum_value;
// precalc weight for every possible l2 dist between blocks // precalc weight for every possible l2 dist between blocks
// additional optimization of precalced weights to replace division(averaging) by binary shift // additional optimization of precalced weights to replace division(averaging) by binary shift
...@@ -127,7 +127,7 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker( ...@@ -127,7 +127,7 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq); almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq);
double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq; double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq;
int max_dist = 255 * 255 * sizeof(T); IT max_dist = 255 * 255 * sizeof(T);
int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1); int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
almost_dist2weight_.resize(almost_max_dist); almost_dist2weight_.resize(almost_max_dist);
...@@ -135,7 +135,7 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker( ...@@ -135,7 +135,7 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
{ {
double dist = almost_dist * almost_dist2actual_dist_multiplier; double dist = almost_dist * almost_dist2actual_dist_multiplier;
int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T)))); IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
weight = 0; weight = 0;
...@@ -149,21 +149,21 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker( ...@@ -149,21 +149,21 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
dst_ = Mat::zeros(src_.size(), src_.type()); dst_ = Mat::zeros(src_.size(), src_.type());
} }
template <class T> template <class T, typename IT, typename UIT>
void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const void FastNlMeansDenoisingInvoker<T, IT, UIT>::operator() (const Range& range) const
{ {
int row_from = range.start; int row_from = range.start;
int row_to = range.end - 1; int row_to = range.end - 1;
// sums of cols anf rows for current pixel p // sums of cols anf rows for current pixel p
Array2d<int> dist_sums(search_window_size_, search_window_size_); Array2d<IT> dist_sums(search_window_size_, search_window_size_);
// for lazy calc optimization (sum of cols for current pixel) // for lazy calc optimization (sum of cols for current pixel)
Array3d<int> col_dist_sums(template_window_size_, search_window_size_, search_window_size_); Array3d<IT> col_dist_sums(template_window_size_, search_window_size_, search_window_size_);
int first_col_num = -1; int first_col_num = -1;
// last elements of column sum (for each element in row) // last elements of column sum (for each element in row)
Array3d<int> up_col_dist_sums(src_.cols, search_window_size_, search_window_size_); Array3d<IT> up_col_dist_sums(src_.cols, search_window_size_, search_window_size_);
for (int i = row_from; i <= row_to; i++) for (int i = row_from; i <= row_to; i++)
{ {
...@@ -202,9 +202,9 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const ...@@ -202,9 +202,9 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
for (int y = 0; y < search_window_size; y++) for (int y = 0; y < search_window_size; y++)
{ {
int * dist_sums_row = dist_sums.row_ptr(y); IT * dist_sums_row = dist_sums.row_ptr(y);
int * col_dist_sums_row = col_dist_sums.row_ptr(first_col_num, y); IT * col_dist_sums_row = col_dist_sums.row_ptr(first_col_num, y);
int * up_col_dist_sums_row = up_col_dist_sums.row_ptr(j, y); IT * up_col_dist_sums_row = up_col_dist_sums.row_ptr(j, y);
const T * b_up_ptr = extended_src_.ptr<T>(start_by - template_window_half_size_ - 1 + y); const T * b_up_ptr = extended_src_.ptr<T>(start_by - template_window_half_size_ - 1 + y);
const T * b_down_ptr = extended_src_.ptr<T>(start_by + template_window_half_size_ + y); const T * b_down_ptr = extended_src_.ptr<T>(start_by + template_window_half_size_ + y);
...@@ -215,7 +215,7 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const ...@@ -215,7 +215,7 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
dist_sums_row[x] -= col_dist_sums_row[x]; dist_sums_row[x] -= col_dist_sums_row[x];
int bx = start_bx + x; int bx = start_bx + x;
col_dist_sums_row[x] = up_col_dist_sums_row[x] + calcUpDownDist(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]); col_dist_sums_row[x] = up_col_dist_sums_row[x] + calcUpDownDist<T, IT>(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]);
dist_sums_row[x] += col_dist_sums_row[x]; dist_sums_row[x] += col_dist_sums_row[x];
up_col_dist_sums_row[x] = col_dist_sums_row[x]; up_col_dist_sums_row[x] = col_dist_sums_row[x];
...@@ -227,39 +227,39 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const ...@@ -227,39 +227,39 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
} }
// calc weights // calc weights
int estimation[3], weights_sum = 0; IT estimation[3], weights_sum = 0;
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
estimation[channel_num] = 0; estimation[channel_num] = 0;
for (int y = 0; y < search_window_size_; y++) for (int y = 0; y < search_window_size_; y++)
{ {
const T* cur_row_ptr = extended_src_.ptr<T>(border_size_ + search_window_y + y); const T* cur_row_ptr = extended_src_.ptr<T>(border_size_ + search_window_y + y);
int* dist_sums_row = dist_sums.row_ptr(y); IT* dist_sums_row = dist_sums.row_ptr(y);
for (int x = 0; x < search_window_size_; x++) for (int x = 0; x < search_window_size_; x++)
{ {
int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_; int almostAvgDist = (int)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_);
int weight = almost_dist2weight_[almostAvgDist]; IT weight = almost_dist2weight_[almostAvgDist];
weights_sum += weight; weights_sum += weight;
T p = cur_row_ptr[border_size_ + search_window_x + x]; T p = cur_row_ptr[border_size_ + search_window_x + x];
incWithWeight(estimation, weight, p); incWithWeight<T, IT>(estimation, weight, p);
} }
} }
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum/2) / weights_sum; estimation[channel_num] = (static_cast<UIT>(estimation[channel_num]) + weights_sum/2) / weights_sum;
dst_.at<T>(i,j) = saturateCastFromArray<T>(estimation); dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
} }
} }
} }
template <class T> template <class T, typename IT, typename UIT>
inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow( inline void FastNlMeansDenoisingInvoker<T, IT, UIT>::calcDistSumsForFirstElementInRow(
int i, int i,
Array2d<int>& dist_sums, Array2d<IT>& dist_sums,
Array3d<int>& col_dist_sums, Array3d<IT>& col_dist_sums,
Array3d<int>& up_col_dist_sums) const Array3d<IT>& up_col_dist_sums) const
{ {
int j = 0; int j = 0;
...@@ -276,7 +276,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow( ...@@ -276,7 +276,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++) for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++)
{ {
int dist = calcDist<T>(extended_src_, int dist = calcDist<T, IT>(extended_src_,
border_size_ + i + ty, border_size_ + j + tx, border_size_ + i + ty, border_size_ + j + tx,
border_size_ + start_y + ty, border_size_ + start_x + tx); border_size_ + start_y + ty, border_size_ + start_x + tx);
...@@ -288,12 +288,12 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow( ...@@ -288,12 +288,12 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
} }
} }
template <class T> template <class T, typename IT, typename UIT>
inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow( inline void FastNlMeansDenoisingInvoker<T, IT, UIT>::calcDistSumsForElementInFirstRow(
int i, int j, int first_col_num, int i, int j, int first_col_num,
Array2d<int>& dist_sums, Array2d<IT>& dist_sums,
Array3d<int>& col_dist_sums, Array3d<IT>& col_dist_sums,
Array3d<int>& up_col_dist_sums) const Array3d<IT>& up_col_dist_sums) const
{ {
int ay = border_size_ + i; int ay = border_size_ + i;
int ax = border_size_ + j + template_window_half_size_; int ax = border_size_ + j + template_window_half_size_;
...@@ -312,7 +312,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow( ...@@ -312,7 +312,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
int by = start_by + y; int by = start_by + y;
int bx = start_bx + x; int bx = start_bx + x;
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
col_dist_sums[new_last_col_num][y][x] += calcDist<T>(extended_src_, ay + ty, ax, by + ty, bx); col_dist_sums[new_last_col_num][y][x] += calcDist<T,IT>(extended_src_, ay + ty, ax, by + ty, bx);
dist_sums[y][x] += col_dist_sums[new_last_col_num][y][x]; dist_sums[y][x] += col_dist_sums[new_last_col_num][y][x];
up_col_dist_sums[j][y][x] = col_dist_sums[new_last_col_num][y][x]; up_col_dist_sums[j][y][x] = col_dist_sums[new_last_col_num][y][x];
......
...@@ -44,118 +44,152 @@ ...@@ -44,118 +44,152 @@
using namespace cv; using namespace cv;
template <typename T> static inline int calcDist(const T a, const T b); template <typename T, typename IT> struct calcDist_
{
static inline IT f(const T a, const T b);
};
template <> inline int calcDist(const uchar a, const uchar b) template <typename IT> struct calcDist_<uchar, IT>
{ {
return (a-b) * (a-b); static inline IT f(uchar a, uchar b)
} {
return (IT)(a-b) * (IT)(a-b);
}
};
template <> inline int calcDist(const Vec2b a, const Vec2b b) template <typename IT> struct calcDist_<Vec2b, IT>
{ {
return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]); static inline IT f(const Vec2b a, const Vec2b b)
} {
return (IT)(a[0]-b[0])*(IT)(a[0]-b[0]) + (IT)(a[1]-b[1])*(IT)(a[1]-b[1]);
}
};
template <typename IT> struct calcDist_<Vec3b, IT>
{
static inline IT f(const Vec3b a, const Vec3b b)
{
return
(IT)(a[0]-b[0])*(IT)(a[0]-b[0]) +
(IT)(a[1]-b[1])*(IT)(a[1]-b[1]) +
(IT)(a[2]-b[2])*(IT)(a[2]-b[2]);
}
};
template <> inline int calcDist(const Vec3b a, const Vec3b b) template <typename T, typename IT> static inline IT calcDist(const T a, const T b)
{ {
return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]) + (a[2]-b[2])*(a[2]-b[2]); return calcDist_<T, IT>::f(a, b);
} }
template <typename T> static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2) template <typename T, typename IT>
static inline IT calcDist(const Mat& m, int i1, int j1, int i2, int j2)
{ {
const T a = m.at<T>(i1, j1); const T a = m.at<T>(i1, j1);
const T b = m.at<T>(i2, j2); const T b = m.at<T>(i2, j2);
return calcDist<T>(a,b); return calcDist<T, IT>(a,b);
} }
template <typename T> static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down) template <typename T, typename IT> struct calcUpDownDist_
{ {
return calcDist(a_down, b_down) - calcDist(a_up, b_up); static inline IT f(T a_up, T a_down, T b_up, T b_down)
} {
return calcDist<T, IT>(a_down, b_down) - calcDist<T, IT>(a_up, b_up);
}
};
template <> inline int calcUpDownDist(uchar a_up, uchar a_down, uchar b_up, uchar b_down) template <typename IT> struct calcUpDownDist_<uchar, IT>
{ {
int A = a_down - b_down; static inline IT f(uchar a_up, uchar a_down, uchar b_up, uchar b_down)
int B = a_up - b_up; {
IT A = a_down - b_down;
IT B = a_up - b_up;
return (A-B)*(A+B); return (A-B)*(A+B);
} }
};
template <typename T> static inline void incWithWeight(int* estimation, int weight, T p); template <typename T, typename IT>
static inline IT calcUpDownDist(T a_up, T a_down, T b_up, T b_down)
template <> inline void incWithWeight(int* estimation, int weight, uchar p)
{ {
estimation[0] += weight * p; return calcUpDownDist_<T, IT>::f(a_up, a_down, b_up, b_down);
} };
template <> inline void incWithWeight(int* estimation, int weight, Vec2b p) template <typename T, typename IT> struct incWithWeight_
{ {
estimation[0] += weight * p[0]; static inline void f(IT* estimation, IT weight, T p);
estimation[1] += weight * p[1]; };
}
template <> inline void incWithWeight(int* estimation, int weight, Vec3b p) template <typename IT> struct incWithWeight_<uchar, IT>
{
estimation[0] += weight * p[0];
estimation[1] += weight * p[1];
estimation[2] += weight * p[2];
}
template <> inline void incWithWeight(int* estimation, int weight, int p)
{ {
static inline void f(IT* estimation, IT weight, uchar p)
{
estimation[0] += weight * p; estimation[0] += weight * p;
} }
};
template <> inline void incWithWeight(int* estimation, int weight, Vec2i p) template <typename IT> struct incWithWeight_<Vec2b, IT>
{ {
static inline void f(IT* estimation, IT weight, Vec2b p)
{
estimation[0] += weight * p[0]; estimation[0] += weight * p[0];
estimation[1] += weight * p[1]; estimation[1] += weight * p[1];
} }
};
template <> inline void incWithWeight(int* estimation, int weight, Vec3i p) template <typename IT> struct incWithWeight_<Vec3b, IT>
{ {
static inline void f(IT* estimation, IT weight, Vec3b p)
{
estimation[0] += weight * p[0]; estimation[0] += weight * p[0];
estimation[1] += weight * p[1]; estimation[1] += weight * p[1];
estimation[2] += weight * p[2]; estimation[2] += weight * p[2];
}
};
template <typename T, typename IT>
static inline void incWithWeight(IT* estimation, IT weight, T p)
{
return incWithWeight_<T, IT>::f(estimation, weight, p);
} }
template <typename T> static inline T saturateCastFromArray(int* estimation); template <typename T, typename IT> struct saturateCastFromArray_
{
static inline T f(IT* estimation);
};
template <> inline uchar saturateCastFromArray(int* estimation) template <typename IT> struct saturateCastFromArray_<uchar, IT>
{ {
static inline uchar f(IT* estimation)
{
return saturate_cast<uchar>(estimation[0]); return saturate_cast<uchar>(estimation[0]);
} }
};
template <> inline Vec2b saturateCastFromArray(int* estimation) template <typename IT> struct saturateCastFromArray_<Vec2b, IT>
{ {
static inline Vec2b f(IT* estimation)
{
Vec2b res; Vec2b res;
res[0] = saturate_cast<uchar>(estimation[0]); res[0] = saturate_cast<uchar>(estimation[0]);
res[1] = saturate_cast<uchar>(estimation[1]); res[1] = saturate_cast<uchar>(estimation[1]);
return res; return res;
} }
};
template <> inline Vec3b saturateCastFromArray(int* estimation) template <typename IT> struct saturateCastFromArray_<Vec3b, IT>
{ {
static inline Vec3b f(IT* estimation)
{
Vec3b res; Vec3b res;
res[0] = saturate_cast<uchar>(estimation[0]); res[0] = saturate_cast<uchar>(estimation[0]);
res[1] = saturate_cast<uchar>(estimation[1]); res[1] = saturate_cast<uchar>(estimation[1]);
res[2] = saturate_cast<uchar>(estimation[2]); res[2] = saturate_cast<uchar>(estimation[2]);
return res; return res;
} }
};
template <> inline int saturateCastFromArray(int* estimation)
{
return estimation[0];
}
template <> inline Vec2i saturateCastFromArray(int* estimation)
{
estimation[1] = 0;
return Vec2i(estimation);
}
template <> inline Vec3i saturateCastFromArray(int* estimation) template <typename T, typename IT> static inline T saturateCastFromArray(IT* estimation)
{ {
return Vec3i(estimation); return saturateCastFromArray_<T, IT>::f(estimation);
} }
#endif #endif
...@@ -50,7 +50,7 @@ ...@@ -50,7 +50,7 @@
using namespace cv; using namespace cv;
template <typename T> template <typename T, typename IT, typename UIT>
struct FastNlMeansMultiDenoisingInvoker : struct FastNlMeansMultiDenoisingInvoker :
ParallelLoopBody ParallelLoopBody
{ {
...@@ -81,21 +81,21 @@ private: ...@@ -81,21 +81,21 @@ private:
int search_window_half_size_; int search_window_half_size_;
int temporal_window_half_size_; int temporal_window_half_size_;
int fixed_point_mult_; IT fixed_point_mult_;
int almost_template_window_size_sq_bin_shift; int almost_template_window_size_sq_bin_shift;
std::vector<int> almost_dist2weight; std::vector<IT> almost_dist2weight;
void calcDistSumsForFirstElementInRow(int i, Array3d<int>& dist_sums, void calcDistSumsForFirstElementInRow(int i, Array3d<IT>& dist_sums,
Array4d<int>& col_dist_sums, Array4d<IT>& col_dist_sums,
Array4d<int>& up_col_dist_sums) const; Array4d<IT>& up_col_dist_sums) const;
void calcDistSumsForElementInFirstRow(int i, int j, int first_col_num, void calcDistSumsForElementInFirstRow(int i, int j, int first_col_num,
Array3d<int>& dist_sums, Array4d<int>& col_dist_sums, Array3d<IT>& dist_sums, Array4d<IT>& col_dist_sums,
Array4d<int>& up_col_dist_sums) const; Array4d<IT>& up_col_dist_sums) const;
}; };
template <class T> template <class T, typename IT, typename UIT>
FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker( FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::FastNlMeansMultiDenoisingInvoker(
const std::vector<Mat>& srcImgs, const std::vector<Mat>& srcImgs,
int imgToDenoiseIndex, int imgToDenoiseIndex,
int temporalWindowSize, int temporalWindowSize,
...@@ -125,8 +125,9 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker( ...@@ -125,8 +125,9 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
border_size_, border_size_, border_size_, border_size_, cv::BORDER_DEFAULT); border_size_, border_size_, border_size_, border_size_, cv::BORDER_DEFAULT);
main_extended_src_ = extended_srcs_[temporal_window_half_size_]; main_extended_src_ = extended_srcs_[temporal_window_half_size_];
const int max_estimate_sum_value = temporal_window_size_ * search_window_size_ * search_window_size_ * 255; const IT max_estimate_sum_value =
fixed_point_mult_ = std::numeric_limits<int>::max() / max_estimate_sum_value; (IT)temporal_window_size_ * (IT)search_window_size_ * (IT)search_window_size_ * 255;
fixed_point_mult_ = std::numeric_limits<IT>::max() / max_estimate_sum_value;
// precalc weight for every possible l2 dist between blocks // precalc weight for every possible l2 dist between blocks
// additional optimization of precalced weights to replace division(averaging) by binary shift // additional optimization of precalced weights to replace division(averaging) by binary shift
...@@ -138,7 +139,7 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker( ...@@ -138,7 +139,7 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift; int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift;
double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq; double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq;
int max_dist = 255 * 255 * sizeof(T); IT max_dist = 255 * 255 * sizeof(T);
int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1); int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1);
almost_dist2weight.resize(almost_max_dist); almost_dist2weight.resize(almost_max_dist);
...@@ -146,7 +147,7 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker( ...@@ -146,7 +147,7 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
{ {
double dist = almost_dist * almost_dist2actual_dist_multiplier; double dist = almost_dist * almost_dist2actual_dist_multiplier;
int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T)))); IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
weight = 0; weight = 0;
...@@ -160,19 +161,19 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker( ...@@ -160,19 +161,19 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
dst_ = Mat::zeros(srcImgs[0].size(), srcImgs[0].type()); dst_ = Mat::zeros(srcImgs[0].size(), srcImgs[0].type());
} }
template <class T> template <class T, typename IT, typename UIT>
void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const void FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::operator() (const Range& range) const
{ {
int row_from = range.start; int row_from = range.start;
int row_to = range.end - 1; int row_to = range.end - 1;
Array3d<int> dist_sums(temporal_window_size_, search_window_size_, search_window_size_); Array3d<IT> dist_sums(temporal_window_size_, search_window_size_, search_window_size_);
// for lazy calc optimization // for lazy calc optimization
Array4d<int> col_dist_sums(template_window_size_, temporal_window_size_, search_window_size_, search_window_size_); Array4d<IT> col_dist_sums(template_window_size_, temporal_window_size_, search_window_size_, search_window_size_);
int first_col_num = -1; int first_col_num = -1;
Array4d<int> up_col_dist_sums(cols_, temporal_window_size_, search_window_size_, search_window_size_); Array4d<IT> up_col_dist_sums(cols_, temporal_window_size_, search_window_size_, search_window_size_);
for (int i = row_from; i <= row_to; i++) for (int i = row_from; i <= row_to; i++)
{ {
...@@ -216,15 +217,15 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const ...@@ -216,15 +217,15 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
for (int d = 0; d < temporal_window_size_; d++) for (int d = 0; d < temporal_window_size_; d++)
{ {
Mat cur_extended_src = extended_srcs_[d]; Mat cur_extended_src = extended_srcs_[d];
Array2d<int> cur_dist_sums = dist_sums[d]; Array2d<IT> cur_dist_sums = dist_sums[d];
Array2d<int> cur_col_dist_sums = col_dist_sums[first_col_num][d]; Array2d<IT> cur_col_dist_sums = col_dist_sums[first_col_num][d];
Array2d<int> cur_up_col_dist_sums = up_col_dist_sums[j][d]; Array2d<IT> cur_up_col_dist_sums = up_col_dist_sums[j][d];
for (int y = 0; y < search_window_size; y++) for (int y = 0; y < search_window_size; y++)
{ {
int* dist_sums_row = cur_dist_sums.row_ptr(y); IT* dist_sums_row = cur_dist_sums.row_ptr(y);
int* col_dist_sums_row = cur_col_dist_sums.row_ptr(y); IT* col_dist_sums_row = cur_col_dist_sums.row_ptr(y);
int* up_col_dist_sums_row = cur_up_col_dist_sums.row_ptr(y); IT* up_col_dist_sums_row = cur_up_col_dist_sums.row_ptr(y);
const T* b_up_ptr = cur_extended_src.ptr<T>(start_by - template_window_half_size_ - 1 + y); const T* b_up_ptr = cur_extended_src.ptr<T>(start_by - template_window_half_size_ - 1 + y);
const T* b_down_ptr = cur_extended_src.ptr<T>(start_by + template_window_half_size_ + y); const T* b_down_ptr = cur_extended_src.ptr<T>(start_by + template_window_half_size_ + y);
...@@ -234,7 +235,7 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const ...@@ -234,7 +235,7 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
dist_sums_row[x] -= col_dist_sums_row[x]; dist_sums_row[x] -= col_dist_sums_row[x];
col_dist_sums_row[x] = up_col_dist_sums_row[x] + col_dist_sums_row[x] = up_col_dist_sums_row[x] +
calcUpDownDist(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]); calcUpDownDist<T, IT>(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]);
dist_sums_row[x] += col_dist_sums_row[x]; dist_sums_row[x] += col_dist_sums_row[x];
up_col_dist_sums_row[x] = col_dist_sums_row[x]; up_col_dist_sums_row[x] = col_dist_sums_row[x];
...@@ -247,9 +248,9 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const ...@@ -247,9 +248,9 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
} }
// calc weights // calc weights
int weights_sum = 0; IT weights_sum = 0;
int estimation[3]; IT estimation[3];
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
estimation[channel_num] = 0; estimation[channel_num] = 0;
...@@ -260,33 +261,33 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const ...@@ -260,33 +261,33 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
{ {
const T* cur_row_ptr = esrc_d.ptr<T>(border_size_ + search_window_y + y); const T* cur_row_ptr = esrc_d.ptr<T>(border_size_ + search_window_y + y);
int* dist_sums_row = dist_sums.row_ptr(d, y); IT* dist_sums_row = dist_sums.row_ptr(d, y);
for (int x = 0; x < search_window_size_; x++) for (int x = 0; x < search_window_size_; x++)
{ {
int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift; int almostAvgDist = (int)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift);
int weight = almost_dist2weight[almostAvgDist]; IT weight = almost_dist2weight[almostAvgDist];
weights_sum += weight; weights_sum += weight;
T p = cur_row_ptr[border_size_ + search_window_x + x]; T p = cur_row_ptr[border_size_ + search_window_x + x];
incWithWeight(estimation, weight, p); incWithWeight<T, IT>(estimation, weight, p);
} }
} }
} }
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++) for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum / 2) / weights_sum; estimation[channel_num] = (static_cast<UIT>(estimation[channel_num]) + weights_sum / 2) / weights_sum; // ????
dst_.at<T>(i,j) = saturateCastFromArray<T>(estimation); dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
} }
} }
} }
template <class T> template <class T, typename IT, typename UIT>
inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRow( inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::calcDistSumsForFirstElementInRow(
int i, Array3d<int>& dist_sums, Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const int i, Array3d<IT>& dist_sums, Array4d<IT>& col_dist_sums, Array4d<IT>& up_col_dist_sums) const
{ {
int j = 0; int j = 0;
...@@ -303,14 +304,14 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo ...@@ -303,14 +304,14 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
int start_y = i + y - search_window_half_size_; int start_y = i + y - search_window_half_size_;
int start_x = j + x - search_window_half_size_; int start_x = j + x - search_window_half_size_;
int* dist_sums_ptr = &dist_sums[d][y][x]; IT* dist_sums_ptr = &dist_sums[d][y][x];
int* col_dist_sums_ptr = &col_dist_sums[0][d][y][x]; IT* col_dist_sums_ptr = &col_dist_sums[0][d][y][x];
int col_dist_sums_step = col_dist_sums.step_size(0); int col_dist_sums_step = col_dist_sums.step_size(0);
for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++) for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++)
{ {
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
{ {
int dist = calcDist<T>( IT dist = calcDist<T, IT>(
main_extended_src_.at<T>(border_size_ + i + ty, border_size_ + j + tx), main_extended_src_.at<T>(border_size_ + i + ty, border_size_ + j + tx),
cur_extended_src.at<T>(border_size_ + start_y + ty, border_size_ + start_x + tx)); cur_extended_src.at<T>(border_size_ + start_y + ty, border_size_ + start_x + tx));
...@@ -325,10 +326,10 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo ...@@ -325,10 +326,10 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
} }
} }
template <class T> template <class T, typename IT, typename UIT>
inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRow( inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::calcDistSumsForElementInFirstRow(
int i, int j, int first_col_num, Array3d<int>& dist_sums, int i, int j, int first_col_num, Array3d<IT>& dist_sums,
Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const Array4d<IT>& col_dist_sums, Array4d<IT>& up_col_dist_sums) const
{ {
int ay = border_size_ + i; int ay = border_size_ + i;
int ax = border_size_ + j + template_window_half_size_; int ax = border_size_ + j + template_window_half_size_;
...@@ -350,10 +351,10 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRo ...@@ -350,10 +351,10 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRo
int by = start_by + y; int by = start_by + y;
int bx = start_bx + x; int bx = start_bx + x;
int* col_dist_sums_ptr = &col_dist_sums[new_last_col_num][d][y][x]; IT* col_dist_sums_ptr = &col_dist_sums[new_last_col_num][d][y][x];
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++) for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
{ {
*col_dist_sums_ptr += calcDist<T>( *col_dist_sums_ptr += calcDist<T, IT>(
main_extended_src_.at<T>(ay + ty, ax), main_extended_src_.at<T>(ay + ty, ax),
cur_extended_src.at<T>(by + ty, bx)); cur_extended_src.at<T>(by + ty, bx));
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment