Commit 42db9e71 authored by Erik Karlsson's avatar Erik Karlsson

Basic 16-bit implmentation of fastNlMeansDenoising. Table-based exponetiation…

Basic 16-bit implmentation of fastNlMeansDenoising. Table-based exponetiation leads to high memory footprint and loss of precision in 16-bit mode.
parent 49e93747
......@@ -65,17 +65,32 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h,
switch (src.type()) {
case CV_8U:
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<uchar, int, unsigned int>(
FastNlMeansDenoisingInvoker<uchar, int, unsigned>(
src, dst, templateWindowSize, searchWindowSize, h));
break;
case CV_8UC2:
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<cv::Vec2b, int, unsigned int>(
FastNlMeansDenoisingInvoker<cv::Vec2b, int, unsigned>(
src, dst, templateWindowSize, searchWindowSize, h));
break;
case CV_8UC3:
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<cv::Vec3b, int, unsigned int>(
FastNlMeansDenoisingInvoker<cv::Vec3b, int, unsigned>(
src, dst, templateWindowSize, searchWindowSize, h));
break;
case CV_16U:
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<unsigned short, int64, uint64>(
src, dst, templateWindowSize, searchWindowSize, h));
break;
case CV_16UC2:
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<cv::Vec<ushort, 2>, int64, uint64>(
src, dst, templateWindowSize, searchWindowSize, h));
break;
case CV_16UC3:
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<cv::Vec<ushort, 3>, int64, uint64>(
src, dst, templateWindowSize, searchWindowSize, h));
break;
default:
......@@ -181,13 +196,31 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds
break;
case CV_8UC2:
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<cv::Vec2b, int, unsigned int>(
FastNlMeansMultiDenoisingInvoker<cv::Vec2b, int, unsigned>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, h));
break;
case CV_8UC3:
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<cv::Vec3b, int, unsigned int>(
FastNlMeansMultiDenoisingInvoker<cv::Vec3b, int, unsigned>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, h));
break;
case CV_16U:
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<ushort, int64, uint64>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, h));
break;
case CV_16UC2:
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<cv::Vec<ushort, 2>, int64, uint64>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, h));
break;
case CV_16UC3:
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<cv::Vec<ushort, 3>, int64, uint64>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, h));
break;
......
......@@ -123,11 +123,13 @@ FastNlMeansDenoisingInvoker<T, IT, UIT>::FastNlMeansDenoisingInvoker(
// precalc weight for every possible l2 dist between blocks
// additional optimization of precalced weights to replace division(averaging) by binary shift
// squared distances are truncated to 16 bits to get a reasonable table size
// squared distances are truncated to 24 bits to avoid unreasonable table sizes
// TODO: uses lots of memory and loses precision wtih 16-bit images ????
const size_t TABLE_MAX_BITS = 24;
CV_Assert(template_window_size_ <= 46340); // sqrt(INT_MAX)
int template_window_size_sq = template_window_size_ * template_window_size_;
almost_template_window_size_sq_bin_shift_ =
getNearestPowerOf2(template_window_size_sq) + 2*pixelInfo<T>::sampleBits() - 16;
almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq) +
std::max(2*pixelInfo<T>::sampleBits(), TABLE_MAX_BITS) - TABLE_MAX_BITS;
double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq;
IT max_dist =
......@@ -139,7 +141,7 @@ FastNlMeansDenoisingInvoker<T, IT, UIT>::FastNlMeansDenoisingInvoker(
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
{
double dist = almost_dist * almost_dist2actual_dist_multiplier;
IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo<T>::channels)));
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
weight = 0;
......@@ -232,7 +234,7 @@ void FastNlMeansDenoisingInvoker<T, IT, UIT>::operator() (const Range& range) co
// calc weights
IT estimation[3], weights_sum = 0;
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
estimation[channel_num] = 0;
for (int y = 0; y < search_window_size_; y++)
......@@ -250,7 +252,7 @@ void FastNlMeansDenoisingInvoker<T, IT, UIT>::operator() (const Range& range) co
}
}
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
estimation[channel_num] = (static_cast<UIT>(estimation[channel_num]) + weights_sum/2) / weights_sum;
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
......
......@@ -131,12 +131,15 @@ FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::FastNlMeansMultiDenoisingInvoker(
// precalc weight for every possible l2 dist between blocks
// additional optimization of precalced weights to replace division(averaging) by binary shift
// squared distances are truncated to 16 bits to get a reasonable table size
// squared distances are truncated to 24 bits to avoid unreasonable table sizes
// TODO: uses lots of memory and loses precision wtih 16-bit images ????
const size_t TABLE_MAX_BITS = 24;
int template_window_size_sq = template_window_size_ * template_window_size_;
almost_template_window_size_sq_bin_shift = 0;
while (1 << almost_template_window_size_sq_bin_shift < template_window_size_sq)
almost_template_window_size_sq_bin_shift++;
almost_template_window_size_sq_bin_shift += 2*pixelInfo<T>::sampleBits() - 16;
almost_template_window_size_sq_bin_shift +=
std::max(2*pixelInfo<T>::sampleBits(), TABLE_MAX_BITS) - TABLE_MAX_BITS;
int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift;
double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq;
......@@ -150,7 +153,7 @@ FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::FastNlMeansMultiDenoisingInvoker(
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
{
double dist = almost_dist * almost_dist2actual_dist_multiplier;
IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist / (h * h * pixelInfo<T>::channels)));
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
weight = 0;
......@@ -254,7 +257,7 @@ void FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::operator() (const Range& rang
IT weights_sum = 0;
IT estimation[3];
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
estimation[channel_num] = 0;
for (int d = 0; d < temporal_window_size_; d++)
......@@ -279,8 +282,8 @@ void FastNlMeansMultiDenoisingInvoker<T, IT, UIT>::operator() (const Range& rang
}
}
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
estimation[channel_num] = (static_cast<UIT>(estimation[channel_num]) + weights_sum / 2) / weights_sum; // ????
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
estimation[channel_num] = (static_cast<UIT>(estimation[channel_num]) + weights_sum / 2) / weights_sum;
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment