Commit 058299cc authored by exoson's avatar exoson

Optimize MultiBandBlender to run faster

parent ca1975ca
...@@ -240,6 +240,7 @@ public: ...@@ -240,6 +240,7 @@ public:
bool isUMatVector() const; bool isUMatVector() const;
bool isMatx() const; bool isMatx() const;
bool isVector() const; bool isVector() const;
bool isGpuMat() const;
bool isGpuMatVector() const; bool isGpuMatVector() const;
~_InputArray(); ~_InputArray();
......
...@@ -157,6 +157,7 @@ inline bool _InputArray::isMatx() const { return kind() == _InputArray::MATX; } ...@@ -157,6 +157,7 @@ inline bool _InputArray::isMatx() const { return kind() == _InputArray::MATX; }
inline bool _InputArray::isVector() const { return kind() == _InputArray::STD_VECTOR || inline bool _InputArray::isVector() const { return kind() == _InputArray::STD_VECTOR ||
kind() == _InputArray::STD_BOOL_VECTOR || kind() == _InputArray::STD_BOOL_VECTOR ||
kind() == _InputArray::STD_ARRAY; } kind() == _InputArray::STD_ARRAY; }
inline bool _InputArray::isGpuMat() const { return kind() == _InputArray::CUDA_GPU_MAT; }
inline bool _InputArray::isGpuMatVector() const { return kind() == _InputArray::STD_VECTOR_CUDA_GPU_MAT; } inline bool _InputArray::isGpuMatVector() const { return kind() == _InputArray::STD_VECTOR_CUDA_GPU_MAT; }
//////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////
......
...@@ -145,6 +145,18 @@ private: ...@@ -145,6 +145,18 @@ private:
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING) #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
std::vector<cuda::GpuMat> gpu_dst_pyr_laplace_; std::vector<cuda::GpuMat> gpu_dst_pyr_laplace_;
std::vector<cuda::GpuMat> gpu_dst_band_weights_; std::vector<cuda::GpuMat> gpu_dst_band_weights_;
std::vector<Point> gpu_tl_points_;
std::vector<cuda::GpuMat> gpu_imgs_with_border_;
std::vector<std::vector<cuda::GpuMat> > gpu_weight_pyr_gauss_vec_;
std::vector<std::vector<cuda::GpuMat> > gpu_src_pyr_laplace_vec_;
std::vector<std::vector<cuda::GpuMat> > gpu_ups_;
cuda::GpuMat gpu_dst_mask_;
cuda::GpuMat gpu_mask_;
cuda::GpuMat gpu_img_;
cuda::GpuMat gpu_weight_map_;
cuda::GpuMat gpu_add_mask_;
int gpu_feed_idx_;
bool gpu_initialized_;
#endif #endif
}; };
......
...@@ -221,6 +221,7 @@ MultiBandBlender::MultiBandBlender(int try_gpu, int num_bands, int weight_type) ...@@ -221,6 +221,7 @@ MultiBandBlender::MultiBandBlender(int try_gpu, int num_bands, int weight_type)
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING) #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
can_use_gpu_ = try_gpu && cuda::getCudaEnabledDeviceCount(); can_use_gpu_ = try_gpu && cuda::getCudaEnabledDeviceCount();
gpu_feed_idx_ = 0;
#else #else
(void) try_gpu; (void) try_gpu;
can_use_gpu_ = false; can_use_gpu_ = false;
...@@ -248,6 +249,15 @@ void MultiBandBlender::prepare(Rect dst_roi) ...@@ -248,6 +249,15 @@ void MultiBandBlender::prepare(Rect dst_roi)
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING) #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
if (can_use_gpu_) if (can_use_gpu_)
{ {
gpu_initialized_ = false;
gpu_feed_idx_ = 0;
gpu_tl_points_.clear();
gpu_weight_pyr_gauss_vec_.clear();
gpu_src_pyr_laplace_vec_.clear();
gpu_ups_.clear();
gpu_imgs_with_border_.clear();
gpu_dst_pyr_laplace_.resize(num_bands_ + 1); gpu_dst_pyr_laplace_.resize(num_bands_ + 1);
gpu_dst_pyr_laplace_[0].create(dst_roi.size(), CV_16SC3); gpu_dst_pyr_laplace_[0].create(dst_roi.size(), CV_16SC3);
gpu_dst_pyr_laplace_[0].setTo(Scalar::all(0)); gpu_dst_pyr_laplace_[0].setTo(Scalar::all(0));
...@@ -320,7 +330,37 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) ...@@ -320,7 +330,37 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
int64 t = getTickCount(); int64 t = getTickCount();
#endif #endif
UMat img = _img.getUMat(); UMat img;
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
// If using gpu save the top left coordinate when running first time after prepare
if (can_use_gpu_)
{
if (!gpu_initialized_)
{
gpu_tl_points_.push_back(tl);
}
else
{
tl = gpu_tl_points_[gpu_feed_idx_];
}
}
// If _img is not a GpuMat get it as UMat from the InputArray object.
// If it is GpuMat make a dummy object with right dimensions but no data and
// get _img as a GpuMat
if (!_img.isGpuMat())
#endif
{
img = _img.getUMat();
}
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
else
{
gpu_img_ = _img.getGpuMat();
img = UMat(gpu_img_.rows, gpu_img_.cols, gpu_img_.type());
}
#endif
CV_Assert(img.type() == CV_16SC3 || img.type() == CV_8UC3); CV_Assert(img.type() == CV_16SC3 || img.type() == CV_8UC3);
CV_Assert(mask.type() == CV_8U); CV_Assert(mask.type() == CV_8U);
...@@ -357,42 +397,63 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) ...@@ -357,42 +397,63 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING) #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
if (can_use_gpu_) if (can_use_gpu_)
{ {
if (!gpu_initialized_)
{
gpu_imgs_with_border_.push_back(cuda::GpuMat());
gpu_weight_pyr_gauss_vec_.push_back(std::vector<cuda::GpuMat>(num_bands_+1));
gpu_src_pyr_laplace_vec_.push_back(std::vector<cuda::GpuMat>(num_bands_+1));
gpu_ups_.push_back(std::vector<cuda::GpuMat>(num_bands_));
}
// If _img is not GpuMat upload it to gpu else gpu_img_ was set already
if (!_img.isGpuMat())
{
gpu_img_.upload(img);
}
// Create the source image Laplacian pyramid // Create the source image Laplacian pyramid
cuda::GpuMat gpu_img; cuda::copyMakeBorder(gpu_img_, gpu_imgs_with_border_[gpu_feed_idx_], top, bottom,
gpu_img.upload(img); left, right, BORDER_REFLECT);
cuda::GpuMat img_with_border; gpu_imgs_with_border_[gpu_feed_idx_].convertTo(gpu_src_pyr_laplace_vec_[gpu_feed_idx_][0], CV_16S);
cuda::copyMakeBorder(gpu_img, img_with_border, top, bottom, left, right, BORDER_REFLECT);
std::vector<cuda::GpuMat> gpu_src_pyr_laplace(num_bands_ + 1);
img_with_border.convertTo(gpu_src_pyr_laplace[0], CV_16S);
for (int i = 0; i < num_bands_; ++i) for (int i = 0; i < num_bands_; ++i)
cuda::pyrDown(gpu_src_pyr_laplace[i], gpu_src_pyr_laplace[i + 1]); cuda::pyrDown(gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i],
gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i + 1]);
for (int i = 0; i < num_bands_; ++i) for (int i = 0; i < num_bands_; ++i)
{ {
cuda::GpuMat up; cuda::pyrUp(gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i + 1], gpu_ups_[gpu_feed_idx_][i]);
cuda::pyrUp(gpu_src_pyr_laplace[i + 1], up); cuda::subtract(gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i],
cuda::subtract(gpu_src_pyr_laplace[i], up, gpu_src_pyr_laplace[i]); gpu_ups_[gpu_feed_idx_][i],
gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i]);
} }
// Create the weight map Gaussian pyramid // Create the weight map Gaussian pyramid only if not yet initialized
cuda::GpuMat gpu_mask; if (!gpu_initialized_)
gpu_mask.upload(mask); {
cuda::GpuMat weight_map; if (mask.isGpuMat())
std::vector<cuda::GpuMat> gpu_weight_pyr_gauss(num_bands_ + 1); {
gpu_mask_ = mask.getGpuMat();
}
else
{
gpu_mask_.upload(mask);
}
if (weight_type_ == CV_32F) if (weight_type_ == CV_32F)
{ {
gpu_mask.convertTo(weight_map, CV_32F, 1. / 255.); gpu_mask_.convertTo(gpu_weight_map_, CV_32F, 1. / 255.);
} }
else // weight_type_ == CV_16S else // weight_type_ == CV_16S
{ {
gpu_mask.convertTo(weight_map, CV_16S); gpu_mask_.convertTo(gpu_weight_map_, CV_16S);
cuda::GpuMat add_mask; cuda::compare(gpu_mask_, 0, gpu_add_mask_, CMP_NE);
cuda::compare(gpu_mask, 0, add_mask, CMP_NE); cuda::add(gpu_weight_map_, Scalar::all(1), gpu_weight_map_, gpu_add_mask_);
cuda::add(weight_map, Scalar::all(1), weight_map, add_mask);
} }
cuda::copyMakeBorder(weight_map, gpu_weight_pyr_gauss[0], top, bottom, left, right, BORDER_CONSTANT); cuda::copyMakeBorder(gpu_weight_map_, gpu_weight_pyr_gauss_vec_[gpu_feed_idx_][0], top,
bottom, left, right, BORDER_CONSTANT);
for (int i = 0; i < num_bands_; ++i) for (int i = 0; i < num_bands_; ++i)
cuda::pyrDown(gpu_weight_pyr_gauss[i], gpu_weight_pyr_gauss[i + 1]); cuda::pyrDown(gpu_weight_pyr_gauss_vec_[gpu_feed_idx_][i],
gpu_weight_pyr_gauss_vec_[gpu_feed_idx_][i + 1]);
}
int y_tl = tl_new.y - dst_roi_.y; int y_tl = tl_new.y - dst_roi_.y;
int y_br = br_new.y - dst_roi_.y; int y_br = br_new.y - dst_roi_.y;
...@@ -403,9 +464,9 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) ...@@ -403,9 +464,9 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
for (int i = 0; i <= num_bands_; ++i) for (int i = 0; i <= num_bands_; ++i)
{ {
Rect rc(x_tl, y_tl, x_br - x_tl, y_br - y_tl); Rect rc(x_tl, y_tl, x_br - x_tl, y_br - y_tl);
cuda::GpuMat &_src_pyr_laplace = gpu_src_pyr_laplace[i]; cuda::GpuMat &_src_pyr_laplace = gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i];
cuda::GpuMat _dst_pyr_laplace = gpu_dst_pyr_laplace_[i](rc); cuda::GpuMat _dst_pyr_laplace = gpu_dst_pyr_laplace_[i](rc);
cuda::GpuMat &_weight_pyr_gauss = gpu_weight_pyr_gauss[i]; cuda::GpuMat &_weight_pyr_gauss = gpu_weight_pyr_gauss_vec_[gpu_feed_idx_][i];
cuda::GpuMat _dst_band_weights = gpu_dst_band_weights_[i](rc); cuda::GpuMat _dst_band_weights = gpu_dst_band_weights_[i](rc);
using namespace cv::cuda::device::blend; using namespace cv::cuda::device::blend;
...@@ -420,6 +481,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) ...@@ -420,6 +481,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
x_tl /= 2; y_tl /= 2; x_tl /= 2; y_tl /= 2;
x_br /= 2; y_br /= 2; x_br /= 2; y_br /= 2;
} }
++gpu_feed_idx_;
return; return;
} }
#endif #endif
...@@ -445,7 +507,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) ...@@ -445,7 +507,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
UMat weight_map; UMat weight_map;
std::vector<UMat> weight_pyr_gauss(num_bands_ + 1); std::vector<UMat> weight_pyr_gauss(num_bands_ + 1);
if(weight_type_ == CV_32F) if (weight_type_ == CV_32F)
{ {
mask.getUMat().convertTo(weight_map, CV_32F, 1./255.); mask.getUMat().convertTo(weight_map, CV_32F, 1./255.);
} }
...@@ -486,7 +548,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) ...@@ -486,7 +548,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
Mat _dst_pyr_laplace = dst_pyr_laplace_[i](rc).getMat(ACCESS_RW); Mat _dst_pyr_laplace = dst_pyr_laplace_[i](rc).getMat(ACCESS_RW);
Mat _weight_pyr_gauss = weight_pyr_gauss[i].getMat(ACCESS_READ); Mat _weight_pyr_gauss = weight_pyr_gauss[i].getMat(ACCESS_READ);
Mat _dst_band_weights = dst_band_weights_[i](rc).getMat(ACCESS_RW); Mat _dst_band_weights = dst_band_weights_[i](rc).getMat(ACCESS_RW);
if(weight_type_ == CV_32F) if (weight_type_ == CV_32F)
{ {
for (int y = 0; y < rc.height; ++y) for (int y = 0; y < rc.height; ++y)
{ {
...@@ -540,11 +602,15 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) ...@@ -540,11 +602,15 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask) void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask)
{ {
cv::UMat dst_band_weights_0;
Rect dst_rc(0, 0, dst_roi_final_.width, dst_roi_final_.height); Rect dst_rc(0, 0, dst_roi_final_.width, dst_roi_final_.height);
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING) #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
if (can_use_gpu_) if (can_use_gpu_)
{ {
if (!gpu_initialized_)
{
gpu_ups_.push_back(std::vector<cuda::GpuMat>(num_bands_+1));
}
for (int i = 0; i <= num_bands_; ++i) for (int i = 0; i <= num_bands_; ++i)
{ {
cuda::GpuMat dst_i = gpu_dst_pyr_laplace_[i]; cuda::GpuMat dst_i = gpu_dst_pyr_laplace_[i];
...@@ -564,20 +630,50 @@ void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask) ...@@ -564,20 +630,50 @@ void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask)
// Restore image from Laplacian pyramid // Restore image from Laplacian pyramid
for (size_t i = num_bands_; i > 0; --i) for (size_t i = num_bands_; i > 0; --i)
{ {
cuda::GpuMat up; cuda::pyrUp(gpu_dst_pyr_laplace_[i], gpu_ups_[gpu_ups_.size()-1][num_bands_-i]);
cuda::pyrUp(gpu_dst_pyr_laplace_[i], up); cuda::add(gpu_ups_[gpu_ups_.size()-1][num_bands_-i],
cuda::add(up, gpu_dst_pyr_laplace_[i - 1], gpu_dst_pyr_laplace_[i - 1]); gpu_dst_pyr_laplace_[i - 1],
gpu_dst_pyr_laplace_[i - 1]);
} }
// If dst is GpuMat do masking on gpu and return dst as a GpuMat
// else download the image to cpu and return it as an ordinary Mat
if (dst.isGpuMat())
{
cuda::GpuMat &gpu_dst = dst.getGpuMatRef();
cuda::compare(gpu_dst_band_weights_[0](dst_rc), WEIGHT_EPS, gpu_dst_mask_, CMP_GT);
cuda::compare(gpu_dst_mask_, 0, gpu_mask_, CMP_EQ);
gpu_dst_pyr_laplace_[0](dst_rc).setTo(Scalar::all(0), gpu_mask_);
gpu_dst_pyr_laplace_[0](dst_rc).convertTo(gpu_dst, CV_16S);
}
else
{
gpu_dst_pyr_laplace_[0](dst_rc).download(dst_); gpu_dst_pyr_laplace_[0](dst_rc).download(dst_);
Mat dst_band_weights_0;
gpu_dst_band_weights_[0].download(dst_band_weights_0); gpu_dst_band_weights_[0].download(dst_band_weights_0);
gpu_dst_pyr_laplace_.clear(); compare(dst_band_weights_0(dst_rc), WEIGHT_EPS, dst_mask_, CMP_GT);
gpu_dst_band_weights_.clear(); Blender::blend(dst, dst_mask);
}
// Set destination Mats to 0 so new image can be blended
for (size_t i = 0; i < num_bands_ + 1; ++i)
{
gpu_dst_band_weights_[i].setTo(0);
gpu_dst_pyr_laplace_[i].setTo(Scalar::all(0));
}
gpu_feed_idx_ = 0;
gpu_initialized_ = true;
} }
else else
#endif #endif
{ {
cv::UMat dst_band_weights_0;
for (int i = 0; i <= num_bands_; ++i) for (int i = 0; i <= num_bands_; ++i)
normalizeUsingWeightMap(dst_band_weights_[i], dst_pyr_laplace_[i]); normalizeUsingWeightMap(dst_band_weights_[i], dst_pyr_laplace_[i]);
...@@ -588,11 +684,11 @@ void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask) ...@@ -588,11 +684,11 @@ void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask)
dst_pyr_laplace_.clear(); dst_pyr_laplace_.clear();
dst_band_weights_.clear(); dst_band_weights_.clear();
}
compare(dst_band_weights_0(dst_rc), WEIGHT_EPS, dst_mask_, CMP_GT); compare(dst_band_weights_0(dst_rc), WEIGHT_EPS, dst_mask_, CMP_GT);
Blender::blend(dst, dst_mask); Blender::blend(dst, dst_mask);
}
} }
......
...@@ -50,13 +50,17 @@ namespace opencv_test { namespace { ...@@ -50,13 +50,17 @@ namespace opencv_test { namespace {
detail::MultiBandBlender blender(try_cuda, 5); detail::MultiBandBlender blender(try_cuda, 5);
blender.prepare(Rect(0, 0, max(im1.cols, im2.cols), max(im1.rows, im2.rows))); blender.prepare(Rect(0, 0, max(im1.cols, im2.cols), max(im1.rows, im2.rows)));
blender.feed(im1, mask1, Point(0,0));
blender.feed(im2, mask2, Point(0,0)); // If using cuda try blending multiple times without calling prepare inbetween
for (int i = 0; i < (try_cuda ? 10 : 1); ++i) {
blender.feed(im1, mask1, Point(0, 0));
blender.feed(im2, mask2, Point(0, 0));
Mat result_s, result_mask; Mat result_s, result_mask;
blender.blend(result_s, result_mask); blender.blend(result_s, result_mask);
result_s.convertTo(result, CV_8U); result_s.convertTo(result, CV_8U);
} }
}
TEST(CUDA_MultiBandBlender, Accuracy) TEST(CUDA_MultiBandBlender, Accuracy)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment