Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
b76e8835
Commit
b76e8835
authored
Feb 12, 2017
by
Wenju He
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
improve MultiBandBlender cuda, add normalizeUsingWeight and addSrcWeight kernels
parent
642e4d97
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
288 additions
and
26 deletions
+288
-26
blenders.hpp
...s/stitching/include/opencv2/stitching/detail/blenders.hpp
+4
-0
blenders.cpp
modules/stitching/src/blenders.cpp
+172
-26
multiband_blend.cu
modules/stitching/src/cuda/multiband_blend.cu
+112
-0
No files found.
modules/stitching/include/opencv2/stitching/detail/blenders.hpp
View file @
b76e8835
...
...
@@ -142,6 +142,10 @@ private:
Rect
dst_roi_final_
;
bool
can_use_gpu_
;
int
weight_type_
;
//CV_32F or CV_16S
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
std
::
vector
<
cuda
::
GpuMat
>
gpu_dst_pyr_laplace_
;
std
::
vector
<
cuda
::
GpuMat
>
gpu_dst_band_weights_
;
#endif
};
...
...
modules/stitching/src/blenders.cpp
View file @
b76e8835
...
...
@@ -43,6 +43,23 @@
#include "precomp.hpp"
#include "opencl_kernels_stitching.hpp"
#ifdef HAVE_CUDA
namespace
cv
{
namespace
cuda
{
namespace
device
{
namespace
blend
{
void
addSrcWeightGpu16S
(
const
PtrStep
<
short
>
src
,
const
PtrStep
<
short
>
src_weight
,
PtrStep
<
short
>
dst
,
PtrStep
<
short
>
dst_weight
,
cv
::
Rect
&
rc
);
void
addSrcWeightGpu32F
(
const
PtrStep
<
short
>
src
,
const
PtrStepf
src_weight
,
PtrStep
<
short
>
dst
,
PtrStepf
dst_weight
,
cv
::
Rect
&
rc
);
void
normalizeUsingWeightMapGpu16S
(
const
PtrStep
<
short
>
weight
,
PtrStep
<
short
>
src
,
const
int
width
,
const
int
height
);
void
normalizeUsingWeightMapGpu32F
(
const
PtrStepf
weight
,
PtrStep
<
short
>
src
,
const
int
width
,
const
int
height
);
}
}}}
#endif
namespace
cv
{
namespace
detail
{
...
...
@@ -228,21 +245,46 @@ void MultiBandBlender::prepare(Rect dst_roi)
Blender
::
prepare
(
dst_roi
);
dst_pyr_laplace_
.
resize
(
num_bands_
+
1
);
dst_pyr_laplace_
[
0
]
=
dst_
;
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
if
(
can_use_gpu_
)
{
gpu_dst_pyr_laplace_
.
resize
(
num_bands_
+
1
);
gpu_dst_pyr_laplace_
[
0
].
create
(
dst_roi
.
size
(),
CV_16SC3
);
gpu_dst_pyr_laplace_
[
0
].
setTo
(
Scalar
::
all
(
0
));
dst_band_weights_
.
resize
(
num_bands_
+
1
);
dst_band_weights_
[
0
].
create
(
dst_roi
.
size
(),
weight_type_
);
dst_band_weights_
[
0
].
setTo
(
0
);
gpu_
dst_band_weights_
.
resize
(
num_bands_
+
1
);
gpu_
dst_band_weights_
[
0
].
create
(
dst_roi
.
size
(),
weight_type_
);
gpu_
dst_band_weights_
[
0
].
setTo
(
0
);
for
(
int
i
=
1
;
i
<=
num_bands_
;
++
i
)
for
(
int
i
=
1
;
i
<=
num_bands_
;
++
i
)
{
gpu_dst_pyr_laplace_
[
i
].
create
((
gpu_dst_pyr_laplace_
[
i
-
1
].
rows
+
1
)
/
2
,
(
gpu_dst_pyr_laplace_
[
i
-
1
].
cols
+
1
)
/
2
,
CV_16SC3
);
gpu_dst_band_weights_
[
i
].
create
((
gpu_dst_band_weights_
[
i
-
1
].
rows
+
1
)
/
2
,
(
gpu_dst_band_weights_
[
i
-
1
].
cols
+
1
)
/
2
,
weight_type_
);
gpu_dst_pyr_laplace_
[
i
].
setTo
(
Scalar
::
all
(
0
));
gpu_dst_band_weights_
[
i
].
setTo
(
0
);
}
}
else
#endif
{
dst_pyr_laplace_
[
i
].
create
((
dst_pyr_laplace_
[
i
-
1
].
rows
+
1
)
/
2
,
(
dst_pyr_laplace_
[
i
-
1
].
cols
+
1
)
/
2
,
CV_16SC3
);
dst_band_weights_
[
i
].
create
((
dst_band_weights_
[
i
-
1
].
rows
+
1
)
/
2
,
(
dst_band_weights_
[
i
-
1
].
cols
+
1
)
/
2
,
weight_type_
);
dst_pyr_laplace_
[
i
].
setTo
(
Scalar
::
all
(
0
));
dst_band_weights_
[
i
].
setTo
(
0
);
dst_pyr_laplace_
.
resize
(
num_bands_
+
1
);
dst_pyr_laplace_
[
0
]
=
dst_
;
dst_band_weights_
.
resize
(
num_bands_
+
1
);
dst_band_weights_
[
0
].
create
(
dst_roi
.
size
(),
weight_type_
);
dst_band_weights_
[
0
].
setTo
(
0
);
for
(
int
i
=
1
;
i
<=
num_bands_
;
++
i
)
{
dst_pyr_laplace_
[
i
].
create
((
dst_pyr_laplace_
[
i
-
1
].
rows
+
1
)
/
2
,
(
dst_pyr_laplace_
[
i
-
1
].
cols
+
1
)
/
2
,
CV_16SC3
);
dst_band_weights_
[
i
].
create
((
dst_band_weights_
[
i
-
1
].
rows
+
1
)
/
2
,
(
dst_band_weights_
[
i
-
1
].
cols
+
1
)
/
2
,
weight_type_
);
dst_pyr_laplace_
[
i
].
setTo
(
Scalar
::
all
(
0
));
dst_band_weights_
[
i
].
setTo
(
0
);
}
}
}
...
...
@@ -312,6 +354,76 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
int
bottom
=
br_new
.
y
-
tl
.
y
-
img
.
rows
;
int
right
=
br_new
.
x
-
tl
.
x
-
img
.
cols
;
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
if
(
can_use_gpu_
)
{
// Create the source image Laplacian pyramid
cuda
::
GpuMat
gpu_img
;
gpu_img
.
upload
(
img
);
cuda
::
GpuMat
img_with_border
;
cuda
::
copyMakeBorder
(
gpu_img
,
img_with_border
,
top
,
bottom
,
left
,
right
,
BORDER_REFLECT
);
std
::
vector
<
cuda
::
GpuMat
>
gpu_src_pyr_laplace
(
num_bands_
+
1
);
img_with_border
.
convertTo
(
gpu_src_pyr_laplace
[
0
],
CV_16S
);
for
(
int
i
=
0
;
i
<
num_bands_
;
++
i
)
cuda
::
pyrDown
(
gpu_src_pyr_laplace
[
i
],
gpu_src_pyr_laplace
[
i
+
1
]);
for
(
int
i
=
0
;
i
<
num_bands_
;
++
i
)
{
cuda
::
GpuMat
up
;
cuda
::
pyrUp
(
gpu_src_pyr_laplace
[
i
+
1
],
up
);
cuda
::
subtract
(
gpu_src_pyr_laplace
[
i
],
up
,
gpu_src_pyr_laplace
[
i
]);
}
// Create the weight map Gaussian pyramid
cuda
::
GpuMat
gpu_mask
;
gpu_mask
.
upload
(
mask
);
cuda
::
GpuMat
weight_map
;
std
::
vector
<
cuda
::
GpuMat
>
gpu_weight_pyr_gauss
(
num_bands_
+
1
);
if
(
weight_type_
==
CV_32F
)
{
gpu_mask
.
convertTo
(
weight_map
,
CV_32F
,
1.
/
255.
);
}
else
// weight_type_ == CV_16S
{
gpu_mask
.
convertTo
(
weight_map
,
CV_16S
);
cuda
::
GpuMat
add_mask
;
cuda
::
compare
(
gpu_mask
,
0
,
add_mask
,
CMP_NE
);
cuda
::
add
(
weight_map
,
Scalar
::
all
(
1
),
weight_map
,
add_mask
);
}
cuda
::
copyMakeBorder
(
weight_map
,
gpu_weight_pyr_gauss
[
0
],
top
,
bottom
,
left
,
right
,
BORDER_CONSTANT
);
for
(
int
i
=
0
;
i
<
num_bands_
;
++
i
)
cuda
::
pyrDown
(
gpu_weight_pyr_gauss
[
i
],
gpu_weight_pyr_gauss
[
i
+
1
]);
int
y_tl
=
tl_new
.
y
-
dst_roi_
.
y
;
int
y_br
=
br_new
.
y
-
dst_roi_
.
y
;
int
x_tl
=
tl_new
.
x
-
dst_roi_
.
x
;
int
x_br
=
br_new
.
x
-
dst_roi_
.
x
;
// Add weighted layer of the source image to the final Laplacian pyramid layer
for
(
int
i
=
0
;
i
<=
num_bands_
;
++
i
)
{
Rect
rc
(
x_tl
,
y_tl
,
x_br
-
x_tl
,
y_br
-
y_tl
);
cuda
::
GpuMat
&
_src_pyr_laplace
=
gpu_src_pyr_laplace
[
i
];
cuda
::
GpuMat
_dst_pyr_laplace
=
gpu_dst_pyr_laplace_
[
i
](
rc
);
cuda
::
GpuMat
&
_weight_pyr_gauss
=
gpu_weight_pyr_gauss
[
i
];
cuda
::
GpuMat
_dst_band_weights
=
gpu_dst_band_weights_
[
i
](
rc
);
using
namespace
cv
::
cuda
::
device
::
blend
;
if
(
weight_type_
==
CV_32F
)
{
addSrcWeightGpu32F
(
_src_pyr_laplace
,
_weight_pyr_gauss
,
_dst_pyr_laplace
,
_dst_band_weights
,
rc
);
}
else
{
addSrcWeightGpu16S
(
_src_pyr_laplace
,
_weight_pyr_gauss
,
_dst_pyr_laplace
,
_dst_band_weights
,
rc
);
}
x_tl
/=
2
;
y_tl
/=
2
;
x_br
/=
2
;
y_br
/=
2
;
}
return
;
}
#endif
// Create the source image Laplacian pyramid
UMat
img_with_border
;
copyMakeBorder
(
_img
,
img_with_border
,
top
,
bottom
,
left
,
right
,
...
...
@@ -322,10 +434,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
#endif
std
::
vector
<
UMat
>
src_pyr_laplace
;
if
(
can_use_gpu_
&&
img_with_border
.
depth
()
==
CV_16S
)
createLaplacePyrGpu
(
img_with_border
,
num_bands_
,
src_pyr_laplace
);
else
createLaplacePyr
(
img_with_border
,
num_bands_
,
src_pyr_laplace
);
createLaplacePyr
(
img_with_border
,
num_bands_
,
src_pyr_laplace
);
LOGLN
(
" Create the source image Laplacian pyramid, time: "
<<
((
getTickCount
()
-
t
)
/
getTickFrequency
())
<<
" sec"
);
#if ENABLE_LOG
...
...
@@ -431,20 +540,57 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
void
MultiBandBlender
::
blend
(
InputOutputArray
dst
,
InputOutputArray
dst_mask
)
{
for
(
int
i
=
0
;
i
<=
num_bands_
;
++
i
)
normalizeUsingWeightMap
(
dst_band_weights_
[
i
],
dst_pyr_laplace_
[
i
]
);
cv
::
UMat
dst_band_weights_0
;
Rect
dst_rc
(
0
,
0
,
dst_roi_final_
.
width
,
dst_roi_final_
.
height
);
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
if
(
can_use_gpu_
)
restoreImageFromLaplacePyrGpu
(
dst_pyr_laplace_
);
{
for
(
int
i
=
0
;
i
<=
num_bands_
;
++
i
)
{
cuda
::
GpuMat
dst_i
=
gpu_dst_pyr_laplace_
[
i
];
cuda
::
GpuMat
weight_i
=
gpu_dst_band_weights_
[
i
];
using
namespace
::
cv
::
cuda
::
device
::
blend
;
if
(
weight_type_
==
CV_32F
)
{
normalizeUsingWeightMapGpu32F
(
weight_i
,
dst_i
,
weight_i
.
cols
,
weight_i
.
rows
);
}
else
{
normalizeUsingWeightMapGpu16S
(
weight_i
,
dst_i
,
weight_i
.
cols
,
weight_i
.
rows
);
}
}
// Restore image from Laplacian pyramid
for
(
size_t
i
=
num_bands_
;
i
>
0
;
--
i
)
{
cuda
::
GpuMat
up
;
cuda
::
pyrUp
(
gpu_dst_pyr_laplace_
[
i
],
up
);
cuda
::
add
(
up
,
gpu_dst_pyr_laplace_
[
i
-
1
],
gpu_dst_pyr_laplace_
[
i
-
1
]);
}
gpu_dst_pyr_laplace_
[
0
](
dst_rc
).
download
(
dst_
);
gpu_dst_band_weights_
[
0
].
download
(
dst_band_weights_0
);
gpu_dst_pyr_laplace_
.
clear
();
gpu_dst_band_weights_
.
clear
();
}
else
#endif
{
for
(
int
i
=
0
;
i
<=
num_bands_
;
++
i
)
normalizeUsingWeightMap
(
dst_band_weights_
[
i
],
dst_pyr_laplace_
[
i
]);
restoreImageFromLaplacePyr
(
dst_pyr_laplace_
);
Rect
dst_rc
(
0
,
0
,
dst_roi_final_
.
width
,
dst_roi_final_
.
height
);
dst_
=
dst_pyr_laplace_
[
0
](
dst_rc
);
UMat
_dst_mask
;
compare
(
dst_band_weights_
[
0
](
dst_rc
),
WEIGHT_EPS
,
dst_mask_
,
CMP_GT
);
dst_pyr_laplace_
.
clear
();
dst_band_weights_
.
clear
();
dst_
=
dst_pyr_laplace_
[
0
](
dst_rc
);
dst_band_weights_0
=
dst_band_weights_
[
0
];
dst_pyr_laplace_
.
clear
();
dst_band_weights_
.
clear
();
}
compare
(
dst_band_weights_0
(
dst_rc
),
WEIGHT_EPS
,
dst_mask_
,
CMP_GT
);
Blender
::
blend
(
dst
,
dst_mask
);
}
...
...
modules/stitching/src/cuda/multiband_blend.cu
0 → 100644
View file @
b76e8835
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/types.hpp"
namespace cv { namespace cuda { namespace device
{
namespace blend
{
__global__ void addSrcWeightKernel16S(const PtrStep<short> src, const PtrStep<short> src_weight,
PtrStep<short> dst, PtrStep<short> dst_weight, int rows, int cols)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < rows && x < cols)
{
const short3 v = ((const short3*)src.ptr(y))[x];
short w = src_weight.ptr(y)[x];
((short3*)dst.ptr(y))[x].x += short((v.x * w) >> 8);
((short3*)dst.ptr(y))[x].y += short((v.y * w) >> 8);
((short3*)dst.ptr(y))[x].z += short((v.z * w) >> 8);
dst_weight.ptr(y)[x] += w;
}
}
void addSrcWeightGpu16S(const PtrStep<short> src, const PtrStep<short> src_weight,
PtrStep<short> dst, PtrStep<short> dst_weight, cv::Rect &rc)
{
dim3 threads(16, 16);
dim3 grid(divUp(rc.width, threads.x), divUp(rc.height, threads.y));
addSrcWeightKernel16S<<<grid, threads>>>(src, src_weight, dst, dst_weight, rc.height, rc.width);
cudaSafeCall(cudaGetLastError());
}
__global__ void addSrcWeightKernel32F(const PtrStep<short> src, const PtrStepf src_weight,
PtrStep<short> dst, PtrStepf dst_weight, int rows, int cols)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < rows && x < cols)
{
const short3 v = ((const short3*)src.ptr(y))[x];
float w = src_weight.ptr(y)[x];
((short3*)dst.ptr(y))[x].x += static_cast<short>(v.x * w);
((short3*)dst.ptr(y))[x].y += static_cast<short>(v.y * w);
((short3*)dst.ptr(y))[x].z += static_cast<short>(v.z * w);
dst_weight.ptr(y)[x] += w;
}
}
void addSrcWeightGpu32F(const PtrStep<short> src, const PtrStepf src_weight,
PtrStep<short> dst, PtrStepf dst_weight, cv::Rect &rc)
{
dim3 threads(16, 16);
dim3 grid(divUp(rc.width, threads.x), divUp(rc.height, threads.y));
addSrcWeightKernel32F<<<grid, threads>>>(src, src_weight, dst, dst_weight, rc.height, rc.width);
cudaSafeCall(cudaGetLastError());
}
__global__ void normalizeUsingWeightKernel16S(const PtrStep<short> weight, PtrStep<short> src,
const int width, const int height)
{
int x = (blockIdx.x * blockDim.x) + threadIdx.x;
int y = (blockIdx.y * blockDim.y) + threadIdx.y;
if (x < width && y < height)
{
const short3 v = ((short3*)src.ptr(y))[x];
short w = weight.ptr(y)[x];
((short3*)src.ptr(y))[x] = make_short3(short((v.x << 8) / w),
short((v.y << 8) / w), short((v.z << 8) / w));
}
}
void normalizeUsingWeightMapGpu16S(const PtrStep<short> weight, PtrStep<short> src,
const int width, const int height)
{
dim3 threads(16, 16);
dim3 grid(divUp(width, threads.x), divUp(height, threads.y));
normalizeUsingWeightKernel16S<<<grid, threads>>> (weight, src, width, height);
}
__global__ void normalizeUsingWeightKernel32F(const PtrStepf weight, PtrStep<short> src,
const int width, const int height)
{
int x = (blockIdx.x * blockDim.x) + threadIdx.x;
int y = (blockIdx.y * blockDim.y) + threadIdx.y;
if (x < width && y < height)
{
static const float WEIGHT_EPS = 1e-5f;
const short3 v = ((short3*)src.ptr(y))[x];
float w = weight.ptr(y)[x];
((short3*)src.ptr(y))[x] = make_short3(static_cast<short>(v.x / (w + WEIGHT_EPS)),
static_cast<short>(v.y / (w + WEIGHT_EPS)),
static_cast<short>(v.z / (w + WEIGHT_EPS)));
}
}
void normalizeUsingWeightMapGpu32F(const PtrStepf weight, PtrStep<short> src,
const int width, const int height)
{
dim3 threads(16, 16);
dim3 grid(divUp(width, threads.x), divUp(height, threads.y));
normalizeUsingWeightKernel32F<<<grid, threads>>> (weight, src, width, height);
}
}
}}}
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment