Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
4ebd91e6
Commit
4ebd91e6
authored
Dec 27, 2019
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #16226 from YashasSamaga:cuda4dnn-permute-optm
parents
89d3f95a
16bc505d
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
49 additions
and
8 deletions
+49
-8
fill_copy.cu
modules/dnn/src/cuda/fill_copy.cu
+36
-1
max_unpooling.cu
modules/dnn/src/cuda/max_unpooling.cu
+1
-1
normalize.cu
modules/dnn/src/cuda/normalize.cu
+1
-1
permute.cu
modules/dnn/src/cuda/permute.cu
+0
-0
fill_copy.hpp
modules/dnn/src/cuda4dnn/kernels/fill_copy.hpp
+6
-3
permute.hpp
modules/dnn/src/cuda4dnn/kernels/permute.hpp
+3
-0
concat.hpp
modules/dnn/src/cuda4dnn/primitives/concat.hpp
+1
-1
padding.hpp
modules/dnn/src/cuda4dnn/primitives/padding.hpp
+1
-1
No files found.
modules/dnn/src/cuda/fill.cu
→
modules/dnn/src/cuda/fill
_copy
.cu
View file @
4ebd91e6
...
...
@@ -21,7 +21,6 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template <class T, std::size_t N>
__global__ void fill_vec(Span<T> output, T value) {
using vector_type = get_vector_type_t<T, N>;
auto output_vPtr = vector_type::get_pointer(output.data());
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
vector_type vec;
...
...
@@ -30,6 +29,18 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
v_store(output_vPtr[i], vec);
}
}
template <class T, std::size_t N>
__global__ void copy_vec(Span<T> output, View<T> input) {
using vector_type = get_vector_type_t<T, N>;
auto input_vPtr = vector_type::get_pointer(input.data());
auto output_vPtr = vector_type::get_pointer(output.data());
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
vector_type vec;
v_load(vec, input_vPtr[i]);
v_store(output_vPtr[i], vec);
}
}
}
template <class T, std::size_t N> static
...
...
@@ -55,4 +66,28 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template void fill(const Stream&, Span<__half>, __half);
template void fill(const Stream&, Span<float>, float);
template <class T, std::size_t N> static
void launch_vectorized_copy(const Stream& stream, Span<T> output, View<T> input) {
CV_Assert(is_fully_aligned<T>(output, N));
CV_Assert(is_fully_aligned<T>(input, N));
auto kernel = raw::copy_vec<T, N>;
auto policy = make_policy(kernel, output.size() / N, 0, stream);
launch_kernel(kernel, policy, output, input);
}
template <class T>
void copy(const Stream& stream, Span<T> output, View<T> input) {
if (is_fully_aligned<T>(output, 4) && is_fully_aligned<T>(input, 4)) {
launch_vectorized_copy<T, 4>(stream, output, input);
} else if (is_fully_aligned<T>(output, 2) && is_fully_aligned<T>(input, 2)) {
launch_vectorized_copy<T, 2>(stream, output, input);
} else {
launch_vectorized_copy<T, 1>(stream, output, input);
}
}
template void copy(const Stream&, Span<__half>, View<__half>);
template void copy(const Stream&, Span<float>, View<float>);
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
modules/dnn/src/cuda/max_unpooling.cu
View file @
4ebd91e6
...
...
@@ -16,7 +16,7 @@
#include "../cuda4dnn/csl/tensor.hpp"
#include "../cuda4dnn/csl/span.hpp"
#include "../cuda4dnn/kernels/fill.hpp"
#include "../cuda4dnn/kernels/fill
_copy
.hpp"
#include <opencv2/core.hpp>
...
...
modules/dnn/src/cuda/normalize.cu
View file @
4ebd91e6
...
...
@@ -15,7 +15,7 @@
#include "../cuda4dnn/csl/stream.hpp"
#include "../cuda4dnn/csl/span.hpp"
#include "../cuda4dnn/kernels/fill.hpp"
#include "../cuda4dnn/kernels/fill
_copy
.hpp"
#include "../cuda4dnn/kernels/scale_shift.hpp"
#include <opencv2/core.hpp>
...
...
modules/dnn/src/cuda/permute.cu
View file @
4ebd91e6
This diff is collapsed.
Click to expand it.
modules/dnn/src/cuda4dnn/kernels/fill.hpp
→
modules/dnn/src/cuda4dnn/kernels/fill
_copy
.hpp
View file @
4ebd91e6
...
...
@@ -2,8 +2,8 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_DNN_SRC_CUDA4DNN_KERNELS_FILL_HPP
#define OPENCV_DNN_SRC_CUDA4DNN_KERNELS_FILL_HPP
#ifndef OPENCV_DNN_SRC_CUDA4DNN_KERNELS_FILL_
COPY_
HPP
#define OPENCV_DNN_SRC_CUDA4DNN_KERNELS_FILL_
COPY_
HPP
#include "../csl/stream.hpp"
#include "../csl/span.hpp"
...
...
@@ -13,6 +13,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template
<
class
T
>
void
fill
(
const
csl
::
Stream
&
stream
,
csl
::
Span
<
T
>
output
,
T
value
);
template
<
class
T
>
void
copy
(
const
csl
::
Stream
&
stream
,
csl
::
Span
<
T
>
output
,
csl
::
View
<
T
>
input
);
}}}}
/* namespace cv::dnn::cuda4dnn::kernels */
#endif
/* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_FILL_HPP */
#endif
/* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_FILL_
COPY_
HPP */
modules/dnn/src/cuda4dnn/kernels/permute.hpp
View file @
4ebd91e6
...
...
@@ -16,6 +16,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template
<
class
T
>
void
permute
(
const
csl
::
Stream
&
stream
,
csl
::
TensorSpan
<
T
>
output
,
csl
::
TensorView
<
T
>
input
,
std
::
vector
<
std
::
size_t
>
order
);
template
<
class
T
>
void
transpose
(
const
csl
::
Stream
&
stream
,
csl
::
Span
<
T
>
output
,
csl
::
View
<
T
>
input
,
std
::
size_t
in_width
,
std
::
size_t
out_width
);
}}}}
/* namespace cv::dnn::cuda4dnn::kernels */
#endif
/* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_PERMUTE_HPP */
modules/dnn/src/cuda4dnn/primitives/concat.hpp
View file @
4ebd91e6
...
...
@@ -10,7 +10,7 @@
#include "../csl/stream.hpp"
#include "../csl/pointer.hpp"
#include "../kernels/fill.hpp"
#include "../kernels/fill
_copy
.hpp"
#include "../kernels/concat.hpp"
#include <opencv2/core.hpp>
...
...
modules/dnn/src/cuda4dnn/primitives/padding.hpp
View file @
4ebd91e6
...
...
@@ -10,7 +10,7 @@
#include "../csl/stream.hpp"
#include "../csl/tensor.hpp"
#include "../kernels/fill.hpp"
#include "../kernels/fill
_copy
.hpp"
#include "../kernels/concat.hpp"
#include "../kernels/padding.hpp"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment