Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
a91eca6e
Commit
a91eca6e
authored
Dec 06, 2019
by
YashasSamaga
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add DIV support to EltwiseOp
parent
4b0132ed
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
58 additions
and
2 deletions
+58
-2
eltwise_ops.cu
modules/dnn/src/cuda/eltwise_ops.cu
+48
-0
eltwise_ops.hpp
modules/dnn/src/cuda4dnn/kernels/eltwise_ops.hpp
+3
-0
eltwise.hpp
modules/dnn/src/cuda4dnn/primitives/eltwise.hpp
+4
-1
eltwise_layer.cpp
modules/dnn/src/layers/eltwise_layer.cpp
+2
-1
test_onnx_importer.cpp
modules/dnn/test/test_onnx_importer.cpp
+1
-0
No files found.
modules/dnn/src/cuda/eltwise_ops.cu
View file @
a91eca6e
...
...
@@ -102,6 +102,26 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
v_store(output_vPtr[i], vec_x);
}
}
template <class T, std::size_t N>
__global__ void eltwise_div_2_vec(Span<T> output, View<T> x, View<T> y) {
using vector_type = get_vector_type_t<T, N>;
auto output_vPtr = vector_type::get_pointer(output.data());
auto x_vPtr = vector_type::get_pointer(x.data());
auto y_vPtr = vector_type::get_pointer(y.data());
for (auto i : grid_stride_range(output.size() / vector_type::size())) {
vector_type vec_x, vec_y;
v_load(vec_x, x_vPtr[i]);
v_load(vec_y, y_vPtr[i]);
for (int j = 0; j < vector_type::size(); j++)
vec_x.data[j] = vec_x.data[j] / vec_y.data[j];
v_store(output_vPtr[i], vec_x);
}
}
}
template <class T, std::size_t N>
...
...
@@ -221,4 +241,32 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template void eltwise_prod_2(const Stream& stream, Span<__half> output, View<__half> x, View<__half> y);
template void eltwise_prod_2(const Stream& stream, Span<float> output, View<float> x, View<float> y);
template <class T, std::size_t N>
void launch_vectorized_eltwise_div_2(const Stream& stream, Span<T> output, View<T> x, View<T> y) {
CV_Assert(is_fully_aligned<T>(output, N));
CV_Assert(is_fully_aligned<T>(x, N));
CV_Assert(is_fully_aligned<T>(y, N));
auto kernel = raw::eltwise_div_2_vec<T, N>;
auto policy = make_policy(kernel, output.size() / N, 0, stream);
launch_kernel(kernel, policy, output, x, y);
}
template <class T>
void eltwise_div_2(const Stream& stream, Span<T> output, View<T> x, View<T> y) {
CV_Assert(x.size() == y.size());
CV_Assert(x.size() == output.size());
if (is_fully_aligned<T>(output, 4) && is_fully_aligned<T>(x, 4) && is_fully_aligned<T>(y, 4)) {
launch_vectorized_eltwise_div_2<T, 4>(stream, output, x, y);
} else if (is_fully_aligned<T>(output, 2) && is_fully_aligned<T>(x, 2) && is_fully_aligned<T>(y, 2)) {
launch_vectorized_eltwise_div_2<T, 2>(stream, output, x, y);
} else {
launch_vectorized_eltwise_div_2<T, 1>(stream, output, x, y);
}
}
template void eltwise_div_2(const Stream& stream, Span<__half> output, View<__half> x, View<__half> y);
template void eltwise_div_2(const Stream& stream, Span<float> output, View<float> x, View<float> y);
}}}} /* namespace cv::dnn::cuda4dnn::kernels */
modules/dnn/src/cuda4dnn/kernels/eltwise_ops.hpp
View file @
a91eca6e
...
...
@@ -24,6 +24,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template
<
class
T
>
void
eltwise_prod_2
(
const
csl
::
Stream
&
stream
,
csl
::
Span
<
T
>
output
,
csl
::
View
<
T
>
x
,
csl
::
View
<
T
>
y
);
template
<
class
T
>
void
eltwise_div_2
(
const
csl
::
Stream
&
stream
,
csl
::
Span
<
T
>
output
,
csl
::
View
<
T
>
x
,
csl
::
View
<
T
>
y
);
}}}}
/* namespace cv::dnn::cuda4dnn::kernels */
#endif
/* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_ELTWISE_OPS_HPP */
modules/dnn/src/cuda4dnn/primitives/eltwise.hpp
View file @
a91eca6e
...
...
@@ -24,7 +24,8 @@ namespace cv { namespace dnn { namespace cuda4dnn {
enum
class
EltwiseOpType
{
MAX
,
SUM
,
PRODUCT
PRODUCT
,
DIV
};
template
<
class
T
>
...
...
@@ -64,6 +65,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
{
case
EltwiseOpType
:
:
MAX
:
kernels
::
eltwise_max_2
<
T
>
(
stream
,
output
,
input_x
,
input_y
);
break
;
case
EltwiseOpType
:
:
PRODUCT
:
kernels
::
eltwise_prod_2
<
T
>
(
stream
,
output
,
input_x
,
input_y
);
break
;
case
EltwiseOpType
:
:
DIV
:
kernels
::
eltwise_div_2
<
T
>
(
stream
,
output
,
input_x
,
input_y
);
break
;
case
EltwiseOpType
:
:
SUM
:
if
(
coeffs
.
empty
()
||
(
coeffs
[
0
]
==
1
&&
coeffs
[
1
]
==
1
))
kernels
::
eltwise_sum_2
<
T
>
(
stream
,
output
,
input_x
,
input_y
);
...
...
@@ -89,6 +91,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
{
case
EltwiseOpType
:
:
MAX
:
kernels
::
eltwise_max_2
<
T
>
(
stream
,
output
,
output
,
input
);
break
;
case
EltwiseOpType
:
:
PRODUCT
:
kernels
::
eltwise_prod_2
<
T
>
(
stream
,
output
,
output
,
input
);
break
;
case
EltwiseOpType
:
:
DIV
:
kernels
::
eltwise_div_2
<
T
>
(
stream
,
output
,
output
,
input
);
break
;
case
EltwiseOpType
:
:
SUM
:
if
(
coeffs
.
empty
()
||
coeffs
[
i
]
==
1
)
kernels
::
eltwise_sum_2
<
T
>
(
stream
,
output
,
output
,
input
);
...
...
modules/dnn/src/layers/eltwise_layer.cpp
View file @
a91eca6e
...
...
@@ -108,7 +108,7 @@ public:
virtual
bool
supportBackend
(
int
backendId
)
CV_OVERRIDE
{
return
backendId
==
DNN_BACKEND_OPENCV
||
(
backendId
==
DNN_BACKEND_CUDA
&&
op
!=
DIV
)
||
// TODO: not implemented, see PR #15811
backendId
==
DNN_BACKEND_CUDA
||
(
backendId
==
DNN_BACKEND_HALIDE
&&
op
!=
DIV
)
||
// TODO: not implemented, see PR #15811
((((
backendId
==
DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019
&&
(
preferableTarget
!=
DNN_TARGET_OPENCL
||
coeffs
.
empty
()))
||
backendId
==
DNN_BACKEND_INFERENCE_ENGINE_NGRAPH
)
&&
!
variableChannels
));
...
...
@@ -471,6 +471,7 @@ public:
case
MAX
:
return
cuda4dnn
::
EltwiseOpType
::
MAX
;
case
SUM
:
return
cuda4dnn
::
EltwiseOpType
::
SUM
;
case
PROD
:
return
cuda4dnn
::
EltwiseOpType
::
PRODUCT
;
case
DIV
:
return
cuda4dnn
::
EltwiseOpType
::
DIV
;
}
return
cuda4dnn
::
EltwiseOpType
::
SUM
;
}();
...
...
modules/dnn/test/test_onnx_importer.cpp
View file @
a91eca6e
...
...
@@ -380,6 +380,7 @@ TEST_P(Test_ONNX_layers, Div)
normAssert
(
ref
,
out
,
""
,
default_l1
,
default_lInf
);
expectNoFallbacksFromIE
(
net
);
expectNoFallbacksFromCUDA
(
net
);
}
TEST_P
(
Test_ONNX_layers
,
DynamicReshape
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment