Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
f6c57388
Commit
f6c57388
authored
Jul 12, 2019
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
parents
f663e8f9
054c7962
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
638 additions
and
228 deletions
+638
-228
perf_convolution3d.cpp
modules/dnn/perf/perf_convolution3d.cpp
+182
-0
convolution_layer.cpp
modules/dnn/src/layers/convolution_layer.cpp
+239
-105
layers_common.cpp
modules/dnn/src/layers/layers_common.cpp
+3
-1
layers_common.hpp
modules/dnn/src/layers/layers_common.hpp
+2
-1
pooling_layer.cpp
modules/dnn/src/layers/pooling_layer.cpp
+124
-60
onnx_importer.cpp
modules/dnn/src/onnx/onnx_importer.cpp
+22
-27
test_onnx_importer.cpp
modules/dnn/test/test_onnx_importer.cpp
+32
-9
test_tf_importer.cpp
modules/dnn/test/test_tf_importer.cpp
+6
-6
filter.simd.hpp
modules/imgproc/src/filter.simd.hpp
+4
-3
smooth.simd.hpp
modules/imgproc/src/smooth.simd.hpp
+16
-16
test_smooth_bitexact.cpp
modules/imgproc/test/test_smooth_bitexact.cpp
+8
-0
No files found.
modules/dnn/perf/perf_convolution3d.cpp
0 → 100644
View file @
f6c57388
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "perf_precomp.hpp"
#include <opencv2/dnn/shape_utils.hpp>
namespace
opencv_test
{
struct
Conv3DParam_t
{
int
kernel
[
3
];
struct
BlobShape
{
int
dims
[
5
];
}
shapeIn
;
int
outCN
;
int
groups
;
int
stride
[
3
];
int
dilation
[
3
];
int
pad
[
6
];
const
char
*
padMode
;
bool
hasBias
;
double
declared_flops
;
};
// Details: #12142
static
const
Conv3DParam_t
testConvolution3DConfigs
[]
=
{
{{
3
,
3
,
3
},
{{
1
,
6
,
10
,
38
,
50
}},
6
,
1
,
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
,
0
,
0
,
0
},
"VALID"
,
true
,
26956800.
},
{{
3
,
3
,
3
},
{{
1
,
2
,
19
,
19
,
19
}},
2
,
2
,
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
},
""
,
true
,
218000.
},
{{
3
,
3
,
3
},
{{
1
,
2
,
25
,
19
,
19
}},
2
,
2
,
{
1
,
2
,
2
},
{
1
,
1
,
1
},
{
2
,
2
,
2
,
2
,
2
,
2
},
"SAME"
,
false
,
545000.
},
{{
3
,
3
,
3
},
{{
1
,
11
,
9
,
150
,
200
}},
11
,
1
,
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
,
0
,
0
,
0
},
"VALID"
,
true
,
1342562760.
},
{{
3
,
3
,
3
},
{{
1
,
10
,
98
,
10
,
10
}},
10
,
1
,
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
0
,
1
,
1
,
0
,
1
},
"SAME"
,
false
,
53018000.
},
{{
5
,
5
,
5
},
{{
1
,
6
,
19
,
19
,
19
}},
6
,
2
,
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
,
0
,
0
,
0
},
""
,
false
,
30395250.
},
{{
5
,
5
,
5
},
{{
1
,
4
,
50
,
19
,
19
}},
4
,
1
,
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
},
"VALID"
,
false
,
5893888.
},
{{
5
,
5
,
5
},
{{
1
,
3
,
75
,
75
,
100
}},
3
,
1
,
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
,
0
,
0
,
0
},
"SAME"
,
true
,
1267312500.
},
{{
5
,
5
,
5
},
{{
1
,
2
,
21
,
75
,
100
}},
2
,
1
,
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
,
0
,
0
,
0
},
""
,
true
,
116103744.
},
{{
5
,
5
,
5
},
{{
1
,
4
,
40
,
75
,
75
}},
4
,
1
,
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
0
,
0
,
0
,
0
,
0
,
0
},
""
,
false
,
93405312.
},
{{
7
,
7
,
7
},
{{
1
,
6
,
15
,
19
,
19
}},
6
,
1
,
{
2
,
1
,
1
},
{
1
,
1
,
1
},
{
3
,
3
,
3
,
3
,
3
,
3
},
"SAME"
,
true
,
71339376.
},
{{
7
,
7
,
7
},
{{
1
,
2
,
38
,
38
,
38
}},
2
,
1
,
{
1
,
2
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
,
0
,
0
,
0
},
""
,
false
,
44990464.
},
{{
1
,
1
,
1
},
{{
1
,
4
,
9
,
10
,
10
}},
4
,
1
,
{
1
,
1
,
2
},
{
1
,
1
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
},
"VALID"
,
false
,
16200.
},
{{
3
,
1
,
4
},
{{
1
,
14
,
5
,
10
,
10
}},
14
,
1
,
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
,
0
,
0
,
0
},
"SAME"
,
false
,
2359000.
},
{{
1
,
1
,
1
},
{{
1
,
8
,
1
,
10
,
10
}},
8
,
8
,
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
,
1
,
1
,
1
},
""
,
true
,
58752.
},
{{
3
,
4
,
2
},
{{
1
,
4
,
8
,
10
,
10
}},
4
,
4
,
{
1
,
2
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
,
0
,
0
,
0
},
""
,
true
,
166752.
}
};
struct
Conv3DParamID
{
enum
{
CONV_0
=
0
,
CONV_100
=
16
,
CONV_LAST
=
sizeof
(
testConvolution3DConfigs
)
/
sizeof
(
testConvolution3DConfigs
[
0
])
};
int
val_
;
\
Conv3DParamID
(
int
val
=
0
)
:
val_
(
val
)
{}
operator
int
()
const
{
return
val_
;
}
static
::
testing
::
internal
::
ParamGenerator
<
Conv3DParamID
>
all
()
{
#if 0
enum { NUM = (int)CONV_LAST };
#else
enum
{
NUM
=
(
int
)
CONV_100
};
#endif
Conv3DParamID
v_
[
NUM
];
for
(
int
i
=
0
;
i
<
NUM
;
++
i
)
{
v_
[
i
]
=
Conv3DParamID
(
i
);
}
// reduce generated code size
return
::
testing
::
ValuesIn
(
v_
,
v_
+
NUM
);
}
};
\
static
inline
void
PrintTo
(
const
Conv3DParamID
&
v
,
std
::
ostream
*
os
)
{
CV_Assert
((
int
)
v
>=
0
);
CV_Assert
((
int
)
v
<
Conv3DParamID
::
CONV_LAST
);
const
Conv3DParam_t
&
p
=
testConvolution3DConfigs
[(
int
)
v
];
*
os
<<
"GFLOPS="
<<
cv
::
format
(
"%.3f"
,
p
.
declared_flops
*
1e-9
)
<<
", K=["
<<
p
.
kernel
[
0
]
<<
" x "
<<
p
.
kernel
[
1
]
<<
" x "
<<
p
.
kernel
[
2
]
<<
"]"
<<
", IN={"
<<
p
.
shapeIn
.
dims
[
0
]
<<
", "
<<
p
.
shapeIn
.
dims
[
1
]
<<
", "
<<
p
.
shapeIn
.
dims
[
2
]
<<
", "
<<
p
.
shapeIn
.
dims
[
3
]
<<
", "
<<
p
.
shapeIn
.
dims
[
4
]
<<
"}"
<<
", OCN="
<<
p
.
outCN
;
if
(
p
.
groups
>
1
)
*
os
<<
", G="
<<
p
.
groups
;
if
(
p
.
stride
[
0
]
*
p
.
stride
[
1
]
*
p
.
stride
[
2
]
!=
1
)
*
os
<<
", S=["
<<
p
.
stride
[
0
]
<<
" x "
<<
p
.
stride
[
1
]
<<
" x "
<<
p
.
stride
[
2
]
<<
"]"
;
if
(
p
.
dilation
[
0
]
*
p
.
dilation
[
1
]
*
p
.
dilation
[
2
]
!=
1
)
*
os
<<
", D=["
<<
p
.
dilation
[
0
]
<<
" x "
<<
p
.
dilation
[
1
]
<<
" x "
<<
p
.
dilation
[
2
]
<<
"]"
;
if
(
p
.
pad
[
0
]
!=
0
&&
p
.
pad
[
1
]
!=
0
&&
p
.
pad
[
2
]
!=
0
&&
p
.
pad
[
3
]
!=
0
&&
p
.
pad
[
4
]
!=
0
&&
p
.
pad
[
5
]
!=
0
)
*
os
<<
", P=("
<<
p
.
pad
[
0
]
<<
", "
<<
p
.
pad
[
3
]
<<
") x ("
<<
p
.
pad
[
1
]
<<
", "
<<
p
.
pad
[
4
]
<<
") x ("
<<
p
.
pad
[
2
]
<<
", "
<<
p
.
pad
[
5
]
<<
")"
;
if
(
!
((
std
::
string
)
p
.
padMode
).
empty
())
*
os
<<
", PM="
<<
((
std
::
string
)
p
.
padMode
);
if
(
p
.
hasBias
)
*
os
<<
", BIAS"
;
}
typedef
tuple
<
Conv3DParamID
,
tuple
<
Backend
,
Target
>
>
Conv3DTestParam_t
;
typedef
TestBaseWithParam
<
Conv3DTestParam_t
>
Conv3D
;
PERF_TEST_P_
(
Conv3D
,
conv3d
)
{
int
test_id
=
(
int
)
get
<
0
>
(
GetParam
());
ASSERT_GE
(
test_id
,
0
);
ASSERT_LT
(
test_id
,
Conv3DParamID
::
CONV_LAST
);
const
Conv3DParam_t
&
params
=
testConvolution3DConfigs
[
test_id
];
double
declared_flops
=
params
.
declared_flops
;
DictValue
kernel
=
DictValue
::
arrayInt
(
&
params
.
kernel
[
0
],
3
);
DictValue
stride
=
DictValue
::
arrayInt
(
&
params
.
stride
[
0
],
3
);
DictValue
pad
=
DictValue
::
arrayInt
(
&
params
.
pad
[
0
],
6
);
DictValue
dilation
=
DictValue
::
arrayInt
(
&
params
.
dilation
[
0
],
3
);
MatShape
inputShape
=
MatShape
(
params
.
shapeIn
.
dims
,
params
.
shapeIn
.
dims
+
5
);
int
outChannels
=
params
.
outCN
;
int
groups
=
params
.
groups
;
std
::
string
padMode
(
params
.
padMode
);
bool
hasBias
=
params
.
hasBias
;
Backend
backendId
=
get
<
0
>
(
get
<
1
>
(
GetParam
()));
Target
targetId
=
get
<
1
>
(
get
<
1
>
(
GetParam
()));
if
(
targetId
!=
DNN_TARGET_CPU
)
throw
SkipTestException
(
"Only CPU is supported"
);
int
inChannels
=
inputShape
[
1
];
int
sz
[]
=
{
outChannels
,
inChannels
/
groups
,
params
.
kernel
[
0
],
params
.
kernel
[
1
],
params
.
kernel
[
2
]};
Mat
weights
(
5
,
&
sz
[
0
],
CV_32F
);
randu
(
weights
,
-
1.0
f
,
1.0
f
);
LayerParams
lp
;
lp
.
set
(
"kernel_size"
,
kernel
);
lp
.
set
(
"pad"
,
pad
);
if
(
!
padMode
.
empty
())
lp
.
set
(
"pad_mode"
,
padMode
);
lp
.
set
(
"stride"
,
stride
);
lp
.
set
(
"dilation"
,
dilation
);
lp
.
set
(
"num_output"
,
outChannels
);
lp
.
set
(
"group"
,
groups
);
lp
.
set
(
"bias_term"
,
hasBias
);
lp
.
type
=
"Convolution"
;
lp
.
name
=
"testLayer"
;
lp
.
blobs
.
push_back
(
weights
);
if
(
hasBias
)
{
Mat
bias
(
1
,
outChannels
,
CV_32F
);
randu
(
bias
,
-
1.0
f
,
1.0
f
);
lp
.
blobs
.
push_back
(
bias
);
}
int
inpSz
[]
=
{
1
,
inChannels
,
inputShape
[
2
],
inputShape
[
3
],
inputShape
[
4
]};
Mat
input
(
5
,
&
inpSz
[
0
],
CV_32F
);
randu
(
input
,
-
1.0
f
,
1.0
f
);
Net
net
;
net
.
addLayerToPrev
(
lp
.
name
,
lp
.
type
,
lp
);
net
.
setInput
(
input
);
net
.
setPreferableBackend
(
backendId
);
net
.
setPreferableTarget
(
targetId
);
Mat
output
=
net
.
forward
();
MatShape
netInputShape
=
shape
(
input
);
size_t
weightsMemory
=
0
,
blobsMemory
=
0
;
net
.
getMemoryConsumption
(
netInputShape
,
weightsMemory
,
blobsMemory
);
int64
flops
=
net
.
getFLOPS
(
netInputShape
);
CV_Assert
(
flops
>
0
);
std
::
cout
<<
"IN="
<<
divUp
(
input
.
total
()
*
input
.
elemSize
(),
1u
<<
10
)
<<
" Kb "
<<
netInputShape
<<
" OUT="
<<
divUp
(
output
.
total
()
*
output
.
elemSize
(),
1u
<<
10
)
<<
" Kb "
<<
shape
(
output
)
<<
" Weights(parameters): "
<<
divUp
(
weightsMemory
,
1u
<<
10
)
<<
" Kb"
<<
" MFLOPS="
<<
flops
*
1e-6
<<
std
::
endl
;
TEST_CYCLE
()
{
Mat
res
=
net
.
forward
();
}
EXPECT_NEAR
(
flops
,
declared_flops
,
declared_flops
*
1e-6
);
SANITY_CHECK_NOTHING
();
}
INSTANTIATE_TEST_CASE_P
(
/**/
,
Conv3D
,
Combine
(
Conv3DParamID
::
all
(),
dnnBackendsAndTargets
(
false
,
false
)
// defined in ../test/test_common.hpp
));
}
// namespace
modules/dnn/src/layers/convolution_layer.cpp
View file @
f6c57388
...
...
@@ -48,6 +48,7 @@
#include "opencv2/core/hal/hal.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include <iostream>
#include <numeric>
#ifdef HAVE_OPENCL
#include "opencl_kernels_dnn.hpp"
...
...
@@ -67,7 +68,7 @@ public:
BaseConvolutionLayerImpl
(
const
LayerParams
&
params
)
{
setParamsFrom
(
params
);
getConvolutionKernelParams
(
params
,
kernel_size
,
pads_begin
,
pads_end
,
strides
,
dilations
,
padMode
);
getConvolutionKernelParams
(
params
,
kernel_size
,
pads_begin
,
pads_end
,
strides
,
dilations
,
padMode
,
adjust_pads
);
numOutput
=
params
.
get
<
int
>
(
"num_output"
);
int
ngroups
=
params
.
get
<
int
>
(
"group"
,
1
);
...
...
@@ -83,14 +84,14 @@ public:
pad
=
Size
(
pads_begin
[
1
],
pads_begin
[
0
]);
dilation
=
Size
(
dilations
[
1
],
dilations
[
0
]);
adjust_pads
.
push_back
(
params
.
get
<
int
>
(
"adj_h"
,
0
));
adjust_pads
.
push_back
(
params
.
get
<
int
>
(
"adj_w"
,
0
));
adjustPad
.
height
=
adjust_pads
[
0
];
adjustPad
.
width
=
adjust_pads
[
1
];
CV_Assert
(
adjustPad
.
width
<
stride
.
width
&&
adjustPad
.
height
<
stride
.
height
);
}
for
(
int
i
=
0
;
i
<
adjust_pads
.
size
();
i
++
)
{
CV_Assert
(
adjust_pads
[
i
]
<
strides
[
i
]);
}
fusedWeights
=
false
;
fusedBias
=
false
;
}
...
...
@@ -258,11 +259,14 @@ public:
else
#endif
{
if
(
kernel_size
.
size
()
!=
2
)
if
(
kernel_size
.
size
()
==
3
)
return
(
preferableTarget
==
DNN_TARGET_CPU
&&
backendId
==
DNN_BACKEND_OPENCV
);
else
if
(
kernel_size
.
size
()
==
2
)
return
backendId
==
DNN_BACKEND_OPENCV
||
backendId
==
DNN_BACKEND_HALIDE
||
(
backendId
==
DNN_BACKEND_VKCOM
&&
haveVulkan
());
else
return
false
;
return
backendId
==
DNN_BACKEND_OPENCV
||
backendId
==
DNN_BACKEND_HALIDE
||
(
backendId
==
DNN_BACKEND_VKCOM
&&
haveVulkan
());
}
}
...
...
@@ -604,8 +608,8 @@ public:
const
Mat
*
input_
;
const
Mat
*
weights_
;
Mat
*
output_
;
int
outShape
[
4
];
Size
kernel_
,
pad_
,
stride_
,
dilation_
;
int
outShape
[
4
];
// used only for conv2d
std
::
vector
<
size_t
>
kernel_size
,
pads_begin
,
pads_end
,
strides
,
dilations
;
int
ngroups_
,
nstripes_
;
std
::
vector
<
int
>
ofstab_
;
const
std
::
vector
<
float
>*
biasvec_
;
...
...
@@ -624,14 +628,18 @@ public:
static
void
run
(
const
Mat
&
input
,
Mat
&
output
,
const
Mat
&
weights
,
const
std
::
vector
<
float
>&
biasvec
,
const
std
::
vector
<
float
>&
reluslope
,
Size
kernel
,
Size
pad
,
Size
stride
,
Size
dilation
,
const
std
::
vector
<
size_t
>&
kernel_size
,
const
std
::
vector
<
size_t
>&
strides
,
const
std
::
vector
<
size_t
>&
pads_begin
,
const
std
::
vector
<
size_t
>&
pads_end
,
const
std
::
vector
<
size_t
>&
dilations
,
const
ActivationLayer
*
activ
,
int
ngroups
,
int
nstripes
)
{
size_t
karea
=
std
::
accumulate
(
kernel_size
.
begin
(),
kernel_size
.
end
(),
1
,
std
::
multiplies
<
size_t
>
());
CV_Assert_N
(
input
.
dims
==
4
&&
output
.
dims
==
4
,
(
input
.
dims
==
4
||
input
.
dims
==
5
)
&&
(
input
.
dims
==
output
.
dims
)
,
input
.
size
[
0
]
==
output
.
size
[
0
],
weights
.
rows
==
output
.
size
[
1
],
weights
.
cols
==
(
input
.
size
[
1
]
/
ngroups
)
*
k
ernel
.
width
*
kernel
.
height
,
weights
.
cols
==
(
input
.
size
[
1
]
/
ngroups
)
*
k
area
,
input
.
type
()
==
output
.
type
(),
input
.
type
()
==
weights
.
type
(),
input
.
type
()
==
CV_32FC1
,
...
...
@@ -645,26 +653,58 @@ public:
p
.
output_
=
&
output
;
for
(
int
i
=
0
;
i
<
4
;
i
++
)
p
.
outShape
[
i
]
=
output
.
size
[
i
];
p
.
outShape
[
1
]
/=
ngroups
;
p
.
kernel_
=
kernel
;
p
.
pad_
=
pad
;
p
.
stride_
=
stride
;
p
.
dilation_
=
dilation
;
p
.
kernel_size
=
kernel_size
;
p
.
strides
=
strides
;
p
.
dilations
=
dilations
;
p
.
pads_begin
=
pads_begin
;
p
.
pads_end
=
pads_end
;
p
.
ngroups_
=
ngroups
;
p
.
nstripes_
=
nstripes
;
int
inpCnAll
=
input
.
size
[
1
],
width
=
input
.
size
[
3
],
height
=
input
.
size
[
2
];
int
inpCnAll
=
input
.
size
[
1
];
int
depth
=
(
input
.
dims
==
5
)
?
input
.
size
[
2
]
:
1
;
int
width
=
input
.
size
[
input
.
dims
-
1
];
int
height
=
input
.
size
[
input
.
dims
-
2
];
int
inpCn
=
inpCnAll
/
ngroups
;
p
.
is1x1_
=
kernel
==
Size
(
1
,
1
)
&&
pad
==
Size
(
0
,
0
);
p
.
useAVX
=
checkHardwareSupport
(
CPU_AVX
);
p
.
useAVX2
=
checkHardwareSupport
(
CPU_AVX2
);
p
.
useAVX512
=
CV_CPU_HAS_SUPPORT_AVX512_SKX
;
bool
isConv2D
=
kernel_size
.
size
()
==
2
;
p
.
is1x1_
=
isConv2D
&&
kernel_size
[
0
]
==
1
&&
kernel_size
[
1
]
==
1
&&
pads_begin
[
0
]
==
0
&&
pads_begin
[
1
]
==
0
;
p
.
useAVX
=
checkHardwareSupport
(
CPU_AVX
)
&&
isConv2D
;
p
.
useAVX2
=
checkHardwareSupport
(
CPU_AVX2
)
&&
isConv2D
;
p
.
useAVX512
=
CV_CPU_HAS_SUPPORT_AVX512_SKX
&&
isConv2D
;
int
ncn
=
std
::
min
(
inpCn
,
(
int
)
BLK_SIZE_CN
);
p
.
ofstab_
.
resize
(
kernel
.
width
*
kernel
.
height
*
ncn
);
int
kernel_d
=
!
isConv2D
?
kernel_size
[
0
]
:
1
;
int
kernel_h
=
kernel_size
[
kernel_size
.
size
()
-
2
];
int
kernel_w
=
kernel_size
.
back
();
int
dil_d
=
!
isConv2D
?
dilations
[
0
]
:
1
;
int
dil_h
=
dilations
[
dilations
.
size
()
-
2
];
int
dil_w
=
dilations
.
back
();
p
.
ofstab_
.
resize
(
karea
*
ncn
);
int
*
ofstab
=
&
p
.
ofstab_
[
0
];
for
(
int
k
=
0
;
k
<
ncn
;
k
++
)
for
(
int
k_r
=
0
;
k_r
<
kernel
.
height
;
k_r
++
)
for
(
int
k_c
=
0
;
k_c
<
kernel
.
width
;
k_c
++
)
ofstab
[(
k
*
kernel
.
height
+
k_r
)
*
kernel
.
width
+
k_c
]
=
(
k
*
height
+
k_r
*
dilation
.
height
)
*
width
+
k_c
*
dilation
.
width
;
if
(
isConv2D
)
{
for
(
int
k
=
0
;
k
<
ncn
;
k
++
)
for
(
int
k_r
=
0
;
k_r
<
kernel_h
;
k_r
++
)
for
(
int
k_c
=
0
;
k_c
<
kernel_w
;
k_c
++
)
ofstab
[(
k
*
kernel_h
+
k_r
)
*
kernel_w
+
k_c
]
=
(
k
*
height
+
k_r
*
dil_h
)
*
width
+
k_c
*
dil_w
;
}
else
{
for
(
int
k
=
0
;
k
<
ncn
;
k
++
)
for
(
int
k_d
=
0
;
k_d
<
kernel_d
;
k_d
++
)
for
(
int
k_r
=
0
;
k_r
<
kernel_h
;
k_r
++
)
for
(
int
k_c
=
0
;
k_c
<
kernel_w
;
k_c
++
)
ofstab
[(
k
*
kernel_d
*
kernel_h
+
k_d
*
kernel_h
+
k_r
)
*
kernel_w
+
k_c
]
=
(
k
*
depth
*
height
+
k_d
*
dil_d
*
height
+
k_r
*
dil_h
)
*
width
+
k_c
*
dil_w
;
}
p
.
biasvec_
=
&
biasvec
;
p
.
reluslope_
=
&
reluslope
;
...
...
@@ -677,17 +717,39 @@ public:
{
const
int
valign
=
ConvolutionLayerImpl
::
VEC_ALIGN
;
int
ngroups
=
ngroups_
,
batchSize
=
input_
->
size
[
0
]
*
ngroups
;
int
outW
=
output_
->
size
[
3
],
outH
=
output_
->
size
[
2
],
outCn
=
output_
->
size
[
1
]
/
ngroups
;
int
width
=
input_
->
size
[
3
],
height
=
input_
->
size
[
2
],
inpCn
=
input_
->
size
[
1
]
/
ngroups
;
bool
isConv2D
=
input_
->
dims
==
4
;
int
outW
=
output_
->
size
[
output_
->
dims
-
1
];
int
outH
=
output_
->
size
[
output_
->
dims
-
2
];
int
outCn
=
output_
->
size
[
1
]
/
ngroups
;
int
depth
=
!
isConv2D
?
input_
->
size
[
2
]
:
1
;
int
height
=
input_
->
size
[
input_
->
dims
-
2
];
int
width
=
input_
->
size
[
input_
->
dims
-
1
];
int
inpCn
=
input_
->
size
[
1
]
/
ngroups
;
const
int
nstripes
=
nstripes_
;
int
kernel_w
=
kernel_
.
width
,
kernel_h
=
kernel_
.
height
;
int
pad_w
=
pad_
.
width
,
pad_h
=
pad_
.
height
;
int
stride_w
=
stride_
.
width
,
stride_h
=
stride_
.
height
;
int
dilation_w
=
dilation_
.
width
,
dilation_h
=
dilation_
.
height
;
int
karea
=
kernel_w
*
kernel_h
;
int
i
,
j
,
k
;
size_t
inpPlaneSize
=
width
*
height
;
size_t
outPlaneSize
=
outW
*
outH
;
int
kernel_d
=
!
isConv2D
?
kernel_size
[
0
]
:
1
;
int
kernel_h
=
kernel_size
[
kernel_size
.
size
()
-
2
];
int
kernel_w
=
kernel_size
.
back
();
int
karea
=
kernel_w
*
kernel_h
*
kernel_d
;
int
pad_d
=
!
isConv2D
?
pads_begin
[
0
]
:
0
;
int
pad_t
=
pads_begin
[
pads_begin
.
size
()
-
2
];
int
pad_l
=
pads_begin
.
back
();
int
stride_d
=
!
isConv2D
?
strides
[
0
]
:
0
;
int
stride_h
=
strides
[
strides
.
size
()
-
2
];
int
stride_w
=
strides
.
back
();
int
dilation_d
=
!
isConv2D
?
dilations
[
0
]
:
1
;
int
dilation_h
=
dilations
[
dilations
.
size
()
-
2
];
int
dilation_w
=
dilations
.
back
();
int
i
,
j
,
k
,
d
;
size_t
inpPlaneSize
=
input_
->
total
(
2
);
size_t
outPlaneSize
=
output_
->
total
(
2
);
bool
is1x1
=
is1x1_
;
int
stripesPerSample
;
...
...
@@ -756,72 +818,125 @@ public:
for
(
int
ofs0
=
stripeStart
;
ofs0
<
stripeEnd
;
ofs0
+=
BLK_SIZE
)
{
int
ofs
,
ofs1
=
std
::
min
(
ofs0
+
BLK_SIZE
,
stripeEnd
);
int
out_i
=
ofs0
/
outW
;
int
out_j
=
ofs0
-
out_i
*
outW
;
int
out_d
=
ofs0
/
(
outH
*
outW
);
int
out_i
=
(
ofs0
-
out_d
*
outH
*
outW
)
/
outW
;
int
out_j
=
ofs0
%
outW
;
// do im2row for a part of input tensor
float
*
rowbuf
=
rowbuf0
;
for
(
ofs
=
ofs0
;
ofs
<
ofs1
;
out_j
=
0
,
++
out_i
)
if
(
isConv2D
)
{
int
delta
=
std
::
min
(
ofs1
-
ofs
,
outW
-
out_j
);
int
out_j1
=
out_j
+
delta
;
int
in_i
=
out_i
*
stride_h
-
pad_h
;
int
in_j
=
out_j
*
stride_w
-
pad_w
;
const
float
*
imgptr
=
data_inp0
+
(
cn0
*
height
+
in_i
)
*
width
+
in_j
;
ofs
+=
delta
;
// do im2row for a part of input tensor
if
(
is1x1
)
for
(
ofs
=
ofs0
;
ofs
<
ofs1
;
out_j
=
0
,
++
out_i
)
{
for
(
;
out_j
<
out_j1
;
out_j
++
,
rowbuf
+=
vsz_a
,
imgptr
+=
stride_w
)
int
delta
=
std
::
min
(
ofs1
-
ofs
,
outW
-
out_j
);
int
out_j1
=
out_j
+
delta
;
int
in_i
=
out_i
*
stride_h
-
pad_t
;
int
in_j
=
out_j
*
stride_w
-
pad_l
;
const
float
*
imgptr
=
data_inp0
+
(
cn0
*
height
+
in_i
)
*
width
+
in_j
;
ofs
+=
delta
;
// do im2row for a part of input tensor
if
(
is1x1
)
{
for
(
k
=
0
;
k
<
vsz
;
k
++
)
rowbuf
[
k
]
=
imgptr
[
k
*
inpPlaneSize
];
for
(
;
out_j
<
out_j1
;
out_j
++
,
rowbuf
+=
vsz_a
,
imgptr
+=
stride_w
)
{
for
(
k
=
0
;
k
<
vsz
;
k
++
)
rowbuf
[
k
]
=
imgptr
[
k
*
inpPlaneSize
];
}
}
else
{
bool
ok_i
=
0
<=
in_i
&&
in_i
<
height
-
(
kernel_h
-
1
)
*
dilation_h
;
int
i0
=
std
::
max
(
0
,
(
-
in_i
+
dilation_h
-
1
)
/
dilation_h
);
int
i1
=
std
::
min
(
kernel_h
,
(
height
-
in_i
+
dilation_h
-
1
)
/
dilation_h
);
for
(
;
out_j
<
out_j1
;
out_j
++
,
rowbuf
+=
vsz_a
,
imgptr
+=
stride_w
,
in_j
+=
stride_w
)
{
// this condition should be true for most of the tensor elements, i.e.
// most of the time the kernel aperture is inside the tensor X-Y plane.
if
(
ok_i
&&
out_j
+
2
<=
out_j1
&&
0
<=
in_j
&&
in_j
+
stride_w
*
2
<=
width
-
(
kernel_w
-
1
)
*
dilation_w
)
{
for
(
k
=
0
;
k
<
vsz
;
k
++
)
{
int
k1
=
ofstab
[
k
];
float
v0
=
imgptr
[
k1
];
float
v1
=
imgptr
[
k1
+
stride_w
];
rowbuf
[
k
]
=
v0
;
rowbuf
[
k
+
vsz_a
]
=
v1
;
}
out_j
++
;
rowbuf
+=
vsz_a
;
imgptr
+=
stride_w
;
in_j
+=
stride_w
;
}
else
{
int
j0
=
std
::
max
(
0
,
(
-
in_j
+
dilation_w
-
1
)
/
dilation_w
);
int
j1
=
std
::
min
(
kernel_w
,
(
width
-
in_j
+
dilation_w
-
1
)
/
dilation_w
);
// here some non-continuous sub-row of the row will not be
// filled from the tensor; we need to make sure that the uncovered
// elements are explicitly set to 0's. the easiest way is to
// set all the elements to 0's before the loop.
memset
(
rowbuf
,
0
,
vsz
*
sizeof
(
rowbuf
[
0
]));
for
(
k
=
0
;
k
<
ncn
;
k
++
)
{
for
(
i
=
i0
;
i
<
i1
;
i
++
)
{
for
(
j
=
j0
;
j
<
j1
;
j
++
)
{
int
imgofs
=
k
*
(
width
*
height
)
+
i
*
(
dilation_h
*
width
)
+
j
*
dilation_w
;
rowbuf
[(
k
*
kernel_h
+
i
)
*
kernel_w
+
j
]
=
imgptr
[
imgofs
];
}
}
}
}
}
}
}
else
}
else
{
for
(
ofs
=
ofs0
;
ofs
<
ofs1
;
out_d
+=
(
out_i
+
1
)
/
outH
,
out_i
=
(
out_i
+
1
)
%
outH
,
out_j
=
0
)
{
bool
ok_i
=
0
<=
in_i
&&
in_i
<
height
-
(
kernel_h
-
1
)
*
dilation_h
;
int
delta
=
std
::
min
(
ofs1
-
ofs
,
outW
-
out_j
);
int
out_j1
=
out_j
+
delta
;
int
in_d
=
out_d
*
stride_d
-
pad_d
;
int
in_i
=
out_i
*
stride_h
-
pad_t
;
int
in_j
=
out_j
*
stride_w
-
pad_l
;
const
float
*
imgptr
=
data_inp0
+
(
cn0
*
depth
*
height
+
in_d
*
height
+
in_i
)
*
width
+
in_j
;
ofs
+=
delta
;
int
d0
=
std
::
max
(
0
,
(
-
in_d
+
dilation_d
-
1
)
/
dilation_d
);
int
d1
=
std
::
min
(
kernel_d
,
(
depth
-
in_d
+
dilation_d
-
1
)
/
dilation_d
);
int
i0
=
std
::
max
(
0
,
(
-
in_i
+
dilation_h
-
1
)
/
dilation_h
);
int
i1
=
std
::
min
(
kernel_h
,
(
height
-
in_i
+
dilation_h
-
1
)
/
dilation_h
);
for
(
;
out_j
<
out_j1
;
out_j
++
,
rowbuf
+=
vsz_a
,
imgptr
+=
stride_w
,
in_j
+=
stride_w
)
{
// this condition should be true for most of the tensor elements, i.e.
// most of the time the kernel aperture is inside the tensor X-Y plane.
if
(
ok_i
&&
out_j
+
2
<=
out_j1
&&
0
<=
in_j
&&
in_j
+
stride_w
*
2
<=
width
-
(
kernel_w
-
1
)
*
dilation_w
)
int
j0
=
std
::
max
(
0
,
(
-
in_j
+
dilation_w
-
1
)
/
dilation_w
);
int
j1
=
std
::
min
(
kernel_w
,
(
width
-
in_j
+
dilation_w
-
1
)
/
dilation_w
);
// here some non-continuous sub-row of the row will not be
// filled from the tensor; we need to make sure that the uncovered
// elements are explicitly set to 0's. the easiest way is to
// set all the elements to 0's before the loop.
memset
(
rowbuf
,
0
,
vsz
*
sizeof
(
rowbuf
[
0
]));
for
(
k
=
0
;
k
<
ncn
;
k
++
)
{
for
(
k
=
0
;
k
<
vsz
;
k
++
)
{
int
k1
=
ofstab
[
k
];
float
v0
=
imgptr
[
k1
];
float
v1
=
imgptr
[
k1
+
stride_w
];
rowbuf
[
k
]
=
v0
;
rowbuf
[
k
+
vsz_a
]
=
v1
;
}
out_j
++
;
rowbuf
+=
vsz_a
;
imgptr
+=
stride_w
;
in_j
+=
stride_w
;
}
else
{
int
j0
=
std
::
max
(
0
,
(
-
in_j
+
dilation_w
-
1
)
/
dilation_w
);
int
j1
=
std
::
min
(
kernel_w
,
(
width
-
in_j
+
dilation_w
-
1
)
/
dilation_w
);
// here some non-continuous sub-row of the row will not be
// filled from the tensor; we need to make sure that the uncovered
// elements are explicitly set to 0's. the easiest way is to
// set all the elements to 0's before the loop.
memset
(
rowbuf
,
0
,
vsz
*
sizeof
(
rowbuf
[
0
]));
for
(
k
=
0
;
k
<
ncn
;
k
++
)
for
(
d
=
d0
;
d
<
d1
;
d
++
)
{
for
(
i
=
i0
;
i
<
i1
;
i
++
)
{
for
(
j
=
j0
;
j
<
j1
;
j
++
)
{
int
imgofs
=
k
*
(
width
*
height
)
+
i
*
(
dilation_h
*
width
)
+
j
*
dilation_w
;
rowbuf
[(
k
*
kernel_h
+
i
)
*
kernel_w
+
j
]
=
imgptr
[
imgofs
];
int
imgofs
=
k
*
(
depth
*
width
*
height
)
+
d
*
dilation_d
*
width
*
height
+
i
*
(
dilation_h
*
width
)
+
j
*
dilation_w
;
rowbuf
[(
k
*
kernel_
d
*
kernel_h
+
d
*
kernel_
h
+
i
)
*
kernel_w
+
j
]
=
imgptr
[
imgofs
];
}
}
}
...
...
@@ -1131,10 +1246,6 @@ public:
CV_Assert_N
(
inputs
.
size
()
==
(
size_t
)
1
,
inputs
[
0
].
size
[
1
]
%
blobs
[
0
].
size
[
1
]
==
0
,
outputs
.
size
()
==
1
,
inputs
[
0
].
data
!=
outputs
[
0
].
data
);
if
(
inputs
[
0
].
dims
==
5
)
{
CV_Error
(
Error
::
StsNotImplemented
,
"Convolution3D layer is not supported on OCV backend"
);
}
int
ngroups
=
inputs
[
0
].
size
[
1
]
/
blobs
[
0
].
size
[
1
];
CV_Assert
(
outputs
[
0
].
size
[
1
]
%
ngroups
==
0
);
int
outCn
=
blobs
[
0
].
size
[
0
];
...
...
@@ -1163,7 +1274,7 @@ public:
int
nstripes
=
std
::
max
(
getNumThreads
(),
1
);
ParallelConv
::
run
(
inputs
[
0
],
outputs
[
0
],
weightsMat
,
biasvec
,
reluslope
,
kernel
,
pad
,
stride
,
dilation
,
activ
.
get
(),
ngroups
,
nstripes
);
kernel
_size
,
strides
,
pads_begin
,
pads_end
,
dilations
,
activ
.
get
(),
ngroups
,
nstripes
);
}
virtual
int64
getFLOPS
(
const
std
::
vector
<
MatShape
>
&
inputs
,
...
...
@@ -1172,9 +1283,10 @@ public:
CV_Assert
(
inputs
.
size
()
==
outputs
.
size
());
int64
flops
=
0
;
int
karea
=
std
::
accumulate
(
kernel_size
.
begin
(),
kernel_size
.
end
(),
1
,
std
::
multiplies
<
size_t
>
());
for
(
int
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
flops
+=
total
(
outputs
[
i
])
*
(
CV_BIG_INT
(
2
)
*
k
ernel
.
area
()
*
inputs
[
i
][
1
]
+
1
);
flops
+=
total
(
outputs
[
i
])
*
(
CV_BIG_INT
(
2
)
*
k
area
*
inputs
[
i
][
1
]
+
1
);
}
return
flops
;
...
...
@@ -1205,29 +1317,39 @@ public:
virtual
bool
supportBackend
(
int
backendId
)
CV_OVERRIDE
{
#ifdef HAVE_INF_ENGINE
const
int
outGroupCn
=
blobs
[
0
].
size
[
1
];
// Weights are in IOHW layout
const
int
outGroupCn
=
blobs
[
0
].
size
[
1
];
// Weights are in IOHW
or IODHW
layout
const
int
group
=
numOutput
/
outGroupCn
;
if
(
backendId
==
DNN_BACKEND_INFERENCE_ENGINE
)
{
if
(
kernel_size
.
size
()
==
3
)
CV_Error
(
Error
::
StsNotImplemented
,
"Unsupported deconvolution3D layer"
);
if
(
kernel_size
.
size
()
==
3
&&
preferableTarget
!=
DNN_TARGET_CPU
)
{
return
false
;
}
if
(
adjustPad
.
height
||
adjustPad
.
width
)
if
(
std
::
accumulate
(
adjust_pads
.
begin
(),
adjust_pads
.
end
(),
0
,
std
::
plus
<
size_t
>
())
>
0
)
{
if
(
padMode
.
empty
())
{
if
(
preferableTarget
!=
DNN_TARGET_CPU
&&
group
!=
1
)
{
if
((
adjustPad
.
height
&&
pad
.
height
)
||
(
adjustPad
.
width
&&
pad
.
width
))
for
(
int
i
=
0
;
i
<
adjust_pads
.
size
();
i
++
)
{
if
(
adjust_pads
[
i
]
&&
pads_begin
[
i
])
return
false
;
}
}
for
(
int
i
=
0
;
i
<
adjust_pads
.
size
();
i
++
)
{
if
(
pads_end
[
i
]
<
adjust_pads
[
i
])
return
false
;
}
return
pad
.
width
>=
adjustPad
.
width
&&
pad
.
height
>=
adjustPad
.
height
;
return
true
;
}
else
if
(
padMode
==
"SAME"
)
{
return
kernel
.
width
>=
pad
.
width
+
1
+
adjustPad
.
width
&&
kernel
.
height
>=
pad
.
height
+
1
+
adjustPad
.
height
;
for
(
int
i
=
0
;
i
<
adjust_pads
.
size
();
i
++
)
{
if
(
kernel_size
[
i
]
<
pads_begin
[
i
]
+
1
+
adjust_pads
[
i
])
return
false
;
}
return
true
;
}
else
if
(
padMode
==
"VALID"
)
return
false
;
...
...
@@ -1238,7 +1360,7 @@ public:
return
preferableTarget
==
DNN_TARGET_CPU
;
}
if
(
preferableTarget
==
DNN_TARGET_OPENCL
||
preferableTarget
==
DNN_TARGET_OPENCL_FP16
)
return
dilation
.
width
==
1
&&
dilation
.
height
==
1
;
return
std
::
accumulate
(
dilations
.
begin
(),
dilations
.
end
(),
1
,
std
::
multiplies
<
size_t
>
())
==
1
;
return
true
;
}
else
...
...
@@ -1825,11 +1947,14 @@ public:
#ifdef HAVE_INF_ENGINE
virtual
Ptr
<
BackendNode
>
initInfEngine
(
const
std
::
vector
<
Ptr
<
BackendWrapper
>
>
&
)
CV_OVERRIDE
{
auto
ieWeights
=
wrapToInfEngineBlob
(
blobs
[
0
],
InferenceEngine
::
Layout
::
OIHW
);
InferenceEngine
::
Layout
layout
=
blobs
[
0
].
dims
==
5
?
InferenceEngine
::
Layout
::
NCDHW
:
InferenceEngine
::
Layout
::
OIHW
;
auto
ieWeights
=
wrapToInfEngineBlob
(
blobs
[
0
],
layout
);
if
(
fusedWeights
)
{
ieWeights
=
InferenceEngine
::
make_shared_blob
<
float
>
(
InferenceEngine
::
Precision
::
FP32
,
InferenceEngine
::
Layout
::
OIHW
,
InferenceEngine
::
Precision
::
FP32
,
layout
,
ieWeights
->
dims
());
ieWeights
->
allocate
();
...
...
@@ -1838,7 +1963,7 @@ public:
transpose
(
weightsMat
,
newWeights
);
}
const
int
outGroupCn
=
blobs
[
0
].
size
[
1
];
// Weights are in IOHW layout
const
int
outGroupCn
=
blobs
[
0
].
size
[
1
];
// Weights are in IOHW
or OIDHW
layout
const
int
group
=
numOutput
/
outGroupCn
;
InferenceEngine
::
Builder
::
DeconvolutionLayer
ieLayer
(
name
);
...
...
@@ -1850,12 +1975,19 @@ public:
if
(
padMode
.
empty
())
{
ieLayer
.
setPaddingsEnd
({
pads_end
[
0
]
-
adjust_pads
[
0
],
pads_end
[
1
]
-
adjust_pads
[
1
]});
std
::
vector
<
size_t
>
paddings_end
;
for
(
int
i
=
0
;
i
<
pads_end
.
size
();
i
++
)
{
paddings_end
.
push_back
(
pads_end
[
i
]
-
adjust_pads
[
i
]);
}
ieLayer
.
setPaddingsEnd
(
paddings_end
);
}
else
if
(
padMode
==
"SAME"
)
{
ieLayer
.
setPaddingsEnd
({
kernel_size
[
0
]
-
pads_begin
[
0
]
-
1
-
adjust_pads
[
0
],
kernel_size
[
1
]
-
pads_begin
[
1
]
-
1
-
adjust_pads
[
1
]});
std
::
vector
<
size_t
>
paddings_end
;
for
(
int
i
=
0
;
i
<
pads_begin
.
size
();
i
++
)
{
paddings_end
.
push_back
(
kernel_size
[
i
]
-
pads_begin
[
i
]
-
1
-
adjust_pads
[
i
]);
}
ieLayer
.
setPaddingsEnd
(
paddings_end
);
}
ieLayer
.
setGroup
((
size_t
)
group
);
ieLayer
.
setOutDepth
((
size_t
)
numOutput
);
...
...
@@ -1875,10 +2007,12 @@ public:
float
flops
=
0
;
int
outChannels
=
blobs
[
0
].
size
[
0
];
size_t
karea
=
std
::
accumulate
(
kernel_size
.
begin
(),
kernel_size
.
end
(),
1
,
std
::
multiplies
<
size_t
>
());
for
(
int
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
flops
+=
CV_BIG_INT
(
2
)
*
outChannels
*
k
ernel
.
area
()
*
total
(
inputs
[
i
]);
flops
+=
CV_BIG_INT
(
2
)
*
outChannels
*
k
area
*
total
(
inputs
[
i
]);
}
return
flops
;
...
...
modules/dnn/src/layers/layers_common.cpp
View file @
f6c57388
...
...
@@ -175,11 +175,13 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector<size_t>& kern
}
void
getConvolutionKernelParams
(
const
LayerParams
&
params
,
std
::
vector
<
size_t
>&
kernel
,
std
::
vector
<
size_t
>&
pads_begin
,
std
::
vector
<
size_t
>&
pads_end
,
std
::
vector
<
size_t
>&
strides
,
std
::
vector
<
size_t
>&
dilations
,
cv
::
String
&
padMode
)
std
::
vector
<
size_t
>&
pads_end
,
std
::
vector
<
size_t
>&
strides
,
std
::
vector
<
size_t
>&
dilations
,
cv
::
String
&
padMode
,
std
::
vector
<
size_t
>&
adjust_pads
)
{
util
::
getKernelSize
(
params
,
kernel
);
util
::
getStrideAndPadding
(
params
,
pads_begin
,
pads_end
,
strides
,
padMode
,
kernel
.
size
());
util
::
getParameter
(
params
,
"dilation"
,
"dilation"
,
dilations
,
true
,
std
::
vector
<
size_t
>
(
kernel
.
size
(),
1
));
util
::
getParameter
(
params
,
"adj"
,
"adj"
,
adjust_pads
,
true
,
std
::
vector
<
size_t
>
(
kernel
.
size
(),
0
));
for
(
int
i
=
0
;
i
<
dilations
.
size
();
i
++
)
CV_Assert
(
dilations
[
i
]
>
0
);
...
...
modules/dnn/src/layers/layers_common.hpp
View file @
f6c57388
...
...
@@ -60,7 +60,8 @@ namespace cv
namespace
dnn
{
void
getConvolutionKernelParams
(
const
LayerParams
&
params
,
std
::
vector
<
size_t
>&
kernel
,
std
::
vector
<
size_t
>&
pads_begin
,
std
::
vector
<
size_t
>&
pads_end
,
std
::
vector
<
size_t
>&
strides
,
std
::
vector
<
size_t
>&
dilations
,
cv
::
String
&
padMode
);
std
::
vector
<
size_t
>&
pads_end
,
std
::
vector
<
size_t
>&
strides
,
std
::
vector
<
size_t
>&
dilations
,
cv
::
String
&
padMode
,
std
::
vector
<
size_t
>&
adjust_pads
);
void
getPoolingKernelParams
(
const
LayerParams
&
params
,
std
::
vector
<
size_t
>&
kernel
,
bool
&
globalPooling
,
std
::
vector
<
size_t
>&
pads_begin
,
std
::
vector
<
size_t
>&
pads_end
,
std
::
vector
<
size_t
>&
strides
,
cv
::
String
&
padMode
);
...
...
modules/dnn/src/layers/pooling_layer.cpp
View file @
f6c57388
...
...
@@ -48,6 +48,7 @@
#include "../op_vkcom.hpp"
#include <float.h>
#include <algorithm>
#include <numeric>
using
std
::
max
;
using
std
::
min
;
...
...
@@ -179,13 +180,16 @@ public:
}
else
{
if
(
!
kernel_size
.
empty
()
&&
kernel_size
.
size
()
!=
2
)
// TODO Support Pooling3D
if
(
kernel_size
.
size
()
==
3
)
return
(
backendId
==
DNN_BACKEND_OPENCV
&&
preferableTarget
==
DNN_TARGET_CPU
);
if
(
kernel_size
.
empty
()
||
kernel_size
.
size
()
==
2
)
return
backendId
==
DNN_BACKEND_OPENCV
||
(
backendId
==
DNN_BACKEND_HALIDE
&&
haveHalide
()
&&
(
type
==
MAX
||
(
type
==
AVE
&&
!
pad_t
&&
!
pad_l
&&
!
pad_b
&&
!
pad_r
)))
||
(
backendId
==
DNN_BACKEND_VKCOM
&&
haveVulkan
()
&&
(
type
==
MAX
||
type
==
AVE
));
else
return
false
;
return
backendId
==
DNN_BACKEND_OPENCV
||
(
backendId
==
DNN_BACKEND_HALIDE
&&
haveHalide
()
&&
(
type
==
MAX
||
(
type
==
AVE
&&
!
pad_t
&&
!
pad_l
&&
!
pad_b
&&
!
pad_r
)))
||
(
backendId
==
DNN_BACKEND_VKCOM
&&
haveVulkan
()
&&
(
type
==
MAX
||
type
==
AVE
));
}
}
...
...
@@ -383,19 +387,26 @@ public:
int
poolingType
;
float
spatialScale
;
std
::
vector
<
size_t
>
pads_begin
,
pads_end
;
std
::
vector
<
size_t
>
kernel_size
;
std
::
vector
<
size_t
>
strides
;
PoolingInvoker
()
:
src
(
0
),
rois
(
0
),
dst
(
0
),
mask
(
0
),
pad_l
(
0
),
pad_t
(
0
),
pad_r
(
0
),
pad_b
(
0
),
avePoolPaddedArea
(
false
),
nstripes
(
0
),
computeMaxIdx
(
0
),
poolingType
(
MAX
),
spatialScale
(
0
)
{}
static
void
run
(
const
Mat
&
src
,
const
Mat
&
rois
,
Mat
&
dst
,
Mat
&
mask
,
Size
kernel
,
Size
stride
,
int
pad_l
,
int
pad_t
,
int
pad_r
,
int
pad_b
,
bool
avePoolPaddedArea
,
int
poolingType
,
float
spatialScale
,
static
void
run
(
const
Mat
&
src
,
const
Mat
&
rois
,
Mat
&
dst
,
Mat
&
mask
,
std
::
vector
<
size_t
>
kernel_size
,
std
::
vector
<
size_t
>
strides
,
std
::
vector
<
size_t
>
pads_begin
,
std
::
vector
<
size_t
>
pads_end
,
bool
avePoolPaddedArea
,
int
poolingType
,
float
spatialScale
,
bool
computeMaxIdx
,
int
nstripes
)
{
CV_Assert_N
(
src
.
isContinuous
(),
dst
.
isContinuous
(),
src
.
type
()
==
CV_32F
,
src
.
type
()
==
dst
.
type
(),
src
.
dims
==
4
,
dst
.
dims
==
4
,
(((
poolingType
==
ROI
||
poolingType
==
PSROI
)
&&
dst
.
size
[
0
]
==
rois
.
size
[
0
])
||
src
.
size
[
0
]
==
dst
.
size
[
0
]),
src
.
dims
==
4
||
src
.
dims
==
5
,
dst
.
dims
==
4
||
dst
.
dims
==
5
,
(((
poolingType
==
ROI
||
poolingType
==
PSROI
)
&&
dst
.
size
[
0
]
==
rois
.
size
[
0
])
||
src
.
size
[
0
]
==
dst
.
size
[
0
]),
poolingType
==
PSROI
||
src
.
size
[
1
]
==
dst
.
size
[
1
],
(
mask
.
empty
()
||
(
mask
.
type
()
==
src
.
type
()
&&
mask
.
size
==
dst
.
size
)));
...
...
@@ -404,13 +415,20 @@ public:
p
.
src
=
&
src
;
p
.
rois
=
&
rois
;
p
.
dst
=
&
dst
;
p
.
kernel_size
=
kernel_size
;
p
.
strides
=
strides
;
p
.
pads_begin
=
pads_begin
;
p
.
pads_end
=
pads_end
;
p
.
mask
=
&
mask
;
p
.
kernel
=
kernel
;
p
.
stride
=
stride
;
p
.
pad_l
=
pad_l
;
p
.
pad_t
=
pad_t
;
p
.
pad_r
=
pad_r
;
p
.
pad_b
=
pad_b
;
p
.
kernel
=
Size
(
kernel_size
[
1
],
kernel_size
[
0
]);
p
.
stride
=
Size
(
strides
[
1
],
strides
[
0
]);
p
.
pad_l
=
pads_begin
.
back
();
p
.
pad_t
=
pads_begin
[
pads_begin
.
size
()
-
2
];
p
.
pad_r
=
pads_end
.
back
();
p
.
pad_b
=
pads_end
[
pads_end
.
size
()
-
2
];
p
.
avePoolPaddedArea
=
avePoolPaddedArea
;
p
.
nstripes
=
nstripes
;
p
.
computeMaxIdx
=
computeMaxIdx
;
...
...
@@ -419,10 +437,21 @@ public:
if
(
!
computeMaxIdx
)
{
p
.
ofsbuf
.
resize
(
kernel
.
width
*
kernel
.
height
);
for
(
int
i
=
0
;
i
<
kernel
.
height
;
i
++
)
for
(
int
j
=
0
;
j
<
kernel
.
width
;
j
++
)
p
.
ofsbuf
[
i
*
kernel
.
width
+
j
]
=
src
.
size
[
3
]
*
i
+
j
;
int
height
=
src
.
size
[
src
.
dims
-
2
];
int
width
=
src
.
size
[
src
.
dims
-
1
];
int
kernel_d
=
(
kernel_size
.
size
()
==
3
)
?
kernel_size
[
0
]
:
1
;
int
kernel_h
=
kernel_size
[
kernel_size
.
size
()
-
2
];
int
kernel_w
=
kernel_size
.
back
();
p
.
ofsbuf
.
resize
(
kernel_d
*
kernel_h
*
kernel_w
);
for
(
int
i
=
0
;
i
<
kernel_d
;
++
i
)
{
for
(
int
j
=
0
;
j
<
kernel_h
;
++
j
)
{
for
(
int
k
=
0
;
k
<
kernel_w
;
++
k
)
{
p
.
ofsbuf
[
i
*
kernel_h
*
kernel_w
+
j
*
kernel_w
+
k
]
=
width
*
height
*
i
+
width
*
j
+
k
;
}
}
}
}
parallel_for_
(
Range
(
0
,
nstripes
),
p
,
nstripes
);
...
...
@@ -430,14 +459,29 @@ public:
void
operator
()(
const
Range
&
r
)
const
CV_OVERRIDE
{
int
channels
=
dst
->
size
[
1
],
width
=
dst
->
size
[
3
],
height
=
dst
->
size
[
2
];
int
inp_width
=
src
->
size
[
3
],
inp_height
=
src
->
size
[
2
];
int
channels
=
dst
->
size
[
1
];
bool
isPool2D
=
src
->
dims
==
4
;
int
depth
=
!
isPool2D
?
dst
->
size
[
2
]
:
1
;
int
height
=
dst
->
size
[
dst
->
dims
-
2
];
int
width
=
dst
->
size
[
dst
->
dims
-
1
];
int
inp_depth
=
!
isPool2D
?
src
->
size
[
2
]
:
1
;
int
inp_height
=
src
->
size
[
src
->
dims
-
2
];
int
inp_width
=
src
->
size
[
src
->
dims
-
1
];
size_t
total
=
dst
->
total
();
size_t
stripeSize
=
(
total
+
nstripes
-
1
)
/
nstripes
;
size_t
stripeStart
=
r
.
start
*
stripeSize
;
size_t
stripeEnd
=
std
::
min
(
r
.
end
*
stripeSize
,
total
);
int
kernel_w
=
kernel
.
width
,
kernel_h
=
kernel
.
height
;
int
stride_w
=
stride
.
width
,
stride_h
=
stride
.
height
;
int
kernel_d
=
!
isPool2D
?
kernel_size
[
0
]
:
1
;
int
kernel_h
=
kernel_size
[
kernel_size
.
size
()
-
2
];
int
kernel_w
=
kernel_size
.
back
();
int
stride_d
=
!
isPool2D
?
strides
[
0
]
:
0
;
int
stride_h
=
strides
[
strides
.
size
()
-
2
];
int
stride_w
=
strides
.
back
();
bool
compMaxIdx
=
computeMaxIdx
;
#if CV_SIMD128
...
...
@@ -456,9 +500,14 @@ public:
ofs
/=
width
;
int
y0
=
(
int
)(
ofs
%
height
);
ofs
/=
height
;
int
d0
=
(
int
)(
ofs
%
depth
);
ofs
/=
depth
;
int
c
=
(
int
)(
ofs
%
channels
);
int
n
=
(
int
)(
ofs
/
channels
);
int
ystart
,
yend
;
int
dstart
=
0
,
dend
=
1
;
const
float
*
srcData
=
0
;
if
(
poolingType
==
ROI
)
...
...
@@ -488,15 +537,22 @@ public:
}
else
{
int
pad_d_begin
=
(
pads_begin
.
size
()
==
3
)
?
pads_begin
[
0
]
:
0
;
dstart
=
d0
*
stride_d
-
pad_d_begin
;
dend
=
min
(
dstart
+
kernel_d
,
(
int
)(
inp_depth
+
pads_end
[
0
]));
ystart
=
y0
*
stride_h
-
pad_t
;
yend
=
min
(
ystart
+
kernel_h
,
inp_height
+
pad_b
);
srcData
=
src
->
ptr
<
float
>
(
n
,
c
);
}
int
ddelta
=
dend
-
dstart
;
dstart
=
max
(
dstart
,
0
);
dend
=
min
(
dend
,
inp_depth
);
int
ydelta
=
yend
-
ystart
;
ystart
=
max
(
ystart
,
0
);
yend
=
min
(
yend
,
inp_height
);
float
*
dstData
=
dst
->
ptr
<
float
>
(
n
,
c
,
y0
)
;
float
*
dstMaskData
=
mask
->
data
?
mask
->
ptr
<
float
>
(
n
,
c
,
y0
)
:
0
;
float
*
dstData
=
&
dst
->
ptr
<
float
>
(
n
,
c
,
d0
)[
y0
*
width
]
;
float
*
dstMaskData
=
mask
->
data
?
&
mask
->
ptr
<
float
>
(
n
,
c
,
d0
)[
y0
*
width
]
:
0
;
int
delta
=
std
::
min
((
int
)(
stripeEnd
-
ofs0
),
width
-
x0
);
ofs0
+=
delta
;
...
...
@@ -516,7 +572,7 @@ public:
continue
;
}
#if CV_SIMD128
if
(
xstart
>
0
&&
x0
+
7
<
x1
&&
(
x0
+
7
)
*
stride_w
-
pad_l
+
kernel_w
<
inp_width
)
if
(
isPool2D
&&
xstart
>
0
&&
x0
+
7
<
x1
&&
(
x0
+
7
)
*
stride_w
-
pad_l
+
kernel_w
<
inp_width
)
{
if
(
compMaxIdx
)
{
...
...
@@ -621,49 +677,51 @@ public:
if
(
compMaxIdx
)
{
int
max_index
=
-
1
;
for
(
int
y
=
ystart
;
y
<
yend
;
++
y
)
for
(
int
x
=
xstart
;
x
<
xend
;
++
x
)
{
const
int
index
=
y
*
inp_width
+
x
;
float
val
=
srcData
[
index
];
if
(
val
>
max_val
)
for
(
int
d
=
dstart
;
d
<
dend
;
++
d
)
for
(
int
y
=
ystart
;
y
<
yend
;
++
y
)
for
(
int
x
=
xstart
;
x
<
xend
;
++
x
)
{
max_val
=
val
;
max_index
=
index
;
const
int
index
=
d
*
inp_width
*
inp_height
+
y
*
inp_width
+
x
;
float
val
=
srcData
[
index
];
if
(
val
>
max_val
)
{
max_val
=
val
;
max_index
=
index
;
}
}
}
dstData
[
x0
]
=
max_val
;
if
(
dstMaskData
)
dstMaskData
[
x0
]
=
max_index
;
}
else
{
for
(
int
y
=
ystart
;
y
<
yend
;
++
y
)
for
(
int
x
=
xstart
;
x
<
xend
;
++
x
)
{
const
int
index
=
y
*
inp_width
+
x
;
float
val
=
srcData
[
index
];
max_val
=
std
::
max
(
max_val
,
val
);
for
(
int
d
=
dstart
;
d
<
dend
;
++
d
)
{
for
(
int
y
=
ystart
;
y
<
yend
;
++
y
)
{
for
(
int
x
=
xstart
;
x
<
xend
;
++
x
)
{
const
int
index
=
d
*
inp_width
*
inp_height
+
y
*
inp_width
+
x
;
float
val
=
srcData
[
index
];
max_val
=
std
::
max
(
max_val
,
val
);
}
}
}
dstData
[
x0
]
=
max_val
;
}
}
}
else
if
(
poolingType
==
AVE
)
{
for
(
;
x0
<
x1
;
x0
++
)
for
(
;
x0
<
x1
;
++
x0
)
{
int
xstart
=
x0
*
stride_w
-
pad_l
;
int
xend
=
min
(
xstart
+
kernel_w
,
inp_width
+
pad_r
);
int
xdelta
=
xend
-
xstart
;
xstart
=
max
(
xstart
,
0
);
xend
=
min
(
xend
,
inp_width
);
float
inv_kernel_area
=
avePoolPaddedArea
?
xdelta
*
ydelta
:
((
yend
-
ystart
)
*
(
xend
-
xstart
));
float
inv_kernel_area
=
avePoolPaddedArea
?
xdelta
*
ydelta
*
ddelta
:
((
dend
-
dstart
)
*
(
yend
-
ystart
)
*
(
xend
-
xstart
));
inv_kernel_area
=
1.0
/
inv_kernel_area
;
#if CV_SIMD128
if
(
xstart
>
0
&&
x0
+
7
<
x1
&&
(
x0
+
7
)
*
stride_w
-
pad_l
+
kernel_w
<
inp_width
)
if
(
isPool2D
&&
xstart
>
0
&&
x0
+
7
<
x1
&&
(
x0
+
7
)
*
stride_w
-
pad_l
+
kernel_w
<
inp_width
)
{
v_float32x4
sum_val0
=
v_setzero_f32
(),
sum_val1
=
v_setzero_f32
();
v_float32x4
ikarea
=
v_setall_f32
(
inv_kernel_area
);
...
...
@@ -689,14 +747,15 @@ public:
#endif
{
float
sum_val
=
0.
f
;
for
(
int
y
=
ystart
;
y
<
yend
;
++
y
)
for
(
int
x
=
xstart
;
x
<
xend
;
++
x
)
{
const
int
index
=
y
*
inp_width
+
x
;
float
val
=
srcData
[
index
];
sum_val
+=
val
;
for
(
int
d
=
dstart
;
d
<
dend
;
++
d
)
{
for
(
int
y
=
ystart
;
y
<
yend
;
++
y
)
{
for
(
int
x
=
xstart
;
x
<
xend
;
++
x
)
{
const
int
index
=
d
*
inp_width
*
inp_height
+
y
*
inp_width
+
x
;
float
val
=
srcData
[
index
];
sum_val
+=
val
;
}
}
}
dstData
[
x0
]
=
sum_val
*
inv_kernel_area
;
}
}
...
...
@@ -772,21 +831,25 @@ public:
{
const
int
nstripes
=
getNumThreads
();
Mat
rois
;
PoolingInvoker
::
run
(
src
,
rois
,
dst
,
mask
,
kernel
,
stride
,
pad_l
,
pad_t
,
pad_r
,
pad_b
,
avePoolPaddedArea
,
type
,
spatialScale
,
computeMaxIdx
,
nstripes
);
PoolingInvoker
::
run
(
src
,
rois
,
dst
,
mask
,
kernel
_size
,
strides
,
pads_begin
,
pads_end
,
avePoolPaddedArea
,
type
,
spatialScale
,
computeMaxIdx
,
nstripes
);
}
void
avePooling
(
Mat
&
src
,
Mat
&
dst
)
{
const
int
nstripes
=
getNumThreads
();
Mat
rois
,
mask
;
PoolingInvoker
::
run
(
src
,
rois
,
dst
,
mask
,
kernel
,
stride
,
pad_l
,
pad_t
,
pad_r
,
pad_b
,
avePoolPaddedArea
,
type
,
spatialScale
,
computeMaxIdx
,
nstripes
);
PoolingInvoker
::
run
(
src
,
rois
,
dst
,
mask
,
kernel
_size
,
strides
,
pads_begin
,
pads_end
,
avePoolPaddedArea
,
type
,
spatialScale
,
computeMaxIdx
,
nstripes
);
}
void
roiPooling
(
const
Mat
&
src
,
const
Mat
&
rois
,
Mat
&
dst
)
{
const
int
nstripes
=
getNumThreads
();
Mat
mask
;
PoolingInvoker
::
run
(
src
,
rois
,
dst
,
mask
,
kernel
,
stride
,
pad_l
,
pad_t
,
pad_r
,
pad_b
,
avePoolPaddedArea
,
type
,
spatialScale
,
computeMaxIdx
,
nstripes
);
kernel_size
.
resize
(
2
);
strides
.
resize
(
2
);
pads_begin
.
resize
(
2
);
pads_end
.
resize
(
2
);
PoolingInvoker
::
run
(
src
,
rois
,
dst
,
mask
,
kernel_size
,
strides
,
pads_begin
,
pads_end
,
avePoolPaddedArea
,
type
,
spatialScale
,
computeMaxIdx
,
nstripes
);
}
virtual
Ptr
<
BackendNode
>
initMaxPoolingHalide
(
const
std
::
vector
<
Ptr
<
BackendWrapper
>
>
&
inputs
)
...
...
@@ -974,17 +1037,18 @@ public:
{
CV_UNUSED
(
inputs
);
// suppress unused variable warning
long
flops
=
0
;
size_t
karea
=
std
::
accumulate
(
kernel_size
.
begin
(),
kernel_size
.
end
(),
1
,
std
::
multiplies
<
size_t
>
());
for
(
int
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
if
(
type
==
MAX
)
{
if
(
i
%
2
==
0
)
flops
+=
total
(
outputs
[
i
])
*
k
ernel
.
area
()
;
flops
+=
total
(
outputs
[
i
])
*
k
area
;
}
else
{
flops
+=
total
(
outputs
[
i
])
*
(
k
ernel
.
area
()
+
1
);
flops
+=
total
(
outputs
[
i
])
*
(
k
area
+
1
);
}
}
return
flops
;
...
...
modules/dnn/src/onnx/onnx_importer.cpp
View file @
f6c57388
...
...
@@ -682,42 +682,37 @@ void ONNXImporter::populateNet(Net dstNet)
layerParams
.
set
(
"num_output"
,
layerParams
.
blobs
[
0
].
size
[
1
]
*
layerParams
.
get
<
int
>
(
"group"
,
1
));
layerParams
.
set
(
"bias_term"
,
node_proto
.
input_size
()
==
3
);
if
(
!
layerParams
.
has
(
"kernel_size"
))
CV_Error
(
Error
::
StsNotImplemented
,
"Required attribute 'kernel_size' is not present."
);
if
(
layerParams
.
has
(
"output_shape"
))
{
const
DictValue
&
outShape
=
layerParams
.
get
(
"output_shape"
);
DictValue
strides
=
layerParams
.
get
(
"stride"
);
DictValue
kernel
=
layerParams
.
get
(
"kernel_size"
);
if
(
outShape
.
size
()
!=
4
)
CV_Error
(
Error
::
StsNotImplemented
,
"Output shape must have 4 elements."
);
DictValue
stride
=
layerParams
.
get
(
"stride"
);
const
int
strideY
=
stride
.
getIntValue
(
0
);
const
int
strideX
=
stride
.
getIntValue
(
1
);
const
int
outH
=
outShape
.
getIntValue
(
2
);
const
int
outW
=
outShape
.
getIntValue
(
3
);
if
(
layerParams
.
get
<
String
>
(
"pad_mode"
)
==
"SAME"
)
String
padMode
;
std
::
vector
<
int
>
adjust_pads
;
if
(
layerParams
.
has
(
"pad_mode"
))
{
layerParams
.
set
(
"adj_w"
,
(
outW
-
1
)
%
strideX
);
layerParams
.
set
(
"adj_h"
,
(
outH
-
1
)
%
strideY
);
}
else
if
(
layerParams
.
get
<
String
>
(
"pad_mode"
)
==
"VALID"
)
{
if
(
!
layerParams
.
has
(
"kernel_size"
))
CV_Error
(
Error
::
StsNotImplemented
,
"Required attribute 'kernel_size' is not present."
);
DictValue
kernel
=
layerParams
.
get
(
"kernel_size"
);
layerParams
.
set
(
"adj_h"
,
(
outH
-
kernel
.
getIntValue
(
0
))
%
strideY
);
layerParams
.
set
(
"adj
_w"
,
(
outW
-
kernel
.
getIntValue
(
1
))
%
strideX
);
padMode
=
toUpperCase
(
layerParams
.
get
<
String
>
(
"pad_mode"
)
);
if
(
padMode
!=
"SAME"
&&
padMode
!=
"VALID"
)
CV_Error
(
Error
::
StsError
,
"Unsupported padding mode "
+
padMode
);
for
(
int
i
=
0
;
i
<
strides
.
size
();
i
++
)
{
int
sz
=
outShape
.
get
<
int
>
(
2
+
i
);
int
stride
=
strides
.
get
<
int
>
(
i
);
adjust_pads
.
push_back
(
padMode
==
"SAME"
?
(
sz
-
1
)
%
stride
:
(
sz
-
kernel
.
get
<
int
>
(
i
))
%
stride
);
}
layerParams
.
set
(
"adj
"
,
DictValue
::
arrayInt
(
&
adjust_pads
[
0
],
adjust_pads
.
size
())
);
}
}
else
if
(
layerParams
.
has
(
"output_padding"
))
{
const
DictValue
&
adj_pad
=
layerParams
.
get
(
"output_padding"
);
if
(
adj_pad
.
size
()
!=
2
)
CV_Error
(
Error
::
StsNotImplemented
,
"Deconvolution3D layer is not supported"
);
layerParams
.
set
(
"adj_w"
,
adj_pad
.
get
<
int
>
(
1
));
layerParams
.
set
(
"adj_h"
,
adj_pad
.
get
<
int
>
(
0
));
replaceLayerParam
(
layerParams
,
"output_padding"
,
"adj"
);
}
}
else
if
(
layer_type
==
"Transpose"
)
...
...
modules/dnn/test/test_onnx_importer.cpp
View file @
f6c57388
...
...
@@ -100,8 +100,8 @@ TEST_P(Test_ONNX_layers, Convolution3D)
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
throw
SkipTestException
(
"Test is enabled starts from 2019R1"
);
#endif
if
(
backend
!=
DNN_BACKEND_INFERENCE_ENGINE
||
target
!=
DNN_TARGET_CPU
)
throw
SkipTestException
(
"Only
DLIE backend on
CPU is supported"
);
if
(
target
!=
DNN_TARGET_CPU
)
throw
SkipTestException
(
"Only CPU is supported"
);
testONNXModels
(
"conv3d"
);
testONNXModels
(
"conv3d_bias"
);
}
...
...
@@ -127,6 +127,19 @@ TEST_P(Test_ONNX_layers, Deconvolution)
testONNXModels
(
"deconv_adjpad_2d"
,
npy
,
0
,
0
,
false
,
false
);
}
TEST_P
(
Test_ONNX_layers
,
Deconvolution3D
)
{
#if defined(INF_ENGINE_RELEASE)
applyTestTag
(
CV_TEST_TAG_DNN_SKIP_IE_2018R5
);
#endif
if
(
backend
!=
DNN_BACKEND_INFERENCE_ENGINE
||
target
!=
DNN_TARGET_CPU
)
throw
SkipTestException
(
"Only DLIE backend on CPU is supported"
);
testONNXModels
(
"deconv3d"
);
testONNXModels
(
"deconv3d_bias"
);
testONNXModels
(
"deconv3d_pad"
);
testONNXModels
(
"deconv3d_adjpad"
);
}
TEST_P
(
Test_ONNX_layers
,
Dropout
)
{
testONNXModels
(
"dropout"
);
...
...
@@ -185,8 +198,8 @@ TEST_P(Test_ONNX_layers, MaxPooling3D)
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
throw
SkipTestException
(
"Test is enabled starts from 2019R1"
);
#endif
if
(
backend
!=
DNN_BACKEND_INFERENCE_ENGINE
||
target
!=
DNN_TARGET_CPU
)
throw
SkipTestException
(
"Only
DLIE backend on
CPU is supported"
);
if
(
target
!=
DNN_TARGET_CPU
)
throw
SkipTestException
(
"Only CPU is supported"
);
testONNXModels
(
"max_pool3d"
);
}
...
...
@@ -195,11 +208,21 @@ TEST_P(Test_ONNX_layers, AvePooling3D)
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
throw
SkipTestException
(
"Test is enabled starts from 2019R1"
);
#endif
if
(
backend
!=
DNN_BACKEND_INFERENCE_ENGINE
||
target
!=
DNN_TARGET_CPU
)
throw
SkipTestException
(
"Only
DLIE backend on
CPU is supported"
);
if
(
target
!=
DNN_TARGET_CPU
)
throw
SkipTestException
(
"Only CPU is supported"
);
testONNXModels
(
"ave_pool3d"
);
}
TEST_P
(
Test_ONNX_layers
,
PoolConv3D
)
{
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
throw
SkipTestException
(
"Test is enabled starts from 2019R1"
);
#endif
if
(
target
!=
DNN_TARGET_CPU
)
throw
SkipTestException
(
"Only CPU is supported"
);
testONNXModels
(
"pool_conv_3d"
);
}
TEST_P
(
Test_ONNX_layers
,
BatchNormalization
)
{
testONNXModels
(
"batch_norm"
);
...
...
@@ -579,10 +602,10 @@ TEST_P(Test_ONNX_nets, Resnet34_kinetics)
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
throw
SkipTestException
(
"Test is enabled starts from 2019R1"
);
#endif
if
(
backend
!=
DNN_BACKEND_INFERENCE_ENGINE
||
target
!=
DNN_TARGET_CPU
)
throw
SkipTestException
(
"Only
DLIE backend on
CPU is supported"
);
if
(
target
!=
DNN_TARGET_CPU
)
throw
SkipTestException
(
"Only CPU is supported"
);
String
onnxmodel
=
findDataFile
(
"dnn/resnet-34_kinetics.onnx"
,
false
);
String
onnxmodel
=
findDataFile
(
"dnn/resnet-34_kinetics.onnx"
);
Mat
image0
=
imread
(
findDataFile
(
"dnn/dog416.png"
));
Mat
image1
=
imread
(
findDataFile
(
"dnn/street.png"
));
...
...
modules/dnn/test/test_tf_importer.cpp
View file @
f6c57388
...
...
@@ -136,8 +136,8 @@ TEST_P(Test_TensorFlow_layers, Convolution3D)
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
throw
SkipTestException
(
"Test is enabled starts from 2019R1"
);
#endif
if
(
backend
!=
DNN_BACKEND_INFERENCE_ENGINE
||
target
!=
DNN_TARGET_CPU
)
throw
SkipTestException
(
"Only DLIE backend on
CPU is supported"
);
if
(
target
!=
DNN_TARGET_CPU
)
throw
SkipTestException
(
"Only
CPU is supported"
);
runTensorFlowNet
(
"conv3d"
);
}
...
...
@@ -243,8 +243,8 @@ TEST_P(Test_TensorFlow_layers, MaxPooling3D)
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
throw
SkipTestException
(
"Test is enabled starts from 2019R1"
);
#endif
if
(
backend
!=
DNN_BACKEND_INFERENCE_ENGINE
||
target
!=
DNN_TARGET_CPU
)
throw
SkipTestException
(
"Only
DLIE backend on
CPU is supported"
);
if
(
target
!=
DNN_TARGET_CPU
)
throw
SkipTestException
(
"Only CPU is supported"
);
runTensorFlowNet
(
"max_pool3d"
);
}
...
...
@@ -253,8 +253,8 @@ TEST_P(Test_TensorFlow_layers, AvePooling3D)
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
throw
SkipTestException
(
"Test is enabled starts from 2019R1"
);
#endif
if
(
backend
!=
DNN_BACKEND_INFERENCE_ENGINE
||
target
!=
DNN_TARGET_CPU
)
throw
SkipTestException
(
"Only
DLIE backend on
CPU is supported"
);
if
(
target
!=
DNN_TARGET_CPU
)
throw
SkipTestException
(
"Only CPU is supported"
);
runTensorFlowNet
(
"ave_pool3d"
);
}
...
...
modules/imgproc/src/filter.simd.hpp
View file @
f6c57388
...
...
@@ -84,6 +84,7 @@ Ptr<BaseFilter> getLinearFilter(
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
typedef
int
CV_DECL_ALIGNED
(
1
)
unaligned_int
;
#define VEC_ALIGN CV_MALLOC_ALIGN
int
FilterEngine__start
(
FilterEngine
&
this_
,
const
Size
&
_wholeSize
,
const
Size
&
sz
,
const
Point
&
ofs
)
...
...
@@ -1049,7 +1050,7 @@ struct SymmColumnVec_32s8u
s0
=
v_muladd
(
v_cvt_f32
(
v_load
(
src
[
k
]
+
i
)
+
v_load
(
src
[
-
k
]
+
i
)),
v_setall_f32
(
ky
[
k
]),
s0
);
v_int32x4
s32
=
v_round
(
s0
);
v_int16x8
s16
=
v_pack
(
s32
,
s32
);
*
(
int
*
)(
dst
+
i
)
=
v_reinterpret_as_s32
(
v_pack_u
(
s16
,
s16
)).
get0
();
*
(
unaligned_
int
*
)(
dst
+
i
)
=
v_reinterpret_as_s32
(
v_pack_u
(
s16
,
s16
)).
get0
();
i
+=
v_int32x4
::
nlanes
;
}
}
...
...
@@ -1104,7 +1105,7 @@ struct SymmColumnVec_32s8u
s0
=
v_muladd
(
v_cvt_f32
(
v_load
(
src
[
k
]
+
i
)
-
v_load
(
src
[
-
k
]
+
i
)),
v_setall_f32
(
ky
[
k
]),
s0
);
v_int32x4
s32
=
v_round
(
s0
);
v_int16x8
s16
=
v_pack
(
s32
,
s32
);
*
(
int
*
)(
dst
+
i
)
=
v_reinterpret_as_s32
(
v_pack_u
(
s16
,
s16
)).
get0
();
*
(
unaligned_
int
*
)(
dst
+
i
)
=
v_reinterpret_as_s32
(
v_pack_u
(
s16
,
s16
)).
get0
();
i
+=
v_int32x4
::
nlanes
;
}
}
...
...
@@ -2129,7 +2130,7 @@ struct FilterVec_8u
s0
=
v_muladd
(
v_cvt_f32
(
v_reinterpret_as_s32
(
v_load_expand_q
(
src
[
k
]
+
i
))),
v_setall_f32
(
kf
[
k
]),
s0
);
v_int32x4
s32
=
v_round
(
s0
);
v_int16x8
s16
=
v_pack
(
s32
,
s32
);
*
(
int
*
)(
dst
+
i
)
=
v_reinterpret_as_s32
(
v_pack_u
(
s16
,
s16
)).
get0
();
*
(
unaligned_
int
*
)(
dst
+
i
)
=
v_reinterpret_as_s32
(
v_pack_u
(
s16
,
s16
)).
get0
();
i
+=
v_int32x4
::
nlanes
;
}
return
i
;
...
...
modules/imgproc/src/smooth.simd.hpp
View file @
f6c57388
...
...
@@ -334,7 +334,7 @@ void hlineSmooth3Naba<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const
{
int
src_idx
=
borderInterpolate
(
-
1
,
len
,
borderType
);
for
(
int
k
=
0
;
k
<
cn
;
k
++
)
((
uint16_t
*
)
dst
)[
k
]
=
((
uint16_t
*
)
m
)[
1
]
*
src
[
k
]
+
((
uint16_t
*
)
m
)[
0
]
*
((
uint16_t
)(
src
[
cn
+
k
])
+
(
uint16_t
)(
src
[
src_idx
*
cn
+
k
]
));
((
uint16_t
*
)
dst
)[
k
]
=
saturate_cast
<
uint16_t
>
(((
uint16_t
*
)
m
)[
1
]
*
(
uint32_t
)(
src
[
k
])
+
((
uint16_t
*
)
m
)[
0
]
*
((
uint32_t
)(
src
[
cn
+
k
])
+
(
uint32_t
)(
src
[
src_idx
*
cn
+
k
])
));
}
else
{
...
...
@@ -354,14 +354,14 @@ void hlineSmooth3Naba<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const
v_mul_wrap
(
vx_load_expand
(
src
),
v_mul1
));
#endif
for
(;
i
<
lencn
;
i
++
,
src
++
,
dst
++
)
*
((
uint16_t
*
)
dst
)
=
((
uint16_t
*
)
m
)[
1
]
*
src
[
0
]
+
((
uint16_t
*
)
m
)[
0
]
*
((
uint16_t
)(
src
[
-
cn
])
+
(
uint16_t
)(
src
[
cn
]
));
*
((
uint16_t
*
)
dst
)
=
saturate_cast
<
uint16_t
>
(((
uint16_t
*
)
m
)[
1
]
*
(
uint32_t
)(
src
[
0
])
+
((
uint16_t
*
)
m
)[
0
]
*
((
uint32_t
)(
src
[
-
cn
])
+
(
uint32_t
)(
src
[
cn
])
));
// Point that fall right from border
if
(
borderType
!=
BORDER_CONSTANT
)
// If BORDER_CONSTANT out of border values are equal to zero and could be skipped
{
int
src_idx
=
(
borderInterpolate
(
len
,
len
,
borderType
)
-
(
len
-
1
))
*
cn
;
for
(
int
k
=
0
;
k
<
cn
;
k
++
)
((
uint16_t
*
)
dst
)[
k
]
=
((
uint16_t
*
)
m
)[
1
]
*
src
[
k
]
+
((
uint16_t
*
)
m
)[
0
]
*
((
uint16_t
)(
src
[
k
-
cn
])
+
(
uint16_t
)(
src
[
src_idx
+
k
]
));
((
uint16_t
*
)
dst
)[
k
]
=
saturate_cast
<
uint16_t
>
(((
uint16_t
*
)
m
)[
1
]
*
(
uint32_t
)(
src
[
k
])
+
((
uint16_t
*
)
m
)[
0
]
*
((
uint32_t
)(
src
[
k
-
cn
])
+
(
uint32_t
)(
src
[
src_idx
+
k
])
));
}
else
{
...
...
@@ -896,8 +896,8 @@ void hlineSmooth5Nabcba<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, cons
int
idxp2
=
borderInterpolate
(
3
,
len
,
borderType
)
*
cn
;
for
(
int
k
=
0
;
k
<
cn
;
k
++
)
{
((
uint16_t
*
)
dst
)[
k
]
=
((
uint16_t
*
)
m
)[
1
]
*
((
uint16_t
)(
src
[
k
+
idxm1
])
+
(
uint16_t
)(
src
[
k
+
cn
]))
+
((
uint16_t
*
)
m
)[
2
]
*
src
[
k
]
+
((
uint16_t
*
)
m
)[
0
]
*
((
uint16_t
)(
src
[
k
+
idxp1
])
+
(
uint16_t
)(
src
[
k
+
idxm2
]
));
((
uint16_t
*
)
dst
)[
k
+
cn
]
=
((
uint16_t
*
)
m
)[
0
]
*
((
uint16_t
)(
src
[
k
+
idxm1
])
+
(
uint16_t
)(
src
[
k
+
idxp2
]))
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint16_t
)(
src
[
k
])
+
(
uint16_t
)(
src
[
k
+
idxp1
]))
+
((
uint16_t
*
)
m
)[
2
]
*
src
[
k
+
cn
]
;
((
uint16_t
*
)
dst
)[
k
]
=
saturate_cast
<
uint16_t
>
(((
uint16_t
*
)
m
)[
1
]
*
((
uint32_t
)(
src
[
k
+
idxm1
])
+
(
uint32_t
)(
src
[
k
+
cn
]))
+
((
uint16_t
*
)
m
)[
2
]
*
(
uint32_t
)(
src
[
k
])
+
((
uint16_t
*
)
m
)[
0
]
*
((
uint32_t
)(
src
[
k
+
idxp1
])
+
(
uint32_t
)(
src
[
k
+
idxm2
])
));
((
uint16_t
*
)
dst
)[
k
+
cn
]
=
saturate_cast
<
uint16_t
>
(((
uint16_t
*
)
m
)[
0
]
*
((
uint32_t
)(
src
[
k
+
idxm1
])
+
(
uint32_t
)(
src
[
k
+
idxp2
]))
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint32_t
)(
src
[
k
])
+
(
uint32_t
)(
src
[
k
+
idxp1
]))
+
((
uint16_t
*
)
m
)[
2
]
*
(
uint32_t
)(
src
[
k
+
cn
]))
;
}
}
}
...
...
@@ -907,7 +907,7 @@ void hlineSmooth5Nabcba<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, cons
for
(
int
k
=
0
;
k
<
cn
;
k
++
)
{
dst
[
k
]
=
m
[
2
]
*
src
[
k
]
+
m
[
1
]
*
src
[
k
+
cn
]
+
m
[
0
]
*
src
[
k
+
2
*
cn
];
((
uint16_t
*
)
dst
)[
k
+
cn
]
=
((
uint16_t
*
)
m
)[
1
]
*
((
uint16_t
)(
src
[
k
])
+
(
uint16_t
)(
src
[
k
+
2
*
cn
]))
+
((
uint16_t
*
)
m
)[
2
]
*
src
[
k
+
cn
]
;
((
uint16_t
*
)
dst
)[
k
+
cn
]
=
saturate_cast
<
uint16_t
>
(((
uint16_t
*
)
m
)[
1
]
*
((
uint32_t
)(
src
[
k
])
+
(
uint32_t
)(
src
[
k
+
2
*
cn
]))
+
((
uint16_t
*
)
m
)[
2
]
*
(
uint32_t
)(
src
[
k
+
cn
]))
;
dst
[
k
+
2
*
cn
]
=
m
[
0
]
*
src
[
k
]
+
m
[
1
]
*
src
[
k
+
cn
]
+
m
[
2
]
*
src
[
k
+
2
*
cn
];
}
else
...
...
@@ -918,9 +918,9 @@ void hlineSmooth5Nabcba<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, cons
int
idxp2
=
borderInterpolate
(
4
,
len
,
borderType
)
*
cn
;
for
(
int
k
=
0
;
k
<
cn
;
k
++
)
{
((
uint16_t
*
)
dst
)[
k
]
=
((
uint16_t
*
)
m
)[
2
]
*
src
[
k
]
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint16_t
)(
src
[
k
+
cn
])
+
(
uint16_t
)(
src
[
k
+
idxm1
]))
+
((
uint16_t
*
)
m
)[
0
]
*
((
uint16_t
)(
src
[
k
+
2
*
cn
])
+
(
uint16_t
)(
src
[
k
+
idxm2
]
));
((
uint16_t
*
)
dst
)[
k
+
cn
]
=
((
uint16_t
*
)
m
)[
2
]
*
src
[
k
+
cn
]
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint16_t
)(
src
[
k
])
+
(
uint16_t
)(
src
[
k
+
2
*
cn
]))
+
((
uint16_t
*
)
m
)[
0
]
*
((
uint16_t
)(
src
[
k
+
idxm1
])
+
(
uint16_t
)(
src
[
k
+
idxp1
]
));
((
uint16_t
*
)
dst
)[
k
+
2
*
cn
]
=
((
uint16_t
*
)
m
)[
0
]
*
((
uint16_t
)(
src
[
k
])
+
(
uint16_t
)(
src
[
k
+
idxp2
]))
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint16_t
)(
src
[
k
+
cn
])
+
(
uint16_t
)(
src
[
k
+
idxp1
]))
+
((
uint16_t
*
)
m
)[
2
]
*
src
[
k
+
2
*
cn
]
;
((
uint16_t
*
)
dst
)[
k
]
=
saturate_cast
<
uint16_t
>
(((
uint16_t
*
)
m
)[
2
]
*
(
uint32_t
)(
src
[
k
])
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint32_t
)(
src
[
k
+
cn
])
+
(
uint32_t
)(
src
[
k
+
idxm1
]))
+
((
uint16_t
*
)
m
)[
0
]
*
((
uint32_t
)(
src
[
k
+
2
*
cn
])
+
(
uint32_t
)(
src
[
k
+
idxm2
])
));
((
uint16_t
*
)
dst
)[
k
+
cn
]
=
saturate_cast
<
uint16_t
>
(((
uint16_t
*
)
m
)[
2
]
*
(
uint32_t
)(
src
[
k
+
cn
])
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint32_t
)(
src
[
k
])
+
(
uint32_t
)(
src
[
k
+
2
*
cn
]))
+
((
uint16_t
*
)
m
)[
0
]
*
((
uint32_t
)(
src
[
k
+
idxm1
])
+
(
uint32_t
)(
src
[
k
+
idxp1
])
));
((
uint16_t
*
)
dst
)[
k
+
2
*
cn
]
=
saturate_cast
<
uint16_t
>
(((
uint16_t
*
)
m
)[
0
]
*
((
uint32_t
)(
src
[
k
])
+
(
uint32_t
)(
src
[
k
+
idxp2
]))
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint32_t
)(
src
[
k
+
cn
])
+
(
uint32_t
)(
src
[
k
+
idxp1
]))
+
((
uint16_t
*
)
m
)[
2
]
*
(
uint32_t
)(
src
[
k
+
2
*
cn
]))
;
}
}
}
...
...
@@ -933,8 +933,8 @@ void hlineSmooth5Nabcba<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, cons
int
idxm1
=
borderInterpolate
(
-
1
,
len
,
borderType
)
*
cn
;
for
(
int
k
=
0
;
k
<
cn
;
k
++
)
{
((
uint16_t
*
)
dst
)[
k
]
=
((
uint16_t
*
)
m
)[
2
]
*
src
[
k
]
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint16_t
)(
src
[
cn
+
k
])
+
(
uint16_t
)(
src
[
idxm1
+
k
]))
+
((
uint16_t
*
)
m
)[
0
]
*
((
uint16_t
)(
src
[
2
*
cn
+
k
])
+
(
uint16_t
)(
src
[
idxm2
+
k
]
));
((
uint16_t
*
)
dst
)[
k
+
cn
]
=
((
uint16_t
*
)
m
)[
1
]
*
((
uint16_t
)(
src
[
k
])
+
(
uint16_t
)(
src
[
2
*
cn
+
k
]))
+
((
uint16_t
*
)
m
)[
2
]
*
src
[
cn
+
k
]
+
((
uint16_t
*
)
m
)[
0
]
*
((
uint16_t
)(
src
[
3
*
cn
+
k
])
+
(
uint16_t
)(
src
[
idxm1
+
k
]
));
((
uint16_t
*
)
dst
)[
k
]
=
saturate_cast
<
uint16_t
>
(((
uint16_t
*
)
m
)[
2
]
*
(
uint32_t
)(
src
[
k
])
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint32_t
)(
src
[
cn
+
k
])
+
(
uint32_t
)(
src
[
idxm1
+
k
]))
+
((
uint16_t
*
)
m
)[
0
]
*
((
uint32_t
)(
src
[
2
*
cn
+
k
])
+
(
uint32_t
)(
src
[
idxm2
+
k
])
));
((
uint16_t
*
)
dst
)[
k
+
cn
]
=
saturate_cast
<
uint16_t
>
(((
uint16_t
*
)
m
)[
1
]
*
((
uint32_t
)(
src
[
k
])
+
(
uint32_t
)(
src
[
2
*
cn
+
k
]))
+
((
uint16_t
*
)
m
)[
2
]
*
(
uint32_t
)(
src
[
cn
+
k
])
+
((
uint16_t
*
)
m
)[
0
]
*
((
uint32_t
)(
src
[
3
*
cn
+
k
])
+
(
uint32_t
)(
src
[
idxm1
+
k
])
));
}
}
else
...
...
@@ -942,7 +942,7 @@ void hlineSmooth5Nabcba<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, cons
for
(
int
k
=
0
;
k
<
cn
;
k
++
)
{
dst
[
k
]
=
m
[
2
]
*
src
[
k
]
+
m
[
1
]
*
src
[
cn
+
k
]
+
m
[
0
]
*
src
[
2
*
cn
+
k
];
((
uint16_t
*
)
dst
)[
k
+
cn
]
=
((
uint16_t
*
)
m
)[
1
]
*
((
uint16_t
)(
src
[
k
])
+
(
uint16_t
)(
src
[
2
*
cn
+
k
]))
+
((
uint16_t
*
)
m
)[
2
]
*
src
[
cn
+
k
]
+
((
uint16_t
*
)
m
)[
0
]
*
src
[
3
*
cn
+
k
]
;
((
uint16_t
*
)
dst
)[
k
+
cn
]
=
saturate_cast
<
uint16_t
>
(((
uint16_t
*
)
m
)[
1
]
*
((
uint32_t
)(
src
[
k
])
+
(
uint32_t
)(
src
[
2
*
cn
+
k
]))
+
((
uint16_t
*
)
m
)[
2
]
*
(
uint32_t
)(
src
[
cn
+
k
])
+
((
uint16_t
*
)
m
)[
0
]
*
(
uint32_t
)(
src
[
3
*
cn
+
k
]))
;
}
}
...
...
@@ -960,7 +960,7 @@ void hlineSmooth5Nabcba<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, cons
v_mul_wrap
(
vx_load_expand
(
src
),
v_mul2
));
#endif
for
(;
i
<
lencn
;
i
++
,
src
++
,
dst
++
)
*
((
uint16_t
*
)
dst
)
=
((
uint16_t
*
)
m
)[
0
]
*
((
uint16_t
)(
src
[
-
2
*
cn
])
+
(
uint16_t
)(
src
[
2
*
cn
]))
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint16_t
)(
src
[
-
cn
])
+
(
uint16_t
)(
src
[
cn
]))
+
((
uint16_t
*
)
m
)[
2
]
*
src
[
0
]
;
*
((
uint16_t
*
)
dst
)
=
saturate_cast
<
uint16_t
>
(((
uint16_t
*
)
m
)[
0
]
*
((
uint32_t
)(
src
[
-
2
*
cn
])
+
(
uint32_t
)(
src
[
2
*
cn
]))
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint32_t
)(
src
[
-
cn
])
+
(
uint32_t
)(
src
[
cn
]))
+
((
uint16_t
*
)
m
)[
2
]
*
(
uint32_t
)(
src
[
0
]))
;
// Points that fall right from border
if
(
borderType
!=
BORDER_CONSTANT
)
// If BORDER_CONSTANT out of border values are equal to zero and could be skipped
...
...
@@ -969,15 +969,15 @@ void hlineSmooth5Nabcba<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, cons
int
idxp2
=
(
borderInterpolate
(
len
+
1
,
len
,
borderType
)
-
(
len
-
2
))
*
cn
;
for
(
int
k
=
0
;
k
<
cn
;
k
++
)
{
((
uint16_t
*
)
dst
)[
k
]
=
((
uint16_t
*
)
m
)[
0
]
*
((
uint16_t
)(
src
[
k
-
2
*
cn
])
+
(
uint16_t
)(
src
[
idxp1
+
k
]))
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint16_t
)(
src
[
k
-
cn
])
+
(
uint16_t
)(
src
[
k
+
cn
]))
+
((
uint16_t
*
)
m
)[
2
]
*
src
[
k
]
;
((
uint16_t
*
)
dst
)[
k
+
cn
]
=
((
uint16_t
*
)
m
)[
0
]
*
((
uint16_t
)(
src
[
k
-
cn
])
+
(
uint16_t
)(
src
[
idxp2
+
k
]))
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint16_t
)(
src
[
k
])
+
(
uint16_t
)(
src
[
idxp1
+
k
]))
+
((
uint16_t
*
)
m
)[
2
]
*
src
[
k
+
cn
]
;
((
uint16_t
*
)
dst
)[
k
]
=
saturate_cast
<
uint16_t
>
(((
uint16_t
*
)
m
)[
0
]
*
((
uint32_t
)(
src
[
k
-
2
*
cn
])
+
(
uint32_t
)(
src
[
idxp1
+
k
]))
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint32_t
)(
src
[
k
-
cn
])
+
(
uint32_t
)(
src
[
k
+
cn
]))
+
((
uint16_t
*
)
m
)[
2
]
*
(
uint32_t
)(
src
[
k
]))
;
((
uint16_t
*
)
dst
)[
k
+
cn
]
=
saturate_cast
<
uint16_t
>
(((
uint16_t
*
)
m
)[
0
]
*
((
uint32_t
)(
src
[
k
-
cn
])
+
(
uint32_t
)(
src
[
idxp2
+
k
]))
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint32_t
)(
src
[
k
])
+
(
uint32_t
)(
src
[
idxp1
+
k
]))
+
((
uint16_t
*
)
m
)[
2
]
*
(
uint32_t
)(
src
[
k
+
cn
]))
;
}
}
else
{
for
(
int
k
=
0
;
k
<
cn
;
k
++
)
{
((
uint16_t
*
)
dst
)[
k
]
=
((
uint16_t
*
)
m
)[
0
]
*
src
[
k
-
2
*
cn
]
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint16_t
)(
src
[
k
-
cn
])
+
(
uint16_t
)(
src
[
k
+
cn
]))
+
((
uint16_t
*
)
m
)[
2
]
*
src
[
k
]
;
((
uint16_t
*
)
dst
)[
k
]
=
saturate_cast
<
uint16_t
>
(((
uint16_t
*
)
m
)[
0
]
*
(
uint32_t
)(
src
[
k
-
2
*
cn
])
+
((
uint16_t
*
)
m
)[
1
]
*
((
uint32_t
)(
src
[
k
-
cn
])
+
(
uint32_t
)(
src
[
k
+
cn
]))
+
((
uint16_t
*
)
m
)[
2
]
*
(
uint32_t
)(
src
[
k
]))
;
dst
[
k
+
cn
]
=
m
[
0
]
*
src
[
k
-
cn
]
+
m
[
1
]
*
src
[
k
]
+
m
[
2
]
*
src
[
k
+
cn
];
}
}
...
...
modules/imgproc/test/test_smooth_bitexact.cpp
View file @
f6c57388
...
...
@@ -158,4 +158,12 @@ TEST(GaussianBlur_Bitexact, Linear8U)
}
}
TEST
(
GaussianBlur_Bitexact
,
regression_15015
)
{
Mat
src
(
100
,
100
,
CV_8UC3
,
Scalar
(
255
,
255
,
255
));
Mat
dst
;
GaussianBlur
(
src
,
dst
,
Size
(
5
,
5
),
9
);
ASSERT_EQ
(
0.0
,
cvtest
::
norm
(
dst
,
src
,
NORM_INF
));
}
}}
// namespace
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment