Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv_contrib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv_contrib
Commits
4cb108ef
Commit
4cb108ef
authored
Jun 23, 2016
by
Vitaliy Lyudvichenko
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Adding CPU parallelization for element-wise layers and im2col operation
parent
6d3cb808
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
175 additions
and
120 deletions
+175
-120
convolution_layer.cpp
modules/dnn/src/layers/convolution_layer.cpp
+4
-4
elementwise_layers.hpp
modules/dnn/src/layers/elementwise_layers.hpp
+110
-93
op_im2col.hpp
modules/dnn/src/layers/op_im2col.hpp
+59
-23
test_layers.cpp
modules/dnn/test/test_layers.cpp
+2
-0
No files found.
modules/dnn/src/layers/convolution_layer.cpp
View file @
4cb108ef
...
...
@@ -179,9 +179,9 @@ namespace dnn
#endif // HAVE_OPENCL
if
(
inpBlob
.
type
()
==
CV_32F
)
im2col_
cpu
((
float
*
)
srcPtr
,
inpGroupCn
,
inpH
,
inpW
,
kerH
,
kerW
,
padH
,
padW
,
strideH
,
strideW
,
(
float
*
)
colMat
.
ptr
());
im2col_
CpuPBody
<
float
>::
run
((
float
*
)
srcPtr
,
inpGroupCn
,
inpH
,
inpW
,
kerH
,
kerW
,
padH
,
padW
,
strideH
,
strideW
,
(
float
*
)
colMat
.
ptr
());
if
(
inpBlob
.
type
()
==
CV_64F
)
im2col_
cpu
((
double
*
)
srcPtr
,
inpGroupCn
,
inpH
,
inpW
,
kerH
,
kerW
,
padH
,
padW
,
strideH
,
strideW
,
(
double
*
)
colMat
.
ptr
());
im2col_
CpuPBody
<
double
>::
run
((
double
*
)
srcPtr
,
inpGroupCn
,
inpH
,
inpW
,
kerH
,
kerW
,
padH
,
padW
,
strideH
,
strideW
,
(
double
*
)
colMat
.
ptr
());
}
void
ConvolutionLayer
::
computeInpOutShape
(
const
Blob
&
inpBlob
)
...
...
@@ -253,9 +253,9 @@ namespace dnn
if
(
is1x1
())
return
;
if
(
dstMat
.
type
()
==
CV_32F
)
col2im_cpu
(
(
float
*
)
colMat
.
ptr
(),
inpGroupCn
,
inpH
,
inpW
,
kerH
,
kerW
,
padH
,
padW
,
strideH
,
strideW
,
(
float
*
)
dstMat
.
ptr
());
col2im_cpu
(
colMat
.
ptr
<
float
>
(),
inpGroupCn
,
inpH
,
inpW
,
kerH
,
kerW
,
padH
,
padW
,
strideH
,
strideW
,
dstMat
.
ptr
<
float
>
());
if
(
dstMat
.
type
()
==
CV_64F
)
col2im_cpu
(
(
double
*
)
colMat
.
ptr
(),
inpGroupCn
,
inpH
,
inpW
,
kerH
,
kerW
,
padH
,
padW
,
strideH
,
strideW
,
(
double
*
)
dstMat
.
ptr
());
col2im_cpu
(
colMat
.
ptr
<
double
>
(),
inpGroupCn
,
inpH
,
inpW
,
kerH
,
kerW
,
padH
,
padW
,
strideH
,
strideW
,
dstMat
.
ptr
<
double
>
());
}
}
}
modules/dnn/src/layers/elementwise_layers.hpp
View file @
4cb108ef
...
...
@@ -55,130 +55,147 @@ using std::exp;
using
std
::
tanh
;
using
std
::
pow
;
template
<
typename
Func
>
class
ElementWiseLayer
:
public
Layer
template
<
typename
Func
>
class
ElementWiseLayer
:
public
Layer
{
Func
func
;
template
<
typename
Dtype
>
class
PBody
:
public
cv
::
ParallelLoopBody
{
Func
func
;
Dtype
*
data
;
Func
&
func
;
public
:
ElementWiseLayer
(
LayerParams
&
_params
)
:
func
(
_params
)
{}
PBody
(
Blob
&
blob
,
Func
&
func_
)
:
func
(
func_
),
data
(
blob
.
ptr
<
Dtype
>
())
{}
void
allocate
(
const
std
::
vector
<
Blob
*>
&
inputs
,
std
::
vector
<
Blob
>
&
outputs
)
void
operator
()(
const
Range
&
r
)
const
{
outputs
.
resize
(
inputs
.
size
());
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
outputs
[
i
].
shareFrom
(
*
inputs
[
i
]);
//no data copy
}
void
forward
(
std
::
vector
<
Blob
*>
&
inputs
,
std
::
vector
<
Blob
>
&
outputs
)
{
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
CV_Assert
(
inputs
[
i
]
->
ptr
()
==
outputs
[
i
].
ptr
()
&&
inputs
[
i
]
->
type
()
==
outputs
[
i
].
type
());
size_t
size
=
outputs
[
i
].
total
();
if
(
outputs
[
i
].
type
()
==
CV_32F
)
{
float
*
data
=
outputs
[
i
].
ptrf
();
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
data
[
j
]
=
func
(
data
[
j
]);
}
else
if
(
outputs
[
i
].
type
()
==
CV_64F
)
{
double
*
data
=
outputs
[
i
].
ptr
<
double
>
();
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
data
[
j
]
=
func
(
data
[
j
]);
}
else
{
CV_Error
(
Error
::
StsNotImplemented
,
"Only CV_32F and CV_64F blobs are supported"
);
}
}
for
(
int
i
=
r
.
start
;
i
<
r
.
end
;
i
++
)
data
[
i
]
=
func
(
data
[
i
]);
}
};
public
:
struct
ReLUFunctor
ElementWiseLayer
(
LayerParams
&
_params
)
:
func
(
_params
)
{}
void
allocate
(
const
std
::
vector
<
Blob
*>
&
inputs
,
std
::
vector
<
Blob
>
&
outputs
)
{
float
negative_slope
;
outputs
.
resize
(
inputs
.
size
());
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
outputs
[
i
].
shareFrom
(
*
inputs
[
i
]);
//no data copy
}
ReLUFunctor
(
LayerParams
&
params
)
void
forward
(
std
::
vector
<
Blob
*>
&
inputs
,
std
::
vector
<
Blob
>
&
outputs
)
{
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
if
(
params
.
has
(
"negative_slope"
))
negative_slope
=
params
.
get
<
float
>
(
"negative_slope"
);
CV_Assert
(
inputs
[
i
]
->
ptr
()
==
outputs
[
i
].
ptr
()
&&
inputs
[
i
]
->
type
()
==
outputs
[
i
].
type
());
CV_Assert
(
inputs
[
i
]
->
matRefConst
().
isContinuous
());
Range
sizeRange
=
Range
(
0
,
outputs
[
i
].
total
());
if
(
outputs
[
i
].
type
()
==
CV_32F
)
{
cv
::
parallel_for_
(
sizeRange
,
PBody
<
float
>
(
outputs
[
i
],
func
));
}
else
if
(
outputs
[
i
].
type
()
==
CV_64F
)
{
cv
::
parallel_for_
(
sizeRange
,
PBody
<
double
>
(
outputs
[
i
],
func
));
}
else
negative_slope
=
0.
f
;
{
CV_Error
(
Error
::
StsNotImplemented
,
"Only CV_32F and CV_64F blobs are supported"
);
}
}
}
};
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
{
return
(
x
>=
(
TFloat
)
0
)
?
x
:
negative_slope
*
x
;
}
};
struct
TanHFunctor
struct
ReLUFunctor
{
float
negative_slope
;
ReLUFunctor
(
LayerParams
&
params
)
{
TanHFunctor
(
LayerParams
&
)
{}
if
(
params
.
has
(
"negative_slope"
))
negative_slope
=
params
.
get
<
float
>
(
"negative_slope"
);
else
negative_slope
=
0.
f
;
}
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
const
{
return
(
x
>=
(
TFloat
)
0
)
?
x
:
negative_slope
*
x
;
}
};
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
{
return
tanh
(
x
);
}
};
struct
TanHFunctor
{
TanHFunctor
(
LayerParams
&
)
{}
struct
SigmoidFunctor
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
const
{
SigmoidFunctor
(
LayerParams
&
)
{}
return
tanh
(
x
);
}
};
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
{
return
(
TFloat
)
1
/
((
TFloat
)
1
+
exp
(
-
x
));
}
};
struct
SigmoidFunctor
{
SigmoidFunctor
(
LayerParams
&
)
{}
struct
AbsValFunctor
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
const
{
AbsValFunctor
(
LayerParams
&
)
{}
return
(
TFloat
)
1
/
((
TFloat
)
1
+
exp
(
-
x
));
}
};
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
{
return
abs
(
x
);
}
};
struct
AbsValFunctor
{
AbsValFunctor
(
LayerParams
&
)
{}
struct
PowerFunctor
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
const
{
float
power
,
scale
,
shift
;
return
abs
(
x
);
}
};
PowerFunctor
(
LayerParams
&
params
)
{
power
=
params
.
get
<
float
>
(
"power"
,
1.0
f
);
scale
=
params
.
get
<
float
>
(
"scale"
,
1.0
f
);
shift
=
params
.
get
<
float
>
(
"shift"
,
0.0
f
);
}
struct
PowerFunctor
{
float
power
,
scale
,
shift
;
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
{
return
pow
((
TFloat
)
shift
+
(
TFloat
)
scale
*
x
,
(
TFloat
)
power
);
}
}
;
PowerFunctor
(
LayerParams
&
params
)
{
power
=
params
.
get
<
float
>
(
"power"
,
1.0
f
);
scale
=
params
.
get
<
float
>
(
"scale"
,
1.0
f
);
shift
=
params
.
get
<
float
>
(
"shift"
,
0.0
f
);
}
struct
BNLLFunctor
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
const
{
BNLLFunctor
(
LayerParams
&
)
{}
return
pow
((
TFloat
)
shift
+
(
TFloat
)
scale
*
x
,
(
TFloat
)
power
);
}
};
struct
BNLLFunctor
{
BNLLFunctor
(
LayerParams
&
)
{}
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
const
{
return
log
((
TFloat
)
1
+
exp
(
-
abs
(
x
)));
}
};
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
{
return
log
((
TFloat
)
1
+
exp
(
-
abs
(
x
)));
}
};
}
}
#endif
modules/dnn/src/layers/op_im2col.hpp
View file @
4cb108ef
...
...
@@ -41,6 +41,8 @@
#ifndef __OPENCV_DNN_LAYERS_IM2COL_HPP__
#define __OPENCV_DNN_LAYERS_IM2COL_HPP__
#include <opencv2/core.hpp>
#include <iostream>
namespace
cv
{
...
...
@@ -48,33 +50,67 @@ namespace dnn
{
template
<
typename
Dtype
>
void
im2col_cpu
(
const
Dtype
*
data_im
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
Dtype
*
data_col
)
class
im2col_CpuPBody
:
public
cv
::
ParallelLoopBody
{
int
height_col
=
(
height
+
2
*
pad_h
-
kernel_h
)
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
kernel_w
)
/
stride_w
+
1
;
int
channels_col
=
channels
*
kernel_h
*
kernel_w
;
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%
kernel_w
;
int
h_offset
=
(
c
/
kernel_w
)
%
kernel_h
;
int
c_im
=
c
/
kernel_h
/
kernel_w
;
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
;
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
;
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
data_col
[(
c
*
height_col
+
h
)
*
width_col
+
w
]
=
data_im
[(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
];
else
data_col
[(
c
*
height_col
+
h
)
*
width_col
+
w
]
=
0
;
const
Dtype
*
data_im
;
int
channels
,
height
,
width
;
int
kernel_h
,
kernel_w
;
int
pad_h
,
pad_w
;
int
stride_h
,
stride_w
;
Dtype
*
data_col
;
int
height_col
,
width_col
,
channels_col
;
public
:
im2col_CpuPBody
(
const
Dtype
*
data_im_
,
int
channels_
,
int
height_
,
int
width_
,
int
kernel_h_
,
int
kernel_w_
,
int
pad_h_
,
int
pad_w_
,
int
stride_h_
,
int
stride_w_
,
Dtype
*
data_col_
)
:
data_im
(
data_im_
),
channels
(
channels_
),
height
(
height_
),
width
(
width_
),
kernel_h
(
kernel_h_
),
kernel_w
(
kernel_w_
),
pad_h
(
pad_h_
),
pad_w
(
pad_w_
),
stride_h
(
stride_h_
),
stride_w
(
stride_w_
),
data_col
(
data_col_
)
{
height_col
=
(
height
+
2
*
pad_h
-
kernel_h
)
/
stride_h
+
1
;
width_col
=
(
width
+
2
*
pad_w
-
kernel_w
)
/
stride_w
+
1
;
channels_col
=
channels
*
kernel_h
*
kernel_w
;
}
static
void
run
(
const
Dtype
*
data_im
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
Dtype
*
data_col
)
{
im2col_CpuPBody
<
Dtype
>
pb
(
data_im
,
channels
,
height
,
width
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
data_col
);
cv
::
parallel_for_
(
Range
(
0
,
pb
.
channels_col
),
pb
);
}
virtual
void
operator
()(
const
Range
&
r
)
const
{
for
(
int
c
=
r
.
start
;
c
<
r
.
end
;
++
c
)
{
int
w_offset
=
c
%
kernel_w
;
int
h_offset
=
(
c
/
kernel_w
)
%
kernel_h
;
int
c_im
=
c
/
kernel_h
/
kernel_w
;
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
;
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
;
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
data_col
[(
c
*
height_col
+
h
)
*
width_col
+
w
]
=
data_im
[(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
];
else
data_col
[(
c
*
height_col
+
h
)
*
width_col
+
w
]
=
0
;
}
}
}
}
}
}
;
template
<
typename
Dtype
>
void
col2im_cpu
(
const
Dtype
*
data_col
,
...
...
modules/dnn/test/test_layers.cpp
View file @
4cb108ef
...
...
@@ -65,6 +65,8 @@ static void testLayer(String basename, bool useCaffeModel = false, bool useCommo
String
inpfile
=
(
useCommonInputBlob
)
?
_tf
(
"blob.npy"
)
:
_tf
(
basename
+
".input.npy"
);
String
outfile
=
_tf
(
basename
+
".npy"
);
cv
::
setNumThreads
(
cv
::
getNumberOfCPUs
());
Net
net
;
{
Ptr
<
Importer
>
importer
=
createCaffeImporter
(
prototxt
,
(
useCaffeModel
)
?
caffemodel
:
String
());
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment