Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv_contrib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv_contrib
Commits
4cb108ef
Commit
4cb108ef
authored
Jun 23, 2016
by
Vitaliy Lyudvichenko
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Adding CPU parallelization for element-wise layers and im2col operation
parent
6d3cb808
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
101 additions
and
46 deletions
+101
-46
convolution_layer.cpp
modules/dnn/src/layers/convolution_layer.cpp
+4
-4
elementwise_layers.hpp
modules/dnn/src/layers/elementwise_layers.hpp
+52
-35
op_im2col.hpp
modules/dnn/src/layers/op_im2col.hpp
+43
-7
test_layers.cpp
modules/dnn/test/test_layers.cpp
+2
-0
No files found.
modules/dnn/src/layers/convolution_layer.cpp
View file @
4cb108ef
...
@@ -179,9 +179,9 @@ namespace dnn
...
@@ -179,9 +179,9 @@ namespace dnn
#endif // HAVE_OPENCL
#endif // HAVE_OPENCL
if
(
inpBlob
.
type
()
==
CV_32F
)
if
(
inpBlob
.
type
()
==
CV_32F
)
im2col_
cpu
((
float
*
)
srcPtr
,
inpGroupCn
,
inpH
,
inpW
,
kerH
,
kerW
,
padH
,
padW
,
strideH
,
strideW
,
(
float
*
)
colMat
.
ptr
());
im2col_
CpuPBody
<
float
>::
run
((
float
*
)
srcPtr
,
inpGroupCn
,
inpH
,
inpW
,
kerH
,
kerW
,
padH
,
padW
,
strideH
,
strideW
,
(
float
*
)
colMat
.
ptr
());
if
(
inpBlob
.
type
()
==
CV_64F
)
if
(
inpBlob
.
type
()
==
CV_64F
)
im2col_
cpu
((
double
*
)
srcPtr
,
inpGroupCn
,
inpH
,
inpW
,
kerH
,
kerW
,
padH
,
padW
,
strideH
,
strideW
,
(
double
*
)
colMat
.
ptr
());
im2col_
CpuPBody
<
double
>::
run
((
double
*
)
srcPtr
,
inpGroupCn
,
inpH
,
inpW
,
kerH
,
kerW
,
padH
,
padW
,
strideH
,
strideW
,
(
double
*
)
colMat
.
ptr
());
}
}
void
ConvolutionLayer
::
computeInpOutShape
(
const
Blob
&
inpBlob
)
void
ConvolutionLayer
::
computeInpOutShape
(
const
Blob
&
inpBlob
)
...
@@ -253,9 +253,9 @@ namespace dnn
...
@@ -253,9 +253,9 @@ namespace dnn
if
(
is1x1
())
return
;
if
(
is1x1
())
return
;
if
(
dstMat
.
type
()
==
CV_32F
)
if
(
dstMat
.
type
()
==
CV_32F
)
col2im_cpu
(
(
float
*
)
colMat
.
ptr
(),
inpGroupCn
,
inpH
,
inpW
,
kerH
,
kerW
,
padH
,
padW
,
strideH
,
strideW
,
(
float
*
)
dstMat
.
ptr
());
col2im_cpu
(
colMat
.
ptr
<
float
>
(),
inpGroupCn
,
inpH
,
inpW
,
kerH
,
kerW
,
padH
,
padW
,
strideH
,
strideW
,
dstMat
.
ptr
<
float
>
());
if
(
dstMat
.
type
()
==
CV_64F
)
if
(
dstMat
.
type
()
==
CV_64F
)
col2im_cpu
(
(
double
*
)
colMat
.
ptr
(),
inpGroupCn
,
inpH
,
inpW
,
kerH
,
kerW
,
padH
,
padW
,
strideH
,
strideW
,
(
double
*
)
dstMat
.
ptr
());
col2im_cpu
(
colMat
.
ptr
<
double
>
(),
inpGroupCn
,
inpH
,
inpW
,
kerH
,
kerW
,
padH
,
padW
,
strideH
,
strideW
,
dstMat
.
ptr
<
double
>
());
}
}
}
}
}
}
modules/dnn/src/layers/elementwise_layers.hpp
View file @
4cb108ef
...
@@ -55,12 +55,31 @@ using std::exp;
...
@@ -55,12 +55,31 @@ using std::exp;
using
std
::
tanh
;
using
std
::
tanh
;
using
std
::
pow
;
using
std
::
pow
;
template
<
typename
Func
>
template
<
typename
Func
>
class
ElementWiseLayer
:
public
Layer
class
ElementWiseLayer
:
public
Layer
{
{
Func
func
;
Func
func
;
template
<
typename
Dtype
>
class
PBody
:
public
cv
::
ParallelLoopBody
{
Dtype
*
data
;
Func
&
func
;
public
:
public
:
PBody
(
Blob
&
blob
,
Func
&
func_
)
:
func
(
func_
),
data
(
blob
.
ptr
<
Dtype
>
())
{}
void
operator
()(
const
Range
&
r
)
const
{
for
(
int
i
=
r
.
start
;
i
<
r
.
end
;
i
++
)
data
[
i
]
=
func
(
data
[
i
]);
}
};
public
:
ElementWiseLayer
(
LayerParams
&
_params
)
:
func
(
_params
)
{}
ElementWiseLayer
(
LayerParams
&
_params
)
:
func
(
_params
)
{}
void
allocate
(
const
std
::
vector
<
Blob
*>
&
inputs
,
std
::
vector
<
Blob
>
&
outputs
)
void
allocate
(
const
std
::
vector
<
Blob
*>
&
inputs
,
std
::
vector
<
Blob
>
&
outputs
)
...
@@ -75,20 +94,17 @@ using std::pow;
...
@@ -75,20 +94,17 @@ using std::pow;
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
{
CV_Assert
(
inputs
[
i
]
->
ptr
()
==
outputs
[
i
].
ptr
()
&&
inputs
[
i
]
->
type
()
==
outputs
[
i
].
type
());
CV_Assert
(
inputs
[
i
]
->
ptr
()
==
outputs
[
i
].
ptr
()
&&
inputs
[
i
]
->
type
()
==
outputs
[
i
].
type
());
CV_Assert
(
inputs
[
i
]
->
matRefConst
().
isContinuous
());
size_t
size
=
outputs
[
i
].
total
(
);
Range
sizeRange
=
Range
(
0
,
outputs
[
i
].
total
()
);
if
(
outputs
[
i
].
type
()
==
CV_32F
)
if
(
outputs
[
i
].
type
()
==
CV_32F
)
{
{
float
*
data
=
outputs
[
i
].
ptrf
();
cv
::
parallel_for_
(
sizeRange
,
PBody
<
float
>
(
outputs
[
i
],
func
));
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
data
[
j
]
=
func
(
data
[
j
]);
}
}
else
if
(
outputs
[
i
].
type
()
==
CV_64F
)
else
if
(
outputs
[
i
].
type
()
==
CV_64F
)
{
{
double
*
data
=
outputs
[
i
].
ptr
<
double
>
();
cv
::
parallel_for_
(
sizeRange
,
PBody
<
double
>
(
outputs
[
i
],
func
));
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
data
[
j
]
=
func
(
data
[
j
]);
}
}
else
else
{
{
...
@@ -96,11 +112,11 @@ using std::pow;
...
@@ -96,11 +112,11 @@ using std::pow;
}
}
}
}
}
}
};
};
struct
ReLUFunctor
struct
ReLUFunctor
{
{
float
negative_slope
;
float
negative_slope
;
ReLUFunctor
(
LayerParams
&
params
)
ReLUFunctor
(
LayerParams
&
params
)
...
@@ -112,47 +128,47 @@ using std::pow;
...
@@ -112,47 +128,47 @@ using std::pow;
}
}
template
<
typename
TFloat
>
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
inline
TFloat
operator
()(
TFloat
x
)
const
{
{
return
(
x
>=
(
TFloat
)
0
)
?
x
:
negative_slope
*
x
;
return
(
x
>=
(
TFloat
)
0
)
?
x
:
negative_slope
*
x
;
}
}
};
};
struct
TanHFunctor
struct
TanHFunctor
{
{
TanHFunctor
(
LayerParams
&
)
{}
TanHFunctor
(
LayerParams
&
)
{}
template
<
typename
TFloat
>
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
inline
TFloat
operator
()(
TFloat
x
)
const
{
{
return
tanh
(
x
);
return
tanh
(
x
);
}
}
};
};
struct
SigmoidFunctor
struct
SigmoidFunctor
{
{
SigmoidFunctor
(
LayerParams
&
)
{}
SigmoidFunctor
(
LayerParams
&
)
{}
template
<
typename
TFloat
>
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
inline
TFloat
operator
()(
TFloat
x
)
const
{
{
return
(
TFloat
)
1
/
((
TFloat
)
1
+
exp
(
-
x
));
return
(
TFloat
)
1
/
((
TFloat
)
1
+
exp
(
-
x
));
}
}
};
};
struct
AbsValFunctor
struct
AbsValFunctor
{
{
AbsValFunctor
(
LayerParams
&
)
{}
AbsValFunctor
(
LayerParams
&
)
{}
template
<
typename
TFloat
>
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
inline
TFloat
operator
()(
TFloat
x
)
const
{
{
return
abs
(
x
);
return
abs
(
x
);
}
}
};
};
struct
PowerFunctor
struct
PowerFunctor
{
{
float
power
,
scale
,
shift
;
float
power
,
scale
,
shift
;
PowerFunctor
(
LayerParams
&
params
)
PowerFunctor
(
LayerParams
&
params
)
...
@@ -163,22 +179,23 @@ using std::pow;
...
@@ -163,22 +179,23 @@ using std::pow;
}
}
template
<
typename
TFloat
>
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
inline
TFloat
operator
()(
TFloat
x
)
const
{
{
return
pow
((
TFloat
)
shift
+
(
TFloat
)
scale
*
x
,
(
TFloat
)
power
);
return
pow
((
TFloat
)
shift
+
(
TFloat
)
scale
*
x
,
(
TFloat
)
power
);
}
}
};
};
struct
BNLLFunctor
struct
BNLLFunctor
{
{
BNLLFunctor
(
LayerParams
&
)
{}
BNLLFunctor
(
LayerParams
&
)
{}
template
<
typename
TFloat
>
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
inline
TFloat
operator
()(
TFloat
x
)
const
{
{
return
log
((
TFloat
)
1
+
exp
(
-
abs
(
x
)));
return
log
((
TFloat
)
1
+
exp
(
-
abs
(
x
)));
}
}
};
};
}
}
}
}
#endif
#endif
modules/dnn/src/layers/op_im2col.hpp
View file @
4cb108ef
...
@@ -41,6 +41,8 @@
...
@@ -41,6 +41,8 @@
#ifndef __OPENCV_DNN_LAYERS_IM2COL_HPP__
#ifndef __OPENCV_DNN_LAYERS_IM2COL_HPP__
#define __OPENCV_DNN_LAYERS_IM2COL_HPP__
#define __OPENCV_DNN_LAYERS_IM2COL_HPP__
#include <opencv2/core.hpp>
#include <iostream>
namespace
cv
namespace
cv
{
{
...
@@ -48,17 +50,50 @@ namespace dnn
...
@@ -48,17 +50,50 @@ namespace dnn
{
{
template
<
typename
Dtype
>
template
<
typename
Dtype
>
void
im2col_cpu
(
const
Dtype
*
data_im
,
class
im2col_CpuPBody
:
public
cv
::
ParallelLoopBody
{
const
Dtype
*
data_im
;
int
channels
,
height
,
width
;
int
kernel_h
,
kernel_w
;
int
pad_h
,
pad_w
;
int
stride_h
,
stride_w
;
Dtype
*
data_col
;
int
height_col
,
width_col
,
channels_col
;
public
:
im2col_CpuPBody
(
const
Dtype
*
data_im_
,
int
channels_
,
int
height_
,
int
width_
,
int
kernel_h_
,
int
kernel_w_
,
int
pad_h_
,
int
pad_w_
,
int
stride_h_
,
int
stride_w_
,
Dtype
*
data_col_
)
:
data_im
(
data_im_
),
channels
(
channels_
),
height
(
height_
),
width
(
width_
),
kernel_h
(
kernel_h_
),
kernel_w
(
kernel_w_
),
pad_h
(
pad_h_
),
pad_w
(
pad_w_
),
stride_h
(
stride_h_
),
stride_w
(
stride_w_
),
data_col
(
data_col_
)
{
height_col
=
(
height
+
2
*
pad_h
-
kernel_h
)
/
stride_h
+
1
;
width_col
=
(
width
+
2
*
pad_w
-
kernel_w
)
/
stride_w
+
1
;
channels_col
=
channels
*
kernel_h
*
kernel_w
;
}
static
void
run
(
const
Dtype
*
data_im
,
int
channels
,
int
height
,
int
width
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
int
stride_h
,
int
stride_w
,
Dtype
*
data_col
)
Dtype
*
data_col
)
{
{
int
height_col
=
(
height
+
2
*
pad_h
-
kernel_h
)
/
stride_h
+
1
;
im2col_CpuPBody
<
Dtype
>
pb
(
data_im
,
channels
,
height
,
width
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
data_col
);
int
width_col
=
(
width
+
2
*
pad_w
-
kernel_w
)
/
stride_w
+
1
;
cv
::
parallel_for_
(
Range
(
0
,
pb
.
channels_col
),
pb
);
int
channels_col
=
channels
*
kernel_h
*
kernel_w
;
}
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
virtual
void
operator
()(
const
Range
&
r
)
const
{
for
(
int
c
=
r
.
start
;
c
<
r
.
end
;
++
c
)
{
int
w_offset
=
c
%
kernel_w
;
int
w_offset
=
c
%
kernel_w
;
int
h_offset
=
(
c
/
kernel_w
)
%
kernel_h
;
int
h_offset
=
(
c
/
kernel_w
)
%
kernel_h
;
int
c_im
=
c
/
kernel_h
/
kernel_w
;
int
c_im
=
c
/
kernel_h
/
kernel_w
;
...
@@ -74,7 +109,8 @@ void im2col_cpu(const Dtype* data_im,
...
@@ -74,7 +109,8 @@ void im2col_cpu(const Dtype* data_im,
}
}
}
}
}
}
}
}
};
template
<
typename
Dtype
>
template
<
typename
Dtype
>
void
col2im_cpu
(
const
Dtype
*
data_col
,
void
col2im_cpu
(
const
Dtype
*
data_col
,
...
...
modules/dnn/test/test_layers.cpp
View file @
4cb108ef
...
@@ -65,6 +65,8 @@ static void testLayer(String basename, bool useCaffeModel = false, bool useCommo
...
@@ -65,6 +65,8 @@ static void testLayer(String basename, bool useCaffeModel = false, bool useCommo
String
inpfile
=
(
useCommonInputBlob
)
?
_tf
(
"blob.npy"
)
:
_tf
(
basename
+
".input.npy"
);
String
inpfile
=
(
useCommonInputBlob
)
?
_tf
(
"blob.npy"
)
:
_tf
(
basename
+
".input.npy"
);
String
outfile
=
_tf
(
basename
+
".npy"
);
String
outfile
=
_tf
(
basename
+
".npy"
);
cv
::
setNumThreads
(
cv
::
getNumberOfCPUs
());
Net
net
;
Net
net
;
{
{
Ptr
<
Importer
>
importer
=
createCaffeImporter
(
prototxt
,
(
useCaffeModel
)
?
caffemodel
:
String
());
Ptr
<
Importer
>
importer
=
createCaffeImporter
(
prototxt
,
(
useCaffeModel
)
?
caffemodel
:
String
());
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment