Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv_contrib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv_contrib
Commits
9e26b24d
Commit
9e26b24d
authored
Apr 25, 2017
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
improved speed of ENet processing.
parent
3f5b4655
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
80 additions
and
301 deletions
+80
-301
torch_enet.cpp
modules/dnn/samples/torch_enet.cpp
+10
-5
batch_norm_layer.cpp
modules/dnn/src/layers/batch_norm_layer.cpp
+11
-11
convolution_layer.cpp
modules/dnn/src/layers/convolution_layer.cpp
+0
-0
elementwise_layers.cpp
modules/dnn/src/layers/elementwise_layers.cpp
+46
-16
eltwise_layer.cpp
modules/dnn/src/layers/eltwise_layer.cpp
+4
-11
op_im2col.cpp
modules/dnn/src/layers/op_im2col.cpp
+0
-0
op_im2col.hpp
modules/dnn/src/layers/op_im2col.hpp
+9
-258
No files found.
modules/dnn/samples/torch_enet.cpp
View file @
9e26b24d
...
...
@@ -98,14 +98,19 @@ int main(int argc, char **argv)
net
.
setBlob
(
""
,
inputBlob
);
//set the network input
//! [Set input blob]
const
int
N
=
3
;
TickMeter
tm
;
tm
.
start
();
//! [Make forward pass]
net
.
forward
();
//compute output
//! [Make forward pass]
tm
.
stop
();
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
TickMeter
tm_
;
tm_
.
start
();
net
.
forward
();
//compute output
tm_
.
stop
();
if
(
i
==
0
||
tm_
.
getTimeTicks
()
<
tm
.
getTimeTicks
()
)
tm
=
tm_
;
}
//! [Gather output]
...
...
modules/dnn/src/layers/batch_norm_layer.cpp
View file @
9e26b24d
...
...
@@ -41,6 +41,15 @@ public:
Mat
*
inp
=
inputs
[
i
];
outputs
[
i
].
create
(
inp
->
dims
,
&
inp
->
size
.
p
[
0
],
inp
->
type
());
}
varMeanScale
=
1.
f
;
if
(
!
hasWeights
&&
!
hasBias
)
{
varMeanScale
=
*
blobs
[
2
].
ptr
<
float
>
();
if
(
varMeanScale
!=
0
)
varMeanScale
=
1
/
varMeanScale
;
}
cv
::
pow
(
blobs
[
1
]
*
varMeanScale
+
epsilon
,
-
0.5
,
invStdMat
);
}
void
forward
(
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
)
...
...
@@ -52,16 +61,6 @@ public:
int
weightsBlobIndex
=
2
;
int
biasBlobIndex
=
weightsBlobIndex
+
hasWeights
;
float
varMeanScale
=
1
;
if
(
!
hasWeights
&&
!
hasBias
)
{
varMeanScale
=
*
blobs
[
2
].
ptr
<
float
>
();
if
(
varMeanScale
!=
0
)
varMeanScale
=
1
/
varMeanScale
;
}
Mat
invStdMat
;
cv
::
pow
(
blobs
[
1
]
*
varMeanScale
+
epsilon
,
-
0.5
,
invStdMat
);
int
rows
=
inpBlob
.
size
[
2
];
int
cols
=
inpBlob
.
size
[
3
];
...
...
@@ -92,7 +91,8 @@ public:
}
bool
hasWeights
,
hasBias
;
float
epsilon
;
float
epsilon
,
varMeanScale
;
Mat
invStdMat
;
};
Ptr
<
BatchNormLayer
>
BatchNormLayer
::
create
(
const
LayerParams
&
params
)
...
...
modules/dnn/src/layers/convolution_layer.cpp
View file @
9e26b24d
This diff is collapsed.
Click to expand it.
modules/dnn/src/layers/elementwise_layers.cpp
View file @
9e26b24d
...
...
@@ -15,8 +15,7 @@ using std::pow;
template
<
typename
Func
>
class
ElementWiseLayer
:
public
Func
::
Layer
{
Func
func
;
public
:
template
<
typename
Dtype
>
class
PBody
:
public
cv
::
ParallelLoopBody
{
...
...
@@ -35,9 +34,7 @@ class ElementWiseLayer : public Func::Layer
}
};
public
:
ElementWiseLayer
(
const
Func
&
f
=
Func
())
:
func
(
f
)
{}
ElementWiseLayer
(
bool
run_parallel_
=
false
,
const
Func
&
f
=
Func
())
:
func
(
f
),
run_parallel
(
run_parallel_
)
{}
void
allocate
(
const
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
)
{
...
...
@@ -58,9 +55,16 @@ public:
Range
sizeRange
=
Range
(
0
,
dst
.
total
());
CV_Assert
(
src
.
type
()
==
CV_32F
);
cv
::
parallel_for_
(
sizeRange
,
PBody
<
float
>
(
dst
,
func
));
PBody
<
float
>
body
(
dst
,
func
);
if
(
run_parallel
)
cv
::
parallel_for_
(
sizeRange
,
body
);
else
body
(
sizeRange
);
}
}
Func
func
;
bool
run_parallel
;
};
struct
ReLUFunctor
...
...
@@ -135,8 +139,24 @@ struct PowerFunctor
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
const
{
return
power
==
1.0
f
?
(
TFloat
)
shift
+
(
TFloat
)
scale
*
x
:
pow
((
TFloat
)
shift
+
(
TFloat
)
scale
*
x
,
(
TFloat
)
power
);
return
pow
((
TFloat
)
shift
+
(
TFloat
)
scale
*
x
,
(
TFloat
)
power
);
}
};
struct
PowerFunctor1
{
typedef
PowerLayer
Layer
;
const
float
scale
;
const
float
shift
;
PowerFunctor1
(
float
scale_
=
1.
f
,
float
shift_
=
0
)
:
scale
(
scale_
),
shift
(
shift_
)
{}
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
const
{
return
(
TFloat
)
shift
+
(
TFloat
)
scale
*
x
;
}
};
...
...
@@ -165,12 +185,12 @@ public:
void
forward
(
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
)
{
CV_Assert
(
inputs
.
size
()
==
1
);
Mat
&
inpBlob
=
*
inputs
[
0
];
for
(
size_t
ii
=
0
;
ii
<
outputs
.
size
();
ii
++
)
{
Mat
&
outBlob
=
outputs
[
ii
];
CV_Assert
(
inpBlob
.
isContinuous
()
&&
outBlob
.
isContinuous
());
CV_Assert
(
blobs
[
0
].
total
()
==
inpBlob
.
size
[
1
]);
...
...
@@ -181,8 +201,16 @@ public:
Mat
inpBlobPlane
=
getPlane
(
inpBlob
,
0
,
n
);
Mat
outBlobPlane
=
getPlane
(
outBlob
,
0
,
n
);
threshold
(
inpBlobPlane
,
outBlobPlane
,
0
,
0
,
cv
::
THRESH_TOZERO_INV
);
scaleAdd
(
outBlobPlane
,
slopeWeight
-
1
,
inpBlobPlane
,
outBlobPlane
);
size_t
i
,
planeTotal
=
inpBlobPlane
.
total
();
const
float
*
inptr
=
inpBlobPlane
.
ptr
<
float
>
();
float
*
outptr
=
outBlobPlane
.
ptr
<
float
>
();
for
(
i
=
0
;
i
<
planeTotal
;
i
++
)
{
float
val
=
inptr
[
i
];
outptr
[
i
]
=
val
*
(
val
>=
0.
f
?
1.
f
:
slopeWeight
);
}
//threshold(inpBlobPlane, outBlobPlane, 0, 0, cv::THRESH_TOZERO_INV);
//scaleAdd(outBlobPlane, slopeWeight-1, inpBlobPlane, outBlobPlane);
}
}
}
...
...
@@ -196,7 +224,7 @@ Ptr<_Layer> _Layer::create() { \
Ptr
<
ReLULayer
>
ReLULayer
::
create
(
const
LayerParams
&
params
)
{
float
negativeSlope
=
params
.
get
<
float
>
(
"negative_slope"
,
0.
f
);
Ptr
<
ReLULayer
>
l
(
new
ElementWiseLayer
<
ReLUFunctor
>
(
ReLUFunctor
(
negativeSlope
)));
Ptr
<
ReLULayer
>
l
(
new
ElementWiseLayer
<
ReLUFunctor
>
(
false
,
ReLUFunctor
(
negativeSlope
)));
l
->
setParamsFrom
(
params
);
return
l
;
...
...
@@ -204,7 +232,7 @@ Ptr<ReLULayer> ReLULayer::create(const LayerParams& params)
Ptr
<
TanHLayer
>
TanHLayer
::
create
(
const
LayerParams
&
params
)
{
Ptr
<
TanHLayer
>
l
(
new
ElementWiseLayer
<
TanHFunctor
>
());
Ptr
<
TanHLayer
>
l
(
new
ElementWiseLayer
<
TanHFunctor
>
(
true
));
l
->
setParamsFrom
(
params
);
return
l
;
...
...
@@ -212,7 +240,7 @@ Ptr<TanHLayer> TanHLayer::create(const LayerParams& params)
Ptr
<
SigmoidLayer
>
SigmoidLayer
::
create
(
const
LayerParams
&
params
)
{
Ptr
<
SigmoidLayer
>
l
(
new
ElementWiseLayer
<
SigmoidFunctor
>
());
Ptr
<
SigmoidLayer
>
l
(
new
ElementWiseLayer
<
SigmoidFunctor
>
(
true
));
l
->
setParamsFrom
(
params
);
return
l
;
...
...
@@ -228,7 +256,7 @@ Ptr<AbsLayer> AbsLayer::create(const LayerParams& params)
Ptr
<
BNLLLayer
>
BNLLLayer
::
create
(
const
LayerParams
&
params
)
{
Ptr
<
BNLLLayer
>
l
(
new
ElementWiseLayer
<
BNLLFunctor
>
());
Ptr
<
BNLLLayer
>
l
(
new
ElementWiseLayer
<
BNLLFunctor
>
(
true
));
l
->
setParamsFrom
(
params
);
return
l
;
...
...
@@ -239,7 +267,9 @@ Ptr<PowerLayer> PowerLayer::create(const LayerParams& params)
float
power
=
params
.
get
<
float
>
(
"power"
,
1.0
f
);
float
scale
=
params
.
get
<
float
>
(
"scale"
,
1.0
f
);
float
shift
=
params
.
get
<
float
>
(
"shift"
,
0.0
f
);
Ptr
<
PowerLayer
>
l
(
new
ElementWiseLayer
<
PowerFunctor
>
(
PowerFunctor
(
power
,
scale
,
shift
)));
Ptr
<
PowerLayer
>
l
(
power
==
1.
f
?
(
PowerLayer
*
)(
new
ElementWiseLayer
<
PowerFunctor1
>
(
false
,
PowerFunctor1
(
scale
,
shift
)))
:
(
PowerLayer
*
)(
new
ElementWiseLayer
<
PowerFunctor
>
(
true
,
PowerFunctor
(
power
,
scale
,
shift
))));
l
->
setParamsFrom
(
params
);
return
l
;
...
...
modules/dnn/src/layers/eltwise_layer.cpp
View file @
9e26b24d
...
...
@@ -98,15 +98,14 @@ public:
void
forward
(
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
)
{
Mat
&
output
=
outputs
[
0
];
switch
(
op
)
{
case
SUM
:
{
CV_Assert
(
coeffs
.
size
()
==
0
||
coeffs
.
size
()
==
inputs
.
size
());
Mat
&
output
=
outputs
[
0
];
output
.
setTo
(
0.
);
if
(
0
<
coeffs
.
size
())
{
output
.
setTo
(
0.
);
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
output
+=
*
inputs
[
i
]
*
coeffs
[
i
];
...
...
@@ -114,32 +113,26 @@ public:
}
else
{
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
add
(
*
inputs
[
0
],
*
inputs
[
1
],
output
);
for
(
size_t
i
=
2
;
i
<
inputs
.
size
();
i
++
)
{
output
+=
*
inputs
[
i
];
}
}
}
break
;
case
PROD
:
{
Mat
&
output
=
outputs
[
0
];
output
.
setTo
(
1.
);
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
output
=
output
.
mul
(
*
inputs
[
i
]);
}
}
break
;
case
MAX
:
{
Mat
&
output
=
outputs
[
0
];
cv
::
max
(
*
inputs
[
0
],
*
inputs
[
1
],
output
);
for
(
size_t
i
=
2
;
i
<
inputs
.
size
();
i
++
)
{
cv
::
max
(
output
,
*
inputs
[
i
],
output
);
}
}
break
;
default
:
CV_Assert
(
0
);
...
...
modules/dnn/src/layers/op_im2col.cpp
View file @
9e26b24d
This diff is collapsed.
Click to expand it.
modules/dnn/src/layers/op_im2col.hpp
View file @
9e26b24d
...
...
@@ -49,264 +49,15 @@ namespace cv
namespace
dnn
{
template
<
typename
Dtype
>
class
im2col_CpuPBody
:
public
cv
::
ParallelLoopBody
{
const
Dtype
*
data_im
;
int
channels
,
height
,
width
;
int
kernel_h
,
kernel_w
;
int
pad_h
,
pad_w
;
int
stride_h
,
stride_w
;
int
dilation_h
,
dilation_w
;
Dtype
*
data_col
;
int
height_col
,
width_col
,
channels_col
;
im2col_CpuPBody
()
{}
public
:
static
void
run
(
const
Dtype
*
data_im
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
int
dilation_h
,
int
dilation_w
,
int
height_col
,
int
width_col
,
Dtype
*
data_col
)
{
im2col_CpuPBody
<
Dtype
>
t
;
t
.
data_im
=
data_im
;
t
.
data_col
=
data_col
;
t
.
channels
=
channels
;
t
.
height
=
height
;
t
.
width
=
width
;
t
.
kernel_h
=
kernel_h
;
t
.
kernel_w
=
kernel_w
;
t
.
pad_h
=
pad_h
;
t
.
pad_w
=
pad_w
;
t
.
stride_h
=
stride_h
;
t
.
stride_w
=
stride_w
;
t
.
dilation_h
=
dilation_h
;
t
.
dilation_w
=
dilation_w
;
t
.
height_col
=
height_col
;
t
.
width_col
=
width_col
;
t
.
channels_col
=
channels
*
kernel_h
*
kernel_w
;
cv
::
parallel_for_
(
Range
(
0
,
t
.
channels_col
),
t
);
}
virtual
void
operator
()(
const
Range
&
r
)
const
{
for
(
int
c
=
r
.
start
;
c
<
r
.
end
;
++
c
)
{
int
w_offset
=
c
%
kernel_w
;
int
h_offset
=
(
c
/
kernel_w
)
%
kernel_h
;
int
c_im
=
c
/
kernel_h
/
kernel_w
;
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
*
dilation_h
;
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
*
dilation_w
;
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
data_col
[(
c
*
height_col
+
h
)
*
width_col
+
w
]
=
data_im
[(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
];
else
data_col
[(
c
*
height_col
+
h
)
*
width_col
+
w
]
=
0
;
}
}
}
}
};
template
<
typename
Dtype
>
class
im2row_CpuPBody
:
public
cv
::
ParallelLoopBody
{
const
Dtype
*
data_im
;
int
channels
,
height
,
width
;
int
kernel_h
,
kernel_w
;
int
pad_h
,
pad_w
;
int
stride_h
,
stride_w
;
int
dilation_h
,
dilation_w
;
Dtype
*
data_col
;
int
height_col
,
width_col
,
channels_col
;
im2row_CpuPBody
()
{}
public
:
static
void
run
(
const
Dtype
*
data_im
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
int
dilation_h
,
int
dilation_w
,
int
height_col
,
int
width_col
,
Dtype
*
data_col
)
{
im2row_CpuPBody
<
Dtype
>
t
;
t
.
data_im
=
data_im
;
t
.
data_col
=
data_col
;
t
.
channels
=
channels
;
t
.
height
=
height
;
t
.
width
=
width
;
t
.
kernel_h
=
kernel_h
;
t
.
kernel_w
=
kernel_w
;
t
.
pad_h
=
pad_h
;
t
.
pad_w
=
pad_w
;
t
.
stride_h
=
stride_h
;
t
.
stride_w
=
stride_w
;
t
.
dilation_h
=
dilation_h
;
t
.
dilation_w
=
dilation_w
;
t
.
height_col
=
height_col
;
t
.
width_col
=
width_col
;
t
.
channels_col
=
channels
*
kernel_h
*
kernel_w
;
cv
::
parallel_for_
(
Range
(
0
,
t
.
height_col
*
t
.
width_col
),
t
,
16
);
}
virtual
void
operator
()(
const
Range
&
r
)
const
{
int
dh
=
dilation_h
,
dw
=
dilation_w
;
Dtype
*
data_col_
=
data_col
;
const
Dtype
*
data_im_
=
data_im
;
for
(
int
row
=
r
.
start
;
row
<
r
.
end
;
++
row
)
{
int
out_c
=
row
%
width_col
;
int
out_r
=
row
/
width_col
;
int
out_row_offset
=
row
*
kernel_h
*
kernel_w
*
channels
;
int
start_in_r
=
out_r
*
stride_h
-
pad_h
;
int
start_in_c
=
out_c
*
stride_w
-
pad_w
;
int
start_k_r
=
std
::
max
(
0
,
cvCeil
(
-
start_in_r
/
(
float
)
dilation_h
));
int
end_k_r
=
std
::
min
(
kernel_h
,
cvCeil
((
height
-
start_in_r
)
/
(
float
)
dilation_h
));
int
start_k_c
=
std
::
max
(
0
,
cvCeil
(
-
start_in_c
/
(
float
)
dilation_w
));
int
end_k_c
=
std
::
min
(
kernel_w
,
cvCeil
((
width
-
start_in_c
)
/
(
float
)
dilation_w
));
for
(
int
i_c
=
0
;
i_c
<
channels
;
i_c
++
)
{
int
channels_offset
=
i_c
*
width
*
height
;
int
out_ch_offset
=
i_c
*
kernel_h
*
kernel_w
;
int
in_r
=
start_in_r
+
start_k_r
*
dilation_h
;
for
(
int
k_r
=
start_k_r
;
k_r
<
end_k_r
;
k_r
++
,
in_r
+=
dh
)
{
int
row_offset
=
in_r
*
width
;
int
out_col_offset
=
k_r
*
kernel_w
;
int
in_c
=
start_in_c
+
start_k_c
*
dilation_w
;
for
(
int
k_c
=
start_k_c
;
k_c
<
end_k_c
;
k_c
++
,
in_c
+=
dw
)
{
int
in_index
=
channels_offset
+
row_offset
+
in_c
;
int
out_index
=
out_row_offset
+
out_ch_offset
+
out_col_offset
+
k_c
;
data_col_
[
out_index
]
=
data_im_
[
in_index
];
}
}
}
}
}
};
template
<
typename
Dtype
>
class
col2im_CpuPBody
:
public
cv
::
ParallelLoopBody
{
const
Dtype
*
data_col
;
int
channels
,
height
,
width
;
int
kernel_h
,
kernel_w
;
int
pad_h
,
pad_w
;
int
stride_h
,
stride_w
;
Dtype
*
data_im
;
int
height_col
,
width_col
;
col2im_CpuPBody
()
{}
public
:
static
void
run
(
const
Dtype
*
data_col
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
Dtype
*
data_im
)
{
//TODO: single-threaded version switch
col2im_CpuPBody
t
;
t
.
data_col
=
data_col
;
t
.
data_im
=
data_im
;
t
.
channels
=
channels
;
t
.
height
=
height
;
t
.
width
=
width
;
t
.
kernel_h
=
kernel_h
;
t
.
kernel_w
=
kernel_w
;
t
.
pad_h
=
pad_h
;
t
.
pad_w
=
pad_w
;
t
.
stride_h
=
stride_h
;
t
.
stride_w
=
stride_w
;
t
.
height_col
=
(
height
+
2
*
pad_h
-
kernel_h
)
/
stride_h
+
1
;
t
.
width_col
=
(
width
+
2
*
pad_w
-
kernel_w
)
/
stride_w
+
1
;
int
img_total
=
channels
*
height
*
width
;
cv
::
parallel_for_
(
Range
(
0
,
img_total
),
t
);
}
virtual
void
operator
()(
const
Range
&
r
)
const
{
const
Dtype
*
data_col_
=
data_col
;
Dtype
*
data_im_
=
data_im
;
int
coeff_h_col
=
(
1
-
stride_h
*
kernel_w
*
height_col
)
*
width_col
;
int
coeff_w_col
=
(
1
-
stride_w
*
height_col
*
width_col
);
for
(
int
index
=
r
.
start
;
index
<
r
.
end
;
index
++
)
{
Dtype
val
=
0
;
int
w
=
index
%
width
+
pad_w
;
int
h
=
(
index
/
width
)
%
height
+
pad_h
;
int
c
=
index
/
(
width
*
height
);
// compute the start and end of the output
int
w_col_start
=
(
w
<
kernel_w
)
?
0
:
(
w
-
kernel_w
)
/
stride_w
+
1
;
int
w_col_end
=
std
::
min
(
w
/
stride_w
+
1
,
width_col
);
int
h_col_start
=
(
h
<
kernel_h
)
?
0
:
(
h
-
kernel_h
)
/
stride_h
+
1
;
int
h_col_end
=
std
::
min
(
h
/
stride_h
+
1
,
height_col
);
// equivalent implementation
int
offset
=
(
c
*
kernel_h
*
kernel_w
+
h
*
kernel_w
+
w
)
*
height_col
*
width_col
;
for
(
int
h_col
=
h_col_start
;
h_col
<
h_col_end
;
++
h_col
)
{
for
(
int
w_col
=
w_col_start
;
w_col
<
w_col_end
;
++
w_col
)
{
val
+=
data_col_
[
offset
+
h_col
*
coeff_h_col
+
w_col
*
coeff_w_col
];
}
}
data_im_
[
index
]
=
val
;
}
}
};
//single-threaded version
template
<
typename
Dtype
>
void
col2im_cpu
(
const
Dtype
*
data_col
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
int
dilation_h
,
int
dilation_w
,
Dtype
*
data_im
)
{
int
height_col
=
(
height
+
2
*
pad_h
-
(
dilation_h
*
(
kernel_h
-
1
)
+
1
))
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
(
dilation_w
*
(
kernel_w
-
1
)
+
1
))
/
stride_w
+
1
;
int
channels_col
=
channels
*
kernel_h
*
kernel_w
;
std
::
memset
(
data_im
,
0
,
height
*
width
*
channels
*
sizeof
(
Dtype
));
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%
kernel_w
;
int
h_offset
=
(
c
/
kernel_w
)
%
kernel_h
;
int
c_im
=
c
/
kernel_h
/
kernel_w
;
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
*
dilation_h
;
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
*
dilation_w
;
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
data_im
[(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
]
+=
data_col
[(
c
*
height_col
+
h
)
*
width_col
+
w
];
}
}
}
}
void
im2row
(
const
float
*
data_im
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
int
dilation_h
,
int
dilation_w
,
int
height_col
,
int
width_col
,
float
*
data_col
);
void
col2im
(
const
float
*
data_col
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
int
dilation_h
,
int
dilation_w
,
float
*
data_im
,
const
int
*
ofsbuf
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment