Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv_contrib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv_contrib
Commits
b97931e0
Commit
b97931e0
authored
Apr 26, 2017
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1136 from vpisarev:dnn5
parents
3908909d
75789089
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
488 additions
and
445 deletions
+488
-445
torch_enet.cpp
modules/dnn/samples/torch_enet.cpp
+9
-4
batch_norm_layer.cpp
modules/dnn/src/layers/batch_norm_layer.cpp
+11
-11
convolution_layer.cpp
modules/dnn/src/layers/convolution_layer.cpp
+86
-145
elementwise_layers.cpp
modules/dnn/src/layers/elementwise_layers.cpp
+46
-16
eltwise_layer.cpp
modules/dnn/src/layers/eltwise_layer.cpp
+4
-11
op_im2col.cpp
modules/dnn/src/layers/op_im2col.cpp
+323
-0
op_im2col.hpp
modules/dnn/src/layers/op_im2col.hpp
+9
-258
No files found.
modules/dnn/samples/torch_enet.cpp
View file @
b97931e0
...
...
@@ -98,14 +98,19 @@ int main(int argc, char **argv)
net
.
setBlob
(
""
,
inputBlob
);
//set the network input
//! [Set input blob]
const
int
N
=
3
;
TickMeter
tm
;
tm
.
start
();
//! [Make forward pass]
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
TickMeter
tm_
;
tm_
.
start
();
net
.
forward
();
//compute output
//! [Make forward pass]
tm
.
stop
();
tm_
.
stop
();
if
(
i
==
0
||
tm_
.
getTimeTicks
()
<
tm
.
getTimeTicks
()
)
tm
=
tm_
;
}
//! [Gather output]
...
...
modules/dnn/src/layers/batch_norm_layer.cpp
View file @
b97931e0
...
...
@@ -41,6 +41,15 @@ public:
Mat
*
inp
=
inputs
[
i
];
outputs
[
i
].
create
(
inp
->
dims
,
&
inp
->
size
.
p
[
0
],
inp
->
type
());
}
varMeanScale
=
1.
f
;
if
(
!
hasWeights
&&
!
hasBias
)
{
varMeanScale
=
*
blobs
[
2
].
ptr
<
float
>
();
if
(
varMeanScale
!=
0
)
varMeanScale
=
1
/
varMeanScale
;
}
cv
::
pow
(
blobs
[
1
]
*
varMeanScale
+
epsilon
,
-
0.5
,
invStdMat
);
}
void
forward
(
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
)
...
...
@@ -52,16 +61,6 @@ public:
int
weightsBlobIndex
=
2
;
int
biasBlobIndex
=
weightsBlobIndex
+
hasWeights
;
float
varMeanScale
=
1
;
if
(
!
hasWeights
&&
!
hasBias
)
{
varMeanScale
=
*
blobs
[
2
].
ptr
<
float
>
();
if
(
varMeanScale
!=
0
)
varMeanScale
=
1
/
varMeanScale
;
}
Mat
invStdMat
;
cv
::
pow
(
blobs
[
1
]
*
varMeanScale
+
epsilon
,
-
0.5
,
invStdMat
);
int
rows
=
inpBlob
.
size
[
2
];
int
cols
=
inpBlob
.
size
[
3
];
...
...
@@ -92,7 +91,8 @@ public:
}
bool
hasWeights
,
hasBias
;
float
epsilon
;
float
epsilon
,
varMeanScale
;
Mat
invStdMat
;
};
Ptr
<
BatchNormLayer
>
BatchNormLayer
::
create
(
const
LayerParams
&
params
)
...
...
modules/dnn/src/layers/convolution_layer.cpp
View file @
b97931e0
...
...
@@ -54,70 +54,25 @@ namespace dnn
class
BaseConvolutionLayerImpl
:
public
ConvolutionLayer
{
public
:
BaseConvolutionLayerImpl
();
virtual
void
allocate
(
const
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
);
void
init
();
virtual
void
computeInpOutShape
(
const
Mat
&
inpBlob
)
=
0
;
bool
is1x1
()
const
;
int
numOutput
,
group
;
int
inpH
,
inpW
,
inpCn
;
int
outH
,
outW
,
outCn
;
int
inpGroupCn
,
outGroupCn
;
int
ksize
;
std
::
vector
<
int
>
colRowBlobShape
;
bool
bias
;
Mat
colRowBlob
,
biasOnesBlob
;
};
//TODO: simultaneously convolution and bias addition for cache optimization
class
ConvolutionLayerImpl
:
public
BaseConvolutionLayerImpl
{
public
:
virtual
void
forward
(
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
);
virtual
void
computeInpOutShape
(
const
Mat
&
inpBlob
);
void
im2col
(
const
Mat
&
srcImg
,
Mat
&
dstCol
);
void
im2row
(
const
Mat
&
srcImg
,
Mat
&
dstRow
);
};
class
DeConvolutionLayerImpl
:
public
BaseConvolutionLayerImpl
{
public
:
virtual
void
forward
(
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
);
virtual
void
computeInpOutShape
(
const
Mat
&
inpBlob
);
void
col2im
(
const
Mat
&
colMat
,
Mat
&
dstImg
);
};
BaseConvolutionLayerImpl
::
BaseConvolutionLayerImpl
()
:
numOutput
(
-
1
),
group
(
-
1
),
inpH
(
0
),
inpW
(
0
),
inpCn
(
0
),
outH
(
0
),
outW
(
0
),
outCn
(
0
),
inpGroupCn
(
0
),
outGroupCn
(
0
),
ksize
(
0
),
bias
(
false
)
{
BaseConvolutionLayerImpl
()
{
numOutput
=
-
1
;
group
=
-
1
;
inpH
=
inpW
=
inpCn
=
0
;
outH
=
outW
=
outCn
=
0
;
inpGroupCn
=
outGroupCn
=
0
;
ksize
=
0
;
bias
=
false
;
#ifdef HAVE_LAPACK
if
(
getBlasThreads
()
!=
cv
::
getThreadNum
())
int
nthreads
=
cv
::
getThreadNum
();
if
(
getBlasThreads
()
!=
nthreads
)
{
setBlasThreads
(
cv
::
getThreadNum
()
);
setBlasThreads
(
nthreads
);
}
#endif
}
void
BaseConvolutionLayerImpl
::
init
()
{
CV_Assert
(
blobs
.
size
()
>=
1
&&
blobs
.
size
()
<=
2
);
CV_Assert
(
blobs
[
0
].
dims
==
4
&&
blobs
[
0
].
size
[
3
]
==
kernel
.
width
&&
blobs
[
0
].
size
[
2
]
==
kernel
.
height
);
bias
=
(
blobs
.
size
()
>=
2
);
}
void
BaseConvolutionLayerImpl
::
allocate
(
const
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
)
{
}
void
allocate
(
const
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
)
{
CV_Assert
(
inputs
.
size
()
>
0
);
init
();
...
...
@@ -151,17 +106,40 @@ void BaseConvolutionLayerImpl::allocate(const std::vector<Mat*> &inputs, std::ve
colRowBlob
.
create
((
int
)
colRowBlobShape
.
size
(),
&
colRowBlobShape
[
0
],
input
.
type
());
colRowBlob
.
setTo
(
0
);
}
}
}
bool
BaseConvolutionLayerImpl
::
is1x1
()
const
{
void
init
()
{
CV_Assert
(
blobs
.
size
()
>=
1
&&
blobs
.
size
()
<=
2
);
CV_Assert
(
blobs
[
0
].
dims
==
4
&&
blobs
[
0
].
size
[
3
]
==
kernel
.
width
&&
blobs
[
0
].
size
[
2
]
==
kernel
.
height
);
bias
=
(
blobs
.
size
()
>=
2
);
}
virtual
void
computeInpOutShape
(
const
Mat
&
inpBlob
)
=
0
;
bool
is1x1
()
const
{
return
(
kernel
.
height
==
1
&&
kernel
.
width
==
1
)
&&
(
stride
.
height
==
1
&&
stride
.
width
==
1
)
&&
(
dilation
.
height
==
1
&&
dilation
.
width
==
1
);
}
}
int
numOutput
,
group
;
int
inpH
,
inpW
,
inpCn
;
int
outH
,
outW
,
outCn
;
int
inpGroupCn
,
outGroupCn
;
int
ksize
;
std
::
vector
<
int
>
colRowBlobShape
;
bool
bias
;
Mat
colRowBlob
,
biasOnesBlob
;
};
void
ConvolutionLayerImpl
::
computeInpOutShape
(
const
Mat
&
input
)
//TODO: simultaneously convolution and bias addition for cache optimization
class
ConvolutionLayerImpl
:
public
BaseConvolutionLayerImpl
{
public
:
void
computeInpOutShape
(
const
Mat
&
input
)
{
CV_Assert
(
!
bias
||
blobs
[
1
].
total
()
==
(
size_t
)
blobs
[
0
].
size
[
0
]);
numOutput
=
blobs
[
0
].
size
[
0
];
...
...
@@ -193,10 +171,10 @@ void ConvolutionLayerImpl::computeInpOutShape(const Mat &input)
colRowBlobShape
.
clear
();
colRowBlobShape
.
push_back
(
outH
*
outW
);
colRowBlobShape
.
push_back
(
ksize
);
}
}
void
ConvolutionLayerImpl
::
forward
(
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
)
{
void
forward
(
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
)
{
CV_Assert
(
inputs
.
size
()
>
0
);
Mat
weightsMat
=
blobs
[
0
].
reshape
(
1
,
outCn
);
...
...
@@ -212,9 +190,9 @@ void ConvolutionLayerImpl::forward(std::vector<Mat*> &inputs, std::vector<Mat> &
{
for
(
int
g
=
0
;
g
<
group
;
g
++
)
{
Mat
colMat
,
curInp
=
slice
(
inpMat
,
n
,
_Range
(
g
*
inpGroupCn
,
inpGroupCn
));
Mat
curInp
=
slice
(
inpMat
,
n
,
_Range
(
g
*
inpGroupCn
,
inpGroupCn
));
im2row
(
curInp
,
colMat
);
im2row
(
curInp
,
colRowBlob
);
_Range
kerRange
(
g
*
outGroupCn
,
outGroupCn
);
Mat
kerMat
=
weightsMat
.
rowRange
(
kerRange
);
...
...
@@ -222,7 +200,7 @@ void ConvolutionLayerImpl::forward(std::vector<Mat*> &inputs, std::vector<Mat> &
_Range
outRange
((
g
+
n
*
group
)
*
outGroupCn
,
outGroupCn
);
Mat
dstMat
=
outMat
.
rowRange
(
outRange
);
dnn
::
gemm
(
kerMat
,
colMat
,
1
,
dstMat
,
0
,
GEMM_2_T
);
dnn
::
gemm
(
kerMat
,
colRowBlob
,
1
,
dstMat
,
0
,
GEMM_2_T
);
if
(
bias
)
{
...
...
@@ -231,54 +209,28 @@ void ConvolutionLayerImpl::forward(std::vector<Mat*> &inputs, std::vector<Mat> &
}
}
}
}
void
ConvolutionLayerImpl
::
im2col
(
const
Mat
&
srcImg
,
Mat
&
dstCol
)
{
if
(
is1x1
())
{
dstCol
=
srcImg
.
reshape
(
1
,
ksize
);
return
;
}
Mat
&
colMat
=
colRowBlob
;
if
(
srcImg
.
type
()
==
CV_32F
)
im2col_CpuPBody
<
float
>::
run
(
srcImg
.
ptr
<
float
>
(),
inpGroupCn
,
inpH
,
inpW
,
kernel
.
height
,
kernel
.
width
,
pad
.
height
,
pad
.
width
,
stride
.
height
,
stride
.
width
,
dilation
.
height
,
dilation
.
width
,
outH
,
outW
,
colMat
.
ptr
<
float
>
());
if
(
srcImg
.
type
()
==
CV_64F
)
im2col_CpuPBody
<
double
>::
run
(
srcImg
.
ptr
<
double
>
(),
inpGroupCn
,
inpH
,
inpW
,
kernel
.
height
,
kernel
.
width
,
pad
.
height
,
pad
.
width
,
stride
.
height
,
stride
.
width
,
dilation
.
height
,
dilation
.
width
,
outH
,
outW
,
colMat
.
ptr
<
double
>
());
dstCol
=
colMat
;
}
void
ConvolutionLayerImpl
::
im2row
(
const
Mat
&
srcImg
,
Mat
&
dstRow
)
{
void
im2row
(
const
Mat
&
srcImg
,
Mat
&
dstRow
)
{
if
(
is1x1
())
{
dstRow
=
srcImg
.
reshape
(
1
,
ksize
).
t
();
return
;
transpose
(
srcImg
.
reshape
(
1
,
ksize
),
dstRow
);
}
Mat
&
colMat
=
colRowBlob
;
if
(
srcImg
.
type
()
==
CV_32F
)
im2row_CpuPBody
<
float
>::
run
(
srcImg
.
ptr
<
float
>
(),
inpGroupCn
,
inpH
,
inpW
,
kernel
.
height
,
kernel
.
width
,
pad
.
height
,
pad
.
width
,
stride
.
height
,
stride
.
width
,
dilation
.
height
,
dilation
.
width
,
outH
,
outW
,
colMat
.
ptr
<
float
>
());
if
(
srcImg
.
type
()
==
CV_64F
)
im2row_CpuPBody
<
double
>::
run
(
srcImg
.
ptr
<
double
>
(),
inpGroupCn
,
inpH
,
inpW
,
kernel
.
height
,
else
{
cv
::
dnn
::
im2row
(
srcImg
.
ptr
<
float
>
(),
inpGroupCn
,
inpH
,
inpW
,
kernel
.
height
,
kernel
.
width
,
pad
.
height
,
pad
.
width
,
stride
.
height
,
stride
.
width
,
dilation
.
height
,
dilation
.
width
,
outH
,
outW
,
colMat
.
ptr
<
double
>
());
dstRow
=
colMat
;
}
//Deconvolution
dilation
.
height
,
dilation
.
width
,
outH
,
outW
,
dstRow
.
ptr
<
float
>
());
}
}
};
void
DeConvolutionLayerImpl
::
computeInpOutShape
(
const
Mat
&
inpBlob
)
class
DeConvolutionLayerImpl
:
public
BaseConvolutionLayerImpl
{
public
:
void
computeInpOutShape
(
const
Mat
&
inpBlob
)
{
CV_Assert
(
!
bias
||
blobs
[
1
].
total
()
==
(
size_t
)
blobs
[
0
].
size
[
0
]);
numOutput
=
blobs
[
0
].
size
[
0
];
...
...
@@ -302,10 +254,21 @@ void DeConvolutionLayerImpl::computeInpOutShape(const Mat &inpBlob)
colRowBlobShape
.
clear
();
colRowBlobShape
.
push_back
(
ksize
);
colRowBlobShape
.
push_back
(
inpH
*
inpW
);
}
void
DeConvolutionLayerImpl
::
forward
(
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
)
{
ofsbuf
.
resize
(
ksize
*
3
);
for
(
int
k
=
0
;
k
<
ksize
;
k
++
)
{
int
w_offset
=
k
%
kernel
.
width
;
int
h_offset
=
(
k
/
kernel
.
width
)
%
kernel
.
height
;
int
c_im
=
k
/
kernel
.
height
/
kernel
.
width
;
ofsbuf
[
k
*
3
]
=
w_offset
;
ofsbuf
[
k
*
3
+
1
]
=
h_offset
;
ofsbuf
[
k
*
3
+
2
]
=
c_im
;
}
}
void
forward
(
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
)
{
Mat
weightsMat
=
blobs
[
0
].
reshape
(
1
,
inpCn
);
Mat
biasesMat
=
bias
?
blobs
[
1
].
reshape
(
1
,
outCn
)
:
Mat
();
...
...
@@ -338,44 +301,22 @@ void DeConvolutionLayerImpl::forward(std::vector<Mat *> &inputs, std::vector<Mat
}
}
}
}
}
void
DeConvolutionLayerImpl
::
col2im
(
const
Mat
&
colMat
,
Mat
&
dstImg
)
{
void
col2im
(
const
Mat
&
colMat
,
Mat
&
dstImg
)
{
if
(
is1x1
())
{
dstImg
=
colMat
;
return
;
}
if
(
dstImg
.
type
()
==
CV_32F
)
col2im_CpuPBody
<
float
>::
run
(
colMat
.
ptr
<
float
>
(),
outGroupCn
,
outH
,
outW
,
kernel
.
height
,
kernel
.
width
,
pad
.
height
,
pad
.
width
,
stride
.
height
,
stride
.
width
,
dstImg
.
ptr
<
float
>
());
if
(
dstImg
.
type
()
==
CV_64F
)
col2im_CpuPBody
<
double
>::
run
(
colMat
.
ptr
<
double
>
(),
inpGroupCn
,
inpH
,
inpW
,
kernel
.
height
,
kernel
.
width
,
pad
.
height
,
pad
.
width
,
stride
.
height
,
stride
.
width
,
dstImg
.
ptr
<
double
>
());
}
//Initializers
/*Ptr<BaseConvolutionLayer> ConvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation)
{
ConvolutionLayerImpl *l = new ConvolutionLayerImpl();
l->kernel = kernel;
l->pad = pad;
l->stride = stride;
l->dilation = dilation;
return Ptr<BaseConvolutionLayer>(l);
}
cv
::
dnn
::
col2im
(
colMat
.
ptr
<
float
>
(),
outGroupCn
,
outH
,
outW
,
kernel
.
height
,
kernel
.
width
,
pad
.
height
,
pad
.
width
,
stride
.
height
,
stride
.
width
,
dilation
.
height
,
dilation
.
width
,
dstImg
.
ptr
<
float
>
(),
&
ofsbuf
[
0
]);
}
Ptr<BaseConvolutionLayer> DeconvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation, Size adjustPad)
{
DeConvolutionLayerImpl *l = new DeConvolutionLayerImpl();
l->kernel = kernel;
l->pad = pad;
l->stride = stride;
l->dilation = dilation;
l->adjustPad = adjustPad;
return Ptr<BaseConvolutionLayer>(l);
}*/
std
::
vector
<
int
>
ofsbuf
;
};
//Convolution and Deconvolution
static
void
initConvDeconvLayerFromCaffe
(
Ptr
<
BaseConvolutionLayer
>
l
,
const
LayerParams
&
params
)
...
...
modules/dnn/src/layers/elementwise_layers.cpp
View file @
b97931e0
...
...
@@ -15,8 +15,7 @@ using std::pow;
template
<
typename
Func
>
class
ElementWiseLayer
:
public
Func
::
Layer
{
Func
func
;
public
:
template
<
typename
Dtype
>
class
PBody
:
public
cv
::
ParallelLoopBody
{
...
...
@@ -35,9 +34,7 @@ class ElementWiseLayer : public Func::Layer
}
};
public
:
ElementWiseLayer
(
const
Func
&
f
=
Func
())
:
func
(
f
)
{}
ElementWiseLayer
(
bool
run_parallel_
=
false
,
const
Func
&
f
=
Func
())
:
func
(
f
),
run_parallel
(
run_parallel_
)
{}
void
allocate
(
const
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
)
{
...
...
@@ -58,9 +55,16 @@ public:
Range
sizeRange
=
Range
(
0
,
dst
.
total
());
CV_Assert
(
src
.
type
()
==
CV_32F
);
cv
::
parallel_for_
(
sizeRange
,
PBody
<
float
>
(
dst
,
func
));
PBody
<
float
>
body
(
dst
,
func
);
if
(
run_parallel
)
cv
::
parallel_for_
(
sizeRange
,
body
);
else
body
(
sizeRange
);
}
}
Func
func
;
bool
run_parallel
;
};
struct
ReLUFunctor
...
...
@@ -135,8 +139,24 @@ struct PowerFunctor
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
const
{
return
power
==
1.0
f
?
(
TFloat
)
shift
+
(
TFloat
)
scale
*
x
:
pow
((
TFloat
)
shift
+
(
TFloat
)
scale
*
x
,
(
TFloat
)
power
);
return
pow
((
TFloat
)
shift
+
(
TFloat
)
scale
*
x
,
(
TFloat
)
power
);
}
};
struct
PowerFunctor1
{
typedef
PowerLayer
Layer
;
const
float
scale
;
const
float
shift
;
PowerFunctor1
(
float
scale_
=
1.
f
,
float
shift_
=
0
)
:
scale
(
scale_
),
shift
(
shift_
)
{}
template
<
typename
TFloat
>
inline
TFloat
operator
()(
TFloat
x
)
const
{
return
(
TFloat
)
shift
+
(
TFloat
)
scale
*
x
;
}
};
...
...
@@ -165,12 +185,12 @@ public:
void
forward
(
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
)
{
CV_Assert
(
inputs
.
size
()
==
1
);
Mat
&
inpBlob
=
*
inputs
[
0
];
for
(
size_t
ii
=
0
;
ii
<
outputs
.
size
();
ii
++
)
{
Mat
&
outBlob
=
outputs
[
ii
];
CV_Assert
(
inpBlob
.
isContinuous
()
&&
outBlob
.
isContinuous
());
CV_Assert
(
blobs
[
0
].
total
()
==
inpBlob
.
size
[
1
]);
...
...
@@ -181,8 +201,16 @@ public:
Mat
inpBlobPlane
=
getPlane
(
inpBlob
,
0
,
n
);
Mat
outBlobPlane
=
getPlane
(
outBlob
,
0
,
n
);
threshold
(
inpBlobPlane
,
outBlobPlane
,
0
,
0
,
cv
::
THRESH_TOZERO_INV
);
scaleAdd
(
outBlobPlane
,
slopeWeight
-
1
,
inpBlobPlane
,
outBlobPlane
);
size_t
i
,
planeTotal
=
inpBlobPlane
.
total
();
const
float
*
inptr
=
inpBlobPlane
.
ptr
<
float
>
();
float
*
outptr
=
outBlobPlane
.
ptr
<
float
>
();
for
(
i
=
0
;
i
<
planeTotal
;
i
++
)
{
float
val
=
inptr
[
i
];
outptr
[
i
]
=
val
*
(
val
>=
0.
f
?
1.
f
:
slopeWeight
);
}
//threshold(inpBlobPlane, outBlobPlane, 0, 0, cv::THRESH_TOZERO_INV);
//scaleAdd(outBlobPlane, slopeWeight-1, inpBlobPlane, outBlobPlane);
}
}
}
...
...
@@ -196,7 +224,7 @@ Ptr<_Layer> _Layer::create() { \
Ptr
<
ReLULayer
>
ReLULayer
::
create
(
const
LayerParams
&
params
)
{
float
negativeSlope
=
params
.
get
<
float
>
(
"negative_slope"
,
0.
f
);
Ptr
<
ReLULayer
>
l
(
new
ElementWiseLayer
<
ReLUFunctor
>
(
ReLUFunctor
(
negativeSlope
)));
Ptr
<
ReLULayer
>
l
(
new
ElementWiseLayer
<
ReLUFunctor
>
(
false
,
ReLUFunctor
(
negativeSlope
)));
l
->
setParamsFrom
(
params
);
return
l
;
...
...
@@ -204,7 +232,7 @@ Ptr<ReLULayer> ReLULayer::create(const LayerParams& params)
Ptr
<
TanHLayer
>
TanHLayer
::
create
(
const
LayerParams
&
params
)
{
Ptr
<
TanHLayer
>
l
(
new
ElementWiseLayer
<
TanHFunctor
>
());
Ptr
<
TanHLayer
>
l
(
new
ElementWiseLayer
<
TanHFunctor
>
(
true
));
l
->
setParamsFrom
(
params
);
return
l
;
...
...
@@ -212,7 +240,7 @@ Ptr<TanHLayer> TanHLayer::create(const LayerParams& params)
Ptr
<
SigmoidLayer
>
SigmoidLayer
::
create
(
const
LayerParams
&
params
)
{
Ptr
<
SigmoidLayer
>
l
(
new
ElementWiseLayer
<
SigmoidFunctor
>
());
Ptr
<
SigmoidLayer
>
l
(
new
ElementWiseLayer
<
SigmoidFunctor
>
(
true
));
l
->
setParamsFrom
(
params
);
return
l
;
...
...
@@ -228,7 +256,7 @@ Ptr<AbsLayer> AbsLayer::create(const LayerParams& params)
Ptr
<
BNLLLayer
>
BNLLLayer
::
create
(
const
LayerParams
&
params
)
{
Ptr
<
BNLLLayer
>
l
(
new
ElementWiseLayer
<
BNLLFunctor
>
());
Ptr
<
BNLLLayer
>
l
(
new
ElementWiseLayer
<
BNLLFunctor
>
(
true
));
l
->
setParamsFrom
(
params
);
return
l
;
...
...
@@ -239,7 +267,9 @@ Ptr<PowerLayer> PowerLayer::create(const LayerParams& params)
float
power
=
params
.
get
<
float
>
(
"power"
,
1.0
f
);
float
scale
=
params
.
get
<
float
>
(
"scale"
,
1.0
f
);
float
shift
=
params
.
get
<
float
>
(
"shift"
,
0.0
f
);
Ptr
<
PowerLayer
>
l
(
new
ElementWiseLayer
<
PowerFunctor
>
(
PowerFunctor
(
power
,
scale
,
shift
)));
Ptr
<
PowerLayer
>
l
(
power
==
1.
f
?
(
PowerLayer
*
)(
new
ElementWiseLayer
<
PowerFunctor1
>
(
false
,
PowerFunctor1
(
scale
,
shift
)))
:
(
PowerLayer
*
)(
new
ElementWiseLayer
<
PowerFunctor
>
(
true
,
PowerFunctor
(
power
,
scale
,
shift
))));
l
->
setParamsFrom
(
params
);
return
l
;
...
...
modules/dnn/src/layers/eltwise_layer.cpp
View file @
b97931e0
...
...
@@ -98,15 +98,14 @@ public:
void
forward
(
std
::
vector
<
Mat
*>
&
inputs
,
std
::
vector
<
Mat
>
&
outputs
)
{
Mat
&
output
=
outputs
[
0
];
switch
(
op
)
{
case
SUM
:
{
CV_Assert
(
coeffs
.
size
()
==
0
||
coeffs
.
size
()
==
inputs
.
size
());
Mat
&
output
=
outputs
[
0
];
output
.
setTo
(
0.
);
if
(
0
<
coeffs
.
size
())
{
output
.
setTo
(
0.
);
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
output
+=
*
inputs
[
i
]
*
coeffs
[
i
];
...
...
@@ -114,32 +113,26 @@ public:
}
else
{
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
add
(
*
inputs
[
0
],
*
inputs
[
1
],
output
);
for
(
size_t
i
=
2
;
i
<
inputs
.
size
();
i
++
)
{
output
+=
*
inputs
[
i
];
}
}
}
break
;
case
PROD
:
{
Mat
&
output
=
outputs
[
0
];
output
.
setTo
(
1.
);
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
output
=
output
.
mul
(
*
inputs
[
i
]);
}
}
break
;
case
MAX
:
{
Mat
&
output
=
outputs
[
0
];
cv
::
max
(
*
inputs
[
0
],
*
inputs
[
1
],
output
);
for
(
size_t
i
=
2
;
i
<
inputs
.
size
();
i
++
)
{
cv
::
max
(
output
,
*
inputs
[
i
],
output
);
}
}
break
;
default
:
CV_Assert
(
0
);
...
...
modules/dnn/src/layers/op_im2col.cpp
View file @
b97931e0
...
...
@@ -44,3 +44,326 @@
#include "opencl_kernels_dnn.hpp"
#include "op_im2col.hpp"
#include "opencl_kernels_dnn.hpp"
namespace
cv
{
namespace
dnn
{
#if 0
template <typename Dtype>
class im2col_CpuPBody : public cv::ParallelLoopBody
{
const Dtype* data_im;
int channels, height, width;
int kernel_h, kernel_w;
int pad_h, pad_w;
int stride_h, stride_w;
int dilation_h, dilation_w;
Dtype* data_col;
int height_col, width_col, channels_col;
im2col_CpuPBody() {}
public:
static void run(const Dtype* data_im,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
int dilation_h, int dilation_w,
int height_col, int width_col,
Dtype* data_col)
{
im2col_CpuPBody<Dtype> t;
t.data_im = data_im;
t.data_col = data_col;
t.channels = channels; t.height = height; t.width = width;
t.kernel_h = kernel_h; t.kernel_w = kernel_w;
t.pad_h = pad_h; t.pad_w = pad_w;
t.stride_h = stride_h; t.stride_w = stride_w;
t.dilation_h = dilation_h; t.dilation_w = dilation_w;
t.height_col = height_col;
t.width_col = width_col;
t.channels_col = channels * kernel_h * kernel_w;
cv::parallel_for_(Range(0, t.channels_col), t);
}
virtual void operator ()(const Range &r) const
{
for (int c = r.start; c < r.end; ++c)
{
int w_offset = c % kernel_w;
int h_offset = (c / kernel_w) % kernel_h;
int c_im = c / kernel_h / kernel_w;
for (int h = 0; h < height_col; ++h)
{
for (int w = 0; w < width_col; ++w)
{
int h_pad = h * stride_h - pad_h + h_offset * dilation_h;
int w_pad = w * stride_w - pad_w + w_offset * dilation_w;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_col[(c * height_col + h) * width_col + w] =
data_im[(c_im * height + h_pad) * width + w_pad];
else
data_col[(c * height_col + h) * width_col + w] = 0;
}
}
}
}
};
#endif
template
<
typename
Dtype
>
class
im2row_CpuPBody
:
public
cv
::
ParallelLoopBody
{
const
Dtype
*
data_im
;
int
channels
,
height
,
width
;
int
kernel_h
,
kernel_w
;
int
pad_h
,
pad_w
;
int
stride_h
,
stride_w
;
int
dilation_h
,
dilation_w
;
Dtype
*
data_col
;
int
height_col
,
width_col
,
channels_col
;
im2row_CpuPBody
()
{}
public
:
static
void
run
(
const
Dtype
*
data_im
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
int
dilation_h
,
int
dilation_w
,
int
height_col
,
int
width_col
,
Dtype
*
data_col
)
{
im2row_CpuPBody
<
Dtype
>
t
;
t
.
data_im
=
data_im
;
t
.
data_col
=
data_col
;
t
.
channels
=
channels
;
t
.
height
=
height
;
t
.
width
=
width
;
t
.
kernel_h
=
kernel_h
;
t
.
kernel_w
=
kernel_w
;
t
.
pad_h
=
pad_h
;
t
.
pad_w
=
pad_w
;
t
.
stride_h
=
stride_h
;
t
.
stride_w
=
stride_w
;
t
.
dilation_h
=
dilation_h
;
t
.
dilation_w
=
dilation_w
;
t
.
height_col
=
height_col
;
t
.
width_col
=
width_col
;
t
.
channels_col
=
channels
*
kernel_h
*
kernel_w
;
int
total
=
t
.
height_col
*
t
.
width_col
;
#if 1
t
(
Range
(
0
,
total
));
#else
cv
::
parallel_for_
(
Range
(
0
,
total
),
t
,
16
);
#endif
}
virtual
void
operator
()(
const
Range
&
r
)
const
{
int
dh
=
dilation_h
,
dw
=
dilation_w
;
int
kh
=
kernel_h
,
kw
=
kernel_w
;
Dtype
*
data_col_
=
data_col
;
const
Dtype
*
data_im_
=
data_im
;
int
kelems
=
kh
*
kw
;
AutoBuffer
<
int
>
ofs_
(
kelems
);
int
*
ofs
=
ofs_
;
int
k
=
0
;
for
(
int
k_r
=
0
;
k_r
<
kernel_h
;
k_r
++
)
for
(
int
k_c
=
0
;
k_c
<
kernel_w
;
k_c
++
,
k
++
)
ofs
[
k
]
=
k_r
*
dh
*
width
+
k_c
*
dw
;
for
(
int
row
=
r
.
start
;
row
<
r
.
end
;
++
row
)
{
int
out_c
=
row
%
width_col
;
int
out_r
=
row
/
width_col
;
int
out_row_offset
=
row
*
kh
*
kw
*
channels
;
int
start_in_r
=
out_r
*
stride_h
-
pad_h
;
int
start_in_c
=
out_c
*
stride_w
-
pad_w
;
int
start_k_r
=
std
::
max
(
0
,
(
-
start_in_r
+
dilation_h
-
1
)
/
dilation_h
);
int
end_k_r
=
std
::
min
(
kh
,
(
height
-
start_in_r
+
dilation_h
-
1
)
/
dilation_h
);
int
start_k_c
=
std
::
max
(
0
,
(
-
start_in_c
+
dilation_w
-
1
)
/
dilation_w
);
int
end_k_c
=
std
::
min
(
kw
,
(
width
-
start_in_c
+
dilation_w
-
1
)
/
dilation_w
);
if
(
start_k_r
==
0
&&
end_k_r
==
kh
&&
start_k_c
==
0
&&
end_k_c
==
kw
)
{
for
(
int
i_c
=
0
;
i_c
<
channels
;
i_c
++
)
{
float
*
data_col_c
=
data_col_
+
out_row_offset
+
i_c
*
kh
*
kw
;
const
float
*
data_im_c
=
data_im_
+
(
i_c
*
height
+
start_in_r
)
*
width
+
start_in_c
;
for
(
k
=
0
;
k
<
kelems
;
k
++
)
{
data_col_c
[
k
]
=
data_im_c
[
ofs
[
k
]];
}
}
}
else
{
memset
(
data_col_
,
0
,
kw
*
kh
*
channels
*
sizeof
(
data_col_
[
0
]));
for
(
int
i_c
=
0
;
i_c
<
channels
;
i_c
++
)
{
int
channels_offset
=
i_c
*
width
*
height
;
int
out_ch_offset
=
i_c
*
kh
*
kw
;
int
in_r
=
start_in_r
+
start_k_r
*
dh
;
for
(
int
k_r
=
start_k_r
;
k_r
<
end_k_r
;
k_r
++
,
in_r
+=
dh
)
{
int
row_offset
=
in_r
*
width
;
int
out_col_offset
=
k_r
*
kw
;
int
in_c
=
start_in_c
+
start_k_c
*
dw
;
for
(
int
k_c
=
start_k_c
;
k_c
<
end_k_c
;
k_c
++
,
in_c
+=
dw
)
{
int
in_index
=
channels_offset
+
row_offset
+
in_c
;
int
out_index
=
out_row_offset
+
out_ch_offset
+
out_col_offset
+
k_c
;
data_col_
[
out_index
]
=
data_im_
[
in_index
];
}
}
}
}
}
}
};
void
im2row
(
const
float
*
data_im
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
int
dilation_h
,
int
dilation_w
,
int
height_col
,
int
width_col
,
float
*
data_col
)
{
im2row_CpuPBody
<
float
>::
run
(
data_im
,
channels
,
height
,
width
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
height_col
,
width_col
,
data_col
);
}
#if 0
template <typename Dtype>
class col2im_CpuPBody : public cv::ParallelLoopBody
{
const Dtype* data_col;
int channels, height, width;
int kernel_h, kernel_w;
int pad_h, pad_w;
int stride_h, stride_w;
Dtype* data_im;
int height_col, width_col;
col2im_CpuPBody() {}
public:
static void run(const Dtype* data_col,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
Dtype* data_im)
{
//TODO: single-threaded version switch
col2im_CpuPBody t;
t.data_col = data_col;
t.data_im = data_im;
t.channels = channels; t.height = height; t.width = width;
t.kernel_h = kernel_h; t.kernel_w = kernel_w;
t.pad_h = pad_h; t.pad_w = pad_w;
t.stride_h = stride_h; t.stride_w = stride_w;
t.height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
t.width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
int img_total = channels * height * width;
cv::parallel_for_(Range(0, img_total), t);
}
virtual void operator ()(const Range &r) const
{
const Dtype* data_col_ = data_col;
Dtype* data_im_ = data_im;
int coeff_h_col = (1 - stride_h * kernel_w * height_col) * width_col;
int coeff_w_col = (1 - stride_w * height_col * width_col);
for (int index = r.start; index < r.end; index++)
{
Dtype val = 0;
int w = index % width + pad_w;
int h = (index / width) % height + pad_h;
int c = index / (width * height);
// compute the start and end of the output
int w_col_start = (w < kernel_w) ? 0 : (w - kernel_w) / stride_w + 1;
int w_col_end = std::min(w / stride_w + 1, width_col);
int h_col_start = (h < kernel_h) ? 0 : (h - kernel_h) / stride_h + 1;
int h_col_end = std::min(h / stride_h + 1, height_col);
// equivalent implementation
int offset =
(c * kernel_h * kernel_w + h * kernel_w + w) * height_col * width_col;
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
val += data_col_[offset + h_col * coeff_h_col + w_col * coeff_w_col];
}
}
data_im_[index] = val;
}
}
};
#endif
//single-threaded version
template
<
typename
Dtype
>
void
col2im_cpu
(
const
Dtype
*
data_col
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
int
dilation_h
,
int
dilation_w
,
Dtype
*
data_im
,
const
int
*
ofsbuf
)
{
int
height_col
=
(
height
+
2
*
pad_h
-
(
dilation_h
*
(
kernel_h
-
1
)
+
1
))
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
(
dilation_w
*
(
kernel_w
-
1
)
+
1
))
/
stride_w
+
1
;
int
channels_col
=
channels
*
kernel_h
*
kernel_w
;
std
::
memset
(
data_im
,
0
,
height
*
width
*
channels
*
sizeof
(
Dtype
));
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
,
ofsbuf
+=
3
)
{
//int w_offset = c % kernel_w;
//int h_offset = (c / kernel_w) % kernel_h;
//int c_im = c / kernel_h / kernel_w;
int
w_offset
=
ofsbuf
[
0
];
int
h_offset
=
ofsbuf
[
1
];
int
c_im
=
ofsbuf
[
2
];
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
*
dilation_h
;
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
*
dilation_w
;
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
data_im
[(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
]
+=
data_col
[(
c
*
height_col
+
h
)
*
width_col
+
w
];
}
}
}
}
void
col2im
(
const
float
*
data_col
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
int
dilation_h
,
int
dilation_w
,
float
*
data_im
,
const
int
*
ofsbuf
)
{
//col2im_CpuPBody<float>::run(data_col, channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, data_im);
col2im_cpu
(
data_col
,
channels
,
height
,
width
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
data_im
,
ofsbuf
);
}
}
}
modules/dnn/src/layers/op_im2col.hpp
View file @
b97931e0
...
...
@@ -49,264 +49,15 @@ namespace cv
namespace
dnn
{
template
<
typename
Dtype
>
class
im2col_CpuPBody
:
public
cv
::
ParallelLoopBody
{
const
Dtype
*
data_im
;
int
channels
,
height
,
width
;
int
kernel_h
,
kernel_w
;
int
pad_h
,
pad_w
;
int
stride_h
,
stride_w
;
int
dilation_h
,
dilation_w
;
Dtype
*
data_col
;
int
height_col
,
width_col
,
channels_col
;
im2col_CpuPBody
()
{}
public
:
static
void
run
(
const
Dtype
*
data_im
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
int
dilation_h
,
int
dilation_w
,
int
height_col
,
int
width_col
,
Dtype
*
data_col
)
{
im2col_CpuPBody
<
Dtype
>
t
;
t
.
data_im
=
data_im
;
t
.
data_col
=
data_col
;
t
.
channels
=
channels
;
t
.
height
=
height
;
t
.
width
=
width
;
t
.
kernel_h
=
kernel_h
;
t
.
kernel_w
=
kernel_w
;
t
.
pad_h
=
pad_h
;
t
.
pad_w
=
pad_w
;
t
.
stride_h
=
stride_h
;
t
.
stride_w
=
stride_w
;
t
.
dilation_h
=
dilation_h
;
t
.
dilation_w
=
dilation_w
;
t
.
height_col
=
height_col
;
t
.
width_col
=
width_col
;
t
.
channels_col
=
channels
*
kernel_h
*
kernel_w
;
cv
::
parallel_for_
(
Range
(
0
,
t
.
channels_col
),
t
);
}
virtual
void
operator
()(
const
Range
&
r
)
const
{
for
(
int
c
=
r
.
start
;
c
<
r
.
end
;
++
c
)
{
int
w_offset
=
c
%
kernel_w
;
int
h_offset
=
(
c
/
kernel_w
)
%
kernel_h
;
int
c_im
=
c
/
kernel_h
/
kernel_w
;
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
*
dilation_h
;
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
*
dilation_w
;
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
data_col
[(
c
*
height_col
+
h
)
*
width_col
+
w
]
=
data_im
[(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
];
else
data_col
[(
c
*
height_col
+
h
)
*
width_col
+
w
]
=
0
;
}
}
}
}
};
template
<
typename
Dtype
>
class
im2row_CpuPBody
:
public
cv
::
ParallelLoopBody
{
const
Dtype
*
data_im
;
int
channels
,
height
,
width
;
int
kernel_h
,
kernel_w
;
int
pad_h
,
pad_w
;
int
stride_h
,
stride_w
;
int
dilation_h
,
dilation_w
;
Dtype
*
data_col
;
int
height_col
,
width_col
,
channels_col
;
im2row_CpuPBody
()
{}
public
:
static
void
run
(
const
Dtype
*
data_im
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
int
dilation_h
,
int
dilation_w
,
int
height_col
,
int
width_col
,
Dtype
*
data_col
)
{
im2row_CpuPBody
<
Dtype
>
t
;
t
.
data_im
=
data_im
;
t
.
data_col
=
data_col
;
t
.
channels
=
channels
;
t
.
height
=
height
;
t
.
width
=
width
;
t
.
kernel_h
=
kernel_h
;
t
.
kernel_w
=
kernel_w
;
t
.
pad_h
=
pad_h
;
t
.
pad_w
=
pad_w
;
t
.
stride_h
=
stride_h
;
t
.
stride_w
=
stride_w
;
t
.
dilation_h
=
dilation_h
;
t
.
dilation_w
=
dilation_w
;
t
.
height_col
=
height_col
;
t
.
width_col
=
width_col
;
t
.
channels_col
=
channels
*
kernel_h
*
kernel_w
;
cv
::
parallel_for_
(
Range
(
0
,
t
.
height_col
*
t
.
width_col
),
t
,
16
);
}
virtual
void
operator
()(
const
Range
&
r
)
const
{
int
dh
=
dilation_h
,
dw
=
dilation_w
;
Dtype
*
data_col_
=
data_col
;
const
Dtype
*
data_im_
=
data_im
;
for
(
int
row
=
r
.
start
;
row
<
r
.
end
;
++
row
)
{
int
out_c
=
row
%
width_col
;
int
out_r
=
row
/
width_col
;
int
out_row_offset
=
row
*
kernel_h
*
kernel_w
*
channels
;
int
start_in_r
=
out_r
*
stride_h
-
pad_h
;
int
start_in_c
=
out_c
*
stride_w
-
pad_w
;
int
start_k_r
=
std
::
max
(
0
,
cvCeil
(
-
start_in_r
/
(
float
)
dilation_h
));
int
end_k_r
=
std
::
min
(
kernel_h
,
cvCeil
((
height
-
start_in_r
)
/
(
float
)
dilation_h
));
int
start_k_c
=
std
::
max
(
0
,
cvCeil
(
-
start_in_c
/
(
float
)
dilation_w
));
int
end_k_c
=
std
::
min
(
kernel_w
,
cvCeil
((
width
-
start_in_c
)
/
(
float
)
dilation_w
));
for
(
int
i_c
=
0
;
i_c
<
channels
;
i_c
++
)
{
int
channels_offset
=
i_c
*
width
*
height
;
int
out_ch_offset
=
i_c
*
kernel_h
*
kernel_w
;
int
in_r
=
start_in_r
+
start_k_r
*
dilation_h
;
for
(
int
k_r
=
start_k_r
;
k_r
<
end_k_r
;
k_r
++
,
in_r
+=
dh
)
{
int
row_offset
=
in_r
*
width
;
int
out_col_offset
=
k_r
*
kernel_w
;
int
in_c
=
start_in_c
+
start_k_c
*
dilation_w
;
for
(
int
k_c
=
start_k_c
;
k_c
<
end_k_c
;
k_c
++
,
in_c
+=
dw
)
{
int
in_index
=
channels_offset
+
row_offset
+
in_c
;
int
out_index
=
out_row_offset
+
out_ch_offset
+
out_col_offset
+
k_c
;
data_col_
[
out_index
]
=
data_im_
[
in_index
];
}
}
}
}
}
};
template
<
typename
Dtype
>
class
col2im_CpuPBody
:
public
cv
::
ParallelLoopBody
{
const
Dtype
*
data_col
;
int
channels
,
height
,
width
;
int
kernel_h
,
kernel_w
;
int
pad_h
,
pad_w
;
int
stride_h
,
stride_w
;
Dtype
*
data_im
;
int
height_col
,
width_col
;
col2im_CpuPBody
()
{}
public
:
static
void
run
(
const
Dtype
*
data_col
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
Dtype
*
data_im
)
{
//TODO: single-threaded version switch
col2im_CpuPBody
t
;
t
.
data_col
=
data_col
;
t
.
data_im
=
data_im
;
t
.
channels
=
channels
;
t
.
height
=
height
;
t
.
width
=
width
;
t
.
kernel_h
=
kernel_h
;
t
.
kernel_w
=
kernel_w
;
t
.
pad_h
=
pad_h
;
t
.
pad_w
=
pad_w
;
t
.
stride_h
=
stride_h
;
t
.
stride_w
=
stride_w
;
t
.
height_col
=
(
height
+
2
*
pad_h
-
kernel_h
)
/
stride_h
+
1
;
t
.
width_col
=
(
width
+
2
*
pad_w
-
kernel_w
)
/
stride_w
+
1
;
int
img_total
=
channels
*
height
*
width
;
cv
::
parallel_for_
(
Range
(
0
,
img_total
),
t
);
}
virtual
void
operator
()(
const
Range
&
r
)
const
{
const
Dtype
*
data_col_
=
data_col
;
Dtype
*
data_im_
=
data_im
;
int
coeff_h_col
=
(
1
-
stride_h
*
kernel_w
*
height_col
)
*
width_col
;
int
coeff_w_col
=
(
1
-
stride_w
*
height_col
*
width_col
);
for
(
int
index
=
r
.
start
;
index
<
r
.
end
;
index
++
)
{
Dtype
val
=
0
;
int
w
=
index
%
width
+
pad_w
;
int
h
=
(
index
/
width
)
%
height
+
pad_h
;
int
c
=
index
/
(
width
*
height
);
// compute the start and end of the output
int
w_col_start
=
(
w
<
kernel_w
)
?
0
:
(
w
-
kernel_w
)
/
stride_w
+
1
;
int
w_col_end
=
std
::
min
(
w
/
stride_w
+
1
,
width_col
);
int
h_col_start
=
(
h
<
kernel_h
)
?
0
:
(
h
-
kernel_h
)
/
stride_h
+
1
;
int
h_col_end
=
std
::
min
(
h
/
stride_h
+
1
,
height_col
);
// equivalent implementation
int
offset
=
(
c
*
kernel_h
*
kernel_w
+
h
*
kernel_w
+
w
)
*
height_col
*
width_col
;
for
(
int
h_col
=
h_col_start
;
h_col
<
h_col_end
;
++
h_col
)
{
for
(
int
w_col
=
w_col_start
;
w_col
<
w_col_end
;
++
w_col
)
{
val
+=
data_col_
[
offset
+
h_col
*
coeff_h_col
+
w_col
*
coeff_w_col
];
}
}
data_im_
[
index
]
=
val
;
}
}
};
//single-threaded version
template
<
typename
Dtype
>
void
col2im_cpu
(
const
Dtype
*
data_col
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
int
dilation_h
,
int
dilation_w
,
Dtype
*
data_im
)
{
int
height_col
=
(
height
+
2
*
pad_h
-
(
dilation_h
*
(
kernel_h
-
1
)
+
1
))
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
(
dilation_w
*
(
kernel_w
-
1
)
+
1
))
/
stride_w
+
1
;
int
channels_col
=
channels
*
kernel_h
*
kernel_w
;
std
::
memset
(
data_im
,
0
,
height
*
width
*
channels
*
sizeof
(
Dtype
));
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%
kernel_w
;
int
h_offset
=
(
c
/
kernel_w
)
%
kernel_h
;
int
c_im
=
c
/
kernel_h
/
kernel_w
;
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
*
dilation_h
;
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
*
dilation_w
;
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
data_im
[(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
]
+=
data_col
[(
c
*
height_col
+
h
)
*
width_col
+
w
];
}
}
}
}
void
im2row
(
const
float
*
data_im
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
int
dilation_h
,
int
dilation_w
,
int
height_col
,
int
width_col
,
float
*
data_col
);
void
col2im
(
const
float
*
data_col
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
pad_h
,
int
pad_w
,
int
stride_h
,
int
stride_w
,
int
dilation_h
,
int
dilation_w
,
float
*
data_im
,
const
int
*
ofsbuf
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment