Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv_contrib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv_contrib
Commits
264dfe0c
Commit
264dfe0c
authored
Jul 15, 2016
by
Anna Petrovicheva
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added DetectionOutput implementation
parent
1f6883c1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
1066 additions
and
0 deletions
+1066
-0
detection_output_layer.cpp
modules/dnn/src/layers/detection_output_layer.cpp
+851
-0
detection_output_layer.hpp
modules/dnn/src/layers/detection_output_layer.hpp
+215
-0
No files found.
modules/dnn/src/layers/detection_output_layer.cpp
0 → 100644
View file @
264dfe0c
/*M ///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "detection_output_layer.hpp"
#include <float.h>
#include <algorithm>
namespace
cv
{
namespace
dnn
{
void
DetectionOutputLayer
::
checkParameter
(
const
LayerParams
&
params
,
const
std
::
string
&
parameterName
)
{
if
(
!
params
.
has
(
parameterName
))
{
CV_Error
(
Error
::
StsBadArg
,
"DetectionOutput layer parameter does not contain "
+
parameterName
+
" index."
);
}
}
DetectionOutputLayer
::
DetectionOutputLayer
(
LayerParams
&
params
)
:
Layer
(
params
)
{
checkParameter
(
params
,
"numClasses"
);
_numClasses
=
params
.
num_classes
();
_shareLocation
=
params
.
share_location
();
_numLocClasses
=
_shareLocation
?
1
:
_numClasses
;
_backgroundLabelId
=
params
.
background_label_id
();
_codeType
=
params
.
code_type
();
_varianceEncodedInTarget
=
params
.
variance_encoded_in_target
();
_keepTopK
=
params
.
keep_top_k
();
_confidenceThreshold
=
params
.
has_confidence_threshold
()
?
params
.
confidence_threshold
()
:
-
FLT_MAX
;
// Parameters used in nms.
_nmsThreshold
=
params
.
nms_param
().
nms_threshold
();
CV_Assert
(
_nmsThreshold
>
0.
);
_topK
=
-
1
;
if
(
params
.
nms_param
().
has_top_k
())
{
_topK
=
params
.
nms_param
().
top_k
();
}
}
void
DetectionOutputLayer
::
checkInputs
(
const
std
::
vector
<
Blob
*>
&
inputs
)
{
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
for
(
size_t
j
=
0
;
j
<
_numAxes
;
j
++
)
{
CV_Assert
(
inputs
[
i
]
->
shape
[
j
]
==
inputs
[
0
]
->
shape
[
j
]);
}
}
}
void
DetectionOutputLayer
::
allocate
(
const
std
::
vector
<
Blob
*>
&
inputs
,
std
::
vector
<
Blob
>
&
outputs
)
{
CV_Assert
(
inputs
.
size
()
>
0
);
CV_Assert
(
inputs
[
0
]
->
num
()
==
inputs
[
1
]
->
num
());
_num
=
inputs
[
0
]
->
num
();
_numPriors
=
inputs
[
2
]
->
height
()
/
4
;
CV_Assert
(
_numPriors
*
_numLocClasses
*
4
==
inputs
[
0
]
->
channels
());
CV_Assert
(
_numPriors
*
_numClasses
==
inputs
[
1
]
->
channels
());
// num() and channels() are 1.
// Since the number of bboxes to be kept is unknown before nms, we manually
// set it to (fake) 1.
// Each row is a 7 dimension std::vector, which stores
// [image_id, label, confidence, xmin, ymin, xmax, ymax]
BlobShape
outputShape
=
BlobShape
(
1
,
1
,
1
,
7
);
outputs
[
0
].
create
(
BlobShape
(
outputShape
));
}
void
DetectionOutputLayer
::
forward
(
std
::
vector
<
Blob
*>
&
inputs
,
std
::
vector
<
Blob
>
&
outputs
)
{
const
Mat
locationData
=
inputs
[
0
]
->
ptrf
();
const
Mat
confidenceData
=
inputs
[
1
]
->
ptrf
();
const
Mat
priorData
=
inputs
[
2
]
->
ptrf
();
// Retrieve all location predictions.
std
::
vector
<
LabelBBox
>
allLocationPredictions
;
GetLocPredictions
(
locationData
,
_num
,
_numPriors
,
_numLocClasses
,
_shareLocation
,
&
allLocationPredictions
);
// Retrieve all confidences.
std
::
vector
<
std
::
map
<
int
,
std
::
vector
<
float
>
>
>
allConfidenceScores
;
GetConfidenceScores
(
confidenceData
,
_num
,
_numPriors
,
_numClasses
,
&
allConfidenceScores
);
// Retrieve all prior bboxes. It is same within a batch since we assume all
// images in a batch are of same dimension.
std
::
vector
<
NormalizedBBox
>
priorBBoxes
;
std
::
vector
<
std
::
vector
<
float
>
>
priorVariances
;
GetPriorBBoxes
(
priorData
,
_numPriors
,
&
priorBBoxes
,
&
priorVariances
);
// Decode all loc predictions to bboxes.
std
::
vector
<
LabelBBox
>
allDecodedBBoxes
;
DecodeBBoxesAll
(
allLocationPredictions
,
priorBBoxes
,
priorVariances
,
_num
,
_shareLocation
,
_numLocClasses
,
_backgroundLabelId
,
_codeType
,
_varianceEncodedInTarget
,
&
allDecodedBBoxes
);
int
numKept
=
0
;
std
::
vector
<
std
::
map
<
int
,
std
::
vector
<
int
>
>
>
allIndices
;
for
(
int
i
=
0
;
i
<
_num
;
++
i
)
{
const
LabelBBox
&
decodeBBoxes
=
allDecodedBBoxes
[
i
];
const
std
::
map
<
int
,
std
::
vector
<
float
>
>&
confidenceScores
=
allConfidenceScores
[
i
];
std
::
map
<
int
,
std
::
vector
<
int
>
>
indices
;
int
numDetections
=
0
;
for
(
int
c
=
0
;
c
<
_numClasses
;
++
c
)
{
if
(
c
==
_backgroundLabelId
)
{
// Ignore background class.
continue
;
}
if
(
confidenceScores
.
find
(
c
)
==
confidenceScores
.
end
())
{
// Something bad happened if there are no predictions for current label.
std
::
string
error
(
"Could not find confidence predictions for label "
);
error
+=
std
::
string
(
c
);
CV_StsError
(
error
.
c_str
());
}
const
std
::
vector
<
float
>&
scores
=
confidenceScores
.
find
(
c
)
->
second
;
int
label
=
_shareLocation
?
-
1
:
c
;
if
(
decodeBBoxes
.
find
(
label
)
==
decodeBBoxes
.
end
())
{
// Something bad happened if there are no predictions for current label.
std
::
string
error
(
"Could not find location predictions for label "
);
error
+=
std
::
string
(
label
);
CV_StsError
(
error
.
c_str
());
continue
;
}
const
std
::
vector
<
NormalizedBBox
>&
bboxes
=
decodeBBoxes
.
find
(
label
)
->
second
;
ApplyNMSFast
(
bboxes
,
scores
,
_confidenceThreshold
,
_nmsThreshold
,
_topK
,
&
(
indices
[
c
]));
numDetections
+=
indices
[
c
].
size
();
}
if
(
_keepTopK
>
-
1
&&
numDetections
>
_keepTopK
)
{
std
::
vector
<
std
::
pair
<
float
,
std
::
pair
<
int
,
int
>
>
>
scoreIndexPairs
;
for
(
std
::
map
<
int
,
std
::
vector
<
int
>
>::
iterator
it
=
indices
.
begin
();
it
!=
indices
.
end
();
++
it
)
{
int
label
=
it
->
first
;
const
std
::
vector
<
int
>&
labelIndices
=
it
->
second
;
if
(
confidenceScores
.
find
(
label
)
==
confidenceScores
.
end
())
{
// Something bad happened for current label.
std
::
string
error
(
"Could not find location predictions for label "
);
error
+=
std
::
string
(
label
);
CV_StsError
(
error
.
c_str
());
continue
;
}
const
std
::
vector
<
float
>&
scores
=
confidenceScores
.
find
(
label
)
->
second
;
for
(
int
j
=
0
;
j
<
labelIndices
.
size
();
++
j
)
{
int
idx
=
labelIndices
[
j
];
CV_Assert
(
idx
<
scores
.
size
());
scoreIndexPairs
.
push_back
(
std
::
make_pair
(
scores
[
idx
],
std
::
make_pair
(
label
,
idx
)));
}
}
// Keep outputs k results per image.
std
::
sort
(
scoreIndexPairs
.
begin
(),
scoreIndexPairs
.
end
(),
SortScorePairDescend
<
std
::
pair
<
int
,
int
>
>
);
scoreIndexPairs
.
resize
(
_keepTopK
);
// Store the new indices.
std
::
map
<
int
,
std
::
vector
<
int
>
>
newIndices
;
for
(
int
j
=
0
;
j
<
scoreIndexPairs
.
size
();
++
j
)
{
int
label
=
scoreIndexPairs
[
j
].
second
.
first
;
int
idx
=
scoreIndexPairs
[
j
].
second
.
second
;
newIndices
[
label
].
push_back
(
idx
);
}
allIndices
.
push_back
(
newIndices
);
numKept
+=
_keepTopK
;
}
else
{
allIndices
.
push_back
(
indices
);
numKept
+=
numDetections
;
}
}
if
(
numKept
==
0
)
{
std
::
cout
<<
"Couldn't find any detections"
<<
std
::
endl
;
return
;
}
std
::
vector
<
int
>
outputsShape
(
2
,
1
);
outputsShape
.
push_back
(
numKept
);
outputsShape
.
push_back
(
7
);
outputs
[
0
]
->
reshape
(
outputsShape
);
float
*
outputsData
=
outputs
[
0
]
->
ptrf
();
int
count
=
0
;
for
(
int
i
=
0
;
i
<
_num
;
++
i
)
{
const
std
::
map
<
int
,
std
::
vector
<
float
>
>&
confidenceScores
=
allConfidenceScores
[
i
];
const
LabelBBox
&
decodeBBoxes
=
allDecodedBBoxes
[
i
];
for
(
std
::
map
<
int
,
std
::
vector
<
int
>
>::
iterator
it
=
allIndices
[
i
].
begin
();
it
!=
allIndices
[
i
].
end
();
++
it
)
{
int
label
=
it
->
first
;
if
(
confidenceScores
.
find
(
label
)
==
confidenceScores
.
end
())
{
// Something bad happened if there are no predictions for current label.
std
::
string
error
(
"Could not find confidence predictions for label "
);
error
+=
std
::
string
(
label
);
CV_StsError
(
error
.
c_str
());
continue
;
}
const
std
::
vector
<
float
>&
scores
=
confidenceScores
.
find
(
label
)
->
second
;
int
locLabel
=
_shareLocation
?
-
1
:
label
;
if
(
decodeBBoxes
.
find
(
locLabel
)
==
decodeBBoxes
.
end
())
{
// Something bad happened if there are no predictions for current label.
std
::
string
error
(
"Could not find location predictions for label "
);
error
+=
std
::
string
(
locLabel
);
CV_StsError
(
error
.
c_str
());
continue
;
}
const
std
::
vector
<
NormalizedBBox
>&
bboxes
=
decodeBBoxes
.
find
(
locLabel
)
->
second
;
std
::
vector
<
int
>&
indices
=
it
->
second
;
for
(
int
j
=
0
;
j
<
indices
.
size
();
++
j
)
{
int
idx
=
indices
[
j
];
outputsData
[
count
*
7
]
=
i
;
outputsData
[
count
*
7
+
1
]
=
label
;
outputsData
[
count
*
7
+
2
]
=
scores
[
idx
];
NormalizedBBox
clipBBox
;
ClipBBox
(
bboxes
[
idx
],
&
clipBBox
);
outputsData
[
count
*
7
+
3
]
=
clipBBox
.
xmin
();
outputsData
[
count
*
7
+
4
]
=
clipBBox
.
ymin
();
outputsData
[
count
*
7
+
5
]
=
clipBBox
.
xmax
();
outputsData
[
count
*
7
+
6
]
=
clipBBox
.
ymax
();
++
count
;
}
}
}
}
float
DetectionOutputLayer
::
BBoxSize
(
const
NormalizedBBox
&
bbox
,
const
bool
normalized
)
{
if
(
bbox
.
xmax
()
<
bbox
.
xmin
()
||
bbox
.
ymax
()
<
bbox
.
ymin
())
{
// If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0.
return
0
;
}
else
{
if
(
bbox
.
has_size
())
{
return
bbox
.
size
();
}
else
{
float
width
=
bbox
.
xmax
()
-
bbox
.
xmin
();
float
height
=
bbox
.
ymax
()
-
bbox
.
ymin
();
if
(
normalized
)
{
return
width
*
height
;
}
else
{
// If bbox is not within range [0, 1].
return
(
width
+
1
)
*
(
height
+
1
);
}
}
}
}
void
DetectionOutputLayer
::
ClipBBox
(
const
NormalizedBBox
&
bbox
,
NormalizedBBox
*
clipBBox
)
{
clipBBox
->
set_xmin
(
std
::
max
(
std
::
min
(
bbox
.
xmin
(),
1.
f
),
0.
f
));
clipBBox
->
set_ymin
(
std
::
max
(
std
::
min
(
bbox
.
ymin
(),
1.
f
),
0.
f
));
clipBBox
->
set_xmax
(
std
::
max
(
std
::
min
(
bbox
.
xmax
(),
1.
f
),
0.
f
));
clipBBox
->
set_ymax
(
std
::
max
(
std
::
min
(
bbox
.
ymax
(),
1.
f
),
0.
f
));
clipBBox
->
clear_size
();
clipBBox
->
set_size
(
BBoxSize
(
*
clipBBox
));
clipBBox
->
set_difficult
(
bbox
.
difficult
());
}
void
DetectionOutputLayer
::
DecodeBBox
(
const
NormalizedBBox
&
priorBBox
,
const
std
::
vector
<
float
>&
priorVariance
,
const
CodeType
codeType
,
const
bool
varianceEncodedInTarget
,
const
NormalizedBBox
&
bbox
,
NormalizedBBox
*
decodeBBox
)
{
if
(
codeType
==
PriorBoxParameter_CodeType_CORNER
)
{
if
(
varianceEncodedInTarget
)
{
// variance is encoded in target, we simply need to add the offset
// predictions.
decodeBBox
->
set_xmin
(
priorBBox
.
xmin
()
+
bbox
.
xmin
());
decodeBBox
->
set_ymin
(
priorBBox
.
ymin
()
+
bbox
.
ymin
());
decodeBBox
->
set_xmax
(
priorBBox
.
xmax
()
+
bbox
.
xmax
());
decodeBBox
->
set_ymax
(
priorBBox
.
ymax
()
+
bbox
.
ymax
());
}
else
{
// variance is encoded in bbox, we need to scale the offset accordingly.
decodeBBox
->
set_xmin
(
priorBBox
.
xmin
()
+
priorVariance
[
0
]
*
bbox
.
xmin
());
decodeBBox
->
set_ymin
(
priorBBox
.
ymin
()
+
priorVariance
[
1
]
*
bbox
.
ymin
());
decodeBBox
->
set_xmax
(
priorBBox
.
xmax
()
+
priorVariance
[
2
]
*
bbox
.
xmax
());
decodeBBox
->
set_ymax
(
priorBBox
.
ymax
()
+
priorVariance
[
3
]
*
bbox
.
ymax
());
}
}
else
if
(
codeType
==
PriorBoxParameter_CodeType_CENTER_SIZE
)
{
float
priorWidth
=
priorBBox
.
xmax
()
-
priorBBox
.
xmin
();
CV_Assert
(
priorWidth
>
0
);
float
priorHeight
=
priorBBox
.
ymax
()
-
priorBBox
.
ymin
();
CV_Assert
(
priorHeight
>
0
);
float
priorCenterX
=
(
priorBBox
.
xmin
()
+
priorBBox
.
xmax
())
/
2.
;
float
priorCenterY
=
(
priorBBox
.
ymin
()
+
priorBBox
.
ymax
())
/
2.
;
float
decodeBBoxCenterX
,
decodeBBoxCenterY
;
float
decodeBBoxWidth
,
decodeBBoxHeight
;
if
(
varianceEncodedInTarget
)
{
// variance is encoded in target, we simply need to retore the offset
// predictions.
decodeBBoxCenterX
=
bbox
.
xmin
()
*
priorWidth
+
priorCenterX
;
decodeBBoxCenterY
=
bbox
.
ymin
()
*
priorHeight
+
priorCenterY
;
decodeBBoxWidth
=
exp
(
bbox
.
xmax
())
*
priorWidth
;
decodeBBoxHeight
=
exp
(
bbox
.
ymax
())
*
priorHeight
;
}
else
{
// variance is encoded in bbox, we need to scale the offset accordingly.
decodeBBoxCenterX
=
priorVariance
[
0
]
*
bbox
.
xmin
()
*
priorWidth
+
priorCenterX
;
decodeBBoxCenterY
=
priorVariance
[
1
]
*
bbox
.
ymin
()
*
priorHeight
+
priorCenterY
;
decodeBBoxWidth
=
exp
(
priorVariance
[
2
]
*
bbox
.
xmax
())
*
priorWidth
;
decodeBBoxHeight
=
exp
(
priorVariance
[
3
]
*
bbox
.
ymax
())
*
priorHeight
;
}
decodeBBox
->
set_xmin
(
decodeBBoxCenterX
-
decodeBBoxWidth
/
2.
);
decodeBBox
->
set_ymin
(
decodeBBoxCenterY
-
decodeBBoxHeight
/
2.
);
decodeBBox
->
set_xmax
(
decodeBBoxCenterX
+
decodeBBoxWidth
/
2.
);
decodeBBox
->
set_ymax
(
decodeBBoxCenterY
+
decodeBBoxHeight
/
2.
);
}
else
{
CV_StsError
(
"Unknown LocLossType."
);
}
float
bboxSize
=
BBoxSize
(
*
decodeBBox
);
decodeBBox
->
set_size
(
bboxSize
);
}
void
DetectionOutputLayer
::
DecodeBBoxes
(
const
std
::
vector
<
NormalizedBBox
>&
priorBBoxes
,
const
std
::
vector
<
std
::
vector
<
float
>
>&
priorVariances
,
const
CodeType
codeType
,
const
bool
varianceEncodedInTarget
,
const
std
::
vector
<
NormalizedBBox
>&
bboxes
,
std
::
vector
<
NormalizedBBox
>*
decodeBBoxes
)
{
CV_Assert
(
priorBBoxes
.
size
()
==
priorVariances
.
size
());
CV_Assert
(
priorBBoxes
.
size
()
==
bboxes
.
size
());
int
numBBoxes
=
priorBBoxes
.
size
();
if
(
numBBoxes
>=
1
)
{
CV_Assert
(
priorVariances
[
0
].
size
()
==
4
);
}
decodeBBoxes
->
clear
();
for
(
int
i
=
0
;
i
<
numBBoxes
;
++
i
)
{
NormalizedBBox
decodeBBox
;
DecodeBBox
(
priorBBoxes
[
i
],
priorVariances
[
i
],
codeType
,
varianceEncodedInTarget
,
bboxes
[
i
],
&
decodeBBox
);
decodeBBoxes
->
push_back
(
decodeBBox
);
}
}
void
DetectionOutputLayer
::
DecodeBBoxesAll
(
const
std
::
vector
<
LabelBBox
>&
allLocPreds
,
const
std
::
vector
<
NormalizedBBox
>&
priorBBoxes
,
const
std
::
vector
<
std
::
vector
<
float
>
>&
priorVariances
,
const
int
num
,
const
bool
shareLocation
,
const
int
numLocClasses
,
const
int
backgroundLabelId
,
const
CodeType
codeType
,
const
bool
varianceEncodedInTarget
,
std
::
vector
<
LabelBBox
>*
allDecodeBBoxes
)
{
CV_Assert
(
allLocPreds
.
size
()
==
num
);
allDecodeBBoxes
->
clear
();
allDecodeBBoxes
->
resize
(
num
);
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
// Decode predictions into bboxes.
LabelBBox
&
decodeBBoxes
=
(
*
allDecodeBBoxes
)[
i
];
for
(
int
c
=
0
;
c
<
numLocClasses
;
++
c
)
{
int
label
=
shareLocation
?
-
1
:
c
;
if
(
label
==
backgroundLabelId
)
{
// Ignore background class.
continue
;
}
if
(
allLocPreds
[
i
].
find
(
label
)
==
allLocPreds
[
i
].
end
())
{
// Something bad happened if there are no predictions for current label.
std
::
string
error
(
"Could not find location predictions for label "
);
error
+=
std
::
string
(
label
);
CV_StsError
(
error
.
c_str
());
}
const
std
::
vector
<
NormalizedBBox
>&
labelLocPreds
=
allLocPreds
[
i
].
find
(
label
)
->
second
;
DecodeBBoxes
(
priorBBoxes
,
priorVariances
,
codeType
,
varianceEncodedInTarget
,
labelLocPreds
,
&
(
decodeBBoxes
[
label
]));
}
}
}
void
DetectionOutputLayer
::
GetPriorBBoxes
(
const
float
*
priorData
,
const
int
numPriors
,
std
::
vector
<
NormalizedBBox
>*
priorBBoxes
,
std
::
vector
<
std
::
vector
<
float
>
>*
priorVariances
)
{
priorBBoxes
->
clear
();
priorVariances
->
clear
();
for
(
int
i
=
0
;
i
<
numPriors
;
++
i
)
{
int
startIdx
=
i
*
4
;
NormalizedBBox
bbox
;
bbox
.
set_xmin
(
priorData
[
startIdx
]);
bbox
.
set_ymin
(
priorData
[
startIdx
+
1
]);
bbox
.
set_xmax
(
priorData
[
startIdx
+
2
]);
bbox
.
set_ymax
(
priorData
[
startIdx
+
3
]);
float
bboxSize
=
BBoxSize
(
bbox
);
bbox
.
set_size
(
bboxSize
);
priorBBoxes
->
push_back
(
bbox
);
}
for
(
int
i
=
0
;
i
<
numPriors
;
++
i
)
{
int
startIdx
=
(
numPriors
+
i
)
*
4
;
std
::
vector
<
float
>
var
;
for
(
int
j
=
0
;
j
<
4
;
++
j
)
{
var
.
push_back
(
priorData
[
startIdx
+
j
]);
}
priorVariances
->
push_back
(
var
);
}
}
void
DetectionOutputLayer
::
ScaleBBox
(
const
NormalizedBBox
&
bbox
,
const
int
height
,
const
int
width
,
NormalizedBBox
*
scaleBBox
)
{
scaleBBox
->
set_xmin
(
bbox
.
xmin
()
*
width
);
scaleBBox
->
set_ymin
(
bbox
.
ymin
()
*
height
);
scaleBBox
->
set_xmax
(
bbox
.
xmax
()
*
width
);
scaleBBox
->
set_ymax
(
bbox
.
ymax
()
*
height
);
scaleBBox
->
clear_size
();
bool
normalized
=
!
(
width
>
1
||
height
>
1
);
scaleBBox
->
set_size
(
BBoxSize
(
*
scaleBBox
,
normalized
));
scaleBBox
->
set_difficult
(
bbox
.
difficult
());
}
void
DetectionOutputLayer
::
GetLocPredictions
(
const
float
*
locData
,
const
int
num
,
const
int
numPredsPerClass
,
const
int
numLocClasses
,
const
bool
shareLocation
,
std
::
vector
<
LabelBBox
>*
locPreds
)
{
locPreds
->
clear
();
if
(
shareLocation
)
{
CV_Assert
(
numLocClasses
==
1
);
}
locPreds
->
resize
(
num
);
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
LabelBBox
&
labelBBox
=
(
*
locPreds
)[
i
];
for
(
int
p
=
0
;
p
<
numPredsPerClass
;
++
p
)
{
int
startIdx
=
p
*
numLocClasses
*
4
;
for
(
int
c
=
0
;
c
<
numLocClasses
;
++
c
)
{
int
label
=
shareLocation
?
-
1
:
c
;
if
(
labelBBox
.
find
(
label
)
==
labelBBox
.
end
())
{
labelBBox
[
label
].
resize
(
numPredsPerClass
);
}
labelBBox
[
label
][
p
].
set_xmin
(
locData
[
startIdx
+
c
*
4
]);
labelBBox
[
label
][
p
].
set_ymin
(
locData
[
startIdx
+
c
*
4
+
1
]);
labelBBox
[
label
][
p
].
set_xmax
(
locData
[
startIdx
+
c
*
4
+
2
]);
labelBBox
[
label
][
p
].
set_ymax
(
locData
[
startIdx
+
c
*
4
+
3
]);
}
}
locData
+=
numPredsPerClass
*
numLocClasses
*
4
;
}
}
void
DetectionOutputLayer
::
GetConfidenceScores
(
const
float
*
confData
,
const
int
num
,
const
int
numPredsPerClass
,
const
int
numClasses
,
std
::
vector
<
std
::
map
<
int
,
std
::
vector
<
float
>
>
>*
confPreds
)
{
confPreds
->
clear
();
confPreds
->
resize
(
num
);
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
std
::
map
<
int
,
std
::
vector
<
float
>
>&
labelScores
=
(
*
confPreds
)[
i
];
for
(
int
p
=
0
;
p
<
numPredsPerClass
;
++
p
)
{
int
startIdx
=
p
*
numClasses
;
for
(
int
c
=
0
;
c
<
numClasses
;
++
c
)
{
labelScores
[
c
].
push_back
(
confData
[
startIdx
+
c
]);
}
}
confData
+=
numPredsPerClass
*
numClasses
;
}
}
void
DetectionOutputLayer
::
DecodeBBox
(
const
NormalizedBBox
&
prior_bbox
,
const
vector
<
float
>&
prior_variance
,
const
CodeType
code_type
,
const
bool
variance_encoded_in_target
,
const
NormalizedBBox
&
bbox
,
NormalizedBBox
*
decode_bbox
)
{
if
(
code_type
==
PriorBoxParameter_CodeType_CORNER
)
{
if
(
variance_encoded_in_target
)
{
// variance is encoded in target, we simply need to add the offset
// predictions.
decode_bbox
->
set_xmin
(
prior_bbox
.
xmin
()
+
bbox
.
xmin
());
decode_bbox
->
set_ymin
(
prior_bbox
.
ymin
()
+
bbox
.
ymin
());
decode_bbox
->
set_xmax
(
prior_bbox
.
xmax
()
+
bbox
.
xmax
());
decode_bbox
->
set_ymax
(
prior_bbox
.
ymax
()
+
bbox
.
ymax
());
}
else
{
// variance is encoded in bbox, we need to scale the offset accordingly.
decode_bbox
->
set_xmin
(
prior_bbox
.
xmin
()
+
prior_variance
[
0
]
*
bbox
.
xmin
());
decode_bbox
->
set_ymin
(
prior_bbox
.
ymin
()
+
prior_variance
[
1
]
*
bbox
.
ymin
());
decode_bbox
->
set_xmax
(
prior_bbox
.
xmax
()
+
prior_variance
[
2
]
*
bbox
.
xmax
());
decode_bbox
->
set_ymax
(
prior_bbox
.
ymax
()
+
prior_variance
[
3
]
*
bbox
.
ymax
());
}
}
else
if
(
code_type
==
PriorBoxParameter_CodeType_CENTER_SIZE
)
{
float
prior_width
=
prior_bbox
.
xmax
()
-
prior_bbox
.
xmin
();
CHECK_GT
(
prior_width
,
0
);
float
prior_height
=
prior_bbox
.
ymax
()
-
prior_bbox
.
ymin
();
CHECK_GT
(
prior_height
,
0
);
float
prior_center_x
=
(
prior_bbox
.
xmin
()
+
prior_bbox
.
xmax
())
/
2.
;
float
prior_center_y
=
(
prior_bbox
.
ymin
()
+
prior_bbox
.
ymax
())
/
2.
;
float
decode_bbox_center_x
,
decode_bbox_center_y
;
float
decode_bbox_width
,
decode_bbox_height
;
if
(
variance_encoded_in_target
)
{
// variance is encoded in target, we simply need to retore the offset
// predictions.
decode_bbox_center_x
=
bbox
.
xmin
()
*
prior_width
+
prior_center_x
;
decode_bbox_center_y
=
bbox
.
ymin
()
*
prior_height
+
prior_center_y
;
decode_bbox_width
=
exp
(
bbox
.
xmax
())
*
prior_width
;
decode_bbox_height
=
exp
(
bbox
.
ymax
())
*
prior_height
;
}
else
{
// variance is encoded in bbox, we need to scale the offset accordingly.
decode_bbox_center_x
=
prior_variance
[
0
]
*
bbox
.
xmin
()
*
prior_width
+
prior_center_x
;
decode_bbox_center_y
=
prior_variance
[
1
]
*
bbox
.
ymin
()
*
prior_height
+
prior_center_y
;
decode_bbox_width
=
exp
(
prior_variance
[
2
]
*
bbox
.
xmax
())
*
prior_width
;
decode_bbox_height
=
exp
(
prior_variance
[
3
]
*
bbox
.
ymax
())
*
prior_height
;
}
decode_bbox
->
set_xmin
(
decode_bbox_center_x
-
decode_bbox_width
/
2.
);
decode_bbox
->
set_ymin
(
decode_bbox_center_y
-
decode_bbox_height
/
2.
);
decode_bbox
->
set_xmax
(
decode_bbox_center_x
+
decode_bbox_width
/
2.
);
decode_bbox
->
set_ymax
(
decode_bbox_center_y
+
decode_bbox_height
/
2.
);
}
else
{
LOG
(
FATAL
)
<<
"Unknown LocLossType."
;
}
float
bbox_size
=
BBoxSize
(
*
decode_bbox
);
decode_bbox
->
set_size
(
bbox_size
);
}
void
DetectionOutputLayer
::
DecodeBBoxes
(
const
std
::
vector
<
NormalizedBBox
>&
priorBBoxes
,
const
std
::
vector
<
std
::
vector
<
float
>
>&
priorVariances
,
const
CodeType
code_type
,
const
bool
variance_encoded_in_target
,
const
std
::
vector
<
NormalizedBBox
>&
bboxes
,
std
::
vector
<
NormalizedBBox
>*
decode_bboxes
)
{
CV_Assert
(
priorBBoxes
.
size
()
==
priorVariances
.
size
());
CV_Assert
(
priorBBoxes
.
size
()
==
bboxes
.
size
());
int
num_bboxes
=
priorBBoxes
.
size
();
if
(
num_bboxes
>=
1
)
{
CV_Assert
(
priorVariances
[
0
].
size
()
==
4
);
}
decode_bboxes
->
clear
();
for
(
int
i
=
0
;
i
<
num_bboxes
;
++
i
)
{
NormalizedBBox
decode_bbox
;
DecodeBBox
(
priorBBoxes
[
i
],
priorVariances
[
i
],
code_type
,
variance_encoded_in_target
,
bboxes
[
i
],
&
decode_bbox
);
decode_bboxes
->
push_back
(
decode_bbox
);
}
}
void
DetectionOutputLayer
::
DecodeBBoxesAll
(
const
std
::
vector
<
LabelBBox
>&
all_loc_preds
,
const
std
::
vector
<
NormalizedBBox
>&
priorBBoxes
,
const
std
::
vector
<
std
::
vector
<
float
>
>&
priorVariances
,
const
int
num
,
const
bool
share_location
,
const
int
num_loc_classes
,
const
int
background_label_id
,
const
CodeType
code_type
,
const
bool
variance_encoded_in_target
,
std
::
vector
<
LabelBBox
>*
all_decode_bboxes
)
{
CV_Assert
(
all_loc_preds
.
size
()
==
num
);
all_decode_bboxes
->
clear
();
all_decode_bboxes
->
resize
(
num
);
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
// Decode predictions into bboxes.
LabelBBox
&
decode_bboxes
=
(
*
all_decode_bboxes
)[
i
];
for
(
int
c
=
0
;
c
<
num_loc_classes
;
++
c
)
{
int
label
=
share_location
?
-
1
:
c
;
if
(
label
==
background_label_id
)
{
// Ignore background class.
continue
;
}
if
(
all_loc_preds
[
i
].
find
(
label
)
==
all_loc_preds
[
i
].
end
())
{
// Something bad happened if there are no predictions for current label.
std
::
string
error
(
"Could not find location predictions for label "
);
error
+=
std
::
string
(
label
);
CV_StsError
(
error
.
c_str
());
}
const
std
::
vector
<
NormalizedBBox
>&
label_loc_preds
=
all_loc_preds
[
i
].
find
(
label
)
->
second
;
DecodeBBoxes
(
priorBBoxes
,
priorVariances
,
code_type
,
variance_encoded_in_target
,
label_loc_preds
,
&
(
decode_bboxes
[
label
]));
}
}
}
void
DetectionOutputLayer
::
ApplyNMSFast
(
const
std
::
vector
<
NormalizedBBox
>&
bboxes
,
const
std
::
vector
<
float
>&
scores
,
const
float
score_threshold
,
const
float
nms_threshold
,
const
int
top_k
,
std
::
vector
<
int
>*
indices
)
{
// Sanity check.
CHECK_EQ
(
bboxes
.
size
(),
scores
.
size
())
<<
"bboxes and scores have different size."
;
// Get top_k scores (with corresponding indices).
std
::
vector
<
std
::
pair
<
float
,
int
>
>
score_index_vec
;
GetMaxScoreIndex
(
scores
,
score_threshold
,
top_k
,
&
score_index_vec
);
// Do nms.
indices
->
clear
();
while
(
score_index_vec
.
size
()
!=
0
)
{
const
int
idx
=
score_index_vec
.
front
().
second
;
bool
keep
=
true
;
for
(
int
k
=
0
;
k
<
indices
->
size
();
++
k
)
{
if
(
keep
)
{
const
int
kept_idx
=
(
*
indices
)[
k
];
float
overlap
=
JaccardOverlap
(
bboxes
[
idx
],
bboxes
[
kept_idx
]);
keep
=
overlap
<=
nms_threshold
;
}
else
{
break
;
}
}
if
(
keep
)
{
indices
->
push_back
(
idx
);
}
score_index_vec
.
erase
(
score_index_vec
.
begin
());
}
}
void
DetectionOutputLayer
::
GetMaxScoreIndex
(
const
std
::
vector
<
float
>&
scores
,
const
float
threshold
,
const
int
top_k
,
std
::
vector
<
std
::
pair
<
float
,
int
>
>*
score_index_vec
)
{
// Generate index score pairs.
for
(
int
i
=
0
;
i
<
scores
.
size
();
++
i
)
{
if
(
scores
[
i
]
>
threshold
)
{
score_index_vec
->
push_back
(
std
::
make_pair
(
scores
[
i
],
i
));
}
}
// Sort the score pair according to the scores in descending order
std
::
stable_sort
(
score_index_vec
->
begin
(),
score_index_vec
->
end
(),
SortScorePairDescend
<
int
>
);
// Keep top_k scores if needed.
if
(
top_k
>
-
1
&&
top_k
<
score_index_vec
->
size
())
{
score_index_vec
->
resize
(
top_k
);
}
}
template
<
typename
T
>
bool
DetectionOutputLayer
::
SortScorePairDescend
(
const
std
::
pair
<
float
,
T
>&
pair1
,
const
std
::
pair
<
float
,
T
>&
pair2
)
{
return
pair1
.
first
>
pair2
.
first
;
}
void
DetectionOutputLayer
::
IntersectBBox
(
const
NormalizedBBox
&
bbox1
,
const
NormalizedBBox
&
bbox2
,
NormalizedBBox
*
intersect_bbox
)
{
if
(
bbox2
.
xmin
()
>
bbox1
.
xmax
()
||
bbox2
.
xmax
()
<
bbox1
.
xmin
()
||
bbox2
.
ymin
()
>
bbox1
.
ymax
()
||
bbox2
.
ymax
()
<
bbox1
.
ymin
())
{
// Return [0, 0, 0, 0] if there is no intersection.
intersect_bbox
->
set_xmin
(
0
);
intersect_bbox
->
set_ymin
(
0
);
intersect_bbox
->
set_xmax
(
0
);
intersect_bbox
->
set_ymax
(
0
);
}
else
{
intersect_bbox
->
set_xmin
(
std
::
max
(
bbox1
.
xmin
(),
bbox2
.
xmin
()));
intersect_bbox
->
set_ymin
(
std
::
max
(
bbox1
.
ymin
(),
bbox2
.
ymin
()));
intersect_bbox
->
set_xmax
(
std
::
min
(
bbox1
.
xmax
(),
bbox2
.
xmax
()));
intersect_bbox
->
set_ymax
(
std
::
min
(
bbox1
.
ymax
(),
bbox2
.
ymax
()));
}
}
float
DetectionOutputLayer
::
JaccardOverlap
(
const
NormalizedBBox
&
bbox1
,
const
NormalizedBBox
&
bbox2
,
const
bool
normalized
)
{
NormalizedBBox
intersect_bbox
;
IntersectBBox
(
bbox1
,
bbox2
,
&
intersect_bbox
);
float
intersect_width
,
intersect_height
;
if
(
normalized
)
{
intersect_width
=
intersect_bbox
.
xmax
()
-
intersect_bbox
.
xmin
();
intersect_height
=
intersect_bbox
.
ymax
()
-
intersect_bbox
.
ymin
();
}
else
{
intersect_width
=
intersect_bbox
.
xmax
()
-
intersect_bbox
.
xmin
()
+
1
;
intersect_height
=
intersect_bbox
.
ymax
()
-
intersect_bbox
.
ymin
()
+
1
;
}
if
(
intersect_width
>
0
&&
intersect_height
>
0
)
{
float
intersect_size
=
intersect_width
*
intersect_height
;
float
bbox1_size
=
BBoxSize
(
bbox1
);
float
bbox2_size
=
BBoxSize
(
bbox2
);
return
intersect_size
/
(
bbox1_size
+
bbox2_size
-
intersect_size
);
}
else
{
return
0.
;
}
}
}
}
modules/dnn/src/layers/detection_output_layer.hpp
0 → 100644
View file @
264dfe0c
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_DETECTION_OUTPUT_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_DETECTION_OUTPUT_LAYER_HPP__
#include "../precomp.hpp"
namespace
cv
{
namespace
dnn
{
class
DetectionOutputLayer
:
public
Layer
{
int
_numClasses
;
bool
_shareLocation
;
int
_numLocClasses
;
int
_backgroundLabelId
;
CodeType
_codeType
;
bool
_varianceEncodedInTarget
;
int
_keepTopK
;
float
_confidenceThreshold
;
int
_num
;
int
_numPriors
;
float
_nmsThreshold
;
int
_topK
;
static
const
size_t
_numAxes
=
4
;
public
:
DetectionOutputLayer
(
LayerParams
&
params
);
void
allocate
(
const
std
::
vector
<
Blob
*>
&
inputs
,
std
::
vector
<
Blob
>
&
outputs
);
void
forward
(
std
::
vector
<
Blob
*>
&
inputs
,
std
::
vector
<
Blob
>
&
outputs
);
void
checkParameter
(
const
LayerParams
&
params
,
const
std
::
string
&
parameterName
);
void
checkInputs
(
const
std
::
vector
<
Blob
*>
&
inputs
);
typedef
std
::
map
<
int
,
std
::
vector
<
NormalizedBBox
>
>
LabelBBox
;
typedef
PriorBoxParameter_CodeType
CodeType
;
// Clip the NormalizedBBox such that the range for each corner is [0, 1].
void
ClipBBox
(
const
NormalizedBBox
&
bbox
,
NormalizedBBox
*
clip_bbox
);
// Decode a bbox according to a prior bbox.
void
DecodeBBox
(
const
NormalizedBBox
&
prior_bbox
,
const
std
::
vector
<
float
>&
prior_variance
,
const
CodeType
code_type
,
const
bool
variance_encoded_in_target
,
const
NormalizedBBox
&
bbox
,
NormalizedBBox
*
decode_bbox
);
// Decode a set of bboxes according to a set of prior bboxes.
void
DecodeBBoxes
(
const
std
::
vector
<
NormalizedBBox
>&
prior_bboxes
,
const
std
::
vector
<
std
::
vector
<
float
>
>&
prior_variances
,
const
CodeType
code_type
,
const
bool
variance_encoded_in_target
,
const
std
::
vector
<
NormalizedBBox
>&
bboxes
,
std
::
vector
<
NormalizedBBox
>*
decode_bboxes
);
// Decode all bboxes in a batch.
void
DecodeBBoxesAll
(
const
std
::
vector
<
LabelBBox
>&
all_loc_pred
,
const
std
::
vector
<
NormalizedBBox
>&
prior_bboxes
,
const
std
::
vector
<
std
::
vector
<
float
>
>&
prior_variances
,
const
int
num
,
const
bool
share_location
,
const
int
num_loc_classes
,
const
int
background_label_id
,
const
CodeType
code_type
,
const
bool
variance_encoded_in_target
,
std
::
vector
<
LabelBBox
>*
all_decode_bboxes
);
// Get prior bounding boxes from prior_data.
// prior_data: 1 x 2 x num_priors * 4 x 1 blob.
// num_priors: number of priors.
// prior_bboxes: stores all the prior bboxes in the format of NormalizedBBox.
// prior_variances: stores all the variances needed by prior bboxes.
template
<
typename
Dtype
>
void
GetPriorBBoxes
(
const
Dtype
*
prior_data
,
const
int
num_priors
,
std
::
vector
<
NormalizedBBox
>*
prior_bboxes
,
std
::
vector
<
std
::
vector
<
float
>
>*
prior_variances
);
// Scale the NormalizedBBox w.r.t. height and width.
void
ScaleBBox
(
const
NormalizedBBox
&
bbox
,
const
int
height
,
const
int
width
,
NormalizedBBox
*
scale_bbox
);
// Do non maximum suppression given bboxes and scores.
// Inspired by Piotr Dollar's NMS implementation in EdgeBox.
// https://goo.gl/jV3JYS
// bboxes: a set of bounding boxes.
// scores: a set of corresponding confidences.
// score_threshold: a threshold used to filter detection results.
// nms_threshold: a threshold used in non maximum suppression.
// top_k: if not -1, keep at most top_k picked indices.
// indices: the kept indices of bboxes after nms.
void
ApplyNMSFast
(
const
std
::
vector
<
NormalizedBBox
>&
bboxes
,
const
std
::
vector
<
float
>&
scores
,
const
float
score_threshold
,
const
float
nms_threshold
,
const
int
top_k
,
std
::
vector
<
int
>*
indices
);
// Do non maximum suppression given bboxes and scores.
// bboxes: a set of bounding boxes.
// scores: a set of corresponding confidences.
// threshold: the threshold used in non maximu suppression.
// top_k: if not -1, keep at most top_k picked indices.
// reuse_overlaps: if true, use and update overlaps; otherwise, always
// compute overlap.
// overlaps: a temp place to optionally store the overlaps between pairs of
// bboxes if reuse_overlaps is true.
// indices: the kept indices of bboxes after nms.
void
ApplyNMS
(
const
std
::
vector
<
NormalizedBBox
>&
bboxes
,
const
std
::
vector
<
float
>&
scores
,
const
float
threshold
,
const
int
top_k
,
const
bool
reuse_overlaps
,
std
::
map
<
int
,
std
::
map
<
int
,
float
>
>*
overlaps
,
std
::
vector
<
int
>*
indices
);
void
ApplyNMS
(
const
bool
*
overlapped
,
const
int
num
,
std
::
vector
<
int
>*
indices
);
// Get confidence predictions from conf_data.
// conf_data: num x num_preds_per_class * num_classes blob.
// num: the number of images.
// num_preds_per_class: number of predictions per class.
// num_classes: number of classes.
// conf_preds: stores the confidence prediction, where each item contains
// confidence prediction for an image.
template
<
typename
Dtype
>
void
GetConfidenceScores
(
const
Dtype
*
conf_data
,
const
int
num
,
const
int
num_preds_per_class
,
const
int
num_classes
,
std
::
vector
<
std
::
map
<
int
,
std
::
vector
<
float
>
>
>*
conf_scores
);
// Get confidence predictions from conf_data.
// conf_data: num x num_preds_per_class * num_classes blob.
// num: the number of images.
// num_preds_per_class: number of predictions per class.
// num_classes: number of classes.
// class_major: if true, data layout is
// num x num_classes x num_preds_per_class; otherwise, data layerout is
// num x num_preds_per_class * num_classes.
// conf_preds: stores the confidence prediction, where each item contains
// confidence prediction for an image.
template
<
typename
Dtype
>
void
GetConfidenceScores
(
const
Dtype
*
conf_data
,
const
int
num
,
const
int
num_preds_per_class
,
const
int
num_classes
,
const
bool
class_major
,
std
::
vector
<
std
::
map
<
int
,
std
::
vector
<
float
>
>
>*
conf_scores
);
// Get location predictions from loc_data.
// loc_data: num x num_preds_per_class * num_loc_classes * 4 blob.
// num: the number of images.
// num_preds_per_class: number of predictions per class.
// num_loc_classes: number of location classes. It is 1 if share_location is
// true; and is equal to number of classes needed to predict otherwise.
// share_location: if true, all classes share the same location prediction.
// loc_preds: stores the location prediction, where each item contains
// location prediction for an image.
template
<
typename
Dtype
>
void
GetLocPredictions
(
const
Dtype
*
loc_data
,
const
int
num
,
const
int
num_preds_per_class
,
const
int
num_loc_classes
,
const
bool
share_location
,
std
::
vector
<
LabelBBox
>*
loc_preds
);
// Get max scores with corresponding indices.
// scores: a set of scores.
// threshold: only consider scores higher than the threshold.
// top_k: if -1, keep all; otherwise, keep at most top_k.
// score_index_vec: store the sorted (score, index) pair.
void
GetMaxScoreIndex
(
const
std
::
vector
<
float
>&
scores
,
const
float
threshold
,
const
int
top_k
,
std
::
vector
<
std
::
pair
<
float
,
int
>
>*
score_index_vec
);
template
<
typename
T
>
bool
SortScorePairDescend
(
const
std
::
pair
<
float
,
T
>&
pair1
,
const
std
::
pair
<
float
,
T
>&
pair2
);
// Compute the jaccard (intersection over union IoU) overlap between two bboxes.
float
JaccardOverlap
(
const
NormalizedBBox
&
bbox1
,
const
NormalizedBBox
&
bbox2
,
const
bool
normalized
=
true
);
// Compute the intersection between two bboxes.
void
IntersectBBox
(
const
NormalizedBBox
&
bbox1
,
const
NormalizedBBox
&
bbox2
,
NormalizedBBox
*
intersect_bbox
);
// Compute bbox size.
float
BBoxSize
(
const
NormalizedBBox
&
bbox
,
const
bool
normalized
=
true
);
};
}
}
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment