Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
c3e6de29
Commit
c3e6de29
authored
Jul 13, 2017
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
dnn: code cleanup, refactor detection output layer
parent
544908d0
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
256 additions
and
331 deletions
+256
-331
detection_output_layer.cpp
modules/dnn/src/layers/detection_output_layer.cpp
+256
-331
No files found.
modules/dnn/src/layers/detection_output_layer.cpp
View file @
c3e6de29
...
@@ -55,29 +55,13 @@ namespace util
...
@@ -55,29 +55,13 @@ namespace util
{
{
template
<
typename
T
>
template
<
typename
T
>
std
::
string
to_string
(
T
value
)
static
inline
bool
SortScorePairDescend
(
const
std
::
pair
<
float
,
T
>&
pair1
,
{
std
::
ostringstream
stream
;
stream
<<
value
;
return
stream
.
str
();
}
template
<
typename
T
>
void
make_error
(
const
std
::
string
&
message1
,
const
T
&
message2
)
{
std
::
string
error
(
message1
);
error
+=
std
::
string
(
util
::
to_string
<
int
>
(
message2
));
CV_Error
(
Error
::
StsBadArg
,
error
.
c_str
());
}
template
<
typename
T
>
bool
SortScorePairDescend
(
const
std
::
pair
<
float
,
T
>&
pair1
,
const
std
::
pair
<
float
,
T
>&
pair2
)
const
std
::
pair
<
float
,
T
>&
pair2
)
{
{
return
pair1
.
first
>
pair2
.
first
;
return
pair1
.
first
>
pair2
.
first
;
}
}
}
}
// namespace
class
DetectionOutputLayerImpl
:
public
DetectionOutputLayer
class
DetectionOutputLayerImpl
:
public
DetectionOutputLayer
{
{
...
@@ -133,7 +117,7 @@ public:
...
@@ -133,7 +117,7 @@ public:
message
+=
" layer parameter does not contain "
;
message
+=
" layer parameter does not contain "
;
message
+=
parameterName
;
message
+=
parameterName
;
message
+=
" parameter."
;
message
+=
" parameter."
;
CV_Error
(
Error
::
StsBadArg
,
message
);
CV_Error
NoReturn
(
Error
::
StsBadArg
,
message
);
}
}
else
else
{
{
...
@@ -209,73 +193,122 @@ public:
...
@@ -209,73 +193,122 @@ public:
CV_TRACE_FUNCTION
();
CV_TRACE_FUNCTION
();
CV_TRACE_ARG_VALUE
(
name
,
"name"
,
name
.
c_str
());
CV_TRACE_ARG_VALUE
(
name
,
"name"
,
name
.
c_str
());
const
float
*
locationData
=
inputs
[
0
]
->
ptr
<
float
>
();
std
::
vector
<
LabelBBox
>
allDecodedBBoxes
;
const
float
*
confidenceData
=
inputs
[
1
]
->
ptr
<
float
>
();
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>
>
>
allConfidenceScores
;
const
float
*
priorData
=
inputs
[
2
]
->
ptr
<
float
>
();
int
num
=
inputs
[
0
]
->
size
[
0
];
int
num
=
inputs
[
0
]
->
size
[
0
];
// extract predictions from input layers
{
int
numPriors
=
inputs
[
2
]
->
size
[
2
]
/
4
;
int
numPriors
=
inputs
[
2
]
->
size
[
2
]
/
4
;
// Retrieve all location predictions.
const
float
*
locationData
=
inputs
[
0
]
->
ptr
<
float
>
();
const
float
*
confidenceData
=
inputs
[
1
]
->
ptr
<
float
>
();
const
float
*
priorData
=
inputs
[
2
]
->
ptr
<
float
>
();
// Retrieve all location predictions
std
::
vector
<
LabelBBox
>
allLocationPredictions
;
std
::
vector
<
LabelBBox
>
allLocationPredictions
;
GetLocPredictions
(
locationData
,
num
,
numPriors
,
_numLocClasses
,
GetLocPredictions
(
locationData
,
num
,
numPriors
,
_numLocClasses
,
_shareLocation
,
&
allLocationPredictions
);
_shareLocation
,
allLocationPredictions
);
// Retrieve all confidences.
// Retrieve all confidences
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>
>
>
allConfidenceScores
;
GetConfidenceScores
(
confidenceData
,
num
,
numPriors
,
_numClasses
,
allConfidenceScores
);
GetConfidenceScores
(
confidenceData
,
num
,
numPriors
,
_numClasses
,
&
allConfidenceScores
);
// Retrieve all prior bboxes. It is same within a batch since we assume all
// Retrieve all prior bboxes
// images in a batch are of same dimension.
std
::
vector
<
caffe
::
NormalizedBBox
>
priorBBoxes
;
std
::
vector
<
caffe
::
NormalizedBBox
>
priorBBoxes
;
std
::
vector
<
std
::
vector
<
float
>
>
priorVariances
;
std
::
vector
<
std
::
vector
<
float
>
>
priorVariances
;
GetPriorBBoxes
(
priorData
,
numPriors
,
&
priorBBoxes
,
&
priorVariances
);
GetPriorBBoxes
(
priorData
,
numPriors
,
priorBBoxes
,
priorVariances
);
const
bool
clip_bbox
=
false
;
// Decode all loc predictions to bboxes
// Decode all loc predictions to bboxes.
std
::
vector
<
LabelBBox
>
allDecodedBBoxes
;
DecodeBBoxesAll
(
allLocationPredictions
,
priorBBoxes
,
priorVariances
,
num
,
DecodeBBoxesAll
(
allLocationPredictions
,
priorBBoxes
,
priorVariances
,
num
,
_shareLocation
,
_numLocClasses
,
_backgroundLabelId
,
_shareLocation
,
_numLocClasses
,
_backgroundLabelId
,
_codeType
,
_varianceEncodedInTarget
,
clip_bbox
,
&
allDecodedBBoxes
);
_codeType
,
_varianceEncodedInTarget
,
false
,
allDecodedBBoxes
);
}
in
t
numKept
=
0
;
size_
t
numKept
=
0
;
std
::
vector
<
std
::
map
<
int
,
std
::
vector
<
int
>
>
>
allIndices
;
std
::
vector
<
std
::
map
<
int
,
std
::
vector
<
int
>
>
>
allIndices
;
for
(
int
i
=
0
;
i
<
num
;
++
i
)
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
{
const
LabelBBox
&
decodeBBoxes
=
allDecodedBBoxes
[
i
];
numKept
+=
processDetections_
(
allDecodedBBoxes
[
i
],
allConfidenceScores
[
i
],
allIndices
);
const
std
::
vector
<
std
::
vector
<
float
>
>&
confidenceScores
=
}
allConfidenceScores
[
i
];
std
::
map
<
int
,
std
::
vector
<
int
>
>
indices
;
if
(
numKept
==
0
)
int
numDetections
=
0
;
for
(
int
c
=
0
;
c
<
(
int
)
_numClasses
;
++
c
)
{
{
if
(
c
==
_backgroundLabelId
)
CV_ErrorNoReturn
(
Error
::
StsError
,
"Couldn't find any detections"
);
}
int
outputShape
[]
=
{
1
,
1
,
(
int
)
numKept
,
7
};
outputs
[
0
].
create
(
4
,
outputShape
,
CV_32F
);
float
*
outputsData
=
outputs
[
0
].
ptr
<
float
>
();
size_t
count
=
0
;
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
{
// Ignore background class.
count
+=
outputDetections_
(
i
,
&
outputsData
[
count
*
7
],
continue
;
allDecodedBBoxes
[
i
],
allConfidenceScores
[
i
],
allIndices
[
i
]);
}
}
if
(
confidenceScores
.
size
()
<=
c
)
CV_Assert
(
count
==
numKept
);
}
size_t
outputDetections_
(
const
int
i
,
float
*
outputsData
,
const
LabelBBox
&
decodeBBoxes
,
const
std
::
vector
<
std
::
vector
<
float
>
>&
confidenceScores
,
const
std
::
map
<
int
,
std
::
vector
<
int
>
>&
indicesMap
)
{
size_t
count
=
0
;
for
(
std
::
map
<
int
,
std
::
vector
<
int
>
>::
const_iterator
it
=
indicesMap
.
begin
();
it
!=
indicesMap
.
end
();
++
it
)
{
int
label
=
it
->
first
;
if
(
confidenceScores
.
size
()
<=
label
)
CV_ErrorNoReturn_
(
cv
::
Error
::
StsError
,
(
"Could not find confidence predictions for label %d"
,
label
));
const
std
::
vector
<
float
>&
scores
=
confidenceScores
[
label
];
int
locLabel
=
_shareLocation
?
-
1
:
label
;
LabelBBox
::
const_iterator
label_bboxes
=
decodeBBoxes
.
find
(
locLabel
);
if
(
label_bboxes
==
decodeBBoxes
.
end
())
CV_ErrorNoReturn_
(
cv
::
Error
::
StsError
,
(
"Could not find location predictions for label %d"
,
locLabel
));
const
std
::
vector
<
int
>&
indices
=
it
->
second
;
for
(
size_t
j
=
0
;
j
<
indices
.
size
();
++
j
,
++
count
)
{
{
// Something bad happened if there are no predictions for current label.
int
idx
=
indices
[
j
];
util
::
make_error
<
int
>
(
"Could not find confidence predictions for label "
,
c
);
const
caffe
::
NormalizedBBox
&
decode_bbox
=
label_bboxes
->
second
[
idx
];
outputsData
[
count
*
7
]
=
i
;
outputsData
[
count
*
7
+
1
]
=
label
;
outputsData
[
count
*
7
+
2
]
=
scores
[
idx
];
outputsData
[
count
*
7
+
3
]
=
decode_bbox
.
xmin
();
outputsData
[
count
*
7
+
4
]
=
decode_bbox
.
ymin
();
outputsData
[
count
*
7
+
5
]
=
decode_bbox
.
xmax
();
outputsData
[
count
*
7
+
6
]
=
decode_bbox
.
ymax
();
}
}
return
count
;
}
}
size_t
processDetections_
(
const
LabelBBox
&
decodeBBoxes
,
const
std
::
vector
<
std
::
vector
<
float
>
>&
confidenceScores
,
std
::
vector
<
std
::
map
<
int
,
std
::
vector
<
int
>
>
>&
allIndices
)
{
std
::
map
<
int
,
std
::
vector
<
int
>
>
indices
;
size_t
numDetections
=
0
;
for
(
int
c
=
0
;
c
<
(
int
)
_numClasses
;
++
c
)
{
if
(
c
==
_backgroundLabelId
)
continue
;
// Ignore background class.
if
(
c
>=
confidenceScores
.
size
())
CV_ErrorNoReturn_
(
cv
::
Error
::
StsError
,
(
"Could not find confidence predictions for label %d"
,
c
));
const
std
::
vector
<
float
>&
scores
=
confidenceScores
[
c
];
const
std
::
vector
<
float
>&
scores
=
confidenceScores
[
c
];
int
label
=
_shareLocation
?
-
1
:
c
;
int
label
=
_shareLocation
?
-
1
:
c
;
if
(
decodeBBoxes
.
find
(
label
)
==
decodeBBoxes
.
end
())
{
LabelBBox
::
const_iterator
label_bboxes
=
decodeBBoxes
.
find
(
label
);
// Something bad happened if there are no predictions for current label.
if
(
label_bboxes
==
decodeBBoxes
.
end
())
util
::
make_error
<
int
>
(
"Could not find location predictions for label "
,
label
);
CV_ErrorNoReturn_
(
cv
::
Error
::
StsError
,
(
"Could not find location predictions for label %d"
,
label
));
continue
;
ApplyNMSFast
(
label_bboxes
->
second
,
scores
,
_confidenceThreshold
,
_nmsThreshold
,
1.0
,
_topK
,
indices
[
c
]);
}
const
std
::
vector
<
caffe
::
NormalizedBBox
>&
bboxes
=
decodeBBoxes
.
find
(
label
)
->
second
;
ApplyNMSFast
(
bboxes
,
scores
,
_confidenceThreshold
,
_nmsThreshold
,
1.0
,
_topK
,
&
(
indices
[
c
]));
numDetections
+=
indices
[
c
].
size
();
numDetections
+=
indices
[
c
].
size
();
}
}
if
(
_keepTopK
>
-
1
&&
numDetections
>
_keepTopK
)
if
(
_keepTopK
>
-
1
&&
numDetections
>
(
size_t
)
_keepTopK
)
{
{
std
::
vector
<
std
::
pair
<
float
,
std
::
pair
<
int
,
int
>
>
>
scoreIndexPairs
;
std
::
vector
<
std
::
pair
<
float
,
std
::
pair
<
int
,
int
>
>
>
scoreIndexPairs
;
for
(
std
::
map
<
int
,
std
::
vector
<
int
>
>::
iterator
it
=
indices
.
begin
();
for
(
std
::
map
<
int
,
std
::
vector
<
int
>
>::
iterator
it
=
indices
.
begin
();
...
@@ -283,26 +316,21 @@ public:
...
@@ -283,26 +316,21 @@ public:
{
{
int
label
=
it
->
first
;
int
label
=
it
->
first
;
const
std
::
vector
<
int
>&
labelIndices
=
it
->
second
;
const
std
::
vector
<
int
>&
labelIndices
=
it
->
second
;
if
(
confidenceScores
.
size
()
<=
label
)
if
(
label
>=
confidenceScores
.
size
())
{
CV_ErrorNoReturn_
(
cv
::
Error
::
StsError
,
(
"Could not find location predictions for label %d"
,
label
));
// Something bad happened for current label.
util
::
make_error
<
int
>
(
"Could not find location predictions for label "
,
label
);
continue
;
}
const
std
::
vector
<
float
>&
scores
=
confidenceScores
[
label
];
const
std
::
vector
<
float
>&
scores
=
confidenceScores
[
label
];
for
(
size_t
j
=
0
;
j
<
labelIndices
.
size
();
++
j
)
for
(
size_t
j
=
0
;
j
<
labelIndices
.
size
();
++
j
)
{
{
size_t
idx
=
labelIndices
[
j
];
size_t
idx
=
labelIndices
[
j
];
CV_Assert
(
idx
<
scores
.
size
());
CV_Assert
(
idx
<
scores
.
size
());
scoreIndexPairs
.
push_back
(
scoreIndexPairs
.
push_back
(
std
::
make_pair
(
scores
[
idx
],
std
::
make_pair
(
label
,
idx
)));
std
::
make_pair
(
scores
[
idx
],
std
::
make_pair
(
label
,
idx
)));
}
}
}
}
// Keep outputs k results per image.
// Keep outputs k results per image.
std
::
sort
(
scoreIndexPairs
.
begin
(),
scoreIndexPairs
.
end
(),
std
::
sort
(
scoreIndexPairs
.
begin
(),
scoreIndexPairs
.
end
(),
util
::
SortScorePairDescend
<
std
::
pair
<
int
,
int
>
>
);
util
::
SortScorePairDescend
<
std
::
pair
<
int
,
int
>
>
);
scoreIndexPairs
.
resize
(
_keepTopK
);
scoreIndexPairs
.
resize
(
_keepTopK
);
// Store the new indices.
std
::
map
<
int
,
std
::
vector
<
int
>
>
newIndices
;
std
::
map
<
int
,
std
::
vector
<
int
>
>
newIndices
;
for
(
size_t
j
=
0
;
j
<
scoreIndexPairs
.
size
();
++
j
)
for
(
size_t
j
=
0
;
j
<
scoreIndexPairs
.
size
();
++
j
)
{
{
...
@@ -311,78 +339,27 @@ public:
...
@@ -311,78 +339,27 @@ public:
newIndices
[
label
].
push_back
(
idx
);
newIndices
[
label
].
push_back
(
idx
);
}
}
allIndices
.
push_back
(
newIndices
);
allIndices
.
push_back
(
newIndices
);
numKept
+=
_keepTopK
;
return
(
size_t
)
_keepTopK
;
}
}
else
else
{
{
allIndices
.
push_back
(
indices
);
allIndices
.
push_back
(
indices
);
numKept
+=
numDetections
;
return
numDetections
;
}
}
if
(
numKept
==
0
)
{
CV_ErrorNoReturn
(
Error
::
StsError
,
"Couldn't find any detections"
);
return
;
}
int
outputShape
[]
=
{
1
,
1
,
numKept
,
7
};
outputs
[
0
].
create
(
4
,
outputShape
,
CV_32F
);
float
*
outputsData
=
outputs
[
0
].
ptr
<
float
>
();
int
count
=
0
;
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
const
std
::
vector
<
std
::
vector
<
float
>
>&
confidenceScores
=
allConfidenceScores
[
i
];
const
LabelBBox
&
decodeBBoxes
=
allDecodedBBoxes
[
i
];
for
(
std
::
map
<
int
,
std
::
vector
<
int
>
>::
iterator
it
=
allIndices
[
i
].
begin
();
it
!=
allIndices
[
i
].
end
();
++
it
)
{
int
label
=
it
->
first
;
if
(
confidenceScores
.
size
()
<=
label
)
{
// Something bad happened if there are no predictions for current label.
util
::
make_error
<
int
>
(
"Could not find confidence predictions for label "
,
label
);
continue
;
}
}
const
std
::
vector
<
float
>&
scores
=
confidenceScores
[
label
];
int
locLabel
=
_shareLocation
?
-
1
:
label
;
if
(
decodeBBoxes
.
find
(
locLabel
)
==
decodeBBoxes
.
end
())
{
// Something bad happened if there are no predictions for current label.
util
::
make_error
<
int
>
(
"Could not find location predictions for label "
,
locLabel
);
continue
;
}
}
const
std
::
vector
<
caffe
::
NormalizedBBox
>&
bboxes
=
decodeBBoxes
.
find
(
locLabel
)
->
second
;
std
::
vector
<
int
>&
indices
=
it
->
second
;
for
(
size_t
j
=
0
;
j
<
indices
.
size
();
++
j
)
{
int
idx
=
indices
[
j
];
outputsData
[
count
*
7
]
=
i
;
outputsData
[
count
*
7
+
1
]
=
label
;
outputsData
[
count
*
7
+
2
]
=
scores
[
idx
];
caffe
::
NormalizedBBox
clipBBox
=
bboxes
[
idx
];
outputsData
[
count
*
7
+
3
]
=
clipBBox
.
xmin
();
outputsData
[
count
*
7
+
4
]
=
clipBBox
.
ymin
();
outputsData
[
count
*
7
+
5
]
=
clipBBox
.
xmax
();
outputsData
[
count
*
7
+
6
]
=
clipBBox
.
ymax
();
++
count
;
// **************************************************************
}
// Utility functions
}
// **************************************************************
}
}
// Compute bbox size
.
// Compute bbox size
float
BBoxSize
(
const
caffe
::
NormalizedBBox
&
bbox
,
template
<
bool
normalized
>
const
bool
normalized
=
true
)
static
float
BBoxSize
(
const
caffe
::
NormalizedBBox
&
bbox
)
{
{
if
(
bbox
.
xmax
()
<
bbox
.
xmin
()
||
bbox
.
ymax
()
<
bbox
.
ymin
())
if
(
bbox
.
xmax
()
<
bbox
.
xmin
()
||
bbox
.
ymax
()
<
bbox
.
ymin
())
{
{
// If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0.
return
0
;
// If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0.
return
0
;
}
}
else
else
{
{
...
@@ -407,193 +384,155 @@ public:
...
@@ -407,193 +384,155 @@ public:
}
}
}
}
// Clip the caffe::NormalizedBBox such that the range for each corner is [0, 1].
void
ClipBBox
(
const
caffe
::
NormalizedBBox
&
bbox
,
caffe
::
NormalizedBBox
*
clipBBox
)
{
clipBBox
->
set_xmin
(
std
::
max
(
std
::
min
(
bbox
.
xmin
(),
1.
f
),
0.
f
));
clipBBox
->
set_ymin
(
std
::
max
(
std
::
min
(
bbox
.
ymin
(),
1.
f
),
0.
f
));
clipBBox
->
set_xmax
(
std
::
max
(
std
::
min
(
bbox
.
xmax
(),
1.
f
),
0.
f
));
clipBBox
->
set_ymax
(
std
::
max
(
std
::
min
(
bbox
.
ymax
(),
1.
f
),
0.
f
));
clipBBox
->
clear_size
();
clipBBox
->
set_size
(
BBoxSize
(
*
clipBBox
));
clipBBox
->
set_difficult
(
bbox
.
difficult
());
}
// Decode a bbox according to a prior bbox.
// Decode a bbox according to a prior bbox
void
DecodeBBox
(
template
<
bool
variance_encoded_in_target
>
static
void
DecodeBBox
(
const
caffe
::
NormalizedBBox
&
prior_bbox
,
const
std
::
vector
<
float
>&
prior_variance
,
const
caffe
::
NormalizedBBox
&
prior_bbox
,
const
std
::
vector
<
float
>&
prior_variance
,
const
CodeType
code_type
,
const
bool
variance_encoded_in_target
,
const
CodeType
code_type
,
const
bool
clip_bbox
,
const
caffe
::
NormalizedBBox
&
bbox
,
const
bool
clip_bbox
,
const
caffe
::
NormalizedBBox
&
bbox
,
caffe
::
NormalizedBBox
*
decode_bbox
)
{
caffe
::
NormalizedBBox
&
decode_bbox
)
if
(
code_type
==
caffe
::
PriorBoxParameter_CodeType_CORNER
)
{
{
if
(
variance_encoded_in_target
)
{
float
bbox_xmin
=
variance_encoded_in_target
?
bbox
.
xmin
()
:
prior_variance
[
0
]
*
bbox
.
xmin
();
// variance is encoded in target, we simply need to add the offset
float
bbox_ymin
=
variance_encoded_in_target
?
bbox
.
ymin
()
:
prior_variance
[
1
]
*
bbox
.
ymin
();
// predictions.
float
bbox_xmax
=
variance_encoded_in_target
?
bbox
.
xmax
()
:
prior_variance
[
2
]
*
bbox
.
xmax
();
decode_bbox
->
set_xmin
(
prior_bbox
.
xmin
()
+
bbox
.
xmin
());
float
bbox_ymax
=
variance_encoded_in_target
?
bbox
.
ymax
()
:
prior_variance
[
3
]
*
bbox
.
ymax
();
decode_bbox
->
set_ymin
(
prior_bbox
.
ymin
()
+
bbox
.
ymin
());
switch
(
code_type
)
decode_bbox
->
set_xmax
(
prior_bbox
.
xmax
()
+
bbox
.
xmax
());
{
decode_bbox
->
set_ymax
(
prior_bbox
.
ymax
()
+
bbox
.
ymax
());
case
caffe
:
:
PriorBoxParameter_CodeType_CORNER
:
}
else
{
decode_bbox
.
set_xmin
(
prior_bbox
.
xmin
()
+
bbox_xmin
);
// variance is encoded in bbox, we need to scale the offset accordingly.
decode_bbox
.
set_ymin
(
prior_bbox
.
ymin
()
+
bbox_ymin
);
decode_bbox
->
set_xmin
(
decode_bbox
.
set_xmax
(
prior_bbox
.
xmax
()
+
bbox_xmax
);
prior_bbox
.
xmin
()
+
prior_variance
[
0
]
*
bbox
.
xmin
());
decode_bbox
.
set_ymax
(
prior_bbox
.
ymax
()
+
bbox_ymax
);
decode_bbox
->
set_ymin
(
break
;
prior_bbox
.
ymin
()
+
prior_variance
[
1
]
*
bbox
.
ymin
());
case
caffe
:
:
PriorBoxParameter_CodeType_CENTER_SIZE
:
decode_bbox
->
set_xmax
(
{
prior_bbox
.
xmax
()
+
prior_variance
[
2
]
*
bbox
.
xmax
());
decode_bbox
->
set_ymax
(
prior_bbox
.
ymax
()
+
prior_variance
[
3
]
*
bbox
.
ymax
());
}
}
else
if
(
code_type
==
caffe
::
PriorBoxParameter_CodeType_CENTER_SIZE
)
{
float
prior_width
=
prior_bbox
.
xmax
()
-
prior_bbox
.
xmin
();
float
prior_width
=
prior_bbox
.
xmax
()
-
prior_bbox
.
xmin
();
CV_Assert
(
prior_width
>
0
);
CV_Assert
(
prior_width
>
0
);
float
prior_height
=
prior_bbox
.
ymax
()
-
prior_bbox
.
ymin
();
float
prior_height
=
prior_bbox
.
ymax
()
-
prior_bbox
.
ymin
();
CV_Assert
(
prior_height
>
0
);
CV_Assert
(
prior_height
>
0
);
float
prior_center_x
=
(
prior_bbox
.
xmin
()
+
prior_bbox
.
xmax
())
/
2.
;
float
prior_center_x
=
(
prior_bbox
.
xmin
()
+
prior_bbox
.
xmax
())
*
.5
;
float
prior_center_y
=
(
prior_bbox
.
ymin
()
+
prior_bbox
.
ymax
())
/
2.
;
float
prior_center_y
=
(
prior_bbox
.
ymin
()
+
prior_bbox
.
ymax
())
*
.5
;
float
decode_bbox_center_x
,
decode_bbox_center_y
;
float
decode_bbox_center_x
,
decode_bbox_center_y
;
float
decode_bbox_width
,
decode_bbox_height
;
float
decode_bbox_width
,
decode_bbox_height
;
if
(
variance_encoded_in_target
)
{
decode_bbox_center_x
=
bbox_xmin
*
prior_width
+
prior_center_x
;
// variance is encoded in target, we simply need to retore the offset
decode_bbox_center_y
=
bbox_ymin
*
prior_height
+
prior_center_y
;
// predictions.
decode_bbox_width
=
exp
(
bbox_xmax
)
*
prior_width
;
decode_bbox_center_x
=
bbox
.
xmin
()
*
prior_width
+
prior_center_x
;
decode_bbox_height
=
exp
(
bbox_ymax
)
*
prior_height
;
decode_bbox_center_y
=
bbox
.
ymin
()
*
prior_height
+
prior_center_y
;
decode_bbox
.
set_xmin
(
decode_bbox_center_x
-
decode_bbox_width
*
.5
);
decode_bbox_width
=
exp
(
bbox
.
xmax
())
*
prior_width
;
decode_bbox
.
set_ymin
(
decode_bbox_center_y
-
decode_bbox_height
*
.5
);
decode_bbox_height
=
exp
(
bbox
.
ymax
())
*
prior_height
;
decode_bbox
.
set_xmax
(
decode_bbox_center_x
+
decode_bbox_width
*
.5
);
}
else
{
decode_bbox
.
set_ymax
(
decode_bbox_center_y
+
decode_bbox_height
*
.5
);
// variance is encoded in bbox, we need to scale the offset accordingly.
break
;
decode_bbox_center_x
=
}
prior_variance
[
0
]
*
bbox
.
xmin
()
*
prior_width
+
prior_center_x
;
default
:
decode_bbox_center_y
=
CV_ErrorNoReturn
(
Error
::
StsBadArg
,
"Unknown type."
);
prior_variance
[
1
]
*
bbox
.
ymin
()
*
prior_height
+
prior_center_y
;
};
decode_bbox_width
=
if
(
clip_bbox
)
exp
(
prior_variance
[
2
]
*
bbox
.
xmax
())
*
prior_width
;
{
decode_bbox_height
=
// Clip the caffe::NormalizedBBox such that the range for each corner is [0, 1]
exp
(
prior_variance
[
3
]
*
bbox
.
ymax
())
*
prior_height
;
decode_bbox
.
set_xmin
(
std
::
max
(
std
::
min
(
decode_bbox
.
xmin
(),
1.
f
),
0.
f
));
}
decode_bbox
.
set_ymin
(
std
::
max
(
std
::
min
(
decode_bbox
.
ymin
(),
1.
f
),
0.
f
));
decode_bbox
.
set_xmax
(
std
::
max
(
std
::
min
(
decode_bbox
.
xmax
(),
1.
f
),
0.
f
));
decode_bbox
->
set_xmin
(
decode_bbox_center_x
-
decode_bbox_width
/
2.
);
decode_bbox
.
set_ymax
(
std
::
max
(
std
::
min
(
decode_bbox
.
ymax
(),
1.
f
),
0.
f
));
decode_bbox
->
set_ymin
(
decode_bbox_center_y
-
decode_bbox_height
/
2.
);
}
decode_bbox
->
set_xmax
(
decode_bbox_center_x
+
decode_bbox_width
/
2.
);
decode_bbox
.
clear_size
();
decode_bbox
->
set_ymax
(
decode_bbox_center_y
+
decode_bbox_height
/
2.
);
decode_bbox
.
set_size
(
BBoxSize
<
true
>
(
decode_bbox
));
}
else
{
}
CV_Error
(
Error
::
StsBadArg
,
"Unknown LocLossType."
);
}
// Decode a set of bboxes according to a set of prior bboxes
float
bbox_size
=
BBoxSize
(
*
decode_bbox
);
static
void
DecodeBBoxes
(
decode_bbox
->
set_size
(
bbox_size
);
if
(
clip_bbox
)
{
ClipBBox
(
*
decode_bbox
,
decode_bbox
);
}
}
// Decode a set of bboxes according to a set of prior bboxes.
void
DecodeBBoxes
(
const
std
::
vector
<
caffe
::
NormalizedBBox
>&
prior_bboxes
,
const
std
::
vector
<
caffe
::
NormalizedBBox
>&
prior_bboxes
,
const
std
::
vector
<
std
::
vector
<
float
>
>&
prior_variances
,
const
std
::
vector
<
std
::
vector
<
float
>
>&
prior_variances
,
const
CodeType
code_type
,
const
bool
variance_encoded_in_target
,
const
CodeType
code_type
,
const
bool
variance_encoded_in_target
,
const
bool
clip_bbox
,
const
std
::
vector
<
caffe
::
NormalizedBBox
>&
bboxes
,
const
bool
clip_bbox
,
const
std
::
vector
<
caffe
::
NormalizedBBox
>&
bboxes
,
std
::
vector
<
caffe
::
NormalizedBBox
>*
decode_bboxes
)
{
std
::
vector
<
caffe
::
NormalizedBBox
>&
decode_bboxes
)
{
CV_Assert
(
prior_bboxes
.
size
()
==
prior_variances
.
size
());
CV_Assert
(
prior_bboxes
.
size
()
==
prior_variances
.
size
());
CV_Assert
(
prior_bboxes
.
size
()
==
bboxes
.
size
());
CV_Assert
(
prior_bboxes
.
size
()
==
bboxes
.
size
());
int
num_bboxes
=
prior_bboxes
.
size
();
size_t
num_bboxes
=
prior_bboxes
.
size
();
if
(
num_bboxes
>=
1
)
{
CV_Assert
(
num_bboxes
==
0
||
prior_variances
[
0
].
size
()
==
4
);
CV_Assert
(
prior_variances
[
0
].
size
()
==
4
);
decode_bboxes
.
clear
();
decode_bboxes
.
resize
(
num_bboxes
);
if
(
variance_encoded_in_target
)
{
for
(
int
i
=
0
;
i
<
num_bboxes
;
++
i
)
DecodeBBox
<
true
>
(
prior_bboxes
[
i
],
prior_variances
[
i
],
code_type
,
clip_bbox
,
bboxes
[
i
],
decode_bboxes
[
i
]);
}
}
decode_bboxes
->
clear
();
else
for
(
int
i
=
0
;
i
<
num_bboxes
;
++
i
)
{
{
caffe
::
NormalizedBBox
decode_bbox
;
for
(
int
i
=
0
;
i
<
num_bboxes
;
++
i
)
DecodeBBox
(
prior_bboxes
[
i
],
prior_variances
[
i
],
code_type
,
DecodeBBox
<
false
>
(
prior_bboxes
[
i
],
prior_variances
[
i
],
code_type
,
variance_encoded_in_target
,
clip_bbox
,
bboxes
[
i
],
&
decode_bbox
);
clip_bbox
,
bboxes
[
i
],
decode_bboxes
[
i
]);
decode_bboxes
->
push_back
(
decode_bbox
);
}
}
}
}
// Decode all bboxes in a batch
.
// Decode all bboxes in a batch
void
DecodeBBoxesAll
(
const
std
::
vector
<
LabelBBox
>&
all_loc_preds
,
static
void
DecodeBBoxesAll
(
const
std
::
vector
<
LabelBBox
>&
all_loc_preds
,
const
std
::
vector
<
caffe
::
NormalizedBBox
>&
prior_bboxes
,
const
std
::
vector
<
caffe
::
NormalizedBBox
>&
prior_bboxes
,
const
std
::
vector
<
std
::
vector
<
float
>
>&
prior_variances
,
const
std
::
vector
<
std
::
vector
<
float
>
>&
prior_variances
,
const
int
num
,
const
bool
share_location
,
const
int
num
,
const
bool
share_location
,
const
int
num_loc_classes
,
const
int
background_label_id
,
const
int
num_loc_classes
,
const
int
background_label_id
,
const
CodeType
code_type
,
const
bool
variance_encoded_in_target
,
const
CodeType
code_type
,
const
bool
variance_encoded_in_target
,
const
bool
clip
,
std
::
vector
<
LabelBBox
>*
all_decode_bboxes
)
{
const
bool
clip
,
std
::
vector
<
LabelBBox
>&
all_decode_bboxes
)
{
CV_Assert
(
all_loc_preds
.
size
()
==
num
);
CV_Assert
(
all_loc_preds
.
size
()
==
num
);
all_decode_bboxes
->
clear
();
all_decode_bboxes
.
clear
();
all_decode_bboxes
->
resize
(
num
);
all_decode_bboxes
.
resize
(
num
);
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
// Decode predictions into bboxes.
// Decode predictions into bboxes.
LabelBBox
&
decode_bboxes
=
(
*
all_decode_bboxes
)[
i
];
const
LabelBBox
&
loc_preds
=
all_loc_preds
[
i
];
for
(
int
c
=
0
;
c
<
num_loc_classes
;
++
c
)
{
LabelBBox
&
decode_bboxes
=
all_decode_bboxes
[
i
];
for
(
int
c
=
0
;
c
<
num_loc_classes
;
++
c
)
{
int
label
=
share_location
?
-
1
:
c
;
int
label
=
share_location
?
-
1
:
c
;
if
(
label
==
background_label_id
)
{
if
(
label
==
background_label_id
)
// Ignore background class.
continue
;
// Ignore background class.
continue
;
LabelBBox
::
const_iterator
label_loc_preds
=
loc_preds
.
find
(
label
);
}
if
(
label_loc_preds
==
loc_preds
.
end
())
if
(
all_loc_preds
[
i
].
find
(
label
)
==
all_loc_preds
[
i
].
end
())
{
CV_ErrorNoReturn_
(
cv
::
Error
::
StsError
,
(
"Could not find location predictions for label %d"
,
label
));
// Something bad happened if there are no predictions for current label.
util
::
make_error
<
int
>
(
"Could not find location predictions for label "
,
label
);
}
const
std
::
vector
<
caffe
::
NormalizedBBox
>&
label_loc_preds
=
all_loc_preds
[
i
].
find
(
label
)
->
second
;
DecodeBBoxes
(
prior_bboxes
,
prior_variances
,
DecodeBBoxes
(
prior_bboxes
,
prior_variances
,
code_type
,
variance_encoded_in_target
,
clip
,
code_type
,
variance_encoded_in_target
,
clip
,
label_loc_preds
,
&
(
decode_bboxes
[
label
])
);
label_loc_preds
->
second
,
decode_bboxes
[
label
]
);
}
}
}
}
}
}
// Get prior bounding boxes from prior_data
.
// Get prior bounding boxes from prior_data
// prior_data: 1 x 2 x num_priors * 4 x 1 blob.
// prior_data: 1 x 2 x num_priors * 4 x 1 blob.
// num_priors: number of priors.
// num_priors: number of priors.
// prior_bboxes: stores all the prior bboxes in the format of caffe::NormalizedBBox.
// prior_bboxes: stores all the prior bboxes in the format of caffe::NormalizedBBox.
// prior_variances: stores all the variances needed by prior bboxes.
// prior_variances: stores all the variances needed by prior bboxes.
void
GetPriorBBoxes
(
const
float
*
priorData
,
const
int
&
numPriors
,
static
void
GetPriorBBoxes
(
const
float
*
priorData
,
const
int
&
numPriors
,
std
::
vector
<
caffe
::
NormalizedBBox
>
*
priorBBoxes
,
std
::
vector
<
caffe
::
NormalizedBBox
>
&
priorBBoxes
,
std
::
vector
<
std
::
vector
<
float
>
>
*
priorVariances
)
std
::
vector
<
std
::
vector
<
float
>
>
&
priorVariances
)
{
{
priorBBoxes
->
clear
(
);
priorBBoxes
.
clear
();
priorBBoxes
.
resize
(
numPriors
);
priorVariances
->
clear
(
);
priorVariances
.
clear
();
priorVariances
.
resize
(
numPriors
);
for
(
int
i
=
0
;
i
<
numPriors
;
++
i
)
for
(
int
i
=
0
;
i
<
numPriors
;
++
i
)
{
{
int
startIdx
=
i
*
4
;
int
startIdx
=
i
*
4
;
caffe
::
NormalizedBBox
bbox
;
caffe
::
NormalizedBBox
&
bbox
=
priorBBoxes
[
i
]
;
bbox
.
set_xmin
(
priorData
[
startIdx
]);
bbox
.
set_xmin
(
priorData
[
startIdx
]);
bbox
.
set_ymin
(
priorData
[
startIdx
+
1
]);
bbox
.
set_ymin
(
priorData
[
startIdx
+
1
]);
bbox
.
set_xmax
(
priorData
[
startIdx
+
2
]);
bbox
.
set_xmax
(
priorData
[
startIdx
+
2
]);
bbox
.
set_ymax
(
priorData
[
startIdx
+
3
]);
bbox
.
set_ymax
(
priorData
[
startIdx
+
3
]);
float
bboxSize
=
BBoxSize
(
bbox
);
bbox
.
set_size
(
BBoxSize
<
true
>
(
bbox
));
bbox
.
set_size
(
bboxSize
);
priorBBoxes
->
push_back
(
bbox
);
}
}
for
(
int
i
=
0
;
i
<
numPriors
;
++
i
)
for
(
int
i
=
0
;
i
<
numPriors
;
++
i
)
{
{
int
startIdx
=
(
numPriors
+
i
)
*
4
;
int
startIdx
=
(
numPriors
+
i
)
*
4
;
std
::
vector
<
float
>
var
;
// not needed here: priorVariances[i].clear()
;
for
(
int
j
=
0
;
j
<
4
;
++
j
)
for
(
int
j
=
0
;
j
<
4
;
++
j
)
{
{
var
.
push_back
(
priorData
[
startIdx
+
j
]);
priorVariances
[
i
]
.
push_back
(
priorData
[
startIdx
+
j
]);
}
}
priorVariances
->
push_back
(
var
);
}
}
}
}
// Scale the caffe::NormalizedBBox w.r.t. height and width.
void
ScaleBBox
(
const
caffe
::
NormalizedBBox
&
bbox
,
const
int
height
,
const
int
width
,
caffe
::
NormalizedBBox
*
scaleBBox
)
{
scaleBBox
->
set_xmin
(
bbox
.
xmin
()
*
width
);
scaleBBox
->
set_ymin
(
bbox
.
ymin
()
*
height
);
scaleBBox
->
set_xmax
(
bbox
.
xmax
()
*
width
);
scaleBBox
->
set_ymax
(
bbox
.
ymax
()
*
height
);
scaleBBox
->
clear_size
();
bool
normalized
=
!
(
width
>
1
||
height
>
1
);
scaleBBox
->
set_size
(
BBoxSize
(
*
scaleBBox
,
normalized
));
scaleBBox
->
set_difficult
(
bbox
.
difficult
());
}
// Get location predictions from loc_data.
// Get location predictions from loc_data.
// loc_data: num x num_preds_per_class * num_loc_classes * 4 blob.
// loc_data: num x num_preds_per_class * num_loc_classes * 4 blob.
// num: the number of images.
// num: the number of images.
...
@@ -603,19 +542,19 @@ public:
...
@@ -603,19 +542,19 @@ public:
// share_location: if true, all classes share the same location prediction.
// share_location: if true, all classes share the same location prediction.
// loc_preds: stores the location prediction, where each item contains
// loc_preds: stores the location prediction, where each item contains
// location prediction for an image.
// location prediction for an image.
void
GetLocPredictions
(
const
float
*
locData
,
const
int
num
,
static
void
GetLocPredictions
(
const
float
*
locData
,
const
int
num
,
const
int
numPredsPerClass
,
const
int
numLocClasses
,
const
int
numPredsPerClass
,
const
int
numLocClasses
,
const
bool
shareLocation
,
std
::
vector
<
LabelBBox
>
*
locPreds
)
const
bool
shareLocation
,
std
::
vector
<
LabelBBox
>
&
locPreds
)
{
{
locPreds
->
clear
();
locPreds
.
clear
();
if
(
shareLocation
)
if
(
shareLocation
)
{
{
CV_Assert
(
numLocClasses
==
1
);
CV_Assert
(
numLocClasses
==
1
);
}
}
locPreds
->
resize
(
num
);
locPreds
.
resize
(
num
);
for
(
int
i
=
0
;
i
<
num
;
++
i
)
for
(
int
i
=
0
;
i
<
num
;
++
i
,
locData
+=
numPredsPerClass
*
numLocClasses
*
4
)
{
{
LabelBBox
&
labelBBox
=
(
*
locPreds
)
[
i
];
LabelBBox
&
labelBBox
=
locPreds
[
i
];
for
(
int
p
=
0
;
p
<
numPredsPerClass
;
++
p
)
for
(
int
p
=
0
;
p
<
numPredsPerClass
;
++
p
)
{
{
int
startIdx
=
p
*
numLocClasses
*
4
;
int
startIdx
=
p
*
numLocClasses
*
4
;
...
@@ -626,13 +565,13 @@ public:
...
@@ -626,13 +565,13 @@ public:
{
{
labelBBox
[
label
].
resize
(
numPredsPerClass
);
labelBBox
[
label
].
resize
(
numPredsPerClass
);
}
}
labelBBox
[
label
][
p
].
set_xmin
(
locData
[
startIdx
+
c
*
4
]);
caffe
::
NormalizedBBox
&
bbox
=
labelBBox
[
label
][
p
];
labelBBox
[
label
][
p
].
set_ymin
(
locData
[
startIdx
+
c
*
4
+
1
]);
bbox
.
set_xmin
(
locData
[
startIdx
+
c
*
4
]);
labelBBox
[
label
][
p
].
set_xmax
(
locData
[
startIdx
+
c
*
4
+
2
]);
bbox
.
set_ymin
(
locData
[
startIdx
+
c
*
4
+
1
]);
labelBBox
[
label
][
p
].
set_ymax
(
locData
[
startIdx
+
c
*
4
+
3
]);
bbox
.
set_xmax
(
locData
[
startIdx
+
c
*
4
+
2
]);
bbox
.
set_ymax
(
locData
[
startIdx
+
c
*
4
+
3
]);
}
}
}
}
locData
+=
numPredsPerClass
*
numLocClasses
*
4
;
}
}
}
}
...
@@ -643,25 +582,24 @@ public:
...
@@ -643,25 +582,24 @@ public:
// num_classes: number of classes.
// num_classes: number of classes.
// conf_preds: stores the confidence prediction, where each item contains
// conf_preds: stores the confidence prediction, where each item contains
// confidence prediction for an image.
// confidence prediction for an image.
void
GetConfidenceScores
(
const
float
*
confData
,
const
int
num
,
static
void
GetConfidenceScores
(
const
float
*
confData
,
const
int
num
,
const
int
numPredsPerClass
,
const
int
numClasses
,
const
int
numPredsPerClass
,
const
int
numClasses
,
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>
>
>
*
confPreds
)
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>
>
>
&
confPreds
)
{
{
confPreds
->
clear
();
confPreds
.
clear
();
confPreds
.
resize
(
num
);
confPreds
->
resize
(
num
);
for
(
int
i
=
0
;
i
<
num
;
++
i
,
confData
+=
numPredsPerClass
*
numClasses
)
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
{
std
::
vector
<
std
::
vector
<
float
>
>&
labelScores
=
(
*
confPreds
)
[
i
];
std
::
vector
<
std
::
vector
<
float
>
>&
labelScores
=
confPreds
[
i
];
labelScores
.
resize
(
numClasses
);
labelScores
.
resize
(
numClasses
);
for
(
int
p
=
0
;
p
<
numPredsPerClass
;
++
p
)
{
int
startIdx
=
p
*
numClasses
;
for
(
int
c
=
0
;
c
<
numClasses
;
++
c
)
for
(
int
c
=
0
;
c
<
numClasses
;
++
c
)
{
{
labelScores
[
c
].
push_back
(
confData
[
startIdx
+
c
]);
std
::
vector
<
float
>&
classLabelScores
=
labelScores
[
c
];
classLabelScores
.
resize
(
numPredsPerClass
);
for
(
int
p
=
0
;
p
<
numPredsPerClass
;
++
p
)
{
classLabelScores
[
p
]
=
confData
[
p
*
numClasses
+
c
];
}
}
}
}
confData
+=
numPredsPerClass
*
numClasses
;
}
}
}
}
...
@@ -674,35 +612,30 @@ public:
...
@@ -674,35 +612,30 @@ public:
// nms_threshold: a threshold used in non maximum suppression.
// nms_threshold: a threshold used in non maximum suppression.
// top_k: if not -1, keep at most top_k picked indices.
// top_k: if not -1, keep at most top_k picked indices.
// indices: the kept indices of bboxes after nms.
// indices: the kept indices of bboxes after nms.
void
ApplyNMSFast
(
const
std
::
vector
<
caffe
::
NormalizedBBox
>&
bboxes
,
static
void
ApplyNMSFast
(
const
std
::
vector
<
caffe
::
NormalizedBBox
>&
bboxes
,
const
std
::
vector
<
float
>&
scores
,
const
float
score_threshold
,
const
std
::
vector
<
float
>&
scores
,
const
float
score_threshold
,
const
float
nms_threshold
,
const
float
eta
,
const
int
top_k
,
const
float
nms_threshold
,
const
float
eta
,
const
int
top_k
,
std
::
vector
<
int
>
*
indices
)
{
std
::
vector
<
int
>
&
indices
)
// Sanity check.
{
CV_Assert
(
bboxes
.
size
()
==
scores
.
size
());
CV_Assert
(
bboxes
.
size
()
==
scores
.
size
());
// Get top_k scores (with corresponding indices).
// Get top_k scores (with corresponding indices).
std
::
vector
<
std
::
pair
<
float
,
int
>
>
score_index_vec
;
std
::
vector
<
std
::
pair
<
float
,
int
>
>
score_index_vec
;
GetMaxScoreIndex
(
scores
,
score_threshold
,
top_k
,
&
score_index_vec
);
GetMaxScoreIndex
(
scores
,
score_threshold
,
top_k
,
score_index_vec
);
// Do nms.
// Do nms.
float
adaptive_threshold
=
nms_threshold
;
float
adaptive_threshold
=
nms_threshold
;
indices
->
clear
();
indices
.
clear
();
while
(
score_index_vec
.
size
()
!=
0
)
{
while
(
score_index_vec
.
size
()
!=
0
)
{
const
int
idx
=
score_index_vec
.
front
().
second
;
const
int
idx
=
score_index_vec
.
front
().
second
;
bool
keep
=
true
;
bool
keep
=
true
;
for
(
int
k
=
0
;
k
<
indices
->
size
();
++
k
)
{
for
(
int
k
=
0
;
k
<
(
int
)
indices
.
size
()
&&
keep
;
++
k
)
{
if
(
keep
)
{
const
int
kept_idx
=
indices
[
k
];
const
int
kept_idx
=
(
*
indices
)[
k
];
float
overlap
=
JaccardOverlap
<
true
>
(
bboxes
[
idx
],
bboxes
[
kept_idx
]);
float
overlap
=
JaccardOverlap
(
bboxes
[
idx
],
bboxes
[
kept_idx
]);
keep
=
overlap
<=
adaptive_threshold
;
keep
=
overlap
<=
adaptive_threshold
;
}
else
{
break
;
}
}
if
(
keep
)
{
indices
->
push_back
(
idx
);
}
}
if
(
keep
)
indices
.
push_back
(
idx
);
score_index_vec
.
erase
(
score_index_vec
.
begin
());
score_index_vec
.
erase
(
score_index_vec
.
begin
());
if
(
keep
&&
eta
<
1
&&
adaptive_threshold
>
0.5
)
{
if
(
keep
&&
eta
<
1
&&
adaptive_threshold
>
0.5
)
{
adaptive_threshold
*=
eta
;
adaptive_threshold
*=
eta
;
...
@@ -715,74 +648,66 @@ public:
...
@@ -715,74 +648,66 @@ public:
// threshold: only consider scores higher than the threshold.
// threshold: only consider scores higher than the threshold.
// top_k: if -1, keep all; otherwise, keep at most top_k.
// top_k: if -1, keep all; otherwise, keep at most top_k.
// score_index_vec: store the sorted (score, index) pair.
// score_index_vec: store the sorted (score, index) pair.
void
GetMaxScoreIndex
(
const
std
::
vector
<
float
>&
scores
,
const
float
threshold
,
const
int
top_k
,
static
void
GetMaxScoreIndex
(
const
std
::
vector
<
float
>&
scores
,
const
float
threshold
,
const
int
top_k
,
std
::
vector
<
std
::
pair
<
float
,
int
>
>
*
score_index_vec
)
std
::
vector
<
std
::
pair
<
float
,
int
>
>
&
score_index_vec
)
{
{
CV_DbgAssert
(
score_index_vec
.
empty
());
// Generate index score pairs.
// Generate index score pairs.
for
(
size_t
i
=
0
;
i
<
scores
.
size
();
++
i
)
for
(
size_t
i
=
0
;
i
<
scores
.
size
();
++
i
)
{
{
if
(
scores
[
i
]
>
threshold
)
if
(
scores
[
i
]
>
threshold
)
{
{
score_index_vec
->
push_back
(
std
::
make_pair
(
scores
[
i
],
i
));
score_index_vec
.
push_back
(
std
::
make_pair
(
scores
[
i
],
i
));
}
}
}
}
// Sort the score pair according to the scores in descending order
// Sort the score pair according to the scores in descending order
std
::
stable_sort
(
score_index_vec
->
begin
(),
score_index_vec
->
end
(),
std
::
stable_sort
(
score_index_vec
.
begin
(),
score_index_vec
.
end
(),
util
::
SortScorePairDescend
<
int
>
);
util
::
SortScorePairDescend
<
int
>
);
// Keep top_k scores if needed.
// Keep top_k scores if needed.
if
(
top_k
>
-
1
&&
top_k
<
(
int
)
score_index_vec
->
size
())
if
(
top_k
>
-
1
&&
top_k
<
(
int
)
score_index_vec
.
size
())
{
{
score_index_vec
->
resize
(
top_k
);
score_index_vec
.
resize
(
top_k
);
}
}
}
}
// Compute the intersection between two bboxes.
// Compute the jaccard (intersection over union IoU) overlap between two bboxes.
void
IntersectBBox
(
const
caffe
::
NormalizedBBox
&
bbox1
,
template
<
bool
normalized
>
const
caffe
::
NormalizedBBox
&
bbox2
,
static
float
JaccardOverlap
(
const
caffe
::
NormalizedBBox
&
bbox1
,
caffe
::
NormalizedBBox
*
intersect_bbox
)
{
const
caffe
::
NormalizedBBox
&
bbox2
)
{
caffe
::
NormalizedBBox
intersect_bbox
;
if
(
bbox2
.
xmin
()
>
bbox1
.
xmax
()
||
bbox2
.
xmax
()
<
bbox1
.
xmin
()
||
if
(
bbox2
.
xmin
()
>
bbox1
.
xmax
()
||
bbox2
.
xmax
()
<
bbox1
.
xmin
()
||
bbox2
.
ymin
()
>
bbox1
.
ymax
()
||
bbox2
.
ymax
()
<
bbox1
.
ymin
())
bbox2
.
ymin
()
>
bbox1
.
ymax
()
||
bbox2
.
ymax
()
<
bbox1
.
ymin
())
{
{
// Return [0, 0, 0, 0] if there is no intersection.
// Return [0, 0, 0, 0] if there is no intersection.
intersect_bbox
->
set_xmin
(
0
);
intersect_bbox
.
set_xmin
(
0
);
intersect_bbox
->
set_ymin
(
0
);
intersect_bbox
.
set_ymin
(
0
);
intersect_bbox
->
set_xmax
(
0
);
intersect_bbox
.
set_xmax
(
0
);
intersect_bbox
->
set_ymax
(
0
);
intersect_bbox
.
set_ymax
(
0
);
}
}
else
else
{
{
intersect_bbox
->
set_xmin
(
std
::
max
(
bbox1
.
xmin
(),
bbox2
.
xmin
()));
intersect_bbox
.
set_xmin
(
std
::
max
(
bbox1
.
xmin
(),
bbox2
.
xmin
()));
intersect_bbox
->
set_ymin
(
std
::
max
(
bbox1
.
ymin
(),
bbox2
.
ymin
()));
intersect_bbox
.
set_ymin
(
std
::
max
(
bbox1
.
ymin
(),
bbox2
.
ymin
()));
intersect_bbox
->
set_xmax
(
std
::
min
(
bbox1
.
xmax
(),
bbox2
.
xmax
()));
intersect_bbox
.
set_xmax
(
std
::
min
(
bbox1
.
xmax
(),
bbox2
.
xmax
()));
intersect_bbox
->
set_ymax
(
std
::
min
(
bbox1
.
ymax
(),
bbox2
.
ymax
()));
intersect_bbox
.
set_ymax
(
std
::
min
(
bbox1
.
ymax
(),
bbox2
.
ymax
()));
}
}
}
// Compute the jaccard (intersection over union IoU) overlap between two bboxes.
float
JaccardOverlap
(
const
caffe
::
NormalizedBBox
&
bbox1
,
const
caffe
::
NormalizedBBox
&
bbox2
,
const
bool
normalized
=
true
)
{
caffe
::
NormalizedBBox
intersect_bbox
;
IntersectBBox
(
bbox1
,
bbox2
,
&
intersect_bbox
);
float
intersect_width
,
intersect_height
;
float
intersect_width
,
intersect_height
;
if
(
normalized
)
{
intersect_width
=
intersect_bbox
.
xmax
()
-
intersect_bbox
.
xmin
();
intersect_width
=
intersect_bbox
.
xmax
()
-
intersect_bbox
.
xmin
();
intersect_height
=
intersect_bbox
.
ymax
()
-
intersect_bbox
.
ymin
();
intersect_height
=
intersect_bbox
.
ymax
()
-
intersect_bbox
.
ymin
();
}
else
{
intersect_width
=
intersect_bbox
.
xmax
()
-
intersect_bbox
.
xmin
()
+
1
;
intersect_height
=
intersect_bbox
.
ymax
()
-
intersect_bbox
.
ymin
()
+
1
;
}
if
(
intersect_width
>
0
&&
intersect_height
>
0
)
if
(
intersect_width
>
0
&&
intersect_height
>
0
)
{
{
if
(
!
normalized
)
{
intersect_width
++
;
intersect_height
++
;
}
float
intersect_size
=
intersect_width
*
intersect_height
;
float
intersect_size
=
intersect_width
*
intersect_height
;
float
bbox1_size
=
BBoxSize
(
bbox1
);
float
bbox1_size
=
BBoxSize
<
true
>
(
bbox1
);
float
bbox2_size
=
BBoxSize
(
bbox2
);
float
bbox2_size
=
BBoxSize
<
true
>
(
bbox2
);
return
intersect_size
/
(
bbox1_size
+
bbox2_size
-
intersect_size
);
return
intersect_size
/
(
bbox1_size
+
bbox2_size
-
intersect_size
);
}
}
else
else
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment