Commit 692ba7ba authored by Aleksandr Rybnikov's avatar Aleksandr Rybnikov

Fixed SSD example, added test

parent 38dd47cf
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
#include <opencv2/dnn.hpp>
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
......@@ -30,7 +31,7 @@ Mat getMean(const size_t& imageHeight, const size_t& imageWidth)
Mat preprocess(const Mat& frame)
{
Mat preprocessed;
frame.convertTo(preprocessed, CV_32FC3);
frame.convertTo(preprocessed, CV_32F);
resize(preprocessed, preprocessed, Size(width, height)); //SSD accepts 300x300 RGB-images
Mat mean = getMean(width, height);
......@@ -98,6 +99,8 @@ int main(int argc, char** argv)
cv::Mat frame = cv::imread(parser.get<string>("image"), -1);
if (frame.channels() == 4)
cvtColor(frame, frame, COLOR_BGRA2BGR);
//! [Prepare blob]
Mat preprocessedFrame = preprocess(frame);
......
......@@ -115,6 +115,21 @@ message PriorBoxParameter {
optional bool clip = 5 [default = true];
// Variance for adjusting the prior bboxes.
repeated float variance = 6;
// By default, we calculate img_height, img_width, step_x, step_y based on
// bottom[0] (feat) and bottom[1] (img). Unless these values are explicitely
// provided.
// Explicitly provide the img_size.
optional uint32 img_size = 7;
// Either img_size or img_h/img_w should be specified; not both.
optional uint32 img_h = 8;
optional uint32 img_w = 9;
// Explicitly provide the step size.
optional float step = 10;
// Either step or step_h/step_w should be specified; not both.
optional float step_h = 11;
optional float step_w = 12;
// Offset to the top left corner of each cell.
optional float offset = 13 [default = 0.5];
}
// Message that store parameters used by DetectionOutputLayer
......@@ -126,6 +141,10 @@ message DetectionOutputParameter {
// Background label id. If there is no background class,
// set it as -1.
optional int32 background_label_id = 3 [default = 0];
// Parameters used for non maximum suppression.
optional NonMaximumSuppressionParameter nms_param = 4;
// Parameters used for saving detection results.
optional SaveOutputParameter save_output_param = 5;
// Type of coding method for bbox.
optional PriorBoxParameter.CodeType code_type = 6 [default = CORNER];
// If true, variance is encoded in target; otherwise we need to adjust the
......@@ -137,11 +156,6 @@ message DetectionOutputParameter {
// Only consider detections whose confidences are larger than a threshold.
// If not provided, consider all boxes.
optional float confidence_threshold = 9;
// Parameters used for non maximum suppression.
// Threshold to be used in nms.
optional float nms_threshold = 10 [default = 0.3];
// Maximum number of results to be kept.
optional int32 top_k = 11;
}
message Datum {
......@@ -503,7 +517,7 @@ message LayerParameter {
optional LRNParameter lrn_param = 118;
optional MemoryDataParameter memory_data_param = 119;
optional MVNParameter mvn_param = 120;
optional NormalizeBBoxParameter normalize_bbox_param = 149;
optional NormalizeBBoxParameter norm_param = 149;
optional PermuteParameter permute_param = 148;
optional ParameterParameter parameter_param = 145;
optional PoolingParameter pooling_param = 121;
......@@ -781,6 +795,39 @@ message DataParameter {
optional uint32 prefetch = 10 [default = 4];
}
message NonMaximumSuppressionParameter {
// Threshold to be used in nms.
optional float nms_threshold = 1 [default = 0.3];
// Maximum number of results to be kept.
optional int32 top_k = 2;
// Parameter for adaptive nms.
optional float eta = 3 [default = 1.0];
}
message SaveOutputParameter {
// Output directory. If not empty, we will save the results.
optional string output_directory = 1;
// Output name prefix.
optional string output_name_prefix = 2;
// Output format.
// VOC - PASCAL VOC output format.
// COCO - MS COCO output format.
optional string output_format = 3;
// If you want to output results, must also provide the following two files.
// Otherwise, we will ignore saving results.
// label map file.
optional string label_map_file = 4;
// A file which contains a list of names and sizes with same order
// of the input DB. The file is in the following format:
// name height width
// ...
optional string name_size_file = 5;
// Number of test images. It can be less than the lines specified in
// name_size_file. For example, when we only want to evaluate on part
// of the test images.
optional uint32 num_test_image = 6;
}
message DropoutParameter {
optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
}
......
......@@ -95,6 +95,7 @@ void initModule()
REG_RUNTIME_LAYER_CLASS(PriorBox, PriorBoxLayer);
REG_RUNTIME_LAYER_CLASS(DetectionOutput, DetectionOutputLayer);
REG_RUNTIME_LAYER_CLASS(NormalizeBBox, NormalizeBBoxLayer);
REG_RUNTIME_LAYER_CLASS(Normalize, NormalizeBBoxLayer);
REG_RUNTIME_LAYER_CLASS(Shift, ShiftLayer);
REG_RUNTIME_LAYER_CLASS(Padding, PaddingLayer);
REG_RUNTIME_LAYER_CLASS(Scale, ScaleLayer);
......
......@@ -84,7 +84,7 @@ public:
CV_Assert(startAxis >= 0);
CV_Assert(endAxis >= startAxis && endAxis < (int)numAxes);
size_t flattenedDimensionSize = total(inputs[0], startAxis, endAxis);
size_t flattenedDimensionSize = total(inputs[0], startAxis, endAxis + 1);
MatShape outputShapeVec;
for (int i = 0; i < startAxis; i++)
......
......@@ -124,7 +124,7 @@ public:
MatShape shapeBefore = inputs[0], shapeAfter;
for (size_t i = 0; i < _numAxes; i++)
{
shapeAfter[i] = shapeBefore[_order[i]];
shapeAfter.push_back(shapeBefore[_order[i]]);
}
outputs.clear();
......@@ -132,6 +132,7 @@ public:
for (size_t i = 0; i < inputs.size(); i++)
{
CV_Assert(inputs[i][2] == shapeBefore[2] && inputs[i][3] == shapeBefore[3]);
CV_Assert(total(inputs[i]) == total(shapeAfter));
outputs.push_back(shapeAfter);
}
......@@ -192,11 +193,11 @@ public:
CV_Assert(inp.dims == numAxes && inp.size == inputs[0]->size);
CV_Assert(out.dims == numAxes && out.size == outputs[0].size);
for( i = 0; i < numAxes; i++ )
{
CV_Assert(inp.size[i] == _oldDimensionSize[i]);
CV_Assert(out.size[i] == _newDimensionSize[i]);
}
// for( i = 0; i < numAxes; i++ )
// {
// CV_Assert(inp.size[i] == _oldDimensionSize[i]);
// CV_Assert(out.size[i] == _newDimensionSize[i]);
// }
CV_Assert(inp.isContinuous() && out.isContinuous());
CV_Assert(inp.type() == CV_32F && out.type() == CV_32F);
......
......@@ -183,6 +183,22 @@ public:
_numPriors += 1;
}
if (params.has("step_h") || params.has("step_w")) {
CV_Assert(!params.has("step"));
_stepY = getParameter<float>(params, "step_h");
CV_Assert(_stepY > 0.);
_stepX = getParameter<float>(params, "step_w");
CV_Assert(_stepX > 0.);
} else if (params.has("step")) {
const float step = getParameter<float>(params, "step");
CV_Assert(step > 0);
_stepY = step;
_stepX = step;
} else {
_stepY = 0;
_stepX = 0;
}
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
......@@ -216,8 +232,14 @@ public:
int _imageWidth = inputs[1]->size[3];
int _imageHeight = inputs[1]->size[2];
float _stepX = static_cast<float>(_imageWidth) / _layerWidth;
float _stepY = static_cast<float>(_imageHeight) / _layerHeight;
float stepX, stepY;
if (_stepX == 0 || _stepY == 0) {
stepX = static_cast<float>(_imageWidth) / _layerWidth;
stepY = static_cast<float>(_imageHeight) / _layerHeight;
} else {
stepX = _stepX;
stepY = _stepY;
}
int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
......@@ -231,8 +253,8 @@ public:
{
_boxWidth = _boxHeight = _minSize;
float center_x = (w + 0.5) * _stepX;
float center_y = (h + 0.5) * _stepY;
float center_x = (w + 0.5) * stepX;
float center_y = (h + 0.5) * stepY;
// xmin
outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
// ymin
......@@ -332,6 +354,8 @@ public:
float _boxWidth;
float _boxHeight;
float _stepX, _stepY;
std::vector<float> _aspectRatios;
std::vector<float> _variance;
......
......@@ -130,4 +130,32 @@ TEST(Reproducibility_FCN, Accuracy)
}
#endif
TEST(Reproducibility_SSD, Accuracy)
{
Net net;
{
const string proto = findDataFile("dnn/ssd_vgg16.prototxt", false);
const string model = findDataFile("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel", false);
Ptr<Importer> importer = createCaffeImporter(proto, model);
ASSERT_TRUE(importer != NULL);
importer->populateNet(net);
}
Mat sample = imread(_tf("street.png"));
ASSERT_TRUE(!sample.empty());
if (sample.channels() == 4)
cvtColor(sample, sample, COLOR_BGRA2BGR);
sample.convertTo(sample, CV_32F);
resize(sample, sample, Size(300, 300));
Mat in_blob = blobFromImage(sample);
net.setBlob(".data", in_blob);
net.forward();
Mat out = net.getBlob("detection_out");
Mat ref = blobFromNPY(_tf("ssd_out.npy"));
normAssert(ref, out);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment