Commit 6f4179d4 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #1214 from arrybn:ssd_test

parents 0b845df0 692ba7ba
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
#include <opencv2/dnn.hpp>
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
......@@ -30,7 +31,7 @@ Mat getMean(const size_t& imageHeight, const size_t& imageWidth)
Mat preprocess(const Mat& frame)
{
Mat preprocessed;
frame.convertTo(preprocessed, CV_32FC3);
frame.convertTo(preprocessed, CV_32F);
resize(preprocessed, preprocessed, Size(width, height)); //SSD accepts 300x300 RGB-images
Mat mean = getMean(width, height);
......@@ -98,6 +99,8 @@ int main(int argc, char** argv)
cv::Mat frame = cv::imread(parser.get<string>("image"), -1);
if (frame.channels() == 4)
cvtColor(frame, frame, COLOR_BGRA2BGR);
//! [Prepare blob]
Mat preprocessedFrame = preprocess(frame);
......
......@@ -115,6 +115,21 @@ message PriorBoxParameter {
optional bool clip = 5 [default = true];
// Variance for adjusting the prior bboxes.
repeated float variance = 6;
// By default, we calculate img_height, img_width, step_x, step_y based on
// bottom[0] (feat) and bottom[1] (img). Unless these values are explicitely
// provided.
// Explicitly provide the img_size.
optional uint32 img_size = 7;
// Either img_size or img_h/img_w should be specified; not both.
optional uint32 img_h = 8;
optional uint32 img_w = 9;
// Explicitly provide the step size.
optional float step = 10;
// Either step or step_h/step_w should be specified; not both.
optional float step_h = 11;
optional float step_w = 12;
// Offset to the top left corner of each cell.
optional float offset = 13 [default = 0.5];
}
// Message that store parameters used by DetectionOutputLayer
......@@ -126,6 +141,10 @@ message DetectionOutputParameter {
// Background label id. If there is no background class,
// set it as -1.
optional int32 background_label_id = 3 [default = 0];
// Parameters used for non maximum suppression.
optional NonMaximumSuppressionParameter nms_param = 4;
// Parameters used for saving detection results.
optional SaveOutputParameter save_output_param = 5;
// Type of coding method for bbox.
optional PriorBoxParameter.CodeType code_type = 6 [default = CORNER];
// If true, variance is encoded in target; otherwise we need to adjust the
......@@ -137,11 +156,6 @@ message DetectionOutputParameter {
// Only consider detections whose confidences are larger than a threshold.
// If not provided, consider all boxes.
optional float confidence_threshold = 9;
// Parameters used for non maximum suppression.
// Threshold to be used in nms.
optional float nms_threshold = 10 [default = 0.3];
// Maximum number of results to be kept.
optional int32 top_k = 11;
}
message Datum {
......@@ -503,7 +517,7 @@ message LayerParameter {
optional LRNParameter lrn_param = 118;
optional MemoryDataParameter memory_data_param = 119;
optional MVNParameter mvn_param = 120;
optional NormalizeBBoxParameter normalize_bbox_param = 149;
optional NormalizeBBoxParameter norm_param = 149;
optional PermuteParameter permute_param = 148;
optional ParameterParameter parameter_param = 145;
optional PoolingParameter pooling_param = 121;
......@@ -781,6 +795,39 @@ message DataParameter {
optional uint32 prefetch = 10 [default = 4];
}
message NonMaximumSuppressionParameter {
// Threshold to be used in nms.
optional float nms_threshold = 1 [default = 0.3];
// Maximum number of results to be kept.
optional int32 top_k = 2;
// Parameter for adaptive nms.
optional float eta = 3 [default = 1.0];
}
message SaveOutputParameter {
// Output directory. If not empty, we will save the results.
optional string output_directory = 1;
// Output name prefix.
optional string output_name_prefix = 2;
// Output format.
// VOC - PASCAL VOC output format.
// COCO - MS COCO output format.
optional string output_format = 3;
// If you want to output results, must also provide the following two files.
// Otherwise, we will ignore saving results.
// label map file.
optional string label_map_file = 4;
// A file which contains a list of names and sizes with same order
// of the input DB. The file is in the following format:
// name height width
// ...
optional string name_size_file = 5;
// Number of test images. It can be less than the lines specified in
// name_size_file. For example, when we only want to evaluate on part
// of the test images.
optional uint32 num_test_image = 6;
}
message DropoutParameter {
optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
}
......
......@@ -95,6 +95,7 @@ void initModule()
REG_RUNTIME_LAYER_CLASS(PriorBox, PriorBoxLayer);
REG_RUNTIME_LAYER_CLASS(DetectionOutput, DetectionOutputLayer);
REG_RUNTIME_LAYER_CLASS(NormalizeBBox, NormalizeBBoxLayer);
REG_RUNTIME_LAYER_CLASS(Normalize, NormalizeBBoxLayer);
REG_RUNTIME_LAYER_CLASS(Shift, ShiftLayer);
REG_RUNTIME_LAYER_CLASS(Padding, PaddingLayer);
REG_RUNTIME_LAYER_CLASS(Scale, ScaleLayer);
......
......@@ -84,7 +84,7 @@ public:
CV_Assert(startAxis >= 0);
CV_Assert(endAxis >= startAxis && endAxis < (int)numAxes);
size_t flattenedDimensionSize = total(inputs[0], startAxis, endAxis);
size_t flattenedDimensionSize = total(inputs[0], startAxis, endAxis + 1);
MatShape outputShapeVec;
for (int i = 0; i < startAxis; i++)
......
......@@ -124,7 +124,7 @@ public:
MatShape shapeBefore = inputs[0], shapeAfter;
for (size_t i = 0; i < _numAxes; i++)
{
shapeAfter[i] = shapeBefore[_order[i]];
shapeAfter.push_back(shapeBefore[_order[i]]);
}
outputs.clear();
......@@ -132,6 +132,7 @@ public:
for (size_t i = 0; i < inputs.size(); i++)
{
CV_Assert(inputs[i][2] == shapeBefore[2] && inputs[i][3] == shapeBefore[3]);
CV_Assert(total(inputs[i]) == total(shapeAfter));
outputs.push_back(shapeAfter);
}
......@@ -192,11 +193,11 @@ public:
CV_Assert(inp.dims == numAxes && inp.size == inputs[0]->size);
CV_Assert(out.dims == numAxes && out.size == outputs[0].size);
for( i = 0; i < numAxes; i++ )
{
CV_Assert(inp.size[i] == _oldDimensionSize[i]);
CV_Assert(out.size[i] == _newDimensionSize[i]);
}
// for( i = 0; i < numAxes; i++ )
// {
// CV_Assert(inp.size[i] == _oldDimensionSize[i]);
// CV_Assert(out.size[i] == _newDimensionSize[i]);
// }
CV_Assert(inp.isContinuous() && out.isContinuous());
CV_Assert(inp.type() == CV_32F && out.type() == CV_32F);
......
......@@ -183,6 +183,22 @@ public:
_numPriors += 1;
}
if (params.has("step_h") || params.has("step_w")) {
CV_Assert(!params.has("step"));
_stepY = getParameter<float>(params, "step_h");
CV_Assert(_stepY > 0.);
_stepX = getParameter<float>(params, "step_w");
CV_Assert(_stepX > 0.);
} else if (params.has("step")) {
const float step = getParameter<float>(params, "step");
CV_Assert(step > 0);
_stepY = step;
_stepX = step;
} else {
_stepY = 0;
_stepX = 0;
}
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
......@@ -216,8 +232,14 @@ public:
int _imageWidth = inputs[1]->size[3];
int _imageHeight = inputs[1]->size[2];
float _stepX = static_cast<float>(_imageWidth) / _layerWidth;
float _stepY = static_cast<float>(_imageHeight) / _layerHeight;
float stepX, stepY;
if (_stepX == 0 || _stepY == 0) {
stepX = static_cast<float>(_imageWidth) / _layerWidth;
stepY = static_cast<float>(_imageHeight) / _layerHeight;
} else {
stepX = _stepX;
stepY = _stepY;
}
int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
......@@ -231,8 +253,8 @@ public:
{
_boxWidth = _boxHeight = _minSize;
float center_x = (w + 0.5) * _stepX;
float center_y = (h + 0.5) * _stepY;
float center_x = (w + 0.5) * stepX;
float center_y = (h + 0.5) * stepY;
// xmin
outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
// ymin
......@@ -332,6 +354,8 @@ public:
float _boxWidth;
float _boxHeight;
float _stepX, _stepY;
std::vector<float> _aspectRatios;
std::vector<float> _variance;
......
......@@ -134,4 +134,32 @@ TEST(Reproducibility_FCN, Accuracy)
}
#endif
TEST(Reproducibility_SSD, Accuracy)
{
Net net;
{
const string proto = findDataFile("dnn/ssd_vgg16.prototxt", false);
const string model = findDataFile("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel", false);
Ptr<Importer> importer = createCaffeImporter(proto, model);
ASSERT_TRUE(importer != NULL);
importer->populateNet(net);
}
Mat sample = imread(_tf("street.png"));
ASSERT_TRUE(!sample.empty());
if (sample.channels() == 4)
cvtColor(sample, sample, COLOR_BGRA2BGR);
sample.convertTo(sample, CV_32F);
resize(sample, sample, Size(300, 300));
Mat in_blob = blobFromImage(sample);
net.setBlob(".data", in_blob);
net.forward();
Mat out = net.getBlob("detection_out");
Mat ref = blobFromNPY(_tf("ssd_out.npy"));
normAssert(ref, out);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment