Merge pull request #1236 from arrybn:crop_mean

8ef2f717 · Vadim Pisarevsky · fd5431c3 · ea1670b5 · 8ef2f717 · 8ef2f717
Commit 8ef2f717 authored Jun 21, 2017 by Vadim Pisarevsky
7 changed files
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -598,9 +598,37 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
     *  @warning This function has the same limitations as createTorchImporter().
     */
    CV_EXPORTS_W Mat readTorchBlob(const String &filename, bool isBinary = true);
+    /** @brief Creates 4-dimensional blob from image. Optionally resizes and crops @p image from center,
-    CV_EXPORTS Mat blobFromImage(const Mat& image, double scalefactor=1.0, bool swapRB=true);
+     *  subtract @p mean values, scales values by @p scalefactor, swap Blue and Red channels.
-    CV_EXPORTS Mat blobFromImages(const std::vector<Mat>& image, double scalefactor=1.0, bool swapRB=true);
+     *  @param image input image (with 1- or 3-channels).
+     *  @param size spatial size for output image
+     *  @param mean scalar with mean values which are subtracted from channels. Values are intended
+     *  to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true.
+     *  @param scalefactor multiplier for @p image values.
+     *  @param swapRB flag which indicates that swap first and last channels
+     *  in 3-channel image is necessary.
+     *  @details input image is resized so one side after resize is equal to corresponing
+     *  dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
+     *  @returns 4-dimansional Mat with NCHW dimensions order.
+     */
+    CV_EXPORTS_W Mat blobFromImage(const Mat& image, double scalefactor=1.0, const Size& size = Size(),
+                                   const Scalar& mean = Scalar(), bool swapRB=true);
+    /** @brief Creates 4-dimensional blob from series of images. Optionally resizes and
+     *  crops @p images from center, subtract @p mean values, scales values by @p scalefactor,
+     *  swap Blue and Red channels.
+     *  @param images input images (all with 1- or 3-channels).
+     *  @param size spatial size for output image
+     *  @param mean scalar with mean values which are subtracted from channels. Values are intended
+     *  to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true.
+     *  @param scalefactor multiplier for @p images values.
+     *  @param swapRB flag which indicates that swap first and last channels
+     *  in 3-channel image is necessary.
+     *  @details input image is resized so one side after resize is equal to corresponing
+     *  dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
+     *  @returns 4-dimansional Mat with NCHW dimensions order.
+     */
+    CV_EXPORTS_W Mat blobFromImages(const std::vector<Mat>& images, double scalefactor=1.0,
+                                    Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=true);
 //! @}
 }

--- a/modules/dnn/samples/caffe_googlenet.cpp
+++ b/modules/dnn/samples/caffe_googlenet.cpp
@@ -114,8 +114,9 @@ int main(int argc, char **argv)
        exit(-1);
    }
-    resize(img, img, Size(224, 224));                   //GoogLeNet accepts only 224x224 RGB-images
+    //GoogLeNet accepts only 224x224 RGB-images
-    Mat inputBlob = blobFromImage(img);   //Convert Mat to batch of images
+    Mat inputBlob = blobFromImage(img, 1, Size(224, 224),
+                                  Scalar(104, 117, 123));   //Convert Mat to batch of images
    //! [Prepare blob]
    //! [Set input blob]

--- a/modules/dnn/samples/squeezenet_halide.cpp
+++ b/modules/dnn/samples/squeezenet_halide.cpp
@@ -89,7 +89,7 @@ int main(int argc, char **argv)
    }
    resize(img, img, Size(227, 227));                // SqueezeNet v1.1 predict class by 3x227x227 input image.
-    Mat inputBlob = blobFromImage(img, 1.0, false);  // Convert Mat to 4-dimensional batch.
+    Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(), false);  // Convert Mat to 4-dimensional batch.
    //! [Prepare blob]
    //! [Set input blob]

--- a/modules/dnn/samples/torch_enet.cpp
+++ b/modules/dnn/samples/torch_enet.cpp
@@ -69,7 +69,7 @@ int main(int argc, char **argv)
    if (inputImgSize != origSize)
        resize(img, img, inputImgSize);       //Resize image to input size
-    Mat inputBlob = blobFromImage(img, 1./255, true);   //Convert Mat to image batch
+    Mat inputBlob = blobFromImage(img, 1./255);   //Convert Mat to image batch
    //! [Prepare blob]
    //! [Set input blob]

--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -48,6 +48,7 @@
 #include <sstream>
 #include <iterator>
 #include <opencv2/dnn/shape_utils.hpp>
+#include <opencv2/imgproc.hpp>
 using namespace cv;
 using namespace cv::dnn;
@@ -86,14 +87,42 @@ static String toString(const T &v)
    return ss.str();
 }
-Mat blobFromImage(const Mat& image_, double scalefactor, bool swapRB)
+Mat blobFromImage(const Mat& image, double scalefactor, const Size& size,
+                  const Scalar& mean, bool swapRB)
 {
-    std::vector<Mat> images(1, image_);
+    std::vector<Mat> images(1, image);
-    return blobFromImages(images, scalefactor, swapRB);
+    return blobFromImages(images, scalefactor, size, mean, swapRB);
 }
-Mat blobFromImages(const std::vector<Mat>& images, double scalefactor, bool swapRB)
+Mat blobFromImages(const std::vector<Mat>& images_, double scalefactor, Size size,
+                   const Scalar& mean_, bool swapRB)
 {
+    std::vector<Mat> images = images_;
+    for (int i = 0; i < images.size(); i++)
+    {
+        Size imgSize = images[i].size();
+        if (size == Size())
+            size = imgSize;
+        if (size != imgSize)
+        {
+            float resizeFactor = std::max(size.width / (float)imgSize.width,
+                                          size.height / (float)imgSize.height);
+            resize(images[i], images[i], Size(), resizeFactor, resizeFactor);
+            Rect crop(Point(0.5 * (images[i].cols - size.width),
+                            0.5 * (images[i].rows - size.height)),
+                      size);
+            images[i] = images[i](crop);
+        }
+        if(images[i].depth() == CV_8U)
+            images[i].convertTo(images[i], CV_32F);
+        Scalar mean = mean_;
+        if (swapRB)
+            std::swap(mean[0], mean[2]);
+        images[i] -= mean;
+        images[i] *= scalefactor;
+    }
    size_t i, nimages = images.size();
    if(nimages == 0)
        return Mat();
@@ -109,13 +138,7 @@ Mat blobFromImages(const std::vector<Mat>& images, double scalefactor, bool swap
        for( i = 0; i < nimages; i++ )
        {
-            Mat image_ = images[i];
+            image = images[i];
-            if(image_.depth() == CV_8U)
-            {
-                image_.convertTo(image, CV_32F, scalefactor);
-            }
-            else
-                image = image_;
            CV_Assert(image.depth() == CV_32F);
            nch = image.channels();
            CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
@@ -136,13 +159,7 @@ Mat blobFromImages(const std::vector<Mat>& images, double scalefactor, bool swap
       for( i = 0; i < nimages; i++ )
       {
-           Mat image_ = images[i];
+           Mat image = images[i];
-           if(image_.depth() == CV_8U)
-           {
-               image_.convertTo(image, CV_32F, scalefactor);
-           }
-           else
-               image = image_;
           CV_Assert(image.depth() == CV_32F);
           nch = image.channels();
           CV_Assert(image.dims == 2 && (nch == 1));
@@ -154,7 +171,6 @@ Mat blobFromImages(const std::vector<Mat>& images, double scalefactor, bool swap
    return blob;
 }
 struct LayerPin
 {
    int lid;

--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -94,7 +94,7 @@ TEST(Reproducibility_AlexNet, Accuracy)
    if (sample.size() != inputSize)
        resize(sample, sample, inputSize);
-    net.setInput(blobFromImage(sample, 1.), "data");
+    net.setInput(blobFromImage(sample), "data");
    Mat out = net.forward("prob");
    Mat ref = blobFromNPY(_tf("caffe_alexnet_prob.npy"));
    normAssert(ref, out);
@@ -123,7 +123,7 @@ TEST(Reproducibility_FCN, Accuracy)
    std::vector<size_t> weights, blobs;
    net.getMemoryConsumption(shape(1,3,227,227), layerIds, weights, blobs);
-    net.setInput(blobFromImage(sample, 1.), "data");
+    net.setInput(blobFromImage(sample), "data");
    Mat out = net.forward("score");
    Mat ref = blobFromNPY(_tf("caffe_fcn8s_prob.npy"));
    normAssert(ref, out);

--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -40,7 +40,7 @@ TEST(Test_TensorFlow, read_inception)
    resize(sample, input, Size(224, 224));
    input -= 128; // mean sub
-    Mat inputBlob = blobFromImage(input, 1.);
+    Mat inputBlob = blobFromImage(input);
    net.setInput(inputBlob, "input");
    Mat out = net.forward("softmax2");
@@ -61,7 +61,7 @@ TEST(Test_TensorFlow, inception_accuracy)
    Mat sample = imread(_tf("grace_hopper_227.png"));
    ASSERT_TRUE(!sample.empty());
    resize(sample, sample, Size(224, 224));
-    Mat inputBlob = blobFromImage(sample, 1.);
+    Mat inputBlob = blobFromImage(sample);
    net.setInput(inputBlob, "input");
    Mat out = net.forward("softmax2");