Updating API of LSTM layer, fixed LSTM bug in tanh() implementation

LSTM bug is caused by different behaviour of std::tanh() and hand- crafted tanh() via std::exp().

Updating API of LSTM layer, fixed LSTM bug in tanh() implementation
LSTM bug is caused by different behaviour of std::tanh() and hand- crafted tanh() via std::exp().
a3c6f1dc · Vitaliy Lyudvichenko · 44a8e818 · a3c6f1dc · a3c6f1dc · a3c6f1dc
Commit a3c6f1dc authored Jun 28, 2016 by Vitaliy Lyudvichenko
8 changed files
--- a/modules/dnn/CMakeLists.txt
+++ b/modules/dnn/CMakeLists.txt
@@ -24,7 +24,7 @@ ocv_module_include_directories(include ${PROTOBUF_INCLUDE_DIR})
 OCV_OPTION(${the_module}_WITH_BLAS "Use external BLAS library to speedup processing" OFF)
 include(cmake/OpenCVFindCBLAS.cmake)
-ocv_glob_module_sources(${PROTOBUF_SRCS} ${PROTOBUF_HDRS} ${CBLAS_H_PATH})
+ocv_glob_module_sources(${PROTOBUF_SRCS} ${PROTOBUF_HDRS} ${CBLAS_H_PROXY_PATH})
 ocv_create_module(${PROTOBUF_LIBRARIES})
 ocv_add_samples()
 ocv_add_accuracy_tests()
@@ -37,6 +37,7 @@ if(${the_module}_WITH_BLAS AND HAVE_BLAS)
    add_definitions(-DHAVE_CBLAS=1)
    ocv_module_include_directories(${${the_module}_BLAS_INCLUDE_DIR})
    ocv_add_dependencies(${the_module} ${${the_module}_BLAS_LIBRARIES})
+    target_link_libraries(${the_module} ${${the_module}_BLAS_LIBRARIES})
    if(${the_module}_BLAS_BINARIES)
        ocv_install_target(${the_module} EXPORT ${the_module}_BLAS_BINARIES

--- a/modules/dnn/cmake/OpenCVFindCBLAS.cmake
+++ b/modules/dnn/cmake/OpenCVFindCBLAS.cmake
 macro(_find_file_in_dirs VAR NAME DIRS)
    find_path(${VAR} ${NAME} ${DIRS} NO_DEFAULT_PATH)
-    set(${VAR} ${${VAR}})
+    set(${VAR} ${${VAR}}/${NAME})
    unset(${VAR} CACHE)
 endmacro()
@@ -16,7 +16,7 @@ if(${the_module}_WITH_BLAS)
    endif()
    if(NOT HAVE_BLAS)
        include(cmake/OpenCVFindMKL.cmake)
-        if(MKL_FOUND)
+        if(MKL_FOUND AND FALSE)
            set(BLAS_INCLUDE_DIR    ${MKL_INCLUDE_DIRS})
            set(BLAS_LIBRARIES      ${MKL_LIBRARIES}   )
            set(BLAS_CBLAS_H        "mkl_cblas.h"      )
@@ -52,8 +52,9 @@ if(${the_module}_WITH_BLAS)
        if(NOT CBLAS_H_PATH)
            message(WARNING "CBLAS header '${${_bp}_CBLAS_H}' not found into '${${_bp}_INCLUDE_DIR}'")
        endif()
-        add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/cblas.h #TARGET ${the_module} PRE_BUILD
-                           COMMAND ${CMAKE_COMMAND} ARGS -E echo "\#include \"${CBLAS_H_PATH}\"" > ${CMAKE_CURRENT_BINARY_DIR}/cblas.h
+        set(CBLAS_H_PROXY_PATH ${CMAKE_CURRENT_BINARY_DIR}/opencv_cblas.hpp)
-                           COMMENT "Adding proxy cblas.h header")
+        set(_include_str "\#include \"${CBLAS_H_PATH}\"")
+        file(WRITE ${CBLAS_H_PROXY_PATH} ${_include_str})
    endif()
 endif()
\ No newline at end of file
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -101,8 +101,8 @@ namespace dnn
        @f$W_{x?} \in R^{N_c \times N_x}@f$, @f$W_h? \in R^{N_c \times N_h}@f$, @f$b_? \in R^{N_c}@f$.
        For simplicity and performance purposes we use @f$ W_x = [W_{xi}; W_{xf}; W_{xo}, W_{xg}] @f$
-        (i.e. @f$W_x@f$ is vertical contacentaion of @f$ W_{x?} @f$), @f$ W_x \in R^{4N_c x N_x} @f$.
+        (i.e. @f$W_x@f$ is vertical contacentaion of @f$ W_{x?} @f$), @f$ W_x \in R^{4N_c \times N_x} @f$.
-        The same for @f$ W_h = [W_{hi}; W_{hf}; W_{ho}, W_{hg}], W_h \in R^{4N_c x N_h} @f$
+        The same for @f$ W_h = [W_{hi}; W_{hf}; W_{ho}, W_{hg}], W_h \in R^{4N_c \times N_h} @f$
        and for @f$ b = [b_i; b_f, b_o, b_g]@f$, @f$b \in R^{4N_c} @f$.
        @param Wh is matrix defining how previous output is transformed to internal gates (i.e. according to abovemtioned notation is @f$ W_h @f$)
@@ -111,16 +111,44 @@ namespace dnn
        */
        virtual void setWeights(const Blob &Wh, const Blob &Wx, const Blob &b) = 0;
-        /** In common case it uses three inputs (@f$x_t@f$, @f$h_{t-1}@f$ and @f$c_{t-1}@f$) to compute compute two outputs (@f$h_t@f$ and @f$c_t@f$).
+        /** @brief Set @f$ h_{t-1} @f$ value that will be used in next forward() calls.
+          * @details By-default @f$ h_{t-1} @f$ is inited by zeros and updated after each forward() call.
-        @param input could contain three inputs: @f$x_t@f$, @f$h_{t-1}@f$ and @f$c_{t-1}@f$.
+          */
-        @param output contains computed outputs: @f$h_t@f$ and @f$c_t@f$.
+        virtual void setH(const Blob &H) = 0;
+        /** @brief Returns current @f$ h_{t-1} @f$ value (deep copy). */
-        The first input @f$x_t@f$ is required.
+        virtual Blob getH() const = 0;
-        The second and third inputs are optional: if they weren't set than layer will use internal @f$h_{t-1}@f$ and @f$c_{t-1}@f$ from previous calls,
-        but at the first call they will be filled by zeros.
+        /** @brief Set @f$ c_{t-1} @f$ value that will be used in next forward() calls.
-        Size of the last dimension of @f$x_t@f$ must be @f$N_x@f$, (@f$N_h@f$ for @f$h_{t-1}@f$ and @f$N_c@f$ for @f$c_{t-1}@f$).
+          * @details By-default @f$ c_{t-1} @f$ is inited by zeros and updated after each forward() call.
-        Sizes of remainder dimensions could be any, but thay must be consistent among @f$x_t@f$, @f$h_{t-1}@f$ and @f$c_{t-1}@f$.
+          */
+        virtual void setC(const Blob &C) = 0;
+        /** @brief Returns current @f$ c_{t-1} @f$ value (deep copy). */
+        virtual Blob getC() const = 0;
+        /** @brief Specifies either interpet first dimension of input blob as timestamp dimenion either as sample.
+          *
+          * If flag is set to true then shape of input blob will be interpeted as [`T`, `N`, `[data dims]`] where `T` specifies number of timpestamps, `N` is number of independent streams.
+          * In this case each forward() call will iterate through `T` timestamps and update layer's state `T` times.
+          *
+          * If flag is set to false then shape of input blob will be interpeted as [`N`, `[data dims]`].
+          * In this case each forward() call will make one iteration and produce one timestamp with shape [`N`, `[out dims]`].
+          */
+        virtual void setUseTimstampsDim(bool use = true) = 0;
+        /** @brief If this flag is set to true then layer will produce @f$ c_t @f$ as second output.
+         * @details Shape of the second output is the same as first output.
+         */
+        virtual void setProduceCellOutput(bool produce = false) = 0;
+        /** In common case it use single input with @f$x_t@f$ values to compute output(s) @f$h_t@f$ (and @f$c_t@f$).
+         * @param input should contain packed values @f$x_t@f$
+         * @param output contains computed outputs: @f$h_t@f$ (and @f$c_t@f$ if setProduceCellOutput() flag was set to true).
+         *
+         * If setUseTimstampsDim() is set to true then @p input[0] should has at least two dimensions with the following shape: [`T`, `N`, `[data dims]`],
+         * where `T` specifies number of timpestamps, `N` is number of independent streams (i.e. x_{t_0 + t}^{stream} is @p input[0][t, stream, ...]).
+         *
+         * If setUseTimstampsDim() is set to fase then @p input[0] should contain single timestamp, its shape should has form [`N`, `[data dims]`] with at least one dimension.
+         * (i.e. x_{t}^{stream} = @p input[0][stream, ...]).
        */
        void forward(std::vector<Blob*> &input, std::vector<Blob> &output);
    };

--- a/modules/dnn/include/opencv2/dnn/blob.hpp
+++ b/modules/dnn/include/opencv2/dnn/blob.hpp
@@ -107,6 +107,18 @@ namespace dnn
        bool operator== (const BlobShape &r) const;
+        /** @brief Contacenates two shapes */
+        BlobShape operator+ (const BlobShape &r) const;
+        /** @brief Returns shape of passed Mat. */
+        static BlobShape like(const Mat &m);
+        /** @brief Returns shape of passed Mat. */
+        static BlobShape like(const UMat &m);
+#ifdef CV_CXX_MOVE_SEMANTICS
+        //TBD
+#endif
    private:
        cv::AutoBuffer<int,4> sz;
    };
@@ -228,6 +240,11 @@ namespace dnn
         */
        Blob &reshape(const BlobShape &shape);
+        /** @brief Changes shape of the blob without copying the data.
+         * @returns shallow copy of original blob with new shape.
+         */
+        Blob reshaped(const BlobShape &newShape) const;
        /** @brief Returns type of the blob. */
        int type() const;

--- a/modules/dnn/include/opencv2/dnn/blob.inl.hpp
+++ b/modules/dnn/include/opencv2/dnn/blob.inl.hpp
@@ -185,6 +185,16 @@ inline bool BlobShape::operator==(const BlobShape &r) const
    return this->equal(r);
 }
+inline BlobShape BlobShape::like(const Mat &m)
+{
+    return BlobShape(m.dims, (const int*)m.size);
+}
+inline BlobShape BlobShape::like(const UMat &m)
+{
+    return BlobShape(m.dims, (const int*)m.size);
+}
 CV_EXPORTS std::ostream &operator<< (std::ostream &stream, const BlobShape &shape);
 /////////////////////////////////////////////////////////////////////
@@ -277,6 +287,17 @@ inline BlobShape Blob::shape() const
    return BlobShape(dims(), sizes());
 }
+inline BlobShape BlobShape::operator+(const BlobShape &r) const
+{
+    BlobShape newShape(this->dims() + r.dims(), (int*)NULL);
+    for (int i = 0; i < this->dims(); i++)
+        newShape[i] = (*this)[i];
+    for (int i = 0; i < r.dims(); i++)
+        newShape[this->dims() + i] = r[i];
+    return newShape;
+}
 inline bool Blob::equalShape(const Blob &other) const
 {
    if (this->dims() != other.dims())
@@ -366,6 +387,13 @@ inline Blob &Blob::reshape(const BlobShape &newShape)
    return *this;
 }
+inline Blob Blob::reshaped(const BlobShape &newShape) const
+{
+    Blob res(*this); //also, res.shareFrom(*this) could be used
+    res.reshape(newShape);
+    return res;
+}
 }
 }

--- a/modules/dnn/src/layers/op_blas.cpp
+++ b/modules/dnn/src/layers/op_blas.cpp
 #include "op_blas.hpp"
 #if HAVE_CBLAS
-#include "cblas.h"
+#include "opencv_cblas.hpp"
 #endif
+#include <iostream>
 namespace cv
 {
 namespace dnn
@@ -14,18 +16,19 @@ void gemm(InputArray A, InputArray B, double alpha, InputOutputArray C, double b
    cv::gemm(A, B, alpha, C, beta, C, flags);
 }
-inline void SwapRowCols(const Mat &A, int &rows, int &cols, bool transA)
+inline void SwapRowCols(const Mat &A, int &rows, int &cols, bool isTrans)
 {
-    rows = (transA) ? A.cols : A.rows;
+    CV_DbgAssert(A.dims == 2);
-    cols = (transA) ? A.rows : A.cols;
+    rows = (isTrans) ? A.cols : A.rows;
+    cols = (isTrans) ? A.rows : A.cols;
 }
 void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int flags /*= 0*/)
 {
    #if HAVE_CBLAS
-    int transA = flags & GEMM_1_T;
+    bool transA = static_cast<bool>(flags & GEMM_1_T);
-    int transB = flags & GEMM_2_T;
+    bool transB = static_cast<bool>(flags & GEMM_2_T);
-    int transC = flags & GEMM_3_T;
+    bool transC = static_cast<bool>(flags & GEMM_3_T);
    int Arows, Acols, Brows, Bcols, Crows, Ccols;
    SwapRowCols(A, Arows, Acols, transA);
@@ -34,9 +37,9 @@ void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int
    CV_DbgAssert(!(flags & GEMM_3_T));
    CV_Assert(Acols == Brows && Arows == Crows && Bcols == Ccols);
-    CV_DbgAssert(A.isContinuous() && B.isContinuous() && C.isContinuous());
+    CV_Assert(A.isContinuous() && B.isContinuous() && C.isContinuous());
-    CV_DbgAssert(A.type() == CV_32F || A.type() == CV_64F);
+    CV_Assert(A.type() == CV_32F || A.type() == CV_64F);
-    CV_DbgAssert(A.type() == B.type() && B.type() == C.type());
+    CV_Assert(A.type() == B.type() && B.type() == C.type());
    if (C.type() == CV_32F)
    {

--- a/modules/dnn/src/layers/recurrent_layers.cpp
+++ b/modules/dnn/src/layers/recurrent_layers.cpp
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -177,6 +177,23 @@ TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
    normAssert(input, output);
 }
+enum RunLayerMode
+{
+    ALLOC_ONLY          = 1,
+    FORWARD_ONLY        = 2,
+    ALLOC_AND_FORWARD   = 3
+};
+void runLayer(Ptr<Layer> layer, std::vector<Blob> &inpBlobs, std::vector<Blob> &outBlobs, int mode=ALLOC_AND_FORWARD)
+{
+    std::vector<Blob*> inpPtrs(inpBlobs.size());
+    for (size_t i = 0; i < inpBlobs.size(); i++)
+        inpPtrs[i] = &inpBlobs[i];
+    if (mode & ALLOC_ONLY) layer->allocate(inpPtrs, outBlobs);
+    if (mode & FORWARD_ONLY) layer->forward(inpPtrs, outBlobs);
+}
 class Layer_LSTM_Test : public ::testing::Test
 {
 public:
@@ -233,6 +250,28 @@ TEST_F(Layer_LSTM_Test, BasicTest_2)
    EXPECT_EQ(outputs[1].shape(), BlobShape(1, 2, 3, Nc));
 }
+TEST(Layer_LSTM_Test_Accuracy_Reference_with_, CaffeRecurrent)
+{
+    Ptr<LSTMLayer> layer = LSTMLayer::create();
+    Blob Wx = blobFromNPY(_tf("lstm.prototxt.w_0.npy"));
+    Blob Wh = blobFromNPY(_tf("lstm.prototxt.w_2.npy"));
+    Blob b  = blobFromNPY(_tf("lstm.prototxt.w_1.npy"));
+    layer->setWeights(Wh, Wx, b);
+    Blob inp = blobFromNPY(_tf("blob.npy"));
+    std::vector<Blob> inputs(1, inp), outputs;
+    runLayer(layer, inputs, outputs, ALLOC_ONLY | FORWARD_ONLY);
+    Blob &h_t_gathered = outputs[0];
+    Blob h_t_reference = blobFromNPY(_tf("lstm.prototxt.h_1.npy"));
+    //h_t_gathered.reshape(h_t_reference.shape());
+    normAssert(h_t_reference, h_t_gathered);
+}
 class Layer_RNN_Test : public ::testing::Test
 {