Merge pull request #6535 from sovrasov:lapack-hal

547a2d29 · Vadim Pisarevsky · 4142e737 · 055f5c73 · 547a2d29 · 547a2d29
Commit 547a2d29 authored Jun 16, 2016 by Vadim Pisarevsky
13 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -221,6 +221,7 @@ OCV_OPTION(WITH_VA             "Include VA support"                          OFF
 OCV_OPTION(WITH_VA_INTEL       "Include Intel VA-API/OpenCL support"         OFF  IF (UNIX AND NOT ANDROID) )
 OCV_OPTION(WITH_GDAL           "Include GDAL Support"                        OFF  IF (NOT ANDROID AND NOT IOS AND NOT WINRT) )
 OCV_OPTION(WITH_GPHOTO2        "Include gPhoto2 library support"             ON   IF (UNIX AND NOT ANDROID) )
+OCV_OPTION(WITH_LAPACK         "Include Lapack library support"              ON   IF (UNIX AND NOT ANDROID) )
 # OpenCV build components
 # ===================================================
@@ -1173,6 +1174,10 @@ if(DEFINED WITH_VA_INTEL)
 status("    Use Intel VA-API/OpenCL:"  HAVE_VA_INTEL       THEN "YES (MSDK: ${VA_INTEL_MSDK_ROOT}  OpenCL: ${VA_INTEL_IOCL_ROOT})" ELSE NO)
 endif(DEFINED WITH_VA_INTEL)
+if(DEFINED WITH_LAPACK)
+status("    Use Lapack:"      HAVE_LAPACK     THEN "YES" ELSE NO)
+endif(DEFINED WITH_LAPACK)
 status("    Use Eigen:"      HAVE_EIGEN       THEN "YES (ver ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION})" ELSE NO)
 status("    Use Cuda:"       HAVE_CUDA        THEN "YES (ver ${CUDA_VERSION_STRING})" ELSE NO)
 status("    Use OpenCL:"     HAVE_OPENCL      THEN YES ELSE NO)

--- a/cmake/OpenCVFindLibsPerf.cmake
+++ b/cmake/OpenCVFindLibsPerf.cmake
@@ -2,6 +2,19 @@
 #  Detect other 3rd-party performance and math libraries
 # ----------------------------------------------------------------------------
+# --- Lapack ---
+if(WITH_LAPACK)
+  find_package(LAPACK)
+  if(LAPACK_FOUND)
+      find_path(LAPACK_INCLUDE_DIR "lapacke.h")
+      if(LAPACK_INCLUDE_DIR)
+        set(HAVE_LAPACK 1)
+        ocv_include_directories(${LAPACK_INCLUDE_DIR})
+        list(APPEND OPENCV_LINKER_LIBS ${LAPACK_LIBRARIES})
+      endif()
+  endif(LAPACK_FOUND)
+endif(WITH_LAPACK)
 # --- TBB ---
 if(WITH_TBB)
  include("${OpenCV_SOURCE_DIR}/cmake/OpenCVDetectTBB.cmake")

--- a/cmake/templates/cvconfig.h.in
+++ b/cmake/templates/cvconfig.h.in
@@ -197,3 +197,6 @@
 /* Intel VA-API/OpenCL */
 #cmakedefine HAVE_VA_INTEL
+/* Lapack */
+#cmakedefine HAVE_LAPACK
--- a/modules/core/include/opencv2/core/hal/hal.hpp
+++ b/modules/core/include/opencv2/core/hal/hal.hpp
@@ -49,17 +49,6 @@
 #include "opencv2/core/cvstd.hpp"
 #include "opencv2/core/hal/interface.h"
-//! @cond IGNORED
-#define CALL_HAL(name, fun, ...) \
-    int res = fun(__VA_ARGS__); \
-    if (res == CV_HAL_ERROR_OK) \
-        return; \
-    else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
-        CV_Error_(cv::Error::StsInternal, \
-            ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res));
-//! @endcond
 namespace cv { namespace hal {
 //! @addtogroup core_hal_functions
@@ -75,6 +64,21 @@ CV_EXPORTS int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int
 CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
 CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
 CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+CV_EXPORTS void SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int flags);
+CV_EXPORTS void SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int flags);
+CV_EXPORTS void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
+                        float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+CV_EXPORTS void gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
+                        double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+CV_EXPORTS void gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
+                        float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+CV_EXPORTS void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
+                        double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
 CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n);
 CV_EXPORTS float normL1_(const float* a, const float* b, int n);

--- a/modules/core/include/opencv2/core/hal/interface.h
+++ b/modules/core/include/opencv2/core/hal/interface.h
@@ -158,6 +158,21 @@ typedef signed char schar;
 #define CV_HAL_DFT_IS_INPLACE 1024
 //! @}
+//! @name SVD flags
+//! @{
+#define CV_HAL_SVD_NO_UV    1
+#define CV_HAL_SVD_SHORT_UV 2
+#define CV_HAL_SVD_MODIFY_A 4
+#define CV_HAL_SVD_FULL_UV  8
+//! @}
+//! @name Gemm flags
+//! @{
+#define CV_HAL_GEMM_1_T 1
+#define CV_HAL_GEMM_2_T 2
+#define CV_HAL_GEMM_3_T 4
+//! @}
 //! @}
 #endif
--- a/modules/core/include/opencv2/core/matx.hpp
+++ b/modules/core/include/opencv2/core/matx.hpp
@@ -438,7 +438,7 @@ template<typename _Tp, int m> struct Matx_DetOp
            return p;
        for( int i = 0; i < m; i++ )
            p *= temp(i, i);
-        return 1./p;
+        return p;
    }
 };

--- a/modules/core/src/hal_internal.cpp
+++ b/modules/core/src/hal_internal.cpp
--- a/modules/core/src/hal_internal.hpp
+++ b/modules/core/src/hal_internal.hpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#ifndef OPENCV_CORE_HAL_INTERNAL_HPP
+#define OPENCV_CORE_HAL_INTERNAL_HPP
+#include "precomp.hpp"
+#ifdef HAVE_LAPACK
+int lapack_LU32f(float* a, size_t a_step, int m, float* b, size_t b_step, int n, int* info);
+int lapack_LU64f(double* a, size_t a_step, int m, double* b, size_t b_step, int n, int* info);
+int lapack_Cholesky32f(float* a, size_t a_step, int m, float* b, size_t b_step, int n, bool* info);
+int lapack_Cholesky64f(double* a, size_t a_step, int m, double* b, size_t b_step, int n, bool* info);
+int lapack_SVD32f(float* a, size_t a_step, float* w, float* u, size_t u_step, float* vt, size_t v_step, int m, int n, int flags);
+int lapack_SVD64f(double* a, size_t a_step, double* w, double* u, size_t u_step, double* vt, size_t v_step, int m, int n, int flags);
+int lapack_gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
+                   float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
+                   int m, int n, int k, int flags);
+int lapack_gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
+                   double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
+                   int m, int n, int k, int flags);
+int lapack_gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
+                   float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
+                   int m, int n, int k, int flags);
+int lapack_gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
+                   double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
+                   int m, int n, int k, int flags);
+#undef cv_hal_LU32f
+#define cv_hal_LU32f lapack_LU32f
+#undef cv_hal_LU64f
+#define cv_hal_LU64f lapack_LU64f
+#undef cv_hal_Cholesky32f
+#define cv_hal_Cholesky32f lapack_Cholesky32f
+#undef cv_hal_Cholesky64f
+#define cv_hal_Cholesky64f lapack_Cholesky64f
+#undef cv_hal_SVD32f
+#define cv_hal_SVD32f lapack_SVD32f
+#undef cv_hal_SVD64f
+#define cv_hal_SVD64f lapack_SVD64f
+#undef cv_hal_gemm32f
+#define cv_hal_gemm32f lapack_gemm32f
+#undef cv_hal_gemm64f
+#define cv_hal_gemm64f lapack_gemm64f
+#undef cv_hal_gemm32fc
+#define cv_hal_gemm32fc lapack_gemm32fc
+#undef cv_hal_gemm64fc
+#define cv_hal_gemm64fc lapack_gemm64fc
+#endif //HAVE_LAPACK
+#endif //OPENCV_CORE_HAL_INTERNAL_HPP
--- a/modules/core/src/hal_replacement.hpp
+++ b/modules/core/src/hal_replacement.hpp
@@ -472,14 +472,148 @@ inline int hal_ni_dctFree2D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEME
 #define cv_hal_dctFree2D hal_ni_dctFree2D
 //! @endcond
+/**
+Performs \f$LU\f$ decomposition of square matrix \f$A=P*L*U\f$ (where \f$P\f$ is permutation matrix) and solves matrix equation \f$A*X=B\f$.
+Function returns the \f$sign\f$ of permutation \f$P\f$ via parameter info.
+@param src1 pointer to input matrix \f$A\f$ stored in row major order. After finish of work src1 contains at least \f$U\f$ part of \f$LU\f$
+decomposition which is appropriate for determainant calculation: \f$det(A)=sign*\prod_{j=1}^{M}a_{jj}\f$.
+@param src1_step number of bytes each matrix \f$A\f$ row occupies.
+@param m size of square matrix \f$A\f$.
+@param src2 pointer to \f$M\times N\f$ matrix \f$B\f$ which is the right-hand side of system \f$A*X=B\f$. \f$B\f$ stored in row major order.
+If src2 is null pointer only \f$LU\f$ decomposition will be performed. After finish of work src2 contains solution \f$X\f$ of system \f$A*X=B\f$.
+@param src2_step number of bytes each matrix \f$B\f$ row occupies.
+@param n number of right-hand vectors in \f$M\times N\f$ matrix \f$B\f$.
+@param info indicates success of decomposition. If *info is equals to zero decomposition failed, othervise *info is equals to \f$sign\f$.
+ */
+//! @addtogroup core_hal_interface_decomp_lu LU matrix decomposition
+//! @{
+inline int hal_ni_LU32f(float* src1, size_t src1_step, int m, float* src2, size_t src2_step, int n, int* info) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_LU64f(double* src1, size_t src1_step, int m, double* src2, size_t src2_step, int n, int* info) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+//! @}
+/**
+Performs Cholesky decomposition of matrix \f$A = L*L^T\f$ and solves matrix equation \f$A*X=B\f$.
+@param src1 pointer to input matrix \f$A\f$ stored in row major order. After finish of work src1 contains lower triangular matrix \f$L\f$.
+@param src1_step number of bytes each matrix \f$A\f$ row occupies.
+@param m size of square matrix \f$A\f$.
+@param src2 pointer to \f$M\times N\f$ matrix \f$B\f$ which is the right-hand side of system \f$A*X=B\f$. B stored in row major order.
+If src2 is null pointer only Cholesky decomposition will be performed. After finish of work src2 contains solution \f$X\f$ of system \f$A*X=B\f$.
+@param src2_step number of bytes each matrix \f$B\f$ row occupies.
+@param n number of right-hand vectors in \f$M\times N\f$ matrix \f$B\f$.
+@param info indicates success of decomposition. If *info is false decomposition failed.
+ */
+//! @addtogroup core_hal_interface_decomp_cholesky Cholesky matrix decomposition
+//! @{
+inline int hal_ni_Cholesky32f(float* src1, size_t src1_step, int m, float* src2, size_t src2_step, int n, bool* info) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_Cholesky64f(double* src1, size_t src1_step, int m, double* src2, size_t src2_step, int n, bool* info) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+//! @}
+/**
+Performs singular value decomposition of \f$M\times N\f$(\f$M>N\f$) matrix \f$A = U*\Sigma*V^T\f$.
+@param src pointer to input \f$M\times N\f$ matrix \f$A\f$ stored in column major order.
+After finish of work src will be filled with rows of \f$U\f$ or not modified (depends of flag CV_HAL_SVD_MODIFY_A).
+@param src_step number of bytes each matrix \f$A\f$ column occupies.
+@param w pointer to array for singular values of matrix \f$A\f$ (i. e. first \f$N\f$ diagonal elements of matrix \f$\Sigma\f$).
+@param u pointer to output \f$M\times N\f$ or \f$M\times M\f$ matrix \f$U\f$ (size depends of flags). Pointer must be valid if flag CV_HAL_SVD_MODIFY_A not used.
+@param u_step number of bytes each matrix \f$U\f$ row occupies.
+@param vt pointer to array for \f$N\times N\f$ matrix \f$V^T\f$.
+@param vt_step number of bytes each matrix \f$V^T\f$ row occupies.
+@param m number fo rows in matrix \f$A\f$.
+@param n number of columns in matrix \f$A\f$.
+@param flags algorithm options (combination of CV_HAL_SVD_FULL_UV, ...).
+ */
+//! @addtogroup core_hal_interface_decomp_svd Singular value matrix decomposition
+//! @{
+inline int hal_ni_SVD32f(float* src, size_t src_step, float* w, float* u, size_t u_step, float* vt, size_t vt_step, int m, int n, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_SVD64f(double* src, size_t src_step, double* w, double* u, size_t u_step, double* vt, size_t vt_step, int m, int n, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+//! @}
+//! @cond IGNORED
+#define cv_hal_LU32f hal_ni_LU32f
+#define cv_hal_LU64f hal_ni_LU64f
+#define cv_hal_Cholesky32f hal_ni_Cholesky32f
+#define cv_hal_Cholesky64f hal_ni_Cholesky64f
+#define cv_hal_SVD32f hal_ni_SVD32f
+#define cv_hal_SVD64f hal_ni_SVD64f
+//! @endcond
+/**
+The function performs generalized matrix multiplication similar to the gemm functions in BLAS level 3:
+\f$D = \alpha*AB+\beta*C\f$
+@param src1 pointer to input \f$M\times N\f$ matrix \f$A\f$ or \f$A^T\f$ stored in row major order.
+@param src1_step number of bytes each matrix \f$A\f$ or \f$A^T\f$ row occupies.
+@param src2 pointer to input \f$N\times K\f$ matrix \f$B\f$ or \f$B^T\f$ stored in row major order.
+@param src2_step number of bytes each matrix \f$B\f$ or \f$B^T\f$ row occupies.
+@param alpha \f$\alpha\f$ multiplier before \f$AB\f$
+@param src3 pointer to input \f$M\times K\f$ matrix \f$C\f$ or \f$C^T\f$ stored in row major order.
+@param src3_step number of bytes each matrix \f$C\f$ or \f$C^T\f$ row occupies.
+@param beta \f$\beta\f$ multiplier before \f$C\f$
+@param dst pointer to input \f$M\times K\f$ matrix \f$D\f$ stored in row major order.
+@param dst_step number of bytes each matrix \f$D\f$ row occupies.
+@param m number of rows in matrix \f$A\f$ or \f$A^T\f$, equals to number of rows in matrix \f$D\f$
+@param n number of columns in matrix \f$A\f$ or \f$A^T\f$
+@param k number of columns in matrix \f$B\f$ or \f$B^T\f$, equals to number of columns in matrix \f$D\f$
+@param flags algorithm options (combination of CV_HAL_GEMM_1_T, ...).
+ */
+//! @addtogroup core_hal_interface_matrix_multiplication Matrix multiplication
+//! @{
+inline int hal_ni_gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
+                          float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
+                          int m, int n, int k, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
+                          double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
+                          int m, int n, int k, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
+                          float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
+                          int m, int n, int k, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+inline int hal_ni_gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
+                          double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
+                          int m, int n, int k, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
 //! @}
+//! @cond IGNORED
+#define cv_hal_gemm32f hal_ni_gemm32f
+#define cv_hal_gemm64f hal_ni_gemm64f
+#define cv_hal_gemm32fc hal_ni_gemm32fc
+#define cv_hal_gemm64fc hal_ni_gemm64fc
+//! @endcond
+//! @}
 #if defined __GNUC__
 #  pragma GCC diagnostic pop
 #elif defined _MSC_VER
 #  pragma warning( pop )
 #endif
+#include "hal_internal.hpp"
 #include "custom_hal.hpp"
+//! @cond IGNORED
+#define CALL_HAL_RET(name, fun, retval, ...) \
+    int res = fun(__VA_ARGS__, &retval); \
+    if (res == CV_HAL_ERROR_OK) \
+        return retval; \
+    else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
+        CV_Error_(cv::Error::StsInternal, \
+            ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res));
+#define CALL_HAL(name, fun, ...) \
+    int res = fun(__VA_ARGS__); \
+    if (res == CV_HAL_ERROR_OK) \
+        return; \
+    else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
+        CV_Error_(cv::Error::StsInternal, \
+            ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res));
+//! @endcond
 #endif
--- a/modules/core/src/lapack.cpp
+++ b/modules/core/src/lapack.cpp
@@ -570,11 +570,44 @@ JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep,
 static void JacobiSVD(float* At, size_t astep, float* W, float* Vt, size_t vstep, int m, int n, int n1=-1)
 {
-    JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, FLT_MIN, FLT_EPSILON*2);
+    hal::SVD32f(At, astep, W, NULL, astep, Vt, vstep, m, n, n1);
 }
 static void JacobiSVD(double* At, size_t astep, double* W, double* Vt, size_t vstep, int m, int n, int n1=-1)
 {
+    hal::SVD64f(At, astep, W, NULL, astep, Vt, vstep, m, n, n1);
+}
+template <typename fptype> static inline int
+decodeSVDParameters(const fptype* U, const fptype* Vt, int m, int n, int n1)
+{
+    int halSVDFlag = 0;
+    if(Vt == NULL)
+        halSVDFlag = CV_HAL_SVD_NO_UV;
+    else if(n1 <= 0 || n1 == n)
+    {
+        halSVDFlag = CV_HAL_SVD_SHORT_UV;
+        if(U == NULL)
+            halSVDFlag |= CV_HAL_SVD_MODIFY_A;
+    }
+    else if(n1 == m)
+    {
+        halSVDFlag = CV_HAL_SVD_FULL_UV;
+        if(U == NULL)
+            halSVDFlag |= CV_HAL_SVD_MODIFY_A;
+    }
+    return halSVDFlag;
+}
+void hal::SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int n1)
+{
+    CALL_HAL(SVD32f, cv_hal_SVD32f, At, astep, W, U, ustep, Vt, vstep, m, n, decodeSVDParameters(U, Vt, m, n, n1))
+    JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, FLT_MIN, FLT_EPSILON*2);
+}
+void hal::SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int n1)
+{
+    CALL_HAL(SVD64f, cv_hal_SVD64f, At, astep, W, U, ustep, Vt, vstep, m, n, decodeSVDParameters(U, Vt, m, n, n1))
    JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, DBL_MIN, DBL_EPSILON*10);
 }
@@ -745,7 +778,6 @@ double cv::determinant( InputArray _mat )
            {
                for( int i = 0; i < rows; i++ )
                    result *= a.at<float>(i,i);
-                result = 1./result;
            }
        }
    }
@@ -769,7 +801,6 @@ double cv::determinant( InputArray _mat )
            {
                for( int i = 0; i < rows; i++ )
                    result *= a.at<double>(i,i);
-                result = 1./result;
            }
        }
    }

--- a/modules/core/src/matmul.cpp
+++ b/modules/core/src/matmul.cpp
--- a/modules/core/src/matrix_decomp.cpp
+++ b/modules/core/src/matrix_decomp.cpp
@@ -89,8 +89,6 @@ LUImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n, _Tp eps)
                for( k = 0; k < n; k++ )
                    b[j*bstep + k] += alpha*b[i*bstep + k];
        }
-        A[i*astep + i] = -d;
    }
    if( b )
@@ -101,7 +99,7 @@ LUImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n, _Tp eps)
                _Tp s = b[i*bstep + j];
                for( k = i+1; k < m; k++ )
                    s -= A[i*astep + k]*b[k*bstep + j];
-                b[i*bstep + j] = s*A[i*astep + i];
+                b[i*bstep + j] = s/A[i*astep + i];
            }
    }
@@ -111,13 +109,19 @@ LUImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n, _Tp eps)
 int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)
 {
-    return LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10);
+    int output;
+    CALL_HAL_RET(LU32f, cv_hal_LU32f, output, A, astep, m, b, bstep, n)
+    output = LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10);
+    return output;
 }
 int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)
 {
-    return LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100);
+    int output;
+    CALL_HAL_RET(LU64f, cv_hal_LU64f, output, A, astep, m, b, bstep, n)
+    output = LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100);
+    return output;
 }
 template<typename _Tp> static inline bool
@@ -193,14 +197,17 @@ CholImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n)
    return true;
 }
 bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)
 {
+    bool output;
+    CALL_HAL_RET(Cholesky32f, cv_hal_Cholesky32f, output, A, astep, m, b, bstep, n)
    return CholImpl(A, astep, m, b, bstep, n);
 }
 bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)
 {
+    bool output;
+    CALL_HAL_RET(Cholesky64f, cv_hal_Cholesky64f, output, A, astep, m, b, bstep, n)
    return CholImpl(A, astep, m, b, bstep, n);
 }

--- a/modules/imgproc/src/hal_replacement.hpp
+++ b/modules/imgproc/src/hal_replacement.hpp
@@ -309,4 +309,23 @@ inline int hal_ni_warpPerspectve(int src_type, const uchar *src_data, size_t src
 #include "custom_hal.hpp"
+//! @cond IGNORED
+#define CALL_HAL_RET(name, fun, retval, ...) \
+    int res = fun(__VA_ARGS__, &retval); \
+    if (res == CV_HAL_ERROR_OK) \
+        return retval; \
+    else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
+        CV_Error_(cv::Error::StsInternal, \
+            ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res));
+#define CALL_HAL(name, fun, ...) \
+    int res = fun(__VA_ARGS__); \
+    if (res == CV_HAL_ERROR_OK) \
+        return; \
+    else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
+        CV_Error_(cv::Error::StsInternal, \
+            ("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res));
+//! @endcond
 #endif