Commit b1c87936 authored by Vitaliy Lyudvichenko's avatar Vitaliy Lyudvichenko

adding of BLAS and MKL support into dnn module

parent cd5993c6
......@@ -24,6 +24,48 @@ ocv_add_samples()
ocv_add_accuracy_tests()
ocv_add_perf_tests()
# ----------------------------------------------------------------------------
# Find BLAS library
# ----------------------------------------------------------------------------
OCV_OPTION(${the_module}_WITH_BLAS "Use external BLAS library to speedup processing" OFF)
if(${the_module}_WITH_BLAS)
set(BLAS_CBLAS_H "cblas.h")
include(cmake/OpenCVFindMKL.cmake)
if(MKL_FOUND)
set(BLAS_INCLUDE_DIR ${MKL_INCLUDE_DIRS})
set(BLAS_CBLAS_H "mkl_cblas.h" )
set(BLAS_LIBRARIES ${MKL_LIBRARIES} )
set(BLAS_BINARIES "" )
endif()
set(BLAS_PREF ${the_module}_BLAS)
set(${BLAS_PREF}_INCLUDE_DIR ${BLAS_INCLUDE_DIR} CACHE PATH "Path to BLAS include dir")
set(${BLAS_PREF}_CBLAS_H ${BLAS_CBLAS_H} CACHE STRING "Name of cblas.h")
set(${BLAS_PREF}_LIBRARIES ${BLAS_LIBRARIES} CACHE FILEPATH "Path to BLAS libraries that will be linked with ${the_module} module")
set(${BLAS_PREF}_BINARIES ${BLAS_BINARIES} CACHE FILEPATH "Path to BLAS binaries (.so, .dll) that will be installed with ${the_module} module")
set(CBLAS_H ${${the_module}_BLAS_INCLUDE_DIR}/${${BLAS_PREF}_CBLAS_H})
if(${BLAS_PREF}_INCLUDE_DIR AND NOT EXISTS ${CBLAS_H})
message(WARNING "cblas.h at \"${CBLAS_H}\" not found")
endif()
ocv_module_include_directories(${${the_module}_BLAS_INCLUDE_DIR})
list(APPEND OPENCV_MODULE_${the_module}_DEPS_EXT ${${the_module}_BLAS_LIBRARIES})
target_link_libraries(${the_module} ${${the_module}_BLAS_LIBRARIES})
add_definitions(-DHAVE_CBLAS)
add_definitions(-DCBLAS_H_INCLUDE=<${${BLAS_PREF}_CBLAS_H}>)
message(CMAKE_CURRENT_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(TARGET ${the_module} PRE_BUILD #OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/cblas.h
COMMAND ${CMAKE_COMMAND} ARGS -E echo \"\#include <${${BLAS_PREF}_CBLAS_H}>\" > ${CMAKE_CURRENT_BINARY_DIR}/cblas.h
COMMENT "Adding proxy cblas.h header")
if(${the_module}_BLAS_BINARIES)
ocv_install_target(${the_module} EXPORT ${the_module}_BLAS_BINARIES
RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT libs)
endif()
endif()
# ----------------------------------------------------------------------------
# Download pre-trained models for complex testing on GoogLeNet and AlexNet
# ----------------------------------------------------------------------------
......
#
# The script to detect Intel(R) Math Kernel Library (MKL)
# installation/package
#
# Parameters:
# MKL_WITH_TBB
#
# On return this will define:
#
# HAVE_MKL - True if Intel IPP found
# MKL_ROOT_DIR - root of IPP installation
# MKL_INCLUDE_DIRS - IPP include folder
# MKL_LIBRARIES - IPP libraries that are used by OpenCV
#
if(NOT DEFINED MKL_USE_MULTITHREAD)
OCV_OPTION(MKL_WITH_TBB "Use MKL with TBB multithreading" OFF)#ON IF WITH_TBB)
OCV_OPTION(MKL_WITH_OPENMP "Use MKL with OpenMP multithreading" OFF)#ON IF WITH_OPENMP)
endif()
#check current MKL_ROOT_DIR
if(NOT MKL_ROOT_DIR OR NOT EXISTS ${MKL_ROOT_DIR}/include/mkl.h)
set(MKLROOT_PATHS ${MKL_ROOT_DIR})
if(DEFINED $ENV{MKLROOT})
list(APPEND MKLROOT_PATHS $ENV{MKLROOT})
endif()
if(WIN32)
set(ProgramFilesx86 "ProgramFiles(x86)")
list(APPEND MKLROOT_PATHS $ENV{${ProgramFilesx86}}/IntelSWTools/compilers_and_libraries/windows/mkl)
endif()
if(UNIX)
list(APPEND MKLROOT_PATHS "opt/intel/mkl")
endif()
find_path(MKL_ROOT_DIR include/mkl.h PATHS ${MKLROOT_PATHS})
endif()
set(MKL_INCLUDE_DIRS ${MKL_ROOT_DIR}/include)
set(MKL_INCLUDE_HEADERS ${MKL_INCLUDE_DIRS}/mkl.h ${MKL_INCLUDE_DIRS}/mkl_version.h)
macro(get_mkl_version VERSION_FILE)
# read MKL version info from file
file(STRINGS ${VERSION_FILE} STR1 REGEX "__INTEL_MKL__")
file(STRINGS ${VERSION_FILE} STR2 REGEX "__INTEL_MKL_MINOR__")
file(STRINGS ${VERSION_FILE} STR3 REGEX "__INTEL_MKL_UPDATE__")
#file(STRINGS ${VERSION_FILE} STR4 REGEX "INTEL_MKL_VERSION")
# extract info and assign to variables
string(REGEX MATCHALL "[0-9]+" MKL_VERSION_MAJOR ${STR1})
string(REGEX MATCHALL "[0-9]+" MKL_VERSION_MINOR ${STR2})
string(REGEX MATCHALL "[0-9]+" MKL_VERSION_UPDATE ${STR3})
set(MKL_VERSION_STR "${MKL_VERSION_MAJOR}.${MKL_VERSION_MINOR}.${MKL_VERSION_UPDATE}" CACHE STRING "MKL version" FORCE)
endmacro()
#determine arch
if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8)
set(MKL_X64 1)
set(MKL_ARCH "intel64")
include(CheckTypeSize)
CHECK_TYPE_SIZE(int _sizeof_int)
if (_sizeof_int EQUAL 4)
set(MKL_LP64 "lp64")
else()
set(MKL_LP64 "ilp64")
endif()
else()
set(MKL_ARCH "ia32")
endif()
if(MSVC)
set(MKL_EXT ".lib")
else()
set(MKL_EXT ".a")
endif()
set(MKL_LIB_DIR ${MKL_ROOT_DIR}/lib/${MKL_ARCH})
set(MKL_LIBRARIES ${MKL_LIB_DIR}/mkl_core${MKL_EXT} ${MKL_LIB_DIR}/mkl_intel_${MKL_LP64}${MKL_EXT})
if(MKL_WITH_TBB)
list(APPEND MKL_LIBRARIES ${MKL_LIB_DIR}/mkl_tbb_thread${MKL_EXT})
list(APPEND MKL_LIBRARIES ${MKL_ROOT_DIR}/../tbb/lib/${MKL_ARCH}/tbb${MKL_EXT})
elseif(MKL_WITH_OPENMP)
message(FATAL_ERROR "Multithreaded MKL is not supported yet")
else()
list(APPEND MKL_LIBRARIES ${MKL_LIB_DIR}/mkl_sequential${MKL_EXT})
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(MKL MKL_INCLUDE_HEADERS MKL_LIBRARIES)
if(MKL_FOUND)
get_mkl_version(${MKL_INCLUDE_DIRS}/mkl_version.h)
message(STATUS "Found MKL ${MKL_VERSION_STR} at: ${MKL_ROOT_DIR}")
set(HAVE_MKL ON CACHE BOOL "True if MKL found")
set(MKL_ROOT_DIR ${MKL_ROOT_DIR} CACHE PATH "Path to MKL directory")
set(MKL_INCLUDE_DIRS ${MKL_INCLUDE_DIRS} CACHE PATH "Path to MKL include directory")
set(MKL_LIBRARIES ${MKL_LIBRARIES} CACHE FILEPATH "MKL libarries")
else()
set(HAVE_MKL OFF CACHE BOOL "True if MKL found")
set(MKL_ROOT_DIR ${MKL_ROOT_DIR} CACHE PATH "Path to MKL directory")
unset(MKL_INCLUDE_DIRS)
unset(MKL_LIBRARIES)
endif()
\ No newline at end of file
......@@ -46,6 +46,10 @@
#include "im2col.hpp"
#include <iostream>
#if HAVE_CBLAS
#include "cblas.h"
#endif
namespace cv
{
namespace dnn
......@@ -73,6 +77,20 @@ namespace dnn
//TBD
useOpenCL = params.has("use_opencl");
//init BLAS
#if HAVE_CBLAS
{
#ifdef OPENBLAS_VERSION
if (openblas_get_num_threads() != cv::getNumThreads())
{
openblas_set_num_threads(cv::getNumThreads());
goto_set_num_threads(cv::getNumThreads());
}
//std::cout << "OpenBLAS threads " << openblas_get_num_threads() << "/" << openblas_get_num_procs() << "\n";
#endif
}
#endif
}
void ConvolutionLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
......@@ -109,7 +127,7 @@ namespace dnn
inline bool ConvolutionLayer::is1x1() const
{
return (kerH == 1 && kerW == 1);
return (kerH == 1 && kerW == 1) && (strideW == 1 && strideH == 1); //hotfix with stride
}
void ConvolutionLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
......@@ -130,13 +148,13 @@ namespace dnn
Mat kerMat(outGroupCn, ksize, wgtBlob.type(), wgtBlob.ptr(g*outGroupCn));
Mat dstMat(outGroupCn, outH*outW, outBlob.type(), outBlob.ptr(n, g*outGroupCn));
cv::gemm(kerMat, colMat, 1, noArray(), 0, dstMat);
gemmCPU(kerMat, colMat, 1, dstMat, 0);
if (bias)
{
float *biasPtr = blobs[1].ptrf() + g*outGroupCn;
Mat biasMat(outGroupCn, 1, CV_32F, biasPtr);
cv::gemm(biasMat, biasOnesMat, 1, dstMat, 1, dstMat);
gemmCPU(biasMat, biasOnesMat, 1, dstMat, 1); //TODO: gemv
}
}
}
......@@ -223,7 +241,7 @@ namespace dnn
Mat convMat(outGroupCn, outH*outW, convBlob.type(), convBlob.ptr(n, g*outGroupCn));
Mat wghtMat(outGroupCn, ksize, wghtBlob.type(), wghtBlob.ptr(g*outGroupCn));
cv::gemm(wghtMat, convMat, 1, noArray(), 0, colMat, GEMM_1_T);
gemmCPU(wghtMat, convMat, 1, colMat, 0, GEMM_1_T);
col2im(dstMat);
......@@ -231,7 +249,7 @@ namespace dnn
{
float *biasPtr = blobs[1].ptrf() + g*inpGroupCn;
Mat biasMat(inpGroupCn, 1, CV_32F, biasPtr);
cv::gemm(biasMat, biasOnesMat, 1, dstMat, 1, dstMat);
gemmCPU(biasMat, biasOnesMat, 1, dstMat, 1); //TODO: gemv
}
}
}
......@@ -247,5 +265,57 @@ namespace dnn
if (dstMat.type() == CV_64F)
col2im_cpu((double*)colMat.ptr(), inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (double*)dstMat.ptr());
}
void gemm(InputArray A, InputArray B, double alpha, InputOutputArray C, double beta, int flags /*= 0*/)
{
cv::gemm(A, B, alpha, C, beta, C, flags);
}
inline void SwapRowCols(const Mat &A, int &rows, int &cols, bool transA = false)
{
rows = (transA) ? A.cols : A.rows;
cols = (transA) ? A.rows : A.cols;
}
void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int flags /*= 0*/)
{
#if HAVE_CBLAS
bool transA = flags & GEMM_1_T;
bool transB = flags & GEMM_2_T;
bool transC = flags & GEMM_3_T;
int Arows, Acols, Brows, Bcols, Crows, Ccols;
SwapRowCols(A, Arows, Acols, transA);
SwapRowCols(B, Brows, Bcols, transB);
SwapRowCols(C, Crows, Ccols, transC);
CV_DbgAssert(!(flags & GEMM_3_T));
CV_Assert(Acols == Brows && Arows == Crows && Bcols == Ccols);
CV_DbgAssert(A.isContinuous() && B.isContinuous() && C.isContinuous());
CV_DbgAssert(A.type() == CV_32F || A.type() == CV_64F);
CV_DbgAssert(A.type() == B.type() && B.type() == C.type());
if (C.type() == CV_32F)
{
cblas_sgemm(CblasRowMajor, transA ? CblasTrans : CblasNoTrans, transB ? CblasTrans : CblasNoTrans,
Arows, Bcols, Acols,
(float)alpha, A.ptr<float>(), A.cols,
B.ptr<float>(), B.cols,
(float)beta, C.ptr<float>(), C.cols);
}
else if (C.type() == CV_64F)
{
//TODO: Should be tested
cblas_dgemm(CblasRowMajor, transA ? CblasTrans : CblasNoTrans, transB ? CblasTrans : CblasNoTrans,
Arows, Bcols, Acols,
alpha, A.ptr<double>(), A.cols,
B.ptr<double>(), B.cols,
beta, C.ptr<double>(), C.cols);
}
#else
cv::gemm(A, B, alpha, C, beta, C, flags);
#endif
}
}
}
......@@ -87,6 +87,10 @@ namespace dnn
DeConvolutionLayer(LayerParams &params);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
void gemm(InputArray A, InputArray B, double alpha, InputOutputArray C, double beta, int flags = 0);
void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int flags = 0);
}
}
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment