Commit 9a342b51 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #707 from ludv1x:dnn

parents e7b5c81b 942e9205
...@@ -17,15 +17,38 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4701) ...@@ -17,15 +17,38 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4701)
# Resolve libprotobuf dependency # Resolve libprotobuf dependency
# ---------------------------------------------------------------------------- # ----------------------------------------------------------------------------
include(cmake/OpenCVFindLibProtobuf.cmake) include(cmake/OpenCVFindLibProtobuf.cmake)
ocv_glob_module_sources(${PROTOBUF_SRCS} ${PROTOBUF_HDRS})
ocv_source_group("Src\\protobuf" FILES ${PROTOBUF_SRCS} ${PROTOBUF_HDRS}) ocv_source_group("Src\\protobuf" FILES ${PROTOBUF_SRCS} ${PROTOBUF_HDRS})
ocv_module_include_directories(include ${PROTOBUF_INCLUDE_DIR}) ocv_module_include_directories(include ${PROTOBUF_INCLUDE_DIR})
# ----------------------------------------------------------------------------
# Try to find BLAS libraries
# ----------------------------------------------------------------------------
OCV_OPTION(${the_module}_WITH_BLAS "Use external BLAS library to speedup processing" OFF)
include(cmake/OpenCVFindCBLAS.cmake)
ocv_glob_module_sources(${PROTOBUF_SRCS} ${PROTOBUF_HDRS} ${CBLAS_H_PROXY_PATH})
ocv_create_module(${PROTOBUF_LIBRARIES}) ocv_create_module(${PROTOBUF_LIBRARIES})
ocv_add_samples() ocv_add_samples()
ocv_add_accuracy_tests() ocv_add_accuracy_tests()
ocv_add_perf_tests() ocv_add_perf_tests()
# ----------------------------------------------------------------------------
# Link BLAS
# ----------------------------------------------------------------------------
if(${the_module}_WITH_BLAS AND HAVE_BLAS)
add_definitions(-DHAVE_CBLAS=1)
ocv_module_include_directories(${${the_module}_BLAS_INCLUDE_DIR})
ocv_add_dependencies(${the_module} ${${the_module}_BLAS_LIBRARIES})
target_link_libraries(${the_module} ${${the_module}_BLAS_LIBRARIES})
if(${the_module}_BLAS_BINARIES)
ocv_install_target(${the_module} EXPORT ${the_module}_BLAS_BINARIES
RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT libs)
endif()
else()
add_definitions(-DHAVE_CBLAS=0)
endif()
# ---------------------------------------------------------------------------- # ----------------------------------------------------------------------------
# Download pre-trained models for complex testing on GoogLeNet and AlexNet # Download pre-trained models for complex testing on GoogLeNet and AlexNet
# ---------------------------------------------------------------------------- # ----------------------------------------------------------------------------
......
#COPYRIGHT
#
#All contributions by the University of California:
#Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
#All rights reserved.
#
#All other contributions:
#Copyright (c) 2014, 2015, the respective contributors
#All rights reserved.
#
#Caffe uses a shared copyright model: each contributor holds copyright over
#their contributions to Caffe. The project versioning records all such
#contribution and copyright details. If a contributor wants to further mark
#their specific copyright on a particular contribution, they should indicate
#their copyright solely in the commit message of the change when it is
#committed.
#
#LICENSE
#
#Redistribution and use in source and binary forms, with or without
#modification, are permitted provided that the following conditions are met:
#
#1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
#ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#CONTRIBUTION AGREEMENT
#
#By contributing to the BVLC/caffe repository through pull-request, comment,
#or otherwise, the contributor releases their content to the
#license and copyright terms herein.
# Find the Atlas (and Lapack) libraries
#
# The following variables are optionally searched for defaults
# Atlas_ROOT_DIR: Base directory where all Atlas components are found
#
# The following are set after configuration is done:
# Atlas_FOUND
# Atlas_INCLUDE_DIRS
# Atlas_LIBRARIES
# Atlas_LIBRARYRARY_DIRS
set(Atlas_INCLUDE_SEARCH_PATHS
/usr/include/atlas
/usr/include/atlas-base
$ENV{Atlas_ROOT_DIR}
$ENV{Atlas_ROOT_DIR}/include
)
set(Atlas_LIB_SEARCH_PATHS
/usr/lib/atlas
/usr/lib/atlas-base
$ENV{Atlas_ROOT_DIR}
$ENV{Atlas_ROOT_DIR}/lib
)
find_path(Atlas_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${Atlas_INCLUDE_SEARCH_PATHS})
find_path(Atlas_CLAPACK_INCLUDE_DIR NAMES clapack.h PATHS ${Atlas_INCLUDE_SEARCH_PATHS})
find_library(Atlas_CBLAS_LIBRARY NAMES ptcblas_r ptcblas cblas_r cblas PATHS ${Atlas_LIB_SEARCH_PATHS})
find_library(Atlas_BLAS_LIBRARY NAMES atlas_r atlas PATHS ${Atlas_LIB_SEARCH_PATHS})
find_library(Atlas_LAPACK_LIBRARY NAMES alapack_r alapack lapack_atlas PATHS ${Atlas_LIB_SEARCH_PATHS})
set(LOOKED_FOR
Atlas_CBLAS_INCLUDE_DIR
Atlas_CLAPACK_INCLUDE_DIR
Atlas_CBLAS_LIBRARY
Atlas_BLAS_LIBRARY
Atlas_LAPACK_LIBRARY
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Atlas DEFAULT_MSG ${LOOKED_FOR})
if(ATLAS_FOUND)
set(Atlas_INCLUDE_DIR ${Atlas_CBLAS_INCLUDE_DIR} ${Atlas_CLAPACK_INCLUDE_DIR})
set(Atlas_LIBRARIES ${Atlas_LAPACK_LIBRARY} ${Atlas_CBLAS_LIBRARY} ${Atlas_BLAS_LIBRARY})
mark_as_advanced(${LOOKED_FOR})
message(STATUS "Found Atlas (include: ${Atlas_CBLAS_INCLUDE_DIR}, library: ${Atlas_BLAS_LIBRARY})")
endif(ATLAS_FOUND)
\ No newline at end of file
#COPYRIGHT
#
#All contributions by the University of California:
#Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
#All rights reserved.
#
#All other contributions:
#Copyright (c) 2014, 2015, the respective contributors
#All rights reserved.
#
#Caffe uses a shared copyright model: each contributor holds copyright over
#their contributions to Caffe. The project versioning records all such
#contribution and copyright details. If a contributor wants to further mark
#their specific copyright on a particular contribution, they should indicate
#their copyright solely in the commit message of the change when it is
#committed.
#
#LICENSE
#
#Redistribution and use in source and binary forms, with or without
#modification, are permitted provided that the following conditions are met:
#
#1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
#ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#CONTRIBUTION AGREEMENT
#
#By contributing to the BVLC/caffe repository through pull-request, comment,
#or otherwise, the contributor releases their content to the
#license and copyright terms herein.
SET(Open_BLAS_INCLUDE_SEARCH_PATHS
/usr/include
/usr/include/openblas
/usr/include/openblas-base
/usr/local/include
/usr/local/include/openblas
/usr/local/include/openblas-base
/opt/OpenBLAS/include
$ENV{OpenBLAS_HOME}
$ENV{OpenBLAS_HOME}/include
)
SET(Open_BLAS_LIB_SEARCH_PATHS
/lib/
/lib/openblas-base
/lib64/
/usr/lib
/usr/lib/openblas-base
/usr/lib64
/usr/local/lib
/usr/local/lib64
/opt/OpenBLAS/lib
$ENV{OpenBLAS}cd
$ENV{OpenBLAS}/lib
$ENV{OpenBLAS_HOME}
$ENV{OpenBLAS_HOME}/lib
)
FIND_PATH(OpenBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${Open_BLAS_INCLUDE_SEARCH_PATHS})
FIND_LIBRARY(OpenBLAS_LIB NAMES openblas PATHS ${Open_BLAS_LIB_SEARCH_PATHS})
SET(OpenBLAS_FOUND ON)
# Check include files
IF(NOT OpenBLAS_INCLUDE_DIR)
SET(OpenBLAS_FOUND OFF)
MESSAGE(STATUS "Could not find OpenBLAS include. Turning OpenBLAS_FOUND off")
ENDIF()
# Check libraries
IF(NOT OpenBLAS_LIB)
SET(OpenBLAS_FOUND OFF)
MESSAGE(STATUS "Could not find OpenBLAS lib. Turning OpenBLAS_FOUND off")
ENDIF()
IF (OpenBLAS_FOUND)
IF (NOT OpenBLAS_FIND_QUIETLY)
MESSAGE(STATUS "Found OpenBLAS libraries: ${OpenBLAS_LIB}")
MESSAGE(STATUS "Found OpenBLAS include: ${OpenBLAS_INCLUDE_DIR}")
ENDIF (NOT OpenBLAS_FIND_QUIETLY)
ELSE (OpenBLAS_FOUND)
IF (OpenBLAS_FIND_REQUIRED)
MESSAGE(FATAL_ERROR "Could not find OpenBLAS")
ENDIF (OpenBLAS_FIND_REQUIRED)
ENDIF (OpenBLAS_FOUND)
MARK_AS_ADVANCED(
OpenBLAS_INCLUDE_DIR
OpenBLAS_LIB
OpenBLAS
)
\ No newline at end of file
macro(_find_file_in_dirs VAR NAME DIRS)
find_path(${VAR} ${NAME} ${DIRS} NO_DEFAULT_PATH)
set(${VAR} ${${VAR}}/${NAME})
unset(${VAR} CACHE)
endmacro()
if(${the_module}_WITH_BLAS)
set(_bp ${the_module}_BLAS) #prefix for blas variables
set(BLAS_CBLAS_H "cblas.h")
set(HAVE_BLAS "")
if(NOT HAVE_BLAS) #check custom BLAS from user input
if(${_bp}_INCLUDE_DIR AND ${_bp}_LIBRARIES AND ${_bp}_CBLAS_H)
set(HAVE_BLAS "Custom")
endif()
endif()
if(NOT HAVE_BLAS)
include(cmake/OpenCVFindMKL.cmake)
if(MKL_FOUND)
set(BLAS_INCLUDE_DIR ${MKL_INCLUDE_DIRS})
set(BLAS_LIBRARIES ${MKL_LIBRARIES} )
set(BLAS_CBLAS_H "mkl_cblas.h" )
set(HAVE_BLAS "MKL")
endif()
endif()
if(NOT HAVE_BLAS)
include(cmake/FindOpenBLAS.cmake)
if(OpenBLAS_FOUND)
set(BLAS_INCLUDE_DIR ${OpenBLAS_INCLUDE_DIR} )
set(BLAS_LIBRARIES ${OpenBLAS_LIB} )
set(HAVE_BLAS "OpenBLAS")
endif()
endif()
if(NOT HAVE_BLAS AND UNIX)
include(cmake/FindAtlas.cmake)
if(ATLAS_FOUND)
set(BLAS_INCLUDE_DIR ${Atlas_INCLUDE_DIR})
set(BLAS_LIBRARIES ${Atlas_LIBRARIES} )
set(HAVE_BLAS "Atlas")
endif()
endif()
if(NOT HAVE_BLAS OR NOT (HAVE_BLAS STREQUAL "Custom"))
set(${_bp}_INCLUDE_DIR ${BLAS_INCLUDE_DIR} CACHE PATH "Path to BLAS include dir" FORCE)
set(${_bp}_CBLAS_H ${BLAS_CBLAS_H} CACHE STRING "Alternative name of cblas.h" FORCE)
set(${_bp}_LIBRARIES ${BLAS_LIBRARIES} CACHE FILEPATH "Path to BLAS libraries that will be linked with ${the_module} module" FORCE)
set(${_bp}_BINARIES ${BLAS_BINARIES} CACHE FILEPATH "Path to BLAS binaries (.so, .dll) that will be installed with ${the_module} module" FORCE)
endif()
if(HAVE_BLAS) #adding proxy cblas.h header
_find_file_in_dirs(CBLAS_H_PATH ${${_bp}_CBLAS_H} ${${_bp}_INCLUDE_DIR})
if(NOT CBLAS_H_PATH)
message(WARNING "CBLAS header '${${_bp}_CBLAS_H}' not found into '${${_bp}_INCLUDE_DIR}'")
endif()
set(CBLAS_H_PROXY_PATH ${CMAKE_CURRENT_BINARY_DIR}/opencv_cblas.hpp)
set(_include_str "\#include \"${CBLAS_H_PATH}\"")
file(WRITE ${CBLAS_H_PROXY_PATH} ${_include_str})
endif()
endif()
\ No newline at end of file
#
# The script to detect Intel(R) Math Kernel Library (MKL)
# installation/package
#
# Parameters:
# MKL_WITH_TBB
#
# On return this will define:
#
# HAVE_MKL - True if Intel IPP found
# MKL_ROOT_DIR - root of IPP installation
# MKL_INCLUDE_DIRS - IPP include folder
# MKL_LIBRARIES - IPP libraries that are used by OpenCV
#
macro(mkl_fail)
set(HAVE_MKL OFF CACHE BOOL "True if MKL found")
set(MKL_ROOT_DIR ${MKL_ROOT_DIR} CACHE PATH "Path to MKL directory")
unset(MKL_INCLUDE_DIRS CACHE)
unset(MKL_LIBRARIES CACHE)
endmacro()
macro(get_mkl_version VERSION_FILE)
# read MKL version info from file
file(STRINGS ${VERSION_FILE} STR1 REGEX "__INTEL_MKL__")
file(STRINGS ${VERSION_FILE} STR2 REGEX "__INTEL_MKL_MINOR__")
file(STRINGS ${VERSION_FILE} STR3 REGEX "__INTEL_MKL_UPDATE__")
#file(STRINGS ${VERSION_FILE} STR4 REGEX "INTEL_MKL_VERSION")
# extract info and assign to variables
string(REGEX MATCHALL "[0-9]+" MKL_VERSION_MAJOR ${STR1})
string(REGEX MATCHALL "[0-9]+" MKL_VERSION_MINOR ${STR2})
string(REGEX MATCHALL "[0-9]+" MKL_VERSION_UPDATE ${STR3})
set(MKL_VERSION_STR "${MKL_VERSION_MAJOR}.${MKL_VERSION_MINOR}.${MKL_VERSION_UPDATE}" CACHE STRING "MKL version" FORCE)
endmacro()
if(NOT DEFINED MKL_USE_MULTITHREAD)
OCV_OPTION(MKL_WITH_TBB "Use MKL with TBB multithreading" OFF)#ON IF WITH_TBB)
OCV_OPTION(MKL_WITH_OPENMP "Use MKL with OpenMP multithreading" OFF)#ON IF WITH_OPENMP)
endif()
#check current MKL_ROOT_DIR
if(NOT MKL_ROOT_DIR OR NOT EXISTS ${MKL_ROOT_DIR}/include/mkl.h)
set(MKLROOT_PATHS ${MKL_ROOT_DIR})
if(DEFINED $ENV{MKLROOT})
list(APPEND MKLROOT_PATHS $ENV{MKLROOT})
endif()
if(WIN32)
set(ProgramFilesx86 "ProgramFiles(x86)")
list(APPEND MKLROOT_PATHS $ENV{${ProgramFilesx86}}/IntelSWTools/compilers_and_libraries/windows/mkl)
endif()
if(UNIX)
list(APPEND MKLROOT_PATHS "/opt/intel/mkl")
endif()
find_path(MKL_ROOT_DIR include/mkl.h PATHS ${MKLROOT_PATHS})
endif()
if(NOT MKL_ROOT_DIR)
mkl_fail()
return()
endif()
set(MKL_INCLUDE_DIRS ${MKL_ROOT_DIR}/include)
set(MKL_INCLUDE_HEADERS ${MKL_INCLUDE_DIRS}/mkl.h ${MKL_INCLUDE_DIRS}/mkl_version.h)
#determine arch
if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8)
set(MKL_X64 1)
set(MKL_ARCH "intel64")
include(CheckTypeSize)
CHECK_TYPE_SIZE(int _sizeof_int)
if (_sizeof_int EQUAL 4)
set(MKL_LP64 "lp64")
else()
set(MKL_LP64 "ilp64")
endif()
else()
set(MKL_ARCH "ia32")
endif()
if(MSVC)
set(MKL_EXT ".lib")
set(MKL_PRE "")
else()
set(MKL_EXT ".a")
set(MKL_PRE "lib")
endif()
set(MKL_LIB_DIR ${MKL_ROOT_DIR}/lib/${MKL_ARCH})
set(MKL_LIBRARIES ${MKL_LIB_DIR}/${MKL_PRE}mkl_core${MKL_EXT} ${MKL_LIB_DIR}/${MKL_PRE}mkl_intel_${MKL_LP64}${MKL_EXT})
if(MKL_WITH_TBB)
list(APPEND MKL_LIBRARIES ${MKL_LIB_DIR}/${MKL_PRE}mkl_tbb_thread${MKL_EXT})
list(APPEND MKL_LIBRARIES ${MKL_ROOT_DIR}/../tbb/lib/${MKL_ARCH}/tbb${MKL_EXT})
elseif(MKL_WITH_OPENMP)
message(FATAL_ERROR "Multithreaded MKL is not supported yet")
else()
list(APPEND MKL_LIBRARIES ${MKL_LIB_DIR}/${MKL_PRE}mkl_sequential${MKL_EXT})
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(MKL MKL_INCLUDE_HEADERS MKL_LIBRARIES)
if(MKL_FOUND)
get_mkl_version(${MKL_INCLUDE_DIRS}/mkl_version.h)
message(STATUS "Found MKL ${MKL_VERSION_STR} at: ${MKL_ROOT_DIR}")
set(HAVE_MKL ON CACHE BOOL "True if MKL found")
set(MKL_ROOT_DIR ${MKL_ROOT_DIR} CACHE PATH "Path to MKL directory")
set(MKL_INCLUDE_DIRS ${MKL_INCLUDE_DIRS} CACHE PATH "Path to MKL include directory")
if(NOT UNIX)
set(MKL_LIBRARIES ${MKL_LIBRARIES} CACHE FILEPATH "MKL libarries")
else()
#it's ugly but helps to avoid cyclic lib problem
set(MKL_LIBRARIES ${MKL_LIBRARIES} ${MKL_LIBRARIES} ${MKL_LIBRARIES} "-lpthread" "-lm" "-ldl")
set(MKL_LIBRARIES ${MKL_LIBRARIES} CACHE STRING "MKL libarries")
endif()
else()
endif()
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
...@@ -48,20 +48,50 @@ namespace cv ...@@ -48,20 +48,50 @@ namespace cv
namespace dnn namespace dnn
{ {
inline BlobShape::BlobShape(int ndims, int fill) : sz( (size_t)std::max(ndims, 0) ) inline BlobShape::BlobShape()
{
sz.allocate(4);
for (size_t i = 0; i < sz.size(); i++)
sz[i] = 1;
}
inline BlobShape BlobShape::all(int ndims, int fill)
{ {
CV_Assert(ndims >= 0); CV_Assert(ndims >= 0);
BlobShape res;
res.sz.allocate(ndims);
for (int i = 0; i < ndims; i++) for (int i = 0; i < ndims; i++)
sz[i] = fill; res.sz[i] = fill;
return res;
} }
inline BlobShape::BlobShape(int ndims, const int *sizes) : sz( (size_t)std::max(ndims, 0) ) inline BlobShape::BlobShape(int ndims, const int *sizes) : sz( (size_t)std::max(ndims, 0) )
{ {
CV_Assert(ndims >= 0); CV_Assert(ndims >= 0);
if (!sizes)
return;
for (int i = 0; i < ndims; i++) for (int i = 0; i < ndims; i++)
sz[i] = sizes[i]; sz[i] = sizes[i];
} }
inline BlobShape::BlobShape(int s0) : sz(1)
{
sz[0] = s0;
}
inline BlobShape::BlobShape(int s0, int s1) : sz(2)
{
sz[0] = s0;
sz[1] = s1;
}
inline BlobShape::BlobShape(int s0, int s1, int s2) : sz(3)
{
sz[0] = s0;
sz[1] = s1;
sz[2] = s2;
}
inline BlobShape::BlobShape(int num, int cn, int rows, int cols) : sz(4) inline BlobShape::BlobShape(int num, int cn, int rows, int cols) : sz(4)
{ {
sz[0] = num; sz[0] = num;
...@@ -120,7 +150,13 @@ inline int &BlobShape::operator[] (int axis) ...@@ -120,7 +150,13 @@ inline int &BlobShape::operator[] (int axis)
return sz[(axis < 0) ? axis + dims() : axis]; return sz[(axis < 0) ? axis + dims() : axis];
} }
inline ptrdiff_t BlobShape::total() inline int BlobShape::canonicalAxis(int axis) const
{
CV_Assert(-dims() <= axis && axis < dims());
return (axis < 0) ? axis + dims() : axis;
}
inline ptrdiff_t BlobShape::total() const
{ {
if (dims() == 0) if (dims() == 0)
return 0; return 0;
...@@ -131,11 +167,52 @@ inline ptrdiff_t BlobShape::total() ...@@ -131,11 +167,52 @@ inline ptrdiff_t BlobShape::total()
return res; return res;
} }
inline ptrdiff_t BlobShape::total(int startAxis, int endAxis) const
{
if (isEmpty())
return 0;
if (endAxis == INT_MAX)
endAxis = dims();
else if (endAxis < 0)
endAxis += dims();
startAxis = (startAxis < 0) ? startAxis + dims() : startAxis;
CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims());
ptrdiff_t res = 1;
for (int i = startAxis; i < endAxis; i++)
res *= sz[i];
return res;
}
inline BlobShape BlobShape::slice(int startAxis, int endAxis) const
{
if (isEmpty())
return BlobShape::empty();
if (endAxis == INT_MAX)
endAxis = dims();
else if (endAxis < 0)
endAxis += dims();
startAxis = (startAxis < 0) ? startAxis + dims() : startAxis;
CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims());
BlobShape res(endAxis - startAxis, (const int*)NULL);
for (int i = startAxis; i < endAxis; i++)
res[i - startAxis] = sz[i];
return res;
}
inline const int *BlobShape::ptr() const inline const int *BlobShape::ptr() const
{ {
return sz; return sz;
} }
inline int *BlobShape::ptr()
{
return sz;
}
inline bool BlobShape::equal(const BlobShape &other) const inline bool BlobShape::equal(const BlobShape &other) const
{ {
if (this->dims() != other.dims()) if (this->dims() != other.dims())
...@@ -155,19 +232,83 @@ inline bool BlobShape::operator==(const BlobShape &r) const ...@@ -155,19 +232,83 @@ inline bool BlobShape::operator==(const BlobShape &r) const
return this->equal(r); return this->equal(r);
} }
inline BlobShape BlobShape::like(const Mat &m)
{
return BlobShape(m.dims, (const int*)m.size);
}
inline BlobShape BlobShape::like(const UMat &m)
{
return BlobShape(m.dims, (const int*)m.size);
}
inline BlobShape BlobShape::empty()
{
return BlobShape(0, (const int*)NULL);
}
inline bool BlobShape::isEmpty() const
{
return dims() == 0;
}
inline BlobShape BlobShape::operator+(const BlobShape &r) const
{
BlobShape newShape(this->dims() + r.dims(), (int*)NULL);
for (int i = 0; i < this->dims(); i++)
newShape[i] = (*this)[i];
for (int i = 0; i < r.dims(); i++)
newShape[this->dims() + i] = r[i];
return newShape;
}
CV_EXPORTS std::ostream &operator<< (std::ostream &stream, const BlobShape &shape); CV_EXPORTS std::ostream &operator<< (std::ostream &stream, const BlobShape &shape);
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
inline int Blob::canonicalAxis(int axis) const #ifndef CV_DNN_UMAT
# define CV_DNN_SWITCH_MU(cpu_expr, gpu_expr) (cpu_expr)
#else
# define CV_DNN_SWITCH_MU(cpu_expr, gpu_expr) ((state == HEAD_AT_UMAT) ? (gpu_expr) : (cpu_expr))
#endif
inline int Blob::dims() const
{ {
CV_Assert(-dims() <= axis && axis < dims()); return CV_DNN_SWITCH_MU(m.dims, um.dims);
return (axis < 0) ? axis + dims() : axis;
} }
inline int Blob::dims() const inline const int * Blob::sizes() const
{ {
return m.dims; return CV_DNN_SWITCH_MU((const int*)m.size, (const int*)um.size);
}
inline int Blob::type() const
{
return CV_DNN_SWITCH_MU(m.type(), um.type());
}
template<int n>
inline size_t Blob::offset(const Vec<int, n> &pos) const
{
const MatStep &step = CV_DNN_SWITCH_MU(m.step, um.step);
size_t ofs = 0;
int i;
for (i = 0; i < std::min(n, dims()); i++)
{
CV_DbgAssert(pos[i] >= 0 && pos[i] < size(i));
ofs += step[i] * pos[i];
}
for (; i < dims(); i++)
CV_DbgAssert(pos[i] == 0);
CV_DbgAssert(ofs % elemSize() == 0);
return ofs / elemSize();
}
inline int Blob::canonicalAxis(int axis) const
{
CV_Assert(-dims() <= axis && axis < dims());
return (axis < 0) ? axis + dims() : axis;
} }
inline int Blob::xsize(int axis) const inline int Blob::xsize(int axis) const
...@@ -196,27 +337,11 @@ inline size_t Blob::total(int startAxis, int endAxis) const ...@@ -196,27 +337,11 @@ inline size_t Blob::total(int startAxis, int endAxis) const
CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims()); CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims());
size_t size = 1; //fix: assume that slice isn't empty size_t cnt = 1; //fix: assume that slice isn't empty
for (int i = startAxis; i < endAxis; i++) for (int i = startAxis; i < endAxis; i++)
size *= (size_t)sizes()[i]; cnt *= (size_t)sizes()[i];
return size; return cnt;
}
template<int n>
inline size_t Blob::offset(const Vec<int, n> &pos) const
{
size_t ofs = 0;
int i;
for (i = 0; i < std::min(n, dims()); i++)
{
CV_DbgAssert(pos[i] >= 0 && pos[i] < size(i));
ofs = ofs * (size_t)size(i) + pos[i];
}
for (; i < dims(); i++)
ofs *= (size_t)size(i);
return ofs;
} }
inline size_t Blob::offset(int n, int cn, int row, int col) const inline size_t Blob::offset(int n, int cn, int row, int col) const
...@@ -226,20 +351,20 @@ inline size_t Blob::offset(int n, int cn, int row, int col) const ...@@ -226,20 +351,20 @@ inline size_t Blob::offset(int n, int cn, int row, int col) const
inline float *Blob::ptrf(int n, int cn, int row, int col) inline float *Blob::ptrf(int n, int cn, int row, int col)
{ {
CV_Assert(type() == CV_32F); return matRef(false).ptr<float>() + offset(n, cn, row, col);
return (float*)m.data + offset(n, cn, row, col);
} }
inline uchar *Blob::ptr(int n, int cn, int row, int col) inline uchar *Blob::ptr(int n, int cn, int row, int col)
{ {
return m.data + m.elemSize() * offset(n, cn, row, col); Mat &mat = matRef(false);
return mat.ptr() + mat.elemSize() * offset(n, cn, row, col);
} }
template<typename TFloat> template<typename Dtype>
inline TFloat* Blob::ptr(int n, int cn, int row, int col) inline Dtype* Blob::ptr(int n, int cn, int row, int col)
{ {
CV_Assert(type() == cv::DataDepth<TFloat>::value); CV_Assert(type() == cv::DataDepth<Dtype>::value);
return (TFloat*) ptr(n, cn, row, col); return (Dtype*) ptr(n, cn, row, col);
} }
inline BlobShape Blob::shape() const inline BlobShape Blob::shape() const
...@@ -260,26 +385,69 @@ inline bool Blob::equalShape(const Blob &other) const ...@@ -260,26 +385,69 @@ inline bool Blob::equalShape(const Blob &other) const
return true; return true;
} }
inline Mat& Blob::matRef() inline Mat& Blob::matRef(bool writeOnly)
{ {
#ifdef CV_DNN_UMAT
updateMat(!writeOnly);
state = HEAD_AT_MAT;
#else
(void)writeOnly;
#endif
return m; return m;
} }
inline const Mat& Blob::matRefConst() const inline const Mat& Blob::matRefConst() const
{ {
CV_DNN_UMAT_ONLY( updateMat() );
return m; return m;
} }
inline UMat &Blob::umatRef() inline UMat &Blob::umatRef(bool writeOnly)
{ {
CV_Error(Error::StsNotImplemented, ""); #ifndef CV_DNN_UMAT
CV_Error(Error::GpuNotSupported, "");
(void)writeOnly;
return *(new UMat()); return *(new UMat());
#else
updateUMat(!writeOnly);
state = HEAD_AT_UMAT;
return um;
#endif
} }
inline const UMat &Blob::umatRefConst() const inline const UMat &Blob::umatRefConst() const
{ {
CV_Error(Error::StsNotImplemented, ""); #ifndef CV_DNN_UMAT
CV_Error(Error::GpuNotSupported, "");
return *(new UMat()); return *(new UMat());
#else
updateUMat();
return um;
#endif
}
template<>
inline Mat &Blob::getRef<Mat>(bool writeOnly)
{
return matRef(writeOnly);
}
template<>
inline UMat &Blob::getRef<UMat>(bool writeOnly)
{
return umatRef(writeOnly);
}
template<>
inline const Mat &Blob::getRefConst<Mat>() const
{
return matRefConst();
}
template<>
inline const UMat &Blob::getRefConst<UMat>() const
{
return umatRefConst();
} }
inline Mat Blob::getPlane(int n, int cn) inline Mat Blob::getPlane(int n, int cn)
...@@ -313,27 +481,44 @@ inline Size Blob::size2() const ...@@ -313,27 +481,44 @@ inline Size Blob::size2() const
return Size(cols(), rows()); return Size(cols(), rows());
} }
inline int Blob::type() const inline Blob &Blob::shareFrom(const Blob &blob)
{ {
return m.depth(); this->m = blob.m;
#ifdef CV_DNN_UMAT
this->um = blob.um;
this->state = blob.state;
#endif
return *this;
} }
inline const int * Blob::sizes() const inline Blob &Blob::reshape(const BlobShape &newShape)
{ {
return &m.size[0]; if (!m.empty()) m = m.reshape(1, newShape.dims(), newShape.ptr());
#ifdef CV_DNN_UMAT
if (!um.empty()) um = um.reshape(1, newShape.dims(), newShape.ptr());
#endif
return *this;
} }
inline Blob Blob::reshaped(const BlobShape &newShape) const
{
Blob res(*this); //also, res.shareFrom(*this) could be used
res.reshape(newShape);
return res;
}
inline Blob &Blob::shareFrom(const Blob &blob) inline int Blob::elemSize() const
{ {
this->m = blob.m; return CV_ELEM_SIZE(type());
return *this;
} }
inline Blob &Blob::reshape(const BlobShape &shape) inline int Blob::getState() const
{ {
m = m.reshape(1, shape.dims(), shape.ptr()); #ifdef CV_DNN_UMAT
return *this; return this->state;
#else
return m.empty() ? UNINITIALIZED : HEAD_AT_MAT;
#endif
} }
} }
......
...@@ -95,10 +95,10 @@ private: ...@@ -95,10 +95,10 @@ private:
AutoBuffer<int64, 1> *pi; AutoBuffer<int64, 1> *pi;
AutoBuffer<double, 1> *pd; AutoBuffer<double, 1> *pd;
AutoBuffer<String, 1> *ps; AutoBuffer<String, 1> *ps;
void *p; void *pv;
}; };
DictValue(int _type, void *_p) : type(_type), p(_p) {} DictValue(int _type, void *_p) : type(_type), pv(_p) {}
void release(); void release();
}; };
......
...@@ -59,15 +59,17 @@ namespace dnn //! This namespace is used for dnn module functionlaity. ...@@ -59,15 +59,17 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* This function automatically called on most of OpenCV builds, * This function automatically called on most of OpenCV builds,
* but you need to call it manually on some specific configurations (iOS for example). * but you need to call it manually on some specific configurations (iOS for example).
*/ */
CV_EXPORTS void initModule(); CV_EXPORTS_W void initModule();
/** @brief This class provides all data needed to initialize layer. /** @brief This class provides all data needed to initialize layer.
* *
* It includes dictionary with scalar params (which can be readed by using Dict interface), * It includes dictionary with scalar params (which can be readed by using Dict interface),
* blob params #blobs and optional meta information: #name and #type of layer instance. * blob params #blobs and optional meta information: #name and #type of layer instance.
*/ */
struct CV_EXPORTS LayerParams : public Dict class CV_EXPORTS LayerParams : public Dict
{ {
public:
//TODO: Add ability to name blob params
std::vector<Blob> blobs; //!< List of learned parameters stored as blobs. std::vector<Blob> blobs; //!< List of learned parameters stored as blobs.
String name; //!< Name of the layer instance (optional, can be used internal purposes). String name; //!< Name of the layer instance (optional, can be used internal purposes).
...@@ -77,10 +79,12 @@ namespace dnn //! This namespace is used for dnn module functionlaity. ...@@ -77,10 +79,12 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
/** @brief This interface class allows to build new Layers - are building blocks of networks. /** @brief This interface class allows to build new Layers - are building blocks of networks.
* *
* Each class, derived from Layer, must implement allocate() methods to declare own outputs and forward() to compute outputs. * Each class, derived from Layer, must implement allocate() methods to declare own outputs and forward() to compute outputs.
* Also before using the new layer into networks you must register your layer by using one of @ref LayerFactoryModule "LayerFactory" macros. * Also before using the new layer into networks you must register your layer by using one of @ref dnnLayerFactory "LayerFactory" macros.
*/ */
struct CV_EXPORTS Layer class CV_EXPORTS_W Layer
{ {
public:
//! List of learned parameters must be stored here to allow read them by using Net::getParam(). //! List of learned parameters must be stored here to allow read them by using Net::getParam().
std::vector<Blob> blobs; std::vector<Blob> blobs;
...@@ -116,7 +120,8 @@ namespace dnn //! This namespace is used for dnn module functionlaity. ...@@ -116,7 +120,8 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
String type; //!< Type name which was used for creating layer by layer factory. String type; //!< Type name which was used for creating layer by layer factory.
Layer(); Layer();
explicit Layer(const LayerParams &params); //!< Initialize only #name, #type and #blobs fields. explicit Layer(const LayerParams &params); //!< Initializes only #name, #type and #blobs fields.
void setParamsFrom(const LayerParams &params); //!< Initializes only #name, #type and #blobs fields.
virtual ~Layer(); virtual ~Layer();
}; };
...@@ -130,7 +135,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity. ...@@ -130,7 +135,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* *
* This class supports reference counting of its instances, i. e. copies point to the same instance. * This class supports reference counting of its instances, i. e. copies point to the same instance.
*/ */
class CV_EXPORTS Net class CV_EXPORTS_W Net
{ {
public: public:
...@@ -174,6 +179,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity. ...@@ -174,6 +179,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* @see setNetInputs(), Layer::inputNameToIndex(), Layer::outputNameToIndex() * @see setNetInputs(), Layer::inputNameToIndex(), Layer::outputNameToIndex()
*/ */
void connect(String outPin, String inpPin); void connect(String outPin, String inpPin);
/** @brief Connects #@p outNum output of the first layer to #@p inNum input of the second layer. /** @brief Connects #@p outNum output of the first layer to #@p inNum input of the second layer.
* @param outLayerId identifier of the first layer * @param outLayerId identifier of the first layer
* @param inpLayerId identifier of the second layer * @param inpLayerId identifier of the second layer
...@@ -181,6 +187,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity. ...@@ -181,6 +187,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* @param inpNum number of the second layer input * @param inpNum number of the second layer input
*/ */
void connect(int outLayerId, int outNum, int inpLayerId, int inpNum); void connect(int outLayerId, int outNum, int inpLayerId, int inpNum);
/** @brief Sets ouputs names of the network input pseudo layer. /** @brief Sets ouputs names of the network input pseudo layer.
* *
* Each net always has special own the network input pseudo layer with id=0. * Each net always has special own the network input pseudo layer with id=0.
...@@ -267,10 +274,10 @@ namespace dnn //! This namespace is used for dnn module functionlaity. ...@@ -267,10 +274,10 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* @param isBinary specifies whether the network was serialized in ascii mode or binary. * @param isBinary specifies whether the network was serialized in ascii mode or binary.
* @returns Pointer to the created importer, NULL in failure cases. * @returns Pointer to the created importer, NULL in failure cases.
* *
* @warning Torch7 importer is experimental now, you need explicitly set CMake opencv_dnn_BUILD_TORCH_IMPORTER flag to compile its. * @warning Torch7 importer is experimental now, you need explicitly set CMake `opencv_dnn_BUILD_TORCH_IMPORTER` flag to compile its.
* *
* @note Ascii mode of Torch serializer is more preferable, because binary mode extensively use long type of C language, * @note Ascii mode of Torch serializer is more preferable, because binary mode extensively use `long` type of C language,
* which has different bit-length on different systems. * which has various bit-length on different systems.
* *
* The loading file must contain serialized <a href="https://github.com/torch/nn/blob/master/doc/module.md">nn.Module</a> object * The loading file must contain serialized <a href="https://github.com/torch/nn/blob/master/doc/module.md">nn.Module</a> object
* with importing network. Try to eliminate a custom objects from serialazing data to avoid importing errors. * with importing network. Try to eliminate a custom objects from serialazing data to avoid importing errors.
......
...@@ -86,7 +86,7 @@ inline DictValue DictValue::get<DictValue>(int idx) const ...@@ -86,7 +86,7 @@ inline DictValue DictValue::get<DictValue>(int idx) const
template<> template<>
inline int64 DictValue::get<int64>(int idx) const inline int64 DictValue::get<int64>(int idx) const
{ {
CV_Assert(idx == -1 && size() == 1 || idx >= 0 && idx < size()); CV_Assert((idx == -1 && size() == 1) || (idx >= 0 && idx < size()));
idx = (idx == -1) ? 0 : idx; idx = (idx == -1) ? 0 : idx;
if (type == Param::INT) if (type == Param::INT)
...@@ -131,7 +131,7 @@ inline bool DictValue::get<bool>(int idx) const ...@@ -131,7 +131,7 @@ inline bool DictValue::get<bool>(int idx) const
template<> template<>
inline double DictValue::get<double>(int idx) const inline double DictValue::get<double>(int idx) const
{ {
CV_Assert(idx == -1 && size() == 1 || idx >= 0 && idx < size()); CV_Assert((idx == -1 && size() == 1) || (idx >= 0 && idx < size()));
idx = (idx == -1) ? 0 : idx; idx = (idx == -1) ? 0 : idx;
if (type == Param::REAL) if (type == Param::REAL)
...@@ -159,7 +159,7 @@ template<> ...@@ -159,7 +159,7 @@ template<>
inline String DictValue::get<String>(int idx) const inline String DictValue::get<String>(int idx) const
{ {
CV_Assert(isString()); CV_Assert(isString());
CV_Assert(idx == -1 && ps->size() == 1 || idx >= 0 && idx < (int)ps->size()); CV_Assert((idx == -1 && ps->size() == 1) || (idx >= 0 && idx < (int)ps->size()));
return (*ps)[(idx == -1) ? 0 : idx]; return (*ps)[(idx == -1) ? 0 : idx];
} }
......
...@@ -50,7 +50,7 @@ namespace dnn ...@@ -50,7 +50,7 @@ namespace dnn
//! @addtogroup dnn //! @addtogroup dnn
//! @{ //! @{
//! //!
//! @defgroup LayerFactoryModule Utilities for new layers registration //! @defgroup dnnLayerFactory Utilities for New Layers Registration
//! @{ //! @{
/** @brief %Layer factory allows to create instances of registered layers. */ /** @brief %Layer factory allows to create instances of registered layers. */
...@@ -86,7 +86,7 @@ private: ...@@ -86,7 +86,7 @@ private:
* @details This macros must be placed inside the function code. * @details This macros must be placed inside the function code.
*/ */
#define REG_RUNTIME_LAYER_FUNC(type, constuctorFunc) \ #define REG_RUNTIME_LAYER_FUNC(type, constuctorFunc) \
LayerFactory::registerLayer(#type, constuctorFunc); cv::dnn::LayerFactory::registerLayer(#type, constuctorFunc);
/** @brief Registers layer class in runtime. /** @brief Registers layer class in runtime.
* @param type string, containing type name of the layer. * @param type string, containing type name of the layer.
...@@ -94,7 +94,7 @@ private: ...@@ -94,7 +94,7 @@ private:
* @details This macros must be placed inside the function code. * @details This macros must be placed inside the function code.
*/ */
#define REG_RUNTIME_LAYER_CLASS(type, class) \ #define REG_RUNTIME_LAYER_CLASS(type, class) \
LayerFactory::registerLayer(#type, _layerDynamicRegisterer<class>); cv::dnn::LayerFactory::registerLayer(#type, _layerDynamicRegisterer<class>);
/** @brief Registers layer constructor on module load time. /** @brief Registers layer constructor on module load time.
* @param type string, containing type name of the layer. * @param type string, containing type name of the layer.
...@@ -102,7 +102,7 @@ private: ...@@ -102,7 +102,7 @@ private:
* @details This macros must be placed outside the function code. * @details This macros must be placed outside the function code.
*/ */
#define REG_STATIC_LAYER_FUNC(type, constuctorFunc) \ #define REG_STATIC_LAYER_FUNC(type, constuctorFunc) \
static _LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constuctorFunc); static cv::dnn::_LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constuctorFunc);
/** @brief Registers layer class on module load time. /** @brief Registers layer class on module load time.
* @param type string, containing type name of the layer. * @param type string, containing type name of the layer.
...@@ -126,14 +126,15 @@ Ptr<Layer> _layerDynamicRegisterer(LayerParams &params) ...@@ -126,14 +126,15 @@ Ptr<Layer> _layerDynamicRegisterer(LayerParams &params)
} }
//allows automatically register created layer on module load time //allows automatically register created layer on module load time
struct _LayerStaticRegisterer class _LayerStaticRegisterer
{ {
String type; String type;
public:
_LayerStaticRegisterer(const String &type, LayerFactory::Constuctor constuctor) _LayerStaticRegisterer(const String &layerType, LayerFactory::Constuctor layerConstuctor)
{ {
this->type = type; this->type = layerType;
LayerFactory::registerLayer(type, constuctor); LayerFactory::registerLayer(layerType, layerConstuctor);
} }
~_LayerStaticRegisterer() ~_LayerStaticRegisterer()
......
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_DNN_SHAPE_UTILS_HPP__
#define __OPENCV_DNN_DNN_SHAPE_UTILS_HPP__
#include <opencv2/core.hpp>
#include <ostream>
namespace cv {
namespace dnn {
//Useful shortcut
typedef BlobShape Shape;
inline std::ostream &operator<< (std::ostream &s, cv::Range &r)
{
return s << "[" << r.start << ", " << r.end << ")";
}
//Reshaping
//TODO: add -1 specifier for automatic size inferring
template<typename Mat>
void reshape(Mat &m, const BlobShape &shape)
{
m = m.reshape(1, shape.dims(), shape.ptr());
}
template<typename Mat>
Mat reshaped(const Mat &m, const BlobShape &shape)
{
return m.reshape(1, shape.dims(), shape.ptr());
}
//Slicing
struct _Range : public cv::Range
{
_Range(const Range &r) : cv::Range(r) {}
_Range(int start, int size = 1) : cv::Range(start, start + size) {}
};
template<typename Mat>
Mat slice(const Mat &m, const _Range &r0)
{
//CV_Assert(m.dims >= 1);
cv::AutoBuffer<cv::Range, 4> ranges(m.dims);
for (int i = 1; i < m.dims; i++)
ranges[i] = Range::all();
ranges[0] = r0;
return m(&ranges[0]);
}
template<typename Mat>
Mat slice(const Mat &m, const _Range &r0, const _Range &r1)
{
CV_Assert(m.dims >= 2);
cv::AutoBuffer<cv::Range, 4> ranges(m.dims);
for (int i = 2; i < m.dims; i++)
ranges[i] = Range::all();
ranges[0] = r0;
ranges[1] = r1;
return m(&ranges[0]);
}
template<typename Mat>
Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2)
{
CV_Assert(m.dims <= 3);
cv::AutoBuffer<cv::Range, 4> ranges(m.dims);
for (int i = 3; i < m.dims; i++)
ranges[i] = Range::all();
ranges[0] = r0;
ranges[1] = r1;
ranges[2] = r2;
return m(&ranges[0]);
}
template<typename Mat>
Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2, const _Range &r3)
{
CV_Assert(m.dims <= 4);
cv::AutoBuffer<cv::Range, 4> ranges(m.dims);
for (int i = 4; i < m.dims; i++)
ranges[i] = Range::all();
ranges[0] = r0;
ranges[1] = r1;
ranges[2] = r2;
ranges[3] = r3;
return m(&ranges[0]);
}
BlobShape computeShapeByReshapeMask(const BlobShape &srcShape, const BlobShape &maskShape, Range srcRange = Range::all());
}
}
#endif
#include "perf_precomp.hpp"
namespace cvtest
{
using std::tr1::tuple;
using std::tr1::get;
using std::tr1::make_tuple;
using std::make_pair;
using namespace perf;
using namespace testing;
using namespace cv;
using namespace cv::dnn;
enum {STRIDE_OFF = 1, STRIDE_ON = 2};
CV_ENUM(StrideSize, STRIDE_OFF, STRIDE_ON);
enum {GROUP_OFF = 1, GROUP_2 = 2};
CV_ENUM(GroupSize, GROUP_OFF, GROUP_2);
//Squared Size
#define SSZ(n) cv::Size(n, n)
typedef std::pair<BlobShape, int> InpShapeNumOut;
typedef tuple<Size, InpShapeNumOut, GroupSize, StrideSize> ConvParam; //kernel_size, inp shape, groups, stride
typedef TestBaseWithParam<ConvParam> ConvolutionPerfTest;
PERF_TEST_P( ConvolutionPerfTest, perf, Combine(
Values(Size(1, 1), Size(3, 3), Size(5, 5), Size(11, 11)),
Values(make_pair(BlobShape(1, 4, 224, 224), 64),
make_pair(BlobShape(1, 64, 112, 122), 128),
make_pair(BlobShape(1, 256, 28, 28), 512)),
GroupSize::all(),
StrideSize::all())
)
{
RNG rng(0);
ConvParam params = GetParam();
int ksz = get<0>(params).width;
BlobShape inpShape = get<1>(params).first;
int outCn = get<1>(params).second;
int groups = get<2>(params);
int stride = (ksz >= 11) ? 4 : (int)get<3>(params);
int inpCn = inpShape[1];
Blob wgtBlob(BlobShape(outCn, inpCn/groups, ksz, ksz)), biasBlob(BlobShape(outCn, 1, 1, 1));
Blob inpBlob(inpShape);
rng.fill(biasBlob.matRef(), RNG::UNIFORM, -1, +1);
rng.fill(wgtBlob.matRef(), RNG::UNIFORM, -1, +1);
rng.fill(inpBlob.matRef(), RNG::UNIFORM, -1, +1);
LayerParams lp;
lp.set("num_output", outCn);
lp.set("group", groups);
lp.set("stride", stride);
lp.set("kernel_size", ksz);
lp.blobs.reserve(2);
lp.blobs.push_back(wgtBlob);
lp.blobs.push_back(biasBlob);
std::vector<Blob*> inpBlobs(1, &inpBlob);
std::vector<Blob> outBlobs;
cv::setNumThreads(cv::getNumberOfCPUs());
Ptr<Layer> layer = cv::dnn::LayerFactory::createLayerInstance("Convolution", lp);
layer->allocate(inpBlobs, outBlobs);
declare.in(inpBlob.matRef(), wgtBlob.matRef(), WARMUP_RNG).out(outBlobs[0].matRef()).tbb_threads(cv::getNumThreads());
TEST_CYCLE_N(10)
{
layer->forward(inpBlobs, outBlobs);
}
SANITY_CHECK_NOTHING();
}
}
\ No newline at end of file
#include "perf_precomp.hpp"
CV_PERF_TEST_MAIN(dnn)
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-declarations"
# if defined __clang__ || defined __APPLE__
# pragma GCC diagnostic ignored "-Wmissing-prototypes"
# pragma GCC diagnostic ignored "-Wextra"
# endif
#endif
#ifndef __OPENCV_PERF_PRECOMP_HPP__
#define __OPENCV_PERF_PRECOMP_HPP__
#include <opencv2/ts.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/dnn.hpp>
#endif
...@@ -124,8 +124,8 @@ int main(int argc, char **argv) ...@@ -124,8 +124,8 @@ int main(int argc, char **argv)
exit(-1); exit(-1);
} }
resize(img, img, Size(224, 224)); //GoogLeNet accepts only 224x224 RGB-images resize(img, img, Size(224, 224)); //GoogLeNet accepts only 224x224 RGB-images
dnn::Blob inputBlob = dnn::Blob(img); //Convert Mat to dnn::Blob image batch dnn::Blob inputBlob = dnn::Blob::fromImages(img); //Convert Mat to dnn::Blob batch of images
//! [Prepare blob] //! [Prepare blob]
//! [Set input blob] //! [Set input blob]
......
This diff is collapsed.
...@@ -191,7 +191,7 @@ namespace ...@@ -191,7 +191,7 @@ namespace
else if (pbBlob.has_shape()) else if (pbBlob.has_shape())
{ {
const caffe::BlobShape &_shape = pbBlob.shape(); const caffe::BlobShape &_shape = pbBlob.shape();
BlobShape shape(_shape.dim_size()); BlobShape shape = BlobShape::all(_shape.dim_size());
for (int i = 0; i < _shape.dim_size(); i++) for (int i = 0; i < _shape.dim_size(); i++)
shape[i] = (int)_shape.dim(i); shape[i] = (int)_shape.dim(i);
...@@ -201,7 +201,7 @@ namespace ...@@ -201,7 +201,7 @@ namespace
else else
{ {
CV_Error(Error::StsError, "Unknown shape of input blob"); CV_Error(Error::StsError, "Unknown shape of input blob");
return BlobShape(-1); return BlobShape();
} }
} }
......
#include "../precomp.hpp"
#include "layer_loaders.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <climits>
namespace cv
{
namespace dnn
{
//Utils
//Extracts params used into Conv, Deconv and Pooling layers
static void getCaffeConvParams(LayerParams &params, Size &kernel, Size &pad, Size &stride)
{
if (params.has("kernel_h") && params.has("kernel_w"))
{
kernel.height = params.get<int>("kernel_h");
kernel.width = params.get<int>("kernel_w");
}
else if (params.has("kernel_size"))
{
kernel.height = kernel.width = params.get<int>("kernel_size");
}
else
{
CV_Error(Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
}
CV_Assert(kernel.height > 0 && kernel.width > 0);
if (params.has("pad_h") && params.has("pad_w"))
{
pad.height = params.get<int>("pad_h");
pad.width = params.get<int>("pad_w");
}
else
{
pad.height = pad.width = params.get<int>("pad", 0);
}
CV_Assert(pad.height >= 0 && pad.width >= 0);
if (params.has("stride_h") && params.has("stride_w"))
{
stride.height = params.get<int>("stride_h");
stride.width = params.get<int>("stride_w");
}
else
{
stride.height = stride.width = params.get<int>("stride", 1);
}
CV_Assert(stride.height > 0 && stride.width > 0);
}
//Layers
//Convolution and Deconvolution
static void initConvDeconvLayerFromCaffe(Ptr<BaseConvolutionLayer> l, LayerParams &params)
{
l->setParamsFrom(params);
getCaffeConvParams(params, l->kernel, l->pad, l->stride);
bool bias = params.get<bool>("bias_term", true);
int numOutput = params.get<int>("num_output");
int group = params.get<int>("group", 1);
CV_Assert(numOutput % group == 0);
CV_Assert((bias && l->blobs.size() == 2) || (!bias && l->blobs.size() == 1));
}
template<>
Ptr<Layer> createLayerFromCaffe<ConvolutionLayer>(LayerParams &params)
{
Ptr<BaseConvolutionLayer> l = ConvolutionLayer::create();
initConvDeconvLayerFromCaffe(l, params);
return Ptr<Layer>(l);
}
template<>
Ptr<Layer> createLayerFromCaffe<DeconvolutionLayer>(LayerParams &params)
{
Ptr<BaseConvolutionLayer> l = DeconvolutionLayer::create();
initConvDeconvLayerFromCaffe(l, params);
return Ptr<Layer>(l);
}
template<>
Ptr<Layer> createLayerFromCaffe<PoolingLayer>(LayerParams &params)
{
int type;
Size kernel, stride, pad;
if (params.has("pool"))
{
String pool = params.get<String>("pool").toLowerCase();
if (pool == "max")
type = PoolingLayer::MAX;
else if (pool == "ave")
type = PoolingLayer::AVE;
else if (pool == "stochastic")
type = PoolingLayer::STOCHASTIC;
else
CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
}
else
{
type = PoolingLayer::MAX;
}
getCaffeConvParams(params, kernel, pad, stride);
return Ptr<Layer>(PoolingLayer::create(type, kernel, stride, pad));
}
template<>
Ptr<Layer> createLayerFromCaffe<SoftmaxLayer>(LayerParams &params)
{
int axis = params.get<int>("axis", 1);
return Ptr<Layer>(SoftmaxLayer::create(axis));
}
template<> //InnerProduct specialization
Ptr<Layer> createLayerFromCaffe<InnerProductLayer>(LayerParams &params)
{
const std::vector<Blob> &blobs = params.blobs;
CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
int numOutputs = params.get<int>("num_output");
int innerSize = (int)blobs[0].total() / numOutputs;
bool bias = params.get<bool>("bias_term", true);
int axis = params.get<int>("axis", 1);
CV_Assert(blobs[0].dims() >= 2 && (size_t)(innerSize * numOutputs) == blobs[0].total());
CV_Assert(!bias || (blobs.size() == 2 && (size_t)numOutputs == blobs[1].total()));
Ptr<InnerProductLayer> l = InnerProductLayer::create(axis);
l->setParamsFrom(params);
l->blobs[0].reshape(Shape(numOutputs, innerSize));
if (bias)
l->blobs[1].reshape(Shape(1, numOutputs));
return Ptr<Layer>(l);
}
template<> //LRNLayer specialization
Ptr<Layer> createLayerFromCaffe<LRNLayer>(LayerParams& params)
{
int type;
String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
if (nrmType == "ACROSS_CHANNELS")
type = LRNLayer::CHANNEL_NRM;
else if (nrmType == "WITHIN_CHANNEL")
type = LRNLayer::SPATIAL_NRM;
else
CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");
int size = params.get<int>("local_size", 5);
if (size % 2 != 1 || size <= 0)
CV_Error(Error::StsBadArg, "LRN layer supports only positive odd values for local_size");
double alpha = params.get<double>("alpha", 1);
double beta = params.get<double>("beta", 0.75);
return Ptr<Layer>(LRNLayer::create(type, size, alpha, beta));
}
template<>
Ptr<Layer> createLayerFromCaffe<MVNLayer>(LayerParams &params)
{
return Ptr<Layer>(MVNLayer::create(
params.get<bool>("normalize_variance", true),
params.get<bool>("across_channels", false),
params.get<double>("eps", 1e-9)
));
}
/* Reshape layers */
template<>
Ptr<Layer> createLayerFromCaffe<ReshapeLayer>(LayerParams &params)
{
int axis = params.get<int>("axis", 0);
int numAxes = params.get<int>("num_axes", -1);
CV_Assert(numAxes >= -1);
Range applyingRange = (numAxes == -1) ? Range(axis, INT_MAX) : Range(axis, axis + numAxes);
Shape newShape;
if (params.has("dim"))
{
const DictValue &paramShape = params.get("dim");
newShape = Shape::all(paramShape.size());
for (int i = 0; i < paramShape.size(); i++)
newShape[i] = paramShape.get<int>(i);
}
else
newShape = Shape::all(0);
return Ptr<Layer>(ReshapeLayer::create(newShape, applyingRange));
}
Ptr<Layer> createFlattenLayerFromCaffe(LayerParams&)
{
return Ptr<Layer>(ReshapeLayer::create(Shape(0, -1)));
}
template<>
Ptr<Layer> createLayerFromCaffe<ConcatLayer>(LayerParams& params)
{
return Ptr<Layer>(ConcatLayer::create(params.get<int>("axis", 1)));
}
template<>
Ptr<Layer> createLayerFromCaffe<SplitLayer>(LayerParams &params)
{
int outputsCount;
//TODO: maybe "top_count" param is useless because it can be determined by output connections number
if (params.has("top_count"))
{
outputsCount = params.get<int>("top_count");
CV_Assert(outputsCount >= 0);
}
else
{
outputsCount = -1;
}
return Ptr<Layer>(SplitLayer::create(outputsCount));
}
template<>
Ptr<Layer> createLayerFromCaffe<SliceLayer>(LayerParams& params)
{
int axis = params.get<int>("axis", 1);
if (!params.has("slice_point"))
{
return Ptr<Layer>(SliceLayer::create(axis));
}
else
{
const DictValue &indicesValue = params.get("slice_point");
std::vector<int> sliceIndices(indicesValue.size());
for (int i = 0; i < indicesValue.size(); i++)
sliceIndices[i] = indicesValue.get<int>(i);
return Ptr<Layer>(SliceLayer::create(axis, sliceIndices));
}
}
/* Activation layers */
template <typename ActivationLayer> //Intended for parameters-free activations
Ptr<Layer> createLayerFromCaffe(LayerParams&)
{
return Ptr<Layer>(ActivationLayer::create());
}
template<> //ReLU specialization
Ptr<Layer> createLayerFromCaffe<ReLULayer>(LayerParams& params)
{
float negative_slope = params.get<float>("negative_slope", 0.f);
return Ptr<Layer>(ReLULayer::create(negative_slope));
}
template<> //Power specialization
Ptr<Layer> createLayerFromCaffe<PowerLayer>(LayerParams& params)
{
float power = params.get<float>("power", 1.0f);
float scale = params.get<float>("scale", 1.0f);
float shift = params.get<float>("shift", 0.0f);
return Ptr<Layer>(PowerLayer::create(power, scale, shift));
}
//Explicit instantiation
template Ptr<Layer> createLayerFromCaffe<ConvolutionLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<DeconvolutionLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<SoftmaxLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<InnerProductLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<LRNLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<MVNLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<ConcatLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<SliceLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<SplitLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<ReLULayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<SigmoidLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<TanHLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<AbsLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<BNLLLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<PowerLayer>(LayerParams&);
}
}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_CAFFE_LAYER_LOADERS_HPP__
#define __OPENCV_DNN_CAFFE_LAYER_LOADERS_HPP__
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
//Common template for Caffe layer loaders
template <typename PublicLayer>
Ptr<Layer> createLayerFromCaffe(LayerParams&);
Ptr<Layer> createFlattenLayerFromCaffe(LayerParams&);
}
}
#endif
\ No newline at end of file
...@@ -543,6 +543,13 @@ Layer::Layer(const LayerParams &params) ...@@ -543,6 +543,13 @@ Layer::Layer(const LayerParams &params)
} }
void Layer::setParamsFrom(const LayerParams &params)
{
blobs = params.blobs;
name = params.name;
type = params.type;
}
int Layer::inputNameToIndex(String) int Layer::inputNameToIndex(String)
{ {
return -1; return -1;
......
...@@ -40,19 +40,8 @@ ...@@ -40,19 +40,8 @@
//M*/ //M*/
#include "precomp.hpp" #include "precomp.hpp"
#include "caffe/layer_loaders.hpp"
#include "layers/concat_layer.hpp"
#include "layers/convolution_layer.hpp"
#include "layers/blank_layer.hpp" #include "layers/blank_layer.hpp"
#include "layers/elementwise_layers.hpp"
#include "layers/fully_connected_layer.hpp"
#include "layers/lrn_layer.hpp"
#include "layers/mvn_layer.hpp"
#include "layers/pooling_layer.hpp"
#include "layers/reshape_layer.hpp"
#include "layers/slice_layer.hpp"
#include "layers/softmax_layer.hpp"
#include "layers/split_layer.hpp"
namespace cv namespace cv
{ {
...@@ -76,27 +65,27 @@ void initModule() ...@@ -76,27 +65,27 @@ void initModule()
if (init.status) if (init.status)
return; return;
REG_RUNTIME_LAYER_CLASS(Slice, SliceLayer) REG_RUNTIME_LAYER_FUNC(Slice, createLayerFromCaffe<SliceLayer>);
REG_RUNTIME_LAYER_CLASS(Softmax, SoftMaxLayer) REG_RUNTIME_LAYER_FUNC(Split, createLayerFromCaffe<SplitLayer>);
REG_RUNTIME_LAYER_CLASS(Split, SplitLayer) REG_RUNTIME_LAYER_FUNC(Concat, createLayerFromCaffe<ConcatLayer>);
REG_RUNTIME_LAYER_CLASS(Reshape, ReshapeLayer) REG_RUNTIME_LAYER_FUNC(Reshape, createLayerFromCaffe<ReshapeLayer>);
REG_STATIC_LAYER_FUNC(Flatten, createFlattenLayer) REG_RUNTIME_LAYER_FUNC(Flatten, createFlattenLayerFromCaffe);
REG_RUNTIME_LAYER_CLASS(Pooling, PoolingLayer)
REG_RUNTIME_LAYER_CLASS(MVN, MVNLayer)
REG_RUNTIME_LAYER_CLASS(LRN, LRNLayer)
REG_RUNTIME_LAYER_CLASS(InnerProduct, FullyConnectedLayer)
REG_RUNTIME_LAYER_CLASS(ReLU, ElementWiseLayer<ReLUFunctor>) REG_RUNTIME_LAYER_FUNC(Convolution, createLayerFromCaffe<ConvolutionLayer>);
REG_RUNTIME_LAYER_CLASS(TanH, ElementWiseLayer<TanHFunctor>) REG_RUNTIME_LAYER_FUNC(Deconvolution, createLayerFromCaffe<DeconvolutionLayer>);
REG_RUNTIME_LAYER_CLASS(BNLL, ElementWiseLayer<BNLLFunctor>) REG_RUNTIME_LAYER_FUNC(Pooling, createLayerFromCaffe<PoolingLayer>);
REG_RUNTIME_LAYER_CLASS(Power, ElementWiseLayer<PowerFunctor>) REG_RUNTIME_LAYER_FUNC(LRN, createLayerFromCaffe<LRNLayer>);
REG_RUNTIME_LAYER_CLASS(AbsVal, ElementWiseLayer<AbsValFunctor>) REG_RUNTIME_LAYER_FUNC(InnerProduct, createLayerFromCaffe<InnerProductLayer>);
REG_RUNTIME_LAYER_CLASS(Sigmoid, ElementWiseLayer<SigmoidFunctor>) REG_RUNTIME_LAYER_FUNC(Softmax, createLayerFromCaffe<SoftmaxLayer>);
REG_RUNTIME_LAYER_CLASS(Dropout, BlankLayer) REG_RUNTIME_LAYER_FUNC(MVN, createLayerFromCaffe<MVNLayer>);
REG_RUNTIME_LAYER_CLASS(Convolution, ConvolutionLayer) REG_RUNTIME_LAYER_FUNC(ReLU, createLayerFromCaffe<ReLULayer>);
REG_RUNTIME_LAYER_CLASS(Deconvolution, DeConvolutionLayer) REG_RUNTIME_LAYER_FUNC(Sigmoid, createLayerFromCaffe<SigmoidLayer>);
REG_RUNTIME_LAYER_CLASS(Concat, ConcatLayer) REG_RUNTIME_LAYER_FUNC(TanH, createLayerFromCaffe<TanHLayer>);
REG_RUNTIME_LAYER_FUNC(BNLL, createLayerFromCaffe<BNLLLayer>);
REG_RUNTIME_LAYER_FUNC(AbsVal, createLayerFromCaffe<AbsLayer>);
REG_RUNTIME_LAYER_FUNC(Power, createLayerFromCaffe<PowerLayer>);
REG_RUNTIME_LAYER_CLASS(Dropout, BlankLayer)
init.status = true; init.status = true;
} }
......
...@@ -42,60 +42,80 @@ ...@@ -42,60 +42,80 @@
#include "../precomp.hpp" #include "../precomp.hpp"
#include "layers_common.hpp" #include "layers_common.hpp"
#include "concat_layer.hpp" #include "concat_layer.hpp"
#include <opencv2/core/ocl.hpp>
namespace cv namespace cv
{ {
namespace dnn namespace dnn
{ {
ConcatLayer::ConcatLayer(LayerParams &params) : Layer(params)
{
axis = params.get<int>("axis", 1);
CV_Assert(axis >= 0);
}
void ConcatLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs) ConcatLayerImpl::ConcatLayerImpl(int axis_ /*= 1*/)
{ {
CV_Assert(inputs.size() > 0); axis = axis_;
}
int refType = inputs[0]->type(); void ConcatLayerImpl::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
BlobShape refShape = inputs[0]->shape(); {
CV_Assert(axis < refShape.dims()); CV_Assert(inputs.size() > 0);
int axisSum = 0; BlobShape refShape = inputs[0]->shape();
for (size_t i = 0; i < inputs.size(); i++) axisIdx = inputs[0]->canonicalAxis(axis);
{
BlobShape curShape = inputs[i]->shape();
CV_Assert(curShape.dims() == refShape.dims() && inputs[i]->type() == refType); int axisSum = 0;
for (int axisId = 0; axisId < refShape.dims(); axisId++) useOpenCL = false;
{ for (size_t i = 0; i < inputs.size(); i++)
if (axisId != axis && refShape[axisId] != curShape[axisId]) {
CV_Error(Error::StsBadSize, "Inconsitent shape for ConcatLayer"); BlobShape curShape = inputs[i]->shape();
}
axisSum += curShape[axis]; CV_Assert(curShape.dims() == refShape.dims() && inputs[i]->type() == inputs[0]->type());
for (int curAxis = 0; curAxis < refShape.dims(); curAxis++)
{
if (curAxis != axisIdx && refShape[curAxis] != curShape[curAxis])
CV_Error(Error::StsBadSize, "Inconsitent shape for ConcatLayer");
} }
refShape[axis] = axisSum; axisSum += curShape[axisIdx];
outputs.resize(1); useOpenCL |= inputs[i]->getState() == Blob::HEAD_AT_MAT;
outputs[0].create(refShape);
} }
void ConcatLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs) refShape[axisIdx] = axisSum;
{ useOpenCL &= ocl::useOpenCL();
const Mat& outMat = outputs[0].matRef(); int allocFlags = (useOpenCL) ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
std::vector<Range> ranges(outputs[0].dims(), Range::all());
int sizeStart = 0;
for (size_t i = 0; i < inputs.size(); i++)
{
int sizeEnd = sizeStart + inputs[i]->size(axis);
ranges[axis] = Range(sizeStart, sizeEnd);
Mat outSubMat = outMat(&ranges[0]); outputs.resize(1);
inputs[i]->matRef().copyTo(outSubMat); outputs[0].create(refShape, inputs[0]->type(), allocFlags);
}
sizeStart = sizeEnd;
} void ConcatLayerImpl::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
#ifdef HAVE_OPENCL
if (useOpenCL)
forward_<UMat>(inputs, outputs);
else
#endif
forward_<Mat>(inputs, outputs);
}
template<typename XMat>
void ConcatLayerImpl::forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
XMat& outMat = outputs[0].getRef<XMat>();
std::vector<Range> ranges(outputs[0].dims(), Range::all());
ranges[axisIdx].start = 0;
for (size_t i = 0; i < inputs.size(); i++)
{
ranges[axisIdx].end = ranges[axisIdx].start + inputs[i]->size(axisIdx);
inputs[i]->getRefConst<XMat>().copyTo(outMat(&ranges[0]));
ranges[axisIdx].start = ranges[axisIdx].end;
} }
} }
Ptr<ConcatLayer> ConcatLayer::create(int axis)
{
return Ptr<ConcatLayer>(new ConcatLayerImpl(axis));
}
}
} }
...@@ -42,20 +42,29 @@ ...@@ -42,20 +42,29 @@
#ifndef __OPENCV_DNN_LAYERS_CONCAT_LAYER_HPP__ #ifndef __OPENCV_DNN_LAYERS_CONCAT_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_CONCAT_LAYER_HPP__ #define __OPENCV_DNN_LAYERS_CONCAT_LAYER_HPP__
#include "../precomp.hpp" #include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv namespace cv
{ {
namespace dnn namespace dnn
{ {
class ConcatLayer : public Layer
{ class ConcatLayerImpl : public ConcatLayer
int axis; {
bool useOpenCL;
public: int axisIdx;
ConcatLayer(LayerParams& params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs); template<typename XMat>
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs); void forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
public:
ConcatLayerImpl(int axis_ = 1);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
} }
} }
#endif #endif
...@@ -42,51 +42,65 @@ ...@@ -42,51 +42,65 @@
#ifndef __OPENCV_DNN_LAYERS_CONVOLUTION_LAYER_HPP__ #ifndef __OPENCV_DNN_LAYERS_CONVOLUTION_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_CONVOLUTION_LAYER_HPP__ #define __OPENCV_DNN_LAYERS_CONVOLUTION_LAYER_HPP__
#include "../precomp.hpp" #include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv namespace cv
{ {
namespace dnn namespace dnn
{ {
//TODO: simultaneously convolution and bias addition for cache optimization
class ConvolutionLayer : public Layer
{
protected:
bool bias;
int numOutput, group;
int padH, padW;
int kerH, kerW;
int strideH, strideW;
int inpH, inpW, inpCn; //TODO: simultaneously convolution and bias addition for cache optimization
int outH, outW, outCn; class ConvolutionLayerImpl : public ConvolutionLayer
int topH, topW, topCn; //switched between inp/out on deconv/conv {
int inpGroupCn, outGroupCn; public:
int ksize;
ConvolutionLayerImpl();
virtual void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
virtual void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
virtual void init();
protected:
int numOutput, group;
int inpH, inpW, inpCn;
int outH, outW, outCn;
int topH, topW, topCn; //switched between inp/out on deconv/conv
int inpGroupCn, outGroupCn;
int ksize;
bool bias;
bool tryUseOpenCL, useOpenCL;
Blob colBlob, biasOnesBlob;
bool is1x1() const;
virtual void computeInpOutShape(const Blob &inpBlob);
template<typename XMat>
void forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void im2col(const Mat &srcImg, Mat &dstCol);
void im2col(const UMat &srcImg, UMat &dstCol);
};
class DeConvolutionLayerImpl : public ConvolutionLayerImpl
{
public:
DeConvolutionLayerImpl();
virtual void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
bool useOpenCL; protected:
Mat colMat, biasOnesMat;
inline bool is1x1() const; virtual void computeInpOutShape(const Blob &inpBlob);
virtual void computeInpOutShape(const Blob &inpBlob);
void im2col(Blob &inpBlob, int imNum, int cnGroup);
public: template<typename XMat>
ConvolutionLayer() {} void forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
ConvolutionLayer(LayerParams &params); void col2im(const Mat &colMat, Mat &dstImg);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs); void col2im(const UMat &colMat, UMat &dstImg);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs); };
};
class DeConvolutionLayer : public ConvolutionLayer //Importers
{ Ptr<Layer> createConvolutionLayerFromCaffe(LayerParams &params);
protected: Ptr<Layer> createDeconvolutionLayerFromCaffe(LayerParams &params);
void computeInpOutShape(const Blob &inpBlob);
void col2im(Mat &dstMat);
public:
DeConvolutionLayer(LayerParams &params);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
} }
} }
#endif #endif
#include "../precomp.hpp"
#include "elementwise_layers.hpp"
namespace cv
{
namespace dnn
{
#define ACTIVATION_CREATOR_FOR(_Layer, _Functor, ...) \
Ptr<_Layer> _Layer::create() { \
return return Ptr<_Layer>( new ElementWiseLayer<_Functor>(_Functor()) ); }
Ptr<ReLULayer> ReLULayer::create(double negativeSlope)
{
return Ptr<ReLULayer>(new ElementWiseLayer<ReLUFunctor>(ReLUFunctor(negativeSlope)));
}
Ptr<TanHLayer> TanHLayer::create()
{
return Ptr<TanHLayer>(new ElementWiseLayer<TanHFunctor>());
}
Ptr<SigmoidLayer> SigmoidLayer::create()
{
return Ptr<SigmoidLayer>(new ElementWiseLayer<SigmoidFunctor>());
}
Ptr<AbsLayer> AbsLayer::create()
{
return Ptr<AbsLayer>(new ElementWiseLayer<AbsValFunctor>());
}
Ptr<BNLLLayer> BNLLLayer::create()
{
return Ptr<BNLLLayer>(new ElementWiseLayer<BNLLFunctor>());
}
Ptr<PowerLayer> PowerLayer::create(double power /*= 1*/, double scale /*= 1*/, double shift /*= 0*/)
{
const PowerFunctor f(power, scale, shift);
return Ptr<PowerLayer>(new ElementWiseLayer<PowerFunctor>(f));
}
}
}
\ No newline at end of file
...@@ -44,6 +44,11 @@ ...@@ -44,6 +44,11 @@
#include "../precomp.hpp" #include "../precomp.hpp"
#include "layers_common.hpp" #include "layers_common.hpp"
#include <cmath> #include <cmath>
#include <opencv2/dnn/all_layers.hpp>
#include <opencv2/core/ocl.hpp>
#ifdef HAVE_OPENCL
#include "modules/dnn/opencl_kernels_dnn.hpp"
#endif
namespace cv namespace cv
{ {
...@@ -55,130 +60,259 @@ using std::exp; ...@@ -55,130 +60,259 @@ using std::exp;
using std::tanh; using std::tanh;
using std::pow; using std::pow;
template<typename Func> template<typename Func>
class ElementWiseLayer : public Layer class ElementWiseLayer : public Func::Layer
{
bool useOpenCL;
Func func;
template<typename Dtype>
class PBody : public cv::ParallelLoopBody
{ {
Func func; Func &func;
Dtype *data;
public: public:
ElementWiseLayer(LayerParams &_params) : func(_params) {} PBody(Mat &mat, Func &func_) :
func(func_), data(mat.ptr<Dtype>())
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs) {}
{
outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
outputs[i].shareFrom(*inputs[i]); //no data copy
}
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs) void operator()(const Range &r) const
{ {
for (size_t i = 0; i < inputs.size(); i++) for (int i = r.start; i < r.end; i++)
{ data[i] = func(data[i]);
CV_Assert(inputs[i]->ptr() == outputs[i].ptr() && inputs[i]->type() == outputs[i].type());
size_t size = outputs[i].total();
if (outputs[i].type() == CV_32F)
{
float *data = outputs[i].ptrf();
for (size_t j = 0; j < size; j++)
data[j] = func(data[j]);
}
else if (outputs[i].type() == CV_64F)
{
double *data = outputs[i].ptr<double>();
for (size_t j = 0; j < size; j++)
data[j] = func(data[j]);
}
else
{
CV_Error(Error::StsNotImplemented, "Only CV_32F and CV_64F blobs are supported");
}
}
} }
}; };
public:
struct ReLUFunctor ElementWiseLayer() {}
ElementWiseLayer(const Func &f) : func(f) {}
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{ {
float negative_slope; useOpenCL = ocl::useOpenCL();
ReLUFunctor(LayerParams &params) outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
{ {
if (params.has("negative_slope")) outputs[i].shareFrom(*inputs[i]); //no data copy
negative_slope = params.get<float>("negative_slope");
//hotfix: shareFrom doesn't provide properly Mat/UMat switching
if (useOpenCL)
outputs[i].umatRef() = inputs[i]->umatRefConst();
else else
negative_slope = 0.f; outputs[i].matRef() = inputs[i]->matRefConst();
} }
}
template<typename TFloat> void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
inline TFloat operator()(TFloat x) {
{ #ifdef HAVE_OPENCL
return (x >= (TFloat)0) ? x : negative_slope * x; if (useOpenCL)
} forwardOCL(inputs, outputs);
}; else
#endif
forwardCPU(inputs, outputs);
}
struct TanHFunctor #ifdef HAVE_OPENCL
void forwardOCL(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{ {
TanHFunctor(LayerParams&) {} size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
template<typename TFloat> for (size_t i = 0; i < inputs.size(); i++)
inline TFloat operator()(TFloat x)
{ {
return tanh(x); const UMat &src = inputs[i]->umatRefConst();
UMat &dst = outputs[i].umatRef();
CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);
ocl::Kernel ker;
CV_Assert(func.initKernel(ker, src));
ker.set(0, (int)src.total());
ker.set(1, ocl::KernelArg::PtrReadOnly(src));
ker.set(2, ocl::KernelArg::PtrWriteOnly(dst));
size_t gSize = src.total();
CV_Assert(ker.run(1, &gSize, &wgSize, true));
} }
}; }
#endif
struct SigmoidFunctor void forwardCPU(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{ {
SigmoidFunctor(LayerParams&) {} for (size_t i = 0; i < inputs.size(); i++)
template<typename TFloat>
inline TFloat operator()(TFloat x)
{ {
return (TFloat)1 / ((TFloat)1 + exp(-x)); const Mat &src = inputs[i]->matRefConst();
Mat &dst = outputs[i].matRef();
CV_Assert(src.ptr() == dst.ptr() && src.isContinuous());
Range sizeRange = Range(0, dst.total());
if (dst.type() == CV_32F)
{
cv::parallel_for_(sizeRange, PBody<float>(dst, func));
}
else if (dst.type() == CV_64F)
{
cv::parallel_for_(sizeRange, PBody<double>(dst, func));
}
else
{
CV_Error(Error::StsNotImplemented, "Only CV_32F and CV_64F blobs are supported");
}
} }
}; }
};
struct AbsValFunctor #ifdef HAVE_OPENCL
static String oclGetTMacro(const UMat &m)
{
return String("-DT=") + ocl::typeToStr(m.type()) + String(" ");
}
#endif
struct ReLUFunctor
{
typedef ReLULayer Layer;
double slope;
ReLUFunctor(double slope_)
: slope(slope_) {}
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{ {
AbsValFunctor(LayerParams&) {} return (x >= (TFloat)0) ? x : (TFloat)slope * x;
}
template<typename TFloat> #ifdef HAVE_OPENCL
inline TFloat operator()(TFloat x) bool initKernel(ocl::Kernel &ker, const UMat &src) const
{ {
return abs(x); const char *buildoptSlope = (slope == 0) ? "-DRELU_NO_SLOPE" : "";
} String buildopt = oclGetTMacro(src) + buildoptSlope;
};
struct PowerFunctor if (!ker.create("ReLUForward", ocl::dnn::activations_oclsrc, buildopt))
return false;
if (slope != 0)
ker.set(3, (float)slope);
return true;
}
#endif
};
struct TanHFunctor
{
typedef TanHLayer Layer;
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{ {
float power, scale, shift; return tanh(x);
}
PowerFunctor(LayerParams &params) #ifdef HAVE_OPENCL
{ bool initKernel(ocl::Kernel &ker, const UMat &src) const
power = params.get<float>("power", 1.0f); {
scale = params.get<float>("scale", 1.0f); if (!ker.create("TanHForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
shift = params.get<float>("shift", 0.0f); return false;
} return true;
}
#endif
};
template<typename TFloat> struct SigmoidFunctor
inline TFloat operator()(TFloat x) {
{ typedef SigmoidLayer Layer;
return pow((TFloat)shift + (TFloat)scale * x, (TFloat)power);
}
};
struct BNLLFunctor template<typename TFloat>
inline TFloat operator()(TFloat x) const
{ {
BNLLFunctor(LayerParams&) {} return (TFloat)1 / ((TFloat)1 + exp(-x));
}
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("SigmoidForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
};
struct AbsValFunctor
{
typedef AbsLayer Layer;
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{
return abs(x);
}
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("AbsValForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
};
struct BNLLFunctor
{
typedef BNLLLayer Layer;
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{
return log((TFloat)1 + exp(-abs(x)));
}
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("BNLLForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
};
struct PowerFunctor
{
typedef PowerLayer Layer;
double power, scale, shift;
PowerFunctor(double power_, double scale_ = 1, double shift_ = 0)
: power(power_), scale(scale_), shift(shift_) {}
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{
return pow((TFloat)shift + (TFloat)scale * x, (TFloat)power);
}
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("PowForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
ker.set(3, (float)power);
ker.set(4, (float)scale);
ker.set(5, (float)shift);
return true;
}
#endif
};
template<typename TFloat>
inline TFloat operator()(TFloat x)
{
return log((TFloat)1 + exp(-abs(x)));
}
};
} }
} }
#endif #endif
...@@ -42,73 +42,88 @@ ...@@ -42,73 +42,88 @@
#include "../precomp.hpp" #include "../precomp.hpp"
#include "layers_common.hpp" #include "layers_common.hpp"
#include "fully_connected_layer.hpp" #include "fully_connected_layer.hpp"
#include "op_blas.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/core/ocl.hpp>
namespace cv namespace cv
{ {
namespace dnn namespace dnn
{ {
FullyConnectedLayer::FullyConnectedLayer(LayerParams &params) : Layer(params)
{
numOutputs = params.get<int>("num_output");
bias = params.get<bool>("bias_term", true);
axis_ = params.get<int>("axis", 1);
CV_Assert(blobs.size() == (bias ? 2U : 1U)); FullyConnectedLayerImpl::FullyConnectedLayerImpl(int axis_)
CV_Assert(blobs[0].dims() >= 2 && blobs[0].total() >= (size_t)numOutputs); {
CV_Assert(!bias || blobs[1].total() == (size_t)numOutputs); axis = axis_;
} }
void FullyConnectedLayer::allocate(const std::vector<Blob*> &input, std::vector<Blob> &output) void FullyConnectedLayerImpl::allocate(const std::vector<Blob*> &input, std::vector<Blob> &output)
{ {
CV_Assert(input.size() > 0); CV_Assert(input.size() > 0);
CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
CV_Assert(blobs[0].dims() == 2);
axis = input[0]->canonicalAxis(axis_); bias = (blobs.size() >= 1);
innerSize = (int)input[0]->total(axis); axisCan = input[0]->canonicalAxis(axis);
dtype = input[0]->type();
numOutput = blobs[0].size(0);
innerSize = blobs[0].size(1);
outerSize = input[0]->total(0, axisCan);
CV_Assert((size_t)innerSize * (size_t)numOutputs == blobs[0].total()); CV_Assert((size_t)innerSize == input[0]->total(axisCan));
CV_Assert(blobs[0].size(-2) == numOutputs && blobs[0].size(-1) == innerSize); CV_Assert(!bias || (size_t)numOutput == blobs[1].total());
output.resize(input.size()); useOpenCL = ocl::useOpenCL();
for (size_t i = 0; i < input.size(); i++) int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_UMAT;
{
if (i != 0)
CV_Assert(input[i]->equalShape(*input[0]));
this->reshape(*input[i], output[i]); biasOnesBlob.create(Shape(outerSize, 1), dtype, allocFlags);
} biasOnesBlob.setTo(1);
}
void FullyConnectedLayer::reshape(const Blob &inp, Blob &out) output.resize(input.size());
for (size_t i = 0; i < input.size(); i++)
{ {
BlobShape inpShape = inp.shape(); CV_Assert(i == 0 || (input[i]->equalShape(*input[0]) && input[i]->type() == dtype));
BlobShape outShape(axis+1, inpShape.ptr()); Shape outShape = input[i]->shape().slice(0, axis) + Shape(numOutput);
outShape[axis] = numOutputs; output[i].create(outShape, dtype, allocFlags);
}
}
out.create(outShape, inp.type()); void FullyConnectedLayerImpl::forward(std::vector<Blob*> &input, std::vector<Blob> &output)
{
#ifdef HAVE_OPENCL
if (useOpenCL)
forward_<UMat>(input, output);
else
#endif
forward_<Mat>(input, output);
}
template<typename XMat>
void FullyConnectedLayerImpl::forward_(std::vector<Blob *> &input, std::vector<Blob> &output)
{
const XMat &weight = blobs[0].getRefConst<XMat>();
const XMat *biasMat = NULL, *biasOnesMat = NULL;
if (bias)
{
biasOnesMat = &biasOnesBlob.getRefConst<XMat>();
biasMat = &blobs[1].getRefConst<XMat>();
} }
void FullyConnectedLayer::forward(std::vector<Blob*> &input, std::vector<Blob> &output) for (size_t i = 0; i < input.size(); i++)
{ {
for (size_t i = 0; i < input.size(); i++) const XMat srcMat = reshaped(input[i]->getRefConst<XMat>(), Shape(outerSize, innerSize));
{ XMat dstMat = reshaped(output[i].getRef<XMat>(), Shape(outerSize, numOutput));
int M = (int)input[i]->total(0, axis); dnn::gemm(srcMat, weight, 1, dstMat, 0, GEMM_2_T);
int N = numOutputs;
int K = innerSize; if (bias)
dnn::gemm(*biasOnesMat, *biasMat, 1, dstMat, 1);
Mat srcMat(M, K, input[i]->type(), input[i]->ptrf());
Mat weight(N, K, blobs[0].type(), blobs[0].ptrf());
Mat dstMat(M, N, output[i].type(), output[i].ptrf());
//important: Caffe stores weights as transposed array
cv::gemm(srcMat, weight, 1, noArray(), 0, dstMat, GEMM_2_T);
if (bias)
{
Mat biasOnesMat = Mat::ones(M, 1, CV_32F);
Mat biasMat(1, N, CV_32F, blobs[1].ptrf());
cv::gemm(biasOnesMat, biasMat, 1, dstMat, 1, dstMat);
}
}
} }
} }
Ptr<InnerProductLayer> InnerProductLayer::create(int axis)
{
return Ptr<InnerProductLayer>(new FullyConnectedLayerImpl(axis));
}
}
} }
...@@ -42,26 +42,30 @@ ...@@ -42,26 +42,30 @@
#ifndef __OPENCV_DNN_LAYERS_FULLY_CONNECTED_LAYER_HPP__ #ifndef __OPENCV_DNN_LAYERS_FULLY_CONNECTED_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_FULLY_CONNECTED_LAYER_HPP__ #define __OPENCV_DNN_LAYERS_FULLY_CONNECTED_LAYER_HPP__
#include "../precomp.hpp" #include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv namespace cv
{ {
namespace dnn namespace dnn
{ {
class FullyConnectedLayer : public Layer
{
bool bias;
int numOutputs;
int axis_, axis;
int innerSize; class FullyConnectedLayerImpl : public InnerProductLayer
{
int axisCan, dtype;
int numOutput, innerSize, outerSize;
bool bias, useOpenCL;
Blob biasOnesBlob;
template<typename XMat>
void forward_(std::vector<Blob*> &input, std::vector<Blob> &output);
public:
void reshape(const Blob &inp, Blob &out); FullyConnectedLayerImpl(int axisCan = 1);
void allocate(const std::vector<Blob*> &input, std::vector<Blob> &output);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
public:
FullyConnectedLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &input, std::vector<Blob> &output);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
} }
} }
#endif #endif
...@@ -46,44 +46,5 @@ namespace cv ...@@ -46,44 +46,5 @@ namespace cv
namespace dnn namespace dnn
{ {
void getKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW)
{
if (params.has("kernel_h") && params.has("kernel_w"))
{
kernelH = params.get<int>("kernel_h");
kernelW = params.get<int>("kernel_w");
}
else if (params.has("kernel_size"))
{
kernelH = kernelW = params.get<int>("kernel_size");
}
else
{
CV_Error(cv::Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
}
if (params.has("pad_h") && params.has("pad_w"))
{
padH = params.get<int>("pad_h");
padW = params.get<int>("pad_w");
}
else
{
padH = padW = params.get<int>("pad", 0);
}
if (params.has("stride_h") && params.has("stride_w"))
{
strideH = params.get<int>("stride_h");
strideW = params.get<int>("stride_w");
}
else
{
strideH = strideW = params.get<int>("stride", 1);
}
CV_Assert(kernelH > 0 && kernelW > 0 && padH >= 0 && padW >= 0 && strideH > 0 && strideW > 0);
}
} }
} }
...@@ -42,14 +42,14 @@ ...@@ -42,14 +42,14 @@
#ifndef __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__ #ifndef __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__
#define __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__ #define __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__
#include <opencv2/dnn.hpp> #include <opencv2/dnn.hpp>
#include "op_blas.hpp"
#include "op_im2col.hpp"
namespace cv namespace cv
{ {
namespace dnn namespace dnn
{ {
void getKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW);
} }
} }
......
...@@ -42,123 +42,213 @@ ...@@ -42,123 +42,213 @@
#include "../precomp.hpp" #include "../precomp.hpp"
#include "layers_common.hpp" #include "layers_common.hpp"
#include "lrn_layer.hpp" #include "lrn_layer.hpp"
#include "modules/dnn/opencl_kernels_dnn.hpp"
#include <opencv2/imgproc.hpp> #include <opencv2/imgproc.hpp>
#include <opencv2/core/ocl.hpp>
#include <opencv2/dnn/shape_utils.hpp>
#include <algorithm> #include <algorithm>
namespace cv namespace cv
{ {
namespace dnn namespace dnn
{ {
LRNLayer::LRNLayer(LayerParams &params) : Layer(params)
{
String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
if (nrmType == "ACROSS_CHANNELS")
type = CHANNEL_NRM;
else if (nrmType == "WITHIN_CHANNEL")
type = SPATIAL_NRM;
else
CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");
size = params.get<int>("local_size", 5);
if (size % 2 != 1 || size <= 0)
CV_Error(Error::StsBadArg, "LRN layer supports only positive odd values for local_size");
alpha = params.get<double>("alpha", 1);
beta = params.get<double>("beta", 0.75);
}
void LRNLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs) LRNLayerImpl::LRNLayerImpl(int type_, int size_, double alpha_, double beta_)
{ {
CV_Assert(inputs.size() == 1); type = type_;
outputs.resize(1); size = size_;
alpha = alpha_;
beta = beta_;
}
Vec4i shape = inputs[0]->shape4(); void LRNLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
outputs[0].create(shape); {
CV_Assert(inputs.size() == 1 && inputs[0]->dims() == 4);
CV_Assert(type == CHANNEL_NRM || type == SPATIAL_NRM);
useOpenCL = cv::ocl::useOpenCL();
shape[0] = 1; //maybe make shape[0] = 1 too if (type == SPATIAL_NRM && !useOpenCL)
bufBlob.create(shape); buf.create(inputs[0]->shape().slice(2), inputs[0]->type(), Blob::ALLOC_MAT);
} if (type == CHANNEL_NRM && useOpenCL)
buf.create(inputs[0]->shape().slice(2), inputs[0]->type(), Blob::ALLOC_UMAT);
void LRNLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs) outputs.resize(1);
outputs[0].create(inputs[0]->shape(), inputs[0]->type());
}
void LRNLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
Blob &src = *inputs[0];
Blob &dst = outputs[0];
switch (type)
{ {
Blob &src = *inputs[0]; case CHANNEL_NRM:
Blob &dst = outputs[0]; channelNoramlization(src, dst);
break;
case SPATIAL_NRM:
spatialNormalization(src, dst);
break;
default:
CV_Error(Error::StsNotImplemented, "Unimplemented mode of LRN layer");
break;
}
}
switch (type) template<typename XMat>
{ static XMat getPlane(XMat &m, int n, int cn)
case CHANNEL_NRM: {
channelNoramlization(src, dst); return reshaped(slice(m, n, cn), BlobShape::like(m).slice(2));
break; }
case SPATIAL_NRM:
spatialNormalization(src, dst); void LRNLayerImpl::channelNoramlization(Blob &src, Blob &dst)
break; {
default: if (!useOpenCL)
CV_Error(cv::Error::StsNotImplemented, "Unimplemented mode of LRN layer"); channelNoramlization_<Mat>(src, dst);
break; else
} {
//channelNoramlization_ocl(src.getRefConst<UMat>(), dst.getRef<UMat>()); //consumes a lot of memory
channelNoramlization_<UMat>(src, dst);
} }
}
void LRNLayer::channelNoramlization(Blob &srcBlob, Blob &dstBlob) template<typename XMat>
void LRNLayerImpl::channelNoramlization_(Blob &srcBlob, Blob &dstBlob)
{
int num = srcBlob.num();
int channels = srcBlob.channels();
int ksize = (size - 1) / 2;
XMat srcMat = srcBlob.getRefConst<XMat>();
XMat dstMat = dstBlob.getRef<XMat>();
for (int n = 0; n < num; n++)
{ {
CV_DbgAssert(srcBlob.ptr() != dstBlob.ptr()); XMat accum = getPlane(dstMat, n, channels-1); //trick for memory saving
accum.setTo(0);
int num = srcBlob.num(); for (int cn = 0; cn < std::min(ksize, channels); cn++)
int channels = srcBlob.channels(); cv::accumulateSquare(getPlane(srcMat, n, cn), accum);
int ksize = (size - 1) / 2;
for (int n = 0; n < num; n++) for (int cn = 0; cn < channels; cn++)
{ {
Mat accum = dstBlob.getPlane(n, channels-1); //trick for memory saving if (cn + ksize < channels)
accum.setTo(0); {
cv::accumulateSquare(getPlane(srcMat, n, cn + ksize), accum);
for (int cn = 0; cn < std::min(ksize, channels); cn++) }
cv::accumulateSquare(srcBlob.getPlane(n, cn), accum);
for (int cn = 0; cn < channels; cn++) if (cn - ksize - 1 >= 0)
{ {
if (cn + ksize < channels) //subtractSquare
{ XMat left = getPlane(srcMat, n, cn - ksize - 1);
cv::accumulateSquare(srcBlob.getPlane(n, cn + ksize), accum); cv::pow(left, 2, left);
} cv::subtract(accum, left, accum);
if (cn - ksize - 1 >= 0)
{
Mat left = srcBlob.getPlane(n, cn - ksize - 1);
cv::subtract(accum, left.mul(left), accum); //subtractSquare
}
Mat dst = dstBlob.getPlane(n, cn);
accum.convertTo(dst, dst.type(), alpha/size, 1);
cv::pow(dst, beta, dst);
cv::divide(srcBlob.getPlane(n, cn), dst, dst);
} }
XMat dst = getPlane(dstMat, n, cn);
accum.convertTo(dst, dst.type(), alpha/size, 1);
cv::pow(dst, beta, dst);
cv::divide(getPlane(srcMat, n, cn), dst, dst);
} }
} }
}
void LRNLayer::spatialNormalization(Blob &srcBlob, Blob &dstBlob) bool LRNLayerImpl::channelNoramlization_ocl(const UMat &src, UMat &dst)
{ {
int num = srcBlob.num(); #ifdef HAVE_OPENCL
int channels = srcBlob.channels(); if (src.offset != 0 || dst.offset != 0) //TODO: add offset
return false;
String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
ocl::Kernel kerScale("LRNFillScale", ocl::dnn::lrn_oclsrc, buildOpts);
if (kerScale.empty())
return false;
ocl::Kernel kerOutput("LRNComputeOutput", ocl::dnn::lrn_oclsrc, buildOpts);
if (kerOutput.empty())
return false;
Shape shape = Shape::like(src);
int ksize = (size - 1) / 2;
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
UMat &scaleBuf = buf.umatRef();
size_t nthreads = (size_t)(shape.total() / shape[1]);
kerScale.args((int)nthreads,
ocl::KernelArg::PtrReadOnly(src), shape[0], shape[1], shape[2], shape[3],
size, (float)(alpha/size), (float)ksize, ocl::KernelArg::PtrWriteOnly(scaleBuf));
if (!kerScale.run(1, &nthreads, &wgSize, true))
return false;
nthreads = (size_t)shape.total();
kerOutput.args((int)nthreads,
ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrReadOnly(scaleBuf),
-beta, ocl::KernelArg::PtrWriteOnly(dst) );
if (!kerOutput.run(1, &nthreads, &wgSize, true))
return false;
return true;
#else
(void)src;
(void)dst;
return false;
#endif
}
void LRNLayerImpl::spatialNormalization(Blob &src, Blob &dst)
{
if (!useOpenCL)
spatialNormalization_<Mat>(src, dst);
else
spatialNormalization_<UMat>(src, dst);
}
//TODO: fix cv::boxFilter with BORDER_ISOLATED flag in CPU mode
template<>
void LRNLayerImpl::sqrBoxFilter_<Mat>(const Mat &src, Mat &dst)
{
Mat bufMat = buf.getRef<Mat>();
src.copyTo(bufMat);
cv::sqrBoxFilter(bufMat, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT);
}
template<>
void LRNLayerImpl::sqrBoxFilter_<UMat>(const UMat &src, UMat &dst)
{
cv::sqrBoxFilter(src, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT | BORDER_ISOLATED);
}
for (int n = 0; n < num; n++) template<typename XMat>
void LRNLayerImpl::spatialNormalization_(Blob &srcBlob, Blob &dstBlob)
{
int num = srcBlob.num();
int channels = srcBlob.channels();
XMat srcMat = srcBlob.getRefConst<XMat>();
XMat dstMat = dstBlob.getRef<XMat>();
for (int n = 0; n < num; n++)
{
for (int cn = 0; cn < channels; cn++)
{ {
for (int cn = 0; cn < channels; cn++) XMat src = getPlane(srcMat, n, cn);
{ XMat dst = getPlane(dstMat, n, cn);
Mat src = srcBlob.getPlane(n, cn);
Mat dst = dstBlob.getPlane(n, cn); sqrBoxFilter_(src, dst);
uchar *dataDst0 = dst.data;
dst.convertTo(dst, dst.type(), alpha/(size*size), 1);
cv::pow(srcBlob.getPlane(n, cn), 2, dst); cv::pow(dst, beta, dst);
//TODO: check border type cv::divide(src, dst, dst);
cv::boxFilter(dst, dst, dst.depth(), cv::Size(size, size), cv::Point(-1, -1), false, cv::BORDER_CONSTANT);
dst.convertTo(dst, dst.type(), alpha/(size*size), 1);
cv::pow(dst, beta, dst);
cv::divide(src, dst, dst);
CV_Assert(dataDst0 == dst.data); //debug
}
} }
} }
}
Ptr<LRNLayer> LRNLayer::create(int type, int size, double alpha, double beta)
{
return Ptr<LRNLayer>(new LRNLayerImpl(type, size, alpha, beta));
}
} }
} }
...@@ -42,34 +42,36 @@ ...@@ -42,34 +42,36 @@
#ifndef __OPENCV_DNN_LAYERS_LRN_LAYER_HPP__ #ifndef __OPENCV_DNN_LAYERS_LRN_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_LRN_LAYER_HPP__ #define __OPENCV_DNN_LAYERS_LRN_LAYER_HPP__
#include "../precomp.hpp" #include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv namespace cv
{ {
namespace dnn namespace dnn
{ {
class LRNLayer : public Layer
{
enum
{
CHANNEL_NRM,
SPATIAL_NRM,
SPATIAL_CONTRAST_NRM //cuda-convnet feature
} type;
int size; class LRNLayerImpl : public LRNLayer
double alpha, beta; {
bool useOpenCL;
Blob buf;
void channelNoramlization(Blob &src, Blob &dst);
template<typename XMat>
void channelNoramlization_(Blob &src, Blob &dst);
bool channelNoramlization_ocl(const UMat &src, UMat &dst);
Blob bufBlob; void spatialNormalization(Blob &src, Blob &dst);
template<typename XMat>
void spatialNormalization_(Blob &src, Blob &dst);
template<typename XMat>
void sqrBoxFilter_(const XMat &src, XMat &dst);
void channelNoramlization(Blob &src, Blob &dst); public:
void spatialNormalization(Blob &src, Blob &dst);
public: LRNLayerImpl(int type = CHANNEL_NRM, int size = 5, double alpha = 1, double beta = 0.75);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
LRNLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
} }
} }
#endif #endif
...@@ -42,20 +42,21 @@ ...@@ -42,20 +42,21 @@
#include "../precomp.hpp" #include "../precomp.hpp"
#include "layers_common.hpp" #include "layers_common.hpp"
#include "mvn_layer.hpp" #include "mvn_layer.hpp"
#include <opencv2/dnn/shape_utils.hpp>
namespace cv namespace cv
{ {
namespace dnn namespace dnn
{ {
MVNLayer::MVNLayer(LayerParams &params) : Layer(params) MVNLayerImpl::MVNLayerImpl(bool normVariance_, bool acrossChannels_, double eps_)
{ {
eps = params.get<double>("eps", 1e-9); normVariance = normVariance_;
acrossChannels = params.get<bool>("across_channels", false); acrossChannels = acrossChannels_;
normalizeVariance = params.get<bool>("normalize_variance", true); eps = eps_;
} }
void MVNLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs) void MVNLayerImpl::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{ {
outputs.resize(inputs.size()); outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++) for (size_t i = 0; i < inputs.size(); i++)
...@@ -65,20 +66,17 @@ void MVNLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &ou ...@@ -65,20 +66,17 @@ void MVNLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &ou
} }
} }
void MVNLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs) void MVNLayerImpl::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{ {
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++) for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
{ {
Blob &inpBlob = *inputs[inpIdx]; Blob &inpBlob = *inputs[inpIdx];
Blob &outBlob = outputs[inpIdx]; Blob &outBlob = outputs[inpIdx];
int workSize[2];
int splitDim = (acrossChannels) ? 1 : 2; int splitDim = (acrossChannels) ? 1 : 2;
workSize[0] = (int)inpBlob.total(0, splitDim); Shape workSize((int)inpBlob.total(0, splitDim), (int)inpBlob.total(splitDim));
workSize[1] = (int)inpBlob.total(splitDim); Mat inpMat = reshaped(inpBlob.matRefConst(), workSize);
Mat outMat = reshaped(outBlob.matRef(), workSize);
Mat inpMat = inpBlob.matRef().reshape(1, 2, workSize);
Mat outMat = outBlob.matRef().reshape(1, 2, workSize);
Scalar mean, dev; Scalar mean, dev;
for (int i = 0; i < workSize[0]; i++) for (int i = 0; i < workSize[0]; i++)
...@@ -86,12 +84,18 @@ void MVNLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs) ...@@ -86,12 +84,18 @@ void MVNLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
Mat inpRow = inpMat.row(i); Mat inpRow = inpMat.row(i);
Mat outRow = outMat.row(i); Mat outRow = outMat.row(i);
cv::meanStdDev(inpRow, mean, (normalizeVariance) ? dev : noArray()); cv::meanStdDev(inpRow, mean, (normVariance) ? dev : noArray());
double alpha = (normalizeVariance) ? 1/(eps + dev[0]) : 1; double alpha = (normVariance) ? 1/(eps + dev[0]) : 1;
inpRow.convertTo(outRow, outRow.type(), alpha, -mean[0] * alpha); inpRow.convertTo(outRow, outRow.type(), alpha, -mean[0] * alpha);
} }
} }
} }
Ptr<MVNLayer> MVNLayer::create(bool normVariance, bool acrossChannels, double eps)
{
return Ptr<MVNLayer>(new MVNLayerImpl(normVariance, acrossChannels, eps));
}
} }
} }
...@@ -42,20 +42,18 @@ ...@@ -42,20 +42,18 @@
#ifndef __OPENCV_DNN_LAYERS_MVN_LAYER_HPP__ #ifndef __OPENCV_DNN_LAYERS_MVN_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_MVN_LAYER_HPP__ #define __OPENCV_DNN_LAYERS_MVN_LAYER_HPP__
#include "../precomp.hpp" #include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv namespace cv
{ {
namespace dnn namespace dnn
{ {
class MVNLayer : public Layer class MVNLayerImpl : public MVNLayer
{ {
double eps;
bool acrossChannels, normalizeVariance;
public: public:
MVNLayer(LayerParams &params); MVNLayerImpl(bool normVariance_ = true, bool acrossChannels_ = false, double eps_ = 1e-9);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs); void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs); void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
}; };
......
#include "op_blas.hpp"
#if HAVE_CBLAS
#include "opencv_cblas.hpp"
#endif
#include <iostream>
namespace cv
{
namespace dnn
{
void gemm(InputArray A, InputArray B, double alpha, InputOutputArray C, double beta, int flags)
{
if (C.isMat())
gemmCPU(A.getMat(), B.getMat(), alpha, C.getMatRef(), beta, flags);
else
{
cv::gemm(A, B, alpha, (beta == 0) ? noArray() : C, beta, C, flags);
}
}
inline void SwapRowCols(const Mat &A, int &rows, int &cols, bool isTrans)
{
CV_DbgAssert(A.dims == 2);
rows = (isTrans) ? A.cols : A.rows;
cols = (isTrans) ? A.rows : A.cols;
}
void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int flags /*= 0*/)
{
#if HAVE_CBLAS
bool transA = static_cast<bool>(flags & GEMM_1_T);
bool transB = static_cast<bool>(flags & GEMM_2_T);
bool transC = static_cast<bool>(flags & GEMM_3_T);
int Arows, Acols, Brows, Bcols, Crows, Ccols;
SwapRowCols(A, Arows, Acols, transA);
SwapRowCols(B, Brows, Bcols, transB);
SwapRowCols(C, Crows, Ccols, transC);
CV_Assert(!(flags & GEMM_3_T));
CV_Assert(Acols == Brows && Arows == Crows && Bcols == Ccols);
CV_Assert(A.isContinuous() && B.isContinuous() && C.isContinuous());
CV_Assert(A.type() == B.type() && B.type() == C.type());
CV_Assert(A.data != C.data && B.data != C.data);
if (C.type() == CV_32F)
{
cblas_sgemm(CblasRowMajor, transA ? CblasTrans : CblasNoTrans, transB ? CblasTrans : CblasNoTrans,
Arows, Bcols, Acols,
(float)alpha, A.ptr<float>(), A.cols,
B.ptr<float>(), B.cols,
(float)beta, C.ptr<float>(), C.cols);
}
else if (C.type() == CV_64F)
{
//TODO: Should be tested
cblas_dgemm(CblasRowMajor, transA ? CblasTrans : CblasNoTrans, transB ? CblasTrans : CblasNoTrans,
Arows, Bcols, Acols,
alpha, A.ptr<double>(), A.cols,
B.ptr<double>(), B.cols,
beta, C.ptr<double>(), C.cols);
}
else
{
CV_Error(Error::BadDepth, "Only floating point types are supported");
}
#else
cv::gemm(A, B, alpha, C, beta, C, flags);
#endif
}
int getBlasThreads()
{
#ifdef OPENBLAS_VERSION
return openblas_get_num_threads();
#else
return 1;
#endif
}
void setBlasThreads(int numThreads)
{
#ifdef OPENBLAS_VERSION
openblas_set_num_threads(numThreads);
goto_set_num_threads(numThreads);
#else
(void)numThreads; //suppress compilers' warning
#endif
}
}
}
...@@ -39,47 +39,21 @@ ...@@ -39,47 +39,21 @@
// //
//M*/ //M*/
#ifndef __OPENCV_DNN_LAYERS_OP_BLAS_HPP__
#define __OPENCV_DNN_LAYERS_OP_BLAS_HPP__
#include "../precomp.hpp" #include "../precomp.hpp"
#include <opencv2/core/ocl.hpp>
#include "im2col.hpp"
#include "opencl_kernels_dnn.hpp"
namespace cv namespace cv
{ {
namespace dnn namespace dnn
{ {
int getBlasThreads();
#ifdef HAVE_OPENCL void setBlasThreads(int numThreads);
void im2col_ocl(UMat &img,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
UMat &col)
{
int h_out = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int w_out = (width + 2 * pad_w - kernel_w) / stride_w + 1;
CV_Assert(img.isContinuous() && col.isContinuous());
CV_Assert(img.total() == (size_t)channels * height * width);
CV_Assert(col.total() == (size_t)channels * kernel_h * kernel_w * h_out * w_out);
ocl::Kernel im2col_ker("im2col", ocl::dnn::im2col_oclsrc);
CV_Assert(!im2col_ker.empty());
im2col_ker.args(ocl::KernelArg::PtrReadOnly(img), (int)img.offset, void gemm(InputArray A, InputArray B, double alpha, InputOutputArray C, double beta, int flags = 0);
channels, height, width,
kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
h_out, w_out,
ocl::KernelArg::PtrWriteOnly(col), (int)col.offset
);
size_t localSize = ocl::Device::getDefault().maxWorkGroupSize();
size_t globalSize = (size_t)channels * h_out * w_out;
CV_Assert(im2col_ker.run(1, &globalSize, &localSize, true));
}
#endif // HAVE_OPENCL
void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int flags = 0);
} }
} }
#endif
\ No newline at end of file
...@@ -39,88 +39,84 @@ ...@@ -39,88 +39,84 @@
// //
//M*/ //M*/
#ifndef __OPENCV_DNN_LAYERS_IM2COL_HPP__ #include "../precomp.hpp"
#define __OPENCV_DNN_LAYERS_IM2COL_HPP__ #include <opencv2/core/ocl.hpp>
#include "opencl_kernels_dnn.hpp"
#include "op_im2col.hpp"
namespace cv namespace cv
{ {
namespace dnn namespace dnn
{ {
template <typename Dtype> #ifdef HAVE_OPENCL
void im2col_cpu(const Dtype* data_im,
int channels, int height, int width, bool im2col_ocl(const UMat &img,
int kernel_h, int kernel_w, int channels, int height, int width,
int pad_h, int pad_w, int kernel_h, int kernel_w,
int stride_h, int stride_w, int pad_h, int pad_w,
Dtype* data_col) int stride_h, int stride_w,
UMat &col)
{ {
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
int channels_col = channels * kernel_h * kernel_w; int channels_col = channels * kernel_h * kernel_w;
for (int c = 0; c < channels_col; ++c) { int esz = img.elemSize();
int w_offset = c % kernel_w;
int h_offset = (c / kernel_w) % kernel_h; CV_Assert(img.isContinuous() && col.isContinuous());
int c_im = c / kernel_h / kernel_w; CV_Assert(img.total() == (size_t)channels * height * width);
for (int h = 0; h < height_col; ++h) { CV_Assert(col.total() == (size_t)channels_col * height_col * width_col);
for (int w = 0; w < width_col; ++w) {
int h_pad = h * stride_h - pad_h + h_offset; ocl::Kernel ker("im2col", ocl::dnn::im2col_oclsrc, String("-DT=") + ocl::typeToStr(img.type()));
int w_pad = w * stride_w - pad_w + w_offset; if (ker.empty())
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) return false;
data_col[(c * height_col + h) * width_col + w] =
data_im[(c_im * height + h_pad) * width + w_pad]; ker.args(ocl::KernelArg::PtrReadOnly(img), (int)img.offset/esz,
else channels, height, width,
data_col[(c * height_col + h) * width_col + w] = 0; kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
} height_col, width_col,
} ocl::KernelArg::PtrWriteOnly(col), (int)col.offset/esz
} );
size_t localSize = ocl::Device::getDefault().maxWorkGroupSize();
size_t globalSize = (size_t)channels * height_col * width_col;
return ker.run(1, &globalSize, &localSize, true);
} }
template <typename Dtype> bool col2im_ocl(const UMat &col,
void col2im_cpu(const Dtype* data_col,
int channels, int height, int width, int channels, int height, int width,
int patch_h, int patch_w, int kernel_h, int kernel_w,
int pad_h, int pad_w, int pad_h, int pad_w,
int stride_h, int stride_w, int stride_h, int stride_w,
Dtype* data_im) UMat &img)
{ {
memset(data_im, 0, height * width * channels * sizeof(Dtype)); int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
int channels_col = channels * kernel_h * kernel_w;
int esz = img.elemSize();
int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1; CV_Assert(img.isContinuous() && col.isContinuous());
int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1; CV_Assert(img.total() == (size_t)channels * height * width);
int channels_col = channels * patch_h * patch_w; CV_Assert(col.total() == (size_t)channels_col * height_col * width_col);
for (int c = 0; c < channels_col; ++c) ocl::Kernel ker("col2im", ocl::dnn::col2im_oclsrc, String("-DT=") + ocl::typeToStr(col.type()));
{ if (ker.empty())
int w_offset = c % patch_w; return false;
int h_offset = (c / patch_w) % patch_h;
int c_im = c / patch_h / patch_w;
for (int h = 0; h < height_col; ++h) ker.args((int)img.total(),
{ ocl::KernelArg::PtrReadOnly(col), (int)col.offset/esz,
for (int w = 0; w < width_col; ++w) height, width, channels,
{ kernel_h, kernel_w,
int h_pad = h * stride_h - pad_h + h_offset; pad_h, pad_w,
int w_pad = w * stride_w - pad_w + w_offset; stride_h, stride_w,
height_col, width_col,
ocl::KernelArg::PtrWriteOnly(img), (int)img.offset/esz);
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) size_t localSize = ocl::Device::getDefault().maxWorkGroupSize();
data_im[(c_im * height + h_pad) * width + w_pad] += size_t globalSize = img.total();
data_col[(c * height_col + h) * width_col + w]; return ker.run(1, &globalSize, &localSize, true);
}
}
}
} }
#ifdef HAVE_OPENCL
void im2col_ocl(UMat &img,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
UMat &col);
#endif #endif
} }
} }
#endif
This diff is collapsed.
This diff is collapsed.
/*M/////////////////////////////////////////////////////////////////////////////////////// /*M///////////////////////////////////////////////////////////////////////////////////////
// //
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
// //
...@@ -42,37 +42,39 @@ ...@@ -42,37 +42,39 @@
#ifndef __OPENCV_DNN_LAYERS_POOLING_LAYER_HPP__ #ifndef __OPENCV_DNN_LAYERS_POOLING_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_POOLING_LAYER_HPP__ #define __OPENCV_DNN_LAYERS_POOLING_LAYER_HPP__
#include "../precomp.hpp" #include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv namespace cv
{ {
namespace dnn namespace dnn
{ {
class PoolingLayer : public Layer
{
enum
{
MAX,
AVE,
STOCHASTIC
};
int type; class PoolingLayerImpl : public PoolingLayer
int padH, padW; {
int strideH, strideW; bool useOpenCL;
int kernelH, kernelW; Size inp, out;
void computeOutputShape(Size inpSz);
bool pooling_ocl(const char *kname, const Blob &src, Blob &dst, Blob *mask = NULL);
void maxPooling(Blob &src, Blob &dst);
void maxPooling_cpu(Blob &src, Blob &dst);
bool maxPooling_ocl(Blob &src, Blob &dst);
void avePooling(Blob &src, Blob &dst);
void avePooling_cpu(Blob &src, Blob &dst);
bool avePooling_ocl(Blob &src, Blob &dst);
public:
int inpH, inpW; PoolingLayerImpl();
int outH, outW; PoolingLayerImpl(int type, Size kernel, Size stride, Size pad);
void computeOutputShape(int inpH, int inpW); void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void maxPooling(Blob &input, Blob &output); void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void avePooling(Blob &input, Blob &output); };
public:
PoolingLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
} }
} }
#endif #endif
This diff is collapsed.
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_RECURRENT_LAYERS_HPP__
#define __OPENCV_DNN_LAYERS_RECURRENT_LAYERS_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
}
}
#endif
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -40,4 +40,5 @@ ...@@ -40,4 +40,5 @@
//M*/ //M*/
#include <opencv2/core.hpp> #include <opencv2/core.hpp>
#include "cvconfig.h"
#include <opencv2/dnn.hpp> #include <opencv2/dnn.hpp>
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment