Commit 9a342b51 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #707 from ludv1x:dnn

parents e7b5c81b 942e9205
......@@ -17,15 +17,38 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4701)
# Resolve libprotobuf dependency
# ----------------------------------------------------------------------------
include(cmake/OpenCVFindLibProtobuf.cmake)
ocv_glob_module_sources(${PROTOBUF_SRCS} ${PROTOBUF_HDRS})
ocv_source_group("Src\\protobuf" FILES ${PROTOBUF_SRCS} ${PROTOBUF_HDRS})
ocv_module_include_directories(include ${PROTOBUF_INCLUDE_DIR})
# ----------------------------------------------------------------------------
# Try to find BLAS libraries
# ----------------------------------------------------------------------------
OCV_OPTION(${the_module}_WITH_BLAS "Use external BLAS library to speedup processing" OFF)
include(cmake/OpenCVFindCBLAS.cmake)
ocv_glob_module_sources(${PROTOBUF_SRCS} ${PROTOBUF_HDRS} ${CBLAS_H_PROXY_PATH})
ocv_create_module(${PROTOBUF_LIBRARIES})
ocv_add_samples()
ocv_add_accuracy_tests()
ocv_add_perf_tests()
# ----------------------------------------------------------------------------
# Link BLAS
# ----------------------------------------------------------------------------
if(${the_module}_WITH_BLAS AND HAVE_BLAS)
add_definitions(-DHAVE_CBLAS=1)
ocv_module_include_directories(${${the_module}_BLAS_INCLUDE_DIR})
ocv_add_dependencies(${the_module} ${${the_module}_BLAS_LIBRARIES})
target_link_libraries(${the_module} ${${the_module}_BLAS_LIBRARIES})
if(${the_module}_BLAS_BINARIES)
ocv_install_target(${the_module} EXPORT ${the_module}_BLAS_BINARIES
RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT libs)
endif()
else()
add_definitions(-DHAVE_CBLAS=0)
endif()
# ----------------------------------------------------------------------------
# Download pre-trained models for complex testing on GoogLeNet and AlexNet
# ----------------------------------------------------------------------------
......
#COPYRIGHT
#
#All contributions by the University of California:
#Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
#All rights reserved.
#
#All other contributions:
#Copyright (c) 2014, 2015, the respective contributors
#All rights reserved.
#
#Caffe uses a shared copyright model: each contributor holds copyright over
#their contributions to Caffe. The project versioning records all such
#contribution and copyright details. If a contributor wants to further mark
#their specific copyright on a particular contribution, they should indicate
#their copyright solely in the commit message of the change when it is
#committed.
#
#LICENSE
#
#Redistribution and use in source and binary forms, with or without
#modification, are permitted provided that the following conditions are met:
#
#1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
#ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#CONTRIBUTION AGREEMENT
#
#By contributing to the BVLC/caffe repository through pull-request, comment,
#or otherwise, the contributor releases their content to the
#license and copyright terms herein.
# Find the Atlas (and Lapack) libraries
#
# The following variables are optionally searched for defaults
# Atlas_ROOT_DIR: Base directory where all Atlas components are found
#
# The following are set after configuration is done:
# Atlas_FOUND
# Atlas_INCLUDE_DIRS
# Atlas_LIBRARIES
# Atlas_LIBRARYRARY_DIRS
set(Atlas_INCLUDE_SEARCH_PATHS
/usr/include/atlas
/usr/include/atlas-base
$ENV{Atlas_ROOT_DIR}
$ENV{Atlas_ROOT_DIR}/include
)
set(Atlas_LIB_SEARCH_PATHS
/usr/lib/atlas
/usr/lib/atlas-base
$ENV{Atlas_ROOT_DIR}
$ENV{Atlas_ROOT_DIR}/lib
)
find_path(Atlas_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${Atlas_INCLUDE_SEARCH_PATHS})
find_path(Atlas_CLAPACK_INCLUDE_DIR NAMES clapack.h PATHS ${Atlas_INCLUDE_SEARCH_PATHS})
find_library(Atlas_CBLAS_LIBRARY NAMES ptcblas_r ptcblas cblas_r cblas PATHS ${Atlas_LIB_SEARCH_PATHS})
find_library(Atlas_BLAS_LIBRARY NAMES atlas_r atlas PATHS ${Atlas_LIB_SEARCH_PATHS})
find_library(Atlas_LAPACK_LIBRARY NAMES alapack_r alapack lapack_atlas PATHS ${Atlas_LIB_SEARCH_PATHS})
set(LOOKED_FOR
Atlas_CBLAS_INCLUDE_DIR
Atlas_CLAPACK_INCLUDE_DIR
Atlas_CBLAS_LIBRARY
Atlas_BLAS_LIBRARY
Atlas_LAPACK_LIBRARY
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Atlas DEFAULT_MSG ${LOOKED_FOR})
if(ATLAS_FOUND)
set(Atlas_INCLUDE_DIR ${Atlas_CBLAS_INCLUDE_DIR} ${Atlas_CLAPACK_INCLUDE_DIR})
set(Atlas_LIBRARIES ${Atlas_LAPACK_LIBRARY} ${Atlas_CBLAS_LIBRARY} ${Atlas_BLAS_LIBRARY})
mark_as_advanced(${LOOKED_FOR})
message(STATUS "Found Atlas (include: ${Atlas_CBLAS_INCLUDE_DIR}, library: ${Atlas_BLAS_LIBRARY})")
endif(ATLAS_FOUND)
\ No newline at end of file
#COPYRIGHT
#
#All contributions by the University of California:
#Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
#All rights reserved.
#
#All other contributions:
#Copyright (c) 2014, 2015, the respective contributors
#All rights reserved.
#
#Caffe uses a shared copyright model: each contributor holds copyright over
#their contributions to Caffe. The project versioning records all such
#contribution and copyright details. If a contributor wants to further mark
#their specific copyright on a particular contribution, they should indicate
#their copyright solely in the commit message of the change when it is
#committed.
#
#LICENSE
#
#Redistribution and use in source and binary forms, with or without
#modification, are permitted provided that the following conditions are met:
#
#1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
#ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#CONTRIBUTION AGREEMENT
#
#By contributing to the BVLC/caffe repository through pull-request, comment,
#or otherwise, the contributor releases their content to the
#license and copyright terms herein.
SET(Open_BLAS_INCLUDE_SEARCH_PATHS
/usr/include
/usr/include/openblas
/usr/include/openblas-base
/usr/local/include
/usr/local/include/openblas
/usr/local/include/openblas-base
/opt/OpenBLAS/include
$ENV{OpenBLAS_HOME}
$ENV{OpenBLAS_HOME}/include
)
SET(Open_BLAS_LIB_SEARCH_PATHS
/lib/
/lib/openblas-base
/lib64/
/usr/lib
/usr/lib/openblas-base
/usr/lib64
/usr/local/lib
/usr/local/lib64
/opt/OpenBLAS/lib
$ENV{OpenBLAS}cd
$ENV{OpenBLAS}/lib
$ENV{OpenBLAS_HOME}
$ENV{OpenBLAS_HOME}/lib
)
FIND_PATH(OpenBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${Open_BLAS_INCLUDE_SEARCH_PATHS})
FIND_LIBRARY(OpenBLAS_LIB NAMES openblas PATHS ${Open_BLAS_LIB_SEARCH_PATHS})
SET(OpenBLAS_FOUND ON)
# Check include files
IF(NOT OpenBLAS_INCLUDE_DIR)
SET(OpenBLAS_FOUND OFF)
MESSAGE(STATUS "Could not find OpenBLAS include. Turning OpenBLAS_FOUND off")
ENDIF()
# Check libraries
IF(NOT OpenBLAS_LIB)
SET(OpenBLAS_FOUND OFF)
MESSAGE(STATUS "Could not find OpenBLAS lib. Turning OpenBLAS_FOUND off")
ENDIF()
IF (OpenBLAS_FOUND)
IF (NOT OpenBLAS_FIND_QUIETLY)
MESSAGE(STATUS "Found OpenBLAS libraries: ${OpenBLAS_LIB}")
MESSAGE(STATUS "Found OpenBLAS include: ${OpenBLAS_INCLUDE_DIR}")
ENDIF (NOT OpenBLAS_FIND_QUIETLY)
ELSE (OpenBLAS_FOUND)
IF (OpenBLAS_FIND_REQUIRED)
MESSAGE(FATAL_ERROR "Could not find OpenBLAS")
ENDIF (OpenBLAS_FIND_REQUIRED)
ENDIF (OpenBLAS_FOUND)
MARK_AS_ADVANCED(
OpenBLAS_INCLUDE_DIR
OpenBLAS_LIB
OpenBLAS
)
\ No newline at end of file
macro(_find_file_in_dirs VAR NAME DIRS)
find_path(${VAR} ${NAME} ${DIRS} NO_DEFAULT_PATH)
set(${VAR} ${${VAR}}/${NAME})
unset(${VAR} CACHE)
endmacro()
if(${the_module}_WITH_BLAS)
set(_bp ${the_module}_BLAS) #prefix for blas variables
set(BLAS_CBLAS_H "cblas.h")
set(HAVE_BLAS "")
if(NOT HAVE_BLAS) #check custom BLAS from user input
if(${_bp}_INCLUDE_DIR AND ${_bp}_LIBRARIES AND ${_bp}_CBLAS_H)
set(HAVE_BLAS "Custom")
endif()
endif()
if(NOT HAVE_BLAS)
include(cmake/OpenCVFindMKL.cmake)
if(MKL_FOUND)
set(BLAS_INCLUDE_DIR ${MKL_INCLUDE_DIRS})
set(BLAS_LIBRARIES ${MKL_LIBRARIES} )
set(BLAS_CBLAS_H "mkl_cblas.h" )
set(HAVE_BLAS "MKL")
endif()
endif()
if(NOT HAVE_BLAS)
include(cmake/FindOpenBLAS.cmake)
if(OpenBLAS_FOUND)
set(BLAS_INCLUDE_DIR ${OpenBLAS_INCLUDE_DIR} )
set(BLAS_LIBRARIES ${OpenBLAS_LIB} )
set(HAVE_BLAS "OpenBLAS")
endif()
endif()
if(NOT HAVE_BLAS AND UNIX)
include(cmake/FindAtlas.cmake)
if(ATLAS_FOUND)
set(BLAS_INCLUDE_DIR ${Atlas_INCLUDE_DIR})
set(BLAS_LIBRARIES ${Atlas_LIBRARIES} )
set(HAVE_BLAS "Atlas")
endif()
endif()
if(NOT HAVE_BLAS OR NOT (HAVE_BLAS STREQUAL "Custom"))
set(${_bp}_INCLUDE_DIR ${BLAS_INCLUDE_DIR} CACHE PATH "Path to BLAS include dir" FORCE)
set(${_bp}_CBLAS_H ${BLAS_CBLAS_H} CACHE STRING "Alternative name of cblas.h" FORCE)
set(${_bp}_LIBRARIES ${BLAS_LIBRARIES} CACHE FILEPATH "Path to BLAS libraries that will be linked with ${the_module} module" FORCE)
set(${_bp}_BINARIES ${BLAS_BINARIES} CACHE FILEPATH "Path to BLAS binaries (.so, .dll) that will be installed with ${the_module} module" FORCE)
endif()
if(HAVE_BLAS) #adding proxy cblas.h header
_find_file_in_dirs(CBLAS_H_PATH ${${_bp}_CBLAS_H} ${${_bp}_INCLUDE_DIR})
if(NOT CBLAS_H_PATH)
message(WARNING "CBLAS header '${${_bp}_CBLAS_H}' not found into '${${_bp}_INCLUDE_DIR}'")
endif()
set(CBLAS_H_PROXY_PATH ${CMAKE_CURRENT_BINARY_DIR}/opencv_cblas.hpp)
set(_include_str "\#include \"${CBLAS_H_PATH}\"")
file(WRITE ${CBLAS_H_PROXY_PATH} ${_include_str})
endif()
endif()
\ No newline at end of file
#
# The script to detect Intel(R) Math Kernel Library (MKL)
# installation/package
#
# Parameters:
# MKL_WITH_TBB
#
# On return this will define:
#
# HAVE_MKL - True if Intel IPP found
# MKL_ROOT_DIR - root of IPP installation
# MKL_INCLUDE_DIRS - IPP include folder
# MKL_LIBRARIES - IPP libraries that are used by OpenCV
#
macro(mkl_fail)
set(HAVE_MKL OFF CACHE BOOL "True if MKL found")
set(MKL_ROOT_DIR ${MKL_ROOT_DIR} CACHE PATH "Path to MKL directory")
unset(MKL_INCLUDE_DIRS CACHE)
unset(MKL_LIBRARIES CACHE)
endmacro()
macro(get_mkl_version VERSION_FILE)
# read MKL version info from file
file(STRINGS ${VERSION_FILE} STR1 REGEX "__INTEL_MKL__")
file(STRINGS ${VERSION_FILE} STR2 REGEX "__INTEL_MKL_MINOR__")
file(STRINGS ${VERSION_FILE} STR3 REGEX "__INTEL_MKL_UPDATE__")
#file(STRINGS ${VERSION_FILE} STR4 REGEX "INTEL_MKL_VERSION")
# extract info and assign to variables
string(REGEX MATCHALL "[0-9]+" MKL_VERSION_MAJOR ${STR1})
string(REGEX MATCHALL "[0-9]+" MKL_VERSION_MINOR ${STR2})
string(REGEX MATCHALL "[0-9]+" MKL_VERSION_UPDATE ${STR3})
set(MKL_VERSION_STR "${MKL_VERSION_MAJOR}.${MKL_VERSION_MINOR}.${MKL_VERSION_UPDATE}" CACHE STRING "MKL version" FORCE)
endmacro()
if(NOT DEFINED MKL_USE_MULTITHREAD)
OCV_OPTION(MKL_WITH_TBB "Use MKL with TBB multithreading" OFF)#ON IF WITH_TBB)
OCV_OPTION(MKL_WITH_OPENMP "Use MKL with OpenMP multithreading" OFF)#ON IF WITH_OPENMP)
endif()
#check current MKL_ROOT_DIR
if(NOT MKL_ROOT_DIR OR NOT EXISTS ${MKL_ROOT_DIR}/include/mkl.h)
set(MKLROOT_PATHS ${MKL_ROOT_DIR})
if(DEFINED $ENV{MKLROOT})
list(APPEND MKLROOT_PATHS $ENV{MKLROOT})
endif()
if(WIN32)
set(ProgramFilesx86 "ProgramFiles(x86)")
list(APPEND MKLROOT_PATHS $ENV{${ProgramFilesx86}}/IntelSWTools/compilers_and_libraries/windows/mkl)
endif()
if(UNIX)
list(APPEND MKLROOT_PATHS "/opt/intel/mkl")
endif()
find_path(MKL_ROOT_DIR include/mkl.h PATHS ${MKLROOT_PATHS})
endif()
if(NOT MKL_ROOT_DIR)
mkl_fail()
return()
endif()
set(MKL_INCLUDE_DIRS ${MKL_ROOT_DIR}/include)
set(MKL_INCLUDE_HEADERS ${MKL_INCLUDE_DIRS}/mkl.h ${MKL_INCLUDE_DIRS}/mkl_version.h)
#determine arch
if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8)
set(MKL_X64 1)
set(MKL_ARCH "intel64")
include(CheckTypeSize)
CHECK_TYPE_SIZE(int _sizeof_int)
if (_sizeof_int EQUAL 4)
set(MKL_LP64 "lp64")
else()
set(MKL_LP64 "ilp64")
endif()
else()
set(MKL_ARCH "ia32")
endif()
if(MSVC)
set(MKL_EXT ".lib")
set(MKL_PRE "")
else()
set(MKL_EXT ".a")
set(MKL_PRE "lib")
endif()
set(MKL_LIB_DIR ${MKL_ROOT_DIR}/lib/${MKL_ARCH})
set(MKL_LIBRARIES ${MKL_LIB_DIR}/${MKL_PRE}mkl_core${MKL_EXT} ${MKL_LIB_DIR}/${MKL_PRE}mkl_intel_${MKL_LP64}${MKL_EXT})
if(MKL_WITH_TBB)
list(APPEND MKL_LIBRARIES ${MKL_LIB_DIR}/${MKL_PRE}mkl_tbb_thread${MKL_EXT})
list(APPEND MKL_LIBRARIES ${MKL_ROOT_DIR}/../tbb/lib/${MKL_ARCH}/tbb${MKL_EXT})
elseif(MKL_WITH_OPENMP)
message(FATAL_ERROR "Multithreaded MKL is not supported yet")
else()
list(APPEND MKL_LIBRARIES ${MKL_LIB_DIR}/${MKL_PRE}mkl_sequential${MKL_EXT})
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(MKL MKL_INCLUDE_HEADERS MKL_LIBRARIES)
if(MKL_FOUND)
get_mkl_version(${MKL_INCLUDE_DIRS}/mkl_version.h)
message(STATUS "Found MKL ${MKL_VERSION_STR} at: ${MKL_ROOT_DIR}")
set(HAVE_MKL ON CACHE BOOL "True if MKL found")
set(MKL_ROOT_DIR ${MKL_ROOT_DIR} CACHE PATH "Path to MKL directory")
set(MKL_INCLUDE_DIRS ${MKL_INCLUDE_DIRS} CACHE PATH "Path to MKL include directory")
if(NOT UNIX)
set(MKL_LIBRARIES ${MKL_LIBRARIES} CACHE FILEPATH "MKL libarries")
else()
#it's ugly but helps to avoid cyclic lib problem
set(MKL_LIBRARIES ${MKL_LIBRARIES} ${MKL_LIBRARIES} ${MKL_LIBRARIES} "-lpthread" "-lm" "-ldl")
set(MKL_LIBRARIES ${MKL_LIBRARIES} CACHE STRING "MKL libarries")
endif()
else()
endif()
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
......@@ -48,20 +48,50 @@ namespace cv
namespace dnn
{
inline BlobShape::BlobShape(int ndims, int fill) : sz( (size_t)std::max(ndims, 0) )
inline BlobShape::BlobShape()
{
sz.allocate(4);
for (size_t i = 0; i < sz.size(); i++)
sz[i] = 1;
}
inline BlobShape BlobShape::all(int ndims, int fill)
{
CV_Assert(ndims >= 0);
BlobShape res;
res.sz.allocate(ndims);
for (int i = 0; i < ndims; i++)
sz[i] = fill;
res.sz[i] = fill;
return res;
}
inline BlobShape::BlobShape(int ndims, const int *sizes) : sz( (size_t)std::max(ndims, 0) )
{
CV_Assert(ndims >= 0);
if (!sizes)
return;
for (int i = 0; i < ndims; i++)
sz[i] = sizes[i];
}
inline BlobShape::BlobShape(int s0) : sz(1)
{
sz[0] = s0;
}
inline BlobShape::BlobShape(int s0, int s1) : sz(2)
{
sz[0] = s0;
sz[1] = s1;
}
inline BlobShape::BlobShape(int s0, int s1, int s2) : sz(3)
{
sz[0] = s0;
sz[1] = s1;
sz[2] = s2;
}
inline BlobShape::BlobShape(int num, int cn, int rows, int cols) : sz(4)
{
sz[0] = num;
......@@ -120,7 +150,13 @@ inline int &BlobShape::operator[] (int axis)
return sz[(axis < 0) ? axis + dims() : axis];
}
inline ptrdiff_t BlobShape::total()
inline int BlobShape::canonicalAxis(int axis) const
{
CV_Assert(-dims() <= axis && axis < dims());
return (axis < 0) ? axis + dims() : axis;
}
inline ptrdiff_t BlobShape::total() const
{
if (dims() == 0)
return 0;
......@@ -131,11 +167,52 @@ inline ptrdiff_t BlobShape::total()
return res;
}
inline ptrdiff_t BlobShape::total(int startAxis, int endAxis) const
{
if (isEmpty())
return 0;
if (endAxis == INT_MAX)
endAxis = dims();
else if (endAxis < 0)
endAxis += dims();
startAxis = (startAxis < 0) ? startAxis + dims() : startAxis;
CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims());
ptrdiff_t res = 1;
for (int i = startAxis; i < endAxis; i++)
res *= sz[i];
return res;
}
inline BlobShape BlobShape::slice(int startAxis, int endAxis) const
{
if (isEmpty())
return BlobShape::empty();
if (endAxis == INT_MAX)
endAxis = dims();
else if (endAxis < 0)
endAxis += dims();
startAxis = (startAxis < 0) ? startAxis + dims() : startAxis;
CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims());
BlobShape res(endAxis - startAxis, (const int*)NULL);
for (int i = startAxis; i < endAxis; i++)
res[i - startAxis] = sz[i];
return res;
}
inline const int *BlobShape::ptr() const
{
return sz;
}
inline int *BlobShape::ptr()
{
return sz;
}
inline bool BlobShape::equal(const BlobShape &other) const
{
if (this->dims() != other.dims())
......@@ -155,19 +232,83 @@ inline bool BlobShape::operator==(const BlobShape &r) const
return this->equal(r);
}
inline BlobShape BlobShape::like(const Mat &m)
{
return BlobShape(m.dims, (const int*)m.size);
}
inline BlobShape BlobShape::like(const UMat &m)
{
return BlobShape(m.dims, (const int*)m.size);
}
inline BlobShape BlobShape::empty()
{
return BlobShape(0, (const int*)NULL);
}
inline bool BlobShape::isEmpty() const
{
return dims() == 0;
}
inline BlobShape BlobShape::operator+(const BlobShape &r) const
{
BlobShape newShape(this->dims() + r.dims(), (int*)NULL);
for (int i = 0; i < this->dims(); i++)
newShape[i] = (*this)[i];
for (int i = 0; i < r.dims(); i++)
newShape[this->dims() + i] = r[i];
return newShape;
}
CV_EXPORTS std::ostream &operator<< (std::ostream &stream, const BlobShape &shape);
/////////////////////////////////////////////////////////////////////
inline int Blob::canonicalAxis(int axis) const
#ifndef CV_DNN_UMAT
# define CV_DNN_SWITCH_MU(cpu_expr, gpu_expr) (cpu_expr)
#else
# define CV_DNN_SWITCH_MU(cpu_expr, gpu_expr) ((state == HEAD_AT_UMAT) ? (gpu_expr) : (cpu_expr))
#endif
inline int Blob::dims() const
{
CV_Assert(-dims() <= axis && axis < dims());
return (axis < 0) ? axis + dims() : axis;
return CV_DNN_SWITCH_MU(m.dims, um.dims);
}
inline int Blob::dims() const
inline const int * Blob::sizes() const
{
return m.dims;
return CV_DNN_SWITCH_MU((const int*)m.size, (const int*)um.size);
}
inline int Blob::type() const
{
return CV_DNN_SWITCH_MU(m.type(), um.type());
}
template<int n>
inline size_t Blob::offset(const Vec<int, n> &pos) const
{
const MatStep &step = CV_DNN_SWITCH_MU(m.step, um.step);
size_t ofs = 0;
int i;
for (i = 0; i < std::min(n, dims()); i++)
{
CV_DbgAssert(pos[i] >= 0 && pos[i] < size(i));
ofs += step[i] * pos[i];
}
for (; i < dims(); i++)
CV_DbgAssert(pos[i] == 0);
CV_DbgAssert(ofs % elemSize() == 0);
return ofs / elemSize();
}
inline int Blob::canonicalAxis(int axis) const
{
CV_Assert(-dims() <= axis && axis < dims());
return (axis < 0) ? axis + dims() : axis;
}
inline int Blob::xsize(int axis) const
......@@ -196,27 +337,11 @@ inline size_t Blob::total(int startAxis, int endAxis) const
CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims());
size_t size = 1; //fix: assume that slice isn't empty
size_t cnt = 1; //fix: assume that slice isn't empty
for (int i = startAxis; i < endAxis; i++)
size *= (size_t)sizes()[i];
cnt *= (size_t)sizes()[i];
return size;
}
template<int n>
inline size_t Blob::offset(const Vec<int, n> &pos) const
{
size_t ofs = 0;
int i;
for (i = 0; i < std::min(n, dims()); i++)
{
CV_DbgAssert(pos[i] >= 0 && pos[i] < size(i));
ofs = ofs * (size_t)size(i) + pos[i];
}
for (; i < dims(); i++)
ofs *= (size_t)size(i);
return ofs;
return cnt;
}
inline size_t Blob::offset(int n, int cn, int row, int col) const
......@@ -226,20 +351,20 @@ inline size_t Blob::offset(int n, int cn, int row, int col) const
inline float *Blob::ptrf(int n, int cn, int row, int col)
{
CV_Assert(type() == CV_32F);
return (float*)m.data + offset(n, cn, row, col);
return matRef(false).ptr<float>() + offset(n, cn, row, col);
}
inline uchar *Blob::ptr(int n, int cn, int row, int col)
{
return m.data + m.elemSize() * offset(n, cn, row, col);
Mat &mat = matRef(false);
return mat.ptr() + mat.elemSize() * offset(n, cn, row, col);
}
template<typename TFloat>
inline TFloat* Blob::ptr(int n, int cn, int row, int col)
template<typename Dtype>
inline Dtype* Blob::ptr(int n, int cn, int row, int col)
{
CV_Assert(type() == cv::DataDepth<TFloat>::value);
return (TFloat*) ptr(n, cn, row, col);
CV_Assert(type() == cv::DataDepth<Dtype>::value);
return (Dtype*) ptr(n, cn, row, col);
}
inline BlobShape Blob::shape() const
......@@ -260,26 +385,69 @@ inline bool Blob::equalShape(const Blob &other) const
return true;
}
inline Mat& Blob::matRef()
inline Mat& Blob::matRef(bool writeOnly)
{
#ifdef CV_DNN_UMAT
updateMat(!writeOnly);
state = HEAD_AT_MAT;
#else
(void)writeOnly;
#endif
return m;
}
inline const Mat& Blob::matRefConst() const
{
CV_DNN_UMAT_ONLY( updateMat() );
return m;
}
inline UMat &Blob::umatRef()
inline UMat &Blob::umatRef(bool writeOnly)
{
CV_Error(Error::StsNotImplemented, "");
#ifndef CV_DNN_UMAT
CV_Error(Error::GpuNotSupported, "");
(void)writeOnly;
return *(new UMat());
#else
updateUMat(!writeOnly);
state = HEAD_AT_UMAT;
return um;
#endif
}
inline const UMat &Blob::umatRefConst() const
{
CV_Error(Error::StsNotImplemented, "");
#ifndef CV_DNN_UMAT
CV_Error(Error::GpuNotSupported, "");
return *(new UMat());
#else
updateUMat();
return um;
#endif
}
template<>
inline Mat &Blob::getRef<Mat>(bool writeOnly)
{
return matRef(writeOnly);
}
template<>
inline UMat &Blob::getRef<UMat>(bool writeOnly)
{
return umatRef(writeOnly);
}
template<>
inline const Mat &Blob::getRefConst<Mat>() const
{
return matRefConst();
}
template<>
inline const UMat &Blob::getRefConst<UMat>() const
{
return umatRefConst();
}
inline Mat Blob::getPlane(int n, int cn)
......@@ -313,27 +481,44 @@ inline Size Blob::size2() const
return Size(cols(), rows());
}
inline int Blob::type() const
inline Blob &Blob::shareFrom(const Blob &blob)
{
return m.depth();
this->m = blob.m;
#ifdef CV_DNN_UMAT
this->um = blob.um;
this->state = blob.state;
#endif
return *this;
}
inline const int * Blob::sizes() const
inline Blob &Blob::reshape(const BlobShape &newShape)
{
return &m.size[0];
if (!m.empty()) m = m.reshape(1, newShape.dims(), newShape.ptr());
#ifdef CV_DNN_UMAT
if (!um.empty()) um = um.reshape(1, newShape.dims(), newShape.ptr());
#endif
return *this;
}
inline Blob Blob::reshaped(const BlobShape &newShape) const
{
Blob res(*this); //also, res.shareFrom(*this) could be used
res.reshape(newShape);
return res;
}
inline Blob &Blob::shareFrom(const Blob &blob)
inline int Blob::elemSize() const
{
this->m = blob.m;
return *this;
return CV_ELEM_SIZE(type());
}
inline Blob &Blob::reshape(const BlobShape &shape)
inline int Blob::getState() const
{
m = m.reshape(1, shape.dims(), shape.ptr());
return *this;
#ifdef CV_DNN_UMAT
return this->state;
#else
return m.empty() ? UNINITIALIZED : HEAD_AT_MAT;
#endif
}
}
......
......@@ -95,10 +95,10 @@ private:
AutoBuffer<int64, 1> *pi;
AutoBuffer<double, 1> *pd;
AutoBuffer<String, 1> *ps;
void *p;
void *pv;
};
DictValue(int _type, void *_p) : type(_type), p(_p) {}
DictValue(int _type, void *_p) : type(_type), pv(_p) {}
void release();
};
......
......@@ -59,15 +59,17 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* This function automatically called on most of OpenCV builds,
* but you need to call it manually on some specific configurations (iOS for example).
*/
CV_EXPORTS void initModule();
CV_EXPORTS_W void initModule();
/** @brief This class provides all data needed to initialize layer.
*
* It includes dictionary with scalar params (which can be readed by using Dict interface),
* blob params #blobs and optional meta information: #name and #type of layer instance.
*/
struct CV_EXPORTS LayerParams : public Dict
class CV_EXPORTS LayerParams : public Dict
{
public:
//TODO: Add ability to name blob params
std::vector<Blob> blobs; //!< List of learned parameters stored as blobs.
String name; //!< Name of the layer instance (optional, can be used internal purposes).
......@@ -77,10 +79,12 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
/** @brief This interface class allows to build new Layers - are building blocks of networks.
*
* Each class, derived from Layer, must implement allocate() methods to declare own outputs and forward() to compute outputs.
* Also before using the new layer into networks you must register your layer by using one of @ref LayerFactoryModule "LayerFactory" macros.
* Also before using the new layer into networks you must register your layer by using one of @ref dnnLayerFactory "LayerFactory" macros.
*/
struct CV_EXPORTS Layer
class CV_EXPORTS_W Layer
{
public:
//! List of learned parameters must be stored here to allow read them by using Net::getParam().
std::vector<Blob> blobs;
......@@ -116,7 +120,8 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
String type; //!< Type name which was used for creating layer by layer factory.
Layer();
explicit Layer(const LayerParams &params); //!< Initialize only #name, #type and #blobs fields.
explicit Layer(const LayerParams &params); //!< Initializes only #name, #type and #blobs fields.
void setParamsFrom(const LayerParams &params); //!< Initializes only #name, #type and #blobs fields.
virtual ~Layer();
};
......@@ -130,7 +135,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
*
* This class supports reference counting of its instances, i. e. copies point to the same instance.
*/
class CV_EXPORTS Net
class CV_EXPORTS_W Net
{
public:
......@@ -174,6 +179,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* @see setNetInputs(), Layer::inputNameToIndex(), Layer::outputNameToIndex()
*/
void connect(String outPin, String inpPin);
/** @brief Connects #@p outNum output of the first layer to #@p inNum input of the second layer.
* @param outLayerId identifier of the first layer
* @param inpLayerId identifier of the second layer
......@@ -181,6 +187,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* @param inpNum number of the second layer input
*/
void connect(int outLayerId, int outNum, int inpLayerId, int inpNum);
/** @brief Sets ouputs names of the network input pseudo layer.
*
* Each net always has special own the network input pseudo layer with id=0.
......@@ -267,10 +274,10 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* @param isBinary specifies whether the network was serialized in ascii mode or binary.
* @returns Pointer to the created importer, NULL in failure cases.
*
* @warning Torch7 importer is experimental now, you need explicitly set CMake opencv_dnn_BUILD_TORCH_IMPORTER flag to compile its.
* @warning Torch7 importer is experimental now, you need explicitly set CMake `opencv_dnn_BUILD_TORCH_IMPORTER` flag to compile its.
*
* @note Ascii mode of Torch serializer is more preferable, because binary mode extensively use long type of C language,
* which has different bit-length on different systems.
* @note Ascii mode of Torch serializer is more preferable, because binary mode extensively use `long` type of C language,
* which has various bit-length on different systems.
*
* The loading file must contain serialized <a href="https://github.com/torch/nn/blob/master/doc/module.md">nn.Module</a> object
* with importing network. Try to eliminate a custom objects from serialazing data to avoid importing errors.
......
......@@ -86,7 +86,7 @@ inline DictValue DictValue::get<DictValue>(int idx) const
template<>
inline int64 DictValue::get<int64>(int idx) const
{
CV_Assert(idx == -1 && size() == 1 || idx >= 0 && idx < size());
CV_Assert((idx == -1 && size() == 1) || (idx >= 0 && idx < size()));
idx = (idx == -1) ? 0 : idx;
if (type == Param::INT)
......@@ -131,7 +131,7 @@ inline bool DictValue::get<bool>(int idx) const
template<>
inline double DictValue::get<double>(int idx) const
{
CV_Assert(idx == -1 && size() == 1 || idx >= 0 && idx < size());
CV_Assert((idx == -1 && size() == 1) || (idx >= 0 && idx < size()));
idx = (idx == -1) ? 0 : idx;
if (type == Param::REAL)
......@@ -159,7 +159,7 @@ template<>
inline String DictValue::get<String>(int idx) const
{
CV_Assert(isString());
CV_Assert(idx == -1 && ps->size() == 1 || idx >= 0 && idx < (int)ps->size());
CV_Assert((idx == -1 && ps->size() == 1) || (idx >= 0 && idx < (int)ps->size()));
return (*ps)[(idx == -1) ? 0 : idx];
}
......
......@@ -50,7 +50,7 @@ namespace dnn
//! @addtogroup dnn
//! @{
//!
//! @defgroup LayerFactoryModule Utilities for new layers registration
//! @defgroup dnnLayerFactory Utilities for New Layers Registration
//! @{
/** @brief %Layer factory allows to create instances of registered layers. */
......@@ -86,7 +86,7 @@ private:
* @details This macros must be placed inside the function code.
*/
#define REG_RUNTIME_LAYER_FUNC(type, constuctorFunc) \
LayerFactory::registerLayer(#type, constuctorFunc);
cv::dnn::LayerFactory::registerLayer(#type, constuctorFunc);
/** @brief Registers layer class in runtime.
* @param type string, containing type name of the layer.
......@@ -94,7 +94,7 @@ private:
* @details This macros must be placed inside the function code.
*/
#define REG_RUNTIME_LAYER_CLASS(type, class) \
LayerFactory::registerLayer(#type, _layerDynamicRegisterer<class>);
cv::dnn::LayerFactory::registerLayer(#type, _layerDynamicRegisterer<class>);
/** @brief Registers layer constructor on module load time.
* @param type string, containing type name of the layer.
......@@ -102,7 +102,7 @@ private:
* @details This macros must be placed outside the function code.
*/
#define REG_STATIC_LAYER_FUNC(type, constuctorFunc) \
static _LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constuctorFunc);
static cv::dnn::_LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constuctorFunc);
/** @brief Registers layer class on module load time.
* @param type string, containing type name of the layer.
......@@ -126,14 +126,15 @@ Ptr<Layer> _layerDynamicRegisterer(LayerParams &params)
}
//allows automatically register created layer on module load time
struct _LayerStaticRegisterer
class _LayerStaticRegisterer
{
String type;
public:
_LayerStaticRegisterer(const String &type, LayerFactory::Constuctor constuctor)
_LayerStaticRegisterer(const String &layerType, LayerFactory::Constuctor layerConstuctor)
{
this->type = type;
LayerFactory::registerLayer(type, constuctor);
this->type = layerType;
LayerFactory::registerLayer(layerType, layerConstuctor);
}
~_LayerStaticRegisterer()
......
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_DNN_SHAPE_UTILS_HPP__
#define __OPENCV_DNN_DNN_SHAPE_UTILS_HPP__
#include <opencv2/core.hpp>
#include <ostream>
namespace cv {
namespace dnn {
//Useful shortcut
typedef BlobShape Shape;
inline std::ostream &operator<< (std::ostream &s, cv::Range &r)
{
return s << "[" << r.start << ", " << r.end << ")";
}
//Reshaping
//TODO: add -1 specifier for automatic size inferring
template<typename Mat>
void reshape(Mat &m, const BlobShape &shape)
{
m = m.reshape(1, shape.dims(), shape.ptr());
}
template<typename Mat>
Mat reshaped(const Mat &m, const BlobShape &shape)
{
return m.reshape(1, shape.dims(), shape.ptr());
}
//Slicing
struct _Range : public cv::Range
{
_Range(const Range &r) : cv::Range(r) {}
_Range(int start, int size = 1) : cv::Range(start, start + size) {}
};
template<typename Mat>
Mat slice(const Mat &m, const _Range &r0)
{
//CV_Assert(m.dims >= 1);
cv::AutoBuffer<cv::Range, 4> ranges(m.dims);
for (int i = 1; i < m.dims; i++)
ranges[i] = Range::all();
ranges[0] = r0;
return m(&ranges[0]);
}
template<typename Mat>
Mat slice(const Mat &m, const _Range &r0, const _Range &r1)
{
CV_Assert(m.dims >= 2);
cv::AutoBuffer<cv::Range, 4> ranges(m.dims);
for (int i = 2; i < m.dims; i++)
ranges[i] = Range::all();
ranges[0] = r0;
ranges[1] = r1;
return m(&ranges[0]);
}
template<typename Mat>
Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2)
{
CV_Assert(m.dims <= 3);
cv::AutoBuffer<cv::Range, 4> ranges(m.dims);
for (int i = 3; i < m.dims; i++)
ranges[i] = Range::all();
ranges[0] = r0;
ranges[1] = r1;
ranges[2] = r2;
return m(&ranges[0]);
}
template<typename Mat>
Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2, const _Range &r3)
{
CV_Assert(m.dims <= 4);
cv::AutoBuffer<cv::Range, 4> ranges(m.dims);
for (int i = 4; i < m.dims; i++)
ranges[i] = Range::all();
ranges[0] = r0;
ranges[1] = r1;
ranges[2] = r2;
ranges[3] = r3;
return m(&ranges[0]);
}
BlobShape computeShapeByReshapeMask(const BlobShape &srcShape, const BlobShape &maskShape, Range srcRange = Range::all());
}
}
#endif
#include "perf_precomp.hpp"
namespace cvtest
{
using std::tr1::tuple;
using std::tr1::get;
using std::tr1::make_tuple;
using std::make_pair;
using namespace perf;
using namespace testing;
using namespace cv;
using namespace cv::dnn;
enum {STRIDE_OFF = 1, STRIDE_ON = 2};
CV_ENUM(StrideSize, STRIDE_OFF, STRIDE_ON);
enum {GROUP_OFF = 1, GROUP_2 = 2};
CV_ENUM(GroupSize, GROUP_OFF, GROUP_2);
//Squared Size
#define SSZ(n) cv::Size(n, n)
typedef std::pair<BlobShape, int> InpShapeNumOut;
typedef tuple<Size, InpShapeNumOut, GroupSize, StrideSize> ConvParam; //kernel_size, inp shape, groups, stride
typedef TestBaseWithParam<ConvParam> ConvolutionPerfTest;
PERF_TEST_P( ConvolutionPerfTest, perf, Combine(
Values(Size(1, 1), Size(3, 3), Size(5, 5), Size(11, 11)),
Values(make_pair(BlobShape(1, 4, 224, 224), 64),
make_pair(BlobShape(1, 64, 112, 122), 128),
make_pair(BlobShape(1, 256, 28, 28), 512)),
GroupSize::all(),
StrideSize::all())
)
{
RNG rng(0);
ConvParam params = GetParam();
int ksz = get<0>(params).width;
BlobShape inpShape = get<1>(params).first;
int outCn = get<1>(params).second;
int groups = get<2>(params);
int stride = (ksz >= 11) ? 4 : (int)get<3>(params);
int inpCn = inpShape[1];
Blob wgtBlob(BlobShape(outCn, inpCn/groups, ksz, ksz)), biasBlob(BlobShape(outCn, 1, 1, 1));
Blob inpBlob(inpShape);
rng.fill(biasBlob.matRef(), RNG::UNIFORM, -1, +1);
rng.fill(wgtBlob.matRef(), RNG::UNIFORM, -1, +1);
rng.fill(inpBlob.matRef(), RNG::UNIFORM, -1, +1);
LayerParams lp;
lp.set("num_output", outCn);
lp.set("group", groups);
lp.set("stride", stride);
lp.set("kernel_size", ksz);
lp.blobs.reserve(2);
lp.blobs.push_back(wgtBlob);
lp.blobs.push_back(biasBlob);
std::vector<Blob*> inpBlobs(1, &inpBlob);
std::vector<Blob> outBlobs;
cv::setNumThreads(cv::getNumberOfCPUs());
Ptr<Layer> layer = cv::dnn::LayerFactory::createLayerInstance("Convolution", lp);
layer->allocate(inpBlobs, outBlobs);
declare.in(inpBlob.matRef(), wgtBlob.matRef(), WARMUP_RNG).out(outBlobs[0].matRef()).tbb_threads(cv::getNumThreads());
TEST_CYCLE_N(10)
{
layer->forward(inpBlobs, outBlobs);
}
SANITY_CHECK_NOTHING();
}
}
\ No newline at end of file
#include "perf_precomp.hpp"
CV_PERF_TEST_MAIN(dnn)
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-declarations"
# if defined __clang__ || defined __APPLE__
# pragma GCC diagnostic ignored "-Wmissing-prototypes"
# pragma GCC diagnostic ignored "-Wextra"
# endif
#endif
#ifndef __OPENCV_PERF_PRECOMP_HPP__
#define __OPENCV_PERF_PRECOMP_HPP__
#include <opencv2/ts.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/dnn.hpp>
#endif
......@@ -125,7 +125,7 @@ int main(int argc, char **argv)
}
resize(img, img, Size(224, 224)); //GoogLeNet accepts only 224x224 RGB-images
dnn::Blob inputBlob = dnn::Blob(img); //Convert Mat to dnn::Blob image batch
dnn::Blob inputBlob = dnn::Blob::fromImages(img); //Convert Mat to dnn::Blob batch of images
//! [Prepare blob]
//! [Set input blob]
......
This diff is collapsed.
......@@ -191,7 +191,7 @@ namespace
else if (pbBlob.has_shape())
{
const caffe::BlobShape &_shape = pbBlob.shape();
BlobShape shape(_shape.dim_size());
BlobShape shape = BlobShape::all(_shape.dim_size());
for (int i = 0; i < _shape.dim_size(); i++)
shape[i] = (int)_shape.dim(i);
......@@ -201,7 +201,7 @@ namespace
else
{
CV_Error(Error::StsError, "Unknown shape of input blob");
return BlobShape(-1);
return BlobShape();
}
}
......
#include "../precomp.hpp"
#include "layer_loaders.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <climits>
namespace cv
{
namespace dnn
{
//Utils
//Extracts params used into Conv, Deconv and Pooling layers
static void getCaffeConvParams(LayerParams &params, Size &kernel, Size &pad, Size &stride)
{
if (params.has("kernel_h") && params.has("kernel_w"))
{
kernel.height = params.get<int>("kernel_h");
kernel.width = params.get<int>("kernel_w");
}
else if (params.has("kernel_size"))
{
kernel.height = kernel.width = params.get<int>("kernel_size");
}
else
{
CV_Error(Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
}
CV_Assert(kernel.height > 0 && kernel.width > 0);
if (params.has("pad_h") && params.has("pad_w"))
{
pad.height = params.get<int>("pad_h");
pad.width = params.get<int>("pad_w");
}
else
{
pad.height = pad.width = params.get<int>("pad", 0);
}
CV_Assert(pad.height >= 0 && pad.width >= 0);
if (params.has("stride_h") && params.has("stride_w"))
{
stride.height = params.get<int>("stride_h");
stride.width = params.get<int>("stride_w");
}
else
{
stride.height = stride.width = params.get<int>("stride", 1);
}
CV_Assert(stride.height > 0 && stride.width > 0);
}
//Layers
//Convolution and Deconvolution
static void initConvDeconvLayerFromCaffe(Ptr<BaseConvolutionLayer> l, LayerParams &params)
{
l->setParamsFrom(params);
getCaffeConvParams(params, l->kernel, l->pad, l->stride);
bool bias = params.get<bool>("bias_term", true);
int numOutput = params.get<int>("num_output");
int group = params.get<int>("group", 1);
CV_Assert(numOutput % group == 0);
CV_Assert((bias && l->blobs.size() == 2) || (!bias && l->blobs.size() == 1));
}
template<>
Ptr<Layer> createLayerFromCaffe<ConvolutionLayer>(LayerParams &params)
{
Ptr<BaseConvolutionLayer> l = ConvolutionLayer::create();
initConvDeconvLayerFromCaffe(l, params);
return Ptr<Layer>(l);
}
template<>
Ptr<Layer> createLayerFromCaffe<DeconvolutionLayer>(LayerParams &params)
{
Ptr<BaseConvolutionLayer> l = DeconvolutionLayer::create();
initConvDeconvLayerFromCaffe(l, params);
return Ptr<Layer>(l);
}
template<>
Ptr<Layer> createLayerFromCaffe<PoolingLayer>(LayerParams &params)
{
int type;
Size kernel, stride, pad;
if (params.has("pool"))
{
String pool = params.get<String>("pool").toLowerCase();
if (pool == "max")
type = PoolingLayer::MAX;
else if (pool == "ave")
type = PoolingLayer::AVE;
else if (pool == "stochastic")
type = PoolingLayer::STOCHASTIC;
else
CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
}
else
{
type = PoolingLayer::MAX;
}
getCaffeConvParams(params, kernel, pad, stride);
return Ptr<Layer>(PoolingLayer::create(type, kernel, stride, pad));
}
template<>
Ptr<Layer> createLayerFromCaffe<SoftmaxLayer>(LayerParams &params)
{
int axis = params.get<int>("axis", 1);
return Ptr<Layer>(SoftmaxLayer::create(axis));
}
template<> //InnerProduct specialization
Ptr<Layer> createLayerFromCaffe<InnerProductLayer>(LayerParams &params)
{
const std::vector<Blob> &blobs = params.blobs;
CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
int numOutputs = params.get<int>("num_output");
int innerSize = (int)blobs[0].total() / numOutputs;
bool bias = params.get<bool>("bias_term", true);
int axis = params.get<int>("axis", 1);
CV_Assert(blobs[0].dims() >= 2 && (size_t)(innerSize * numOutputs) == blobs[0].total());
CV_Assert(!bias || (blobs.size() == 2 && (size_t)numOutputs == blobs[1].total()));
Ptr<InnerProductLayer> l = InnerProductLayer::create(axis);
l->setParamsFrom(params);
l->blobs[0].reshape(Shape(numOutputs, innerSize));
if (bias)
l->blobs[1].reshape(Shape(1, numOutputs));
return Ptr<Layer>(l);
}
template<> //LRNLayer specialization
Ptr<Layer> createLayerFromCaffe<LRNLayer>(LayerParams& params)
{
int type;
String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
if (nrmType == "ACROSS_CHANNELS")
type = LRNLayer::CHANNEL_NRM;
else if (nrmType == "WITHIN_CHANNEL")
type = LRNLayer::SPATIAL_NRM;
else
CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");
int size = params.get<int>("local_size", 5);
if (size % 2 != 1 || size <= 0)
CV_Error(Error::StsBadArg, "LRN layer supports only positive odd values for local_size");
double alpha = params.get<double>("alpha", 1);
double beta = params.get<double>("beta", 0.75);
return Ptr<Layer>(LRNLayer::create(type, size, alpha, beta));
}
template<>
Ptr<Layer> createLayerFromCaffe<MVNLayer>(LayerParams &params)
{
return Ptr<Layer>(MVNLayer::create(
params.get<bool>("normalize_variance", true),
params.get<bool>("across_channels", false),
params.get<double>("eps", 1e-9)
));
}
/* Reshape layers */
template<>
Ptr<Layer> createLayerFromCaffe<ReshapeLayer>(LayerParams &params)
{
int axis = params.get<int>("axis", 0);
int numAxes = params.get<int>("num_axes", -1);
CV_Assert(numAxes >= -1);
Range applyingRange = (numAxes == -1) ? Range(axis, INT_MAX) : Range(axis, axis + numAxes);
Shape newShape;
if (params.has("dim"))
{
const DictValue &paramShape = params.get("dim");
newShape = Shape::all(paramShape.size());
for (int i = 0; i < paramShape.size(); i++)
newShape[i] = paramShape.get<int>(i);
}
else
newShape = Shape::all(0);
return Ptr<Layer>(ReshapeLayer::create(newShape, applyingRange));
}
Ptr<Layer> createFlattenLayerFromCaffe(LayerParams&)
{
return Ptr<Layer>(ReshapeLayer::create(Shape(0, -1)));
}
template<>
Ptr<Layer> createLayerFromCaffe<ConcatLayer>(LayerParams& params)
{
return Ptr<Layer>(ConcatLayer::create(params.get<int>("axis", 1)));
}
template<>
Ptr<Layer> createLayerFromCaffe<SplitLayer>(LayerParams &params)
{
int outputsCount;
//TODO: maybe "top_count" param is useless because it can be determined by output connections number
if (params.has("top_count"))
{
outputsCount = params.get<int>("top_count");
CV_Assert(outputsCount >= 0);
}
else
{
outputsCount = -1;
}
return Ptr<Layer>(SplitLayer::create(outputsCount));
}
template<>
Ptr<Layer> createLayerFromCaffe<SliceLayer>(LayerParams& params)
{
int axis = params.get<int>("axis", 1);
if (!params.has("slice_point"))
{
return Ptr<Layer>(SliceLayer::create(axis));
}
else
{
const DictValue &indicesValue = params.get("slice_point");
std::vector<int> sliceIndices(indicesValue.size());
for (int i = 0; i < indicesValue.size(); i++)
sliceIndices[i] = indicesValue.get<int>(i);
return Ptr<Layer>(SliceLayer::create(axis, sliceIndices));
}
}
/* Activation layers */
template <typename ActivationLayer> //Intended for parameters-free activations
Ptr<Layer> createLayerFromCaffe(LayerParams&)
{
return Ptr<Layer>(ActivationLayer::create());
}
template<> //ReLU specialization
Ptr<Layer> createLayerFromCaffe<ReLULayer>(LayerParams& params)
{
float negative_slope = params.get<float>("negative_slope", 0.f);
return Ptr<Layer>(ReLULayer::create(negative_slope));
}
template<> //Power specialization
Ptr<Layer> createLayerFromCaffe<PowerLayer>(LayerParams& params)
{
float power = params.get<float>("power", 1.0f);
float scale = params.get<float>("scale", 1.0f);
float shift = params.get<float>("shift", 0.0f);
return Ptr<Layer>(PowerLayer::create(power, scale, shift));
}
//Explicit instantiation
template Ptr<Layer> createLayerFromCaffe<ConvolutionLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<DeconvolutionLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<SoftmaxLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<InnerProductLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<LRNLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<MVNLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<ConcatLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<SliceLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<SplitLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<ReLULayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<SigmoidLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<TanHLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<AbsLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<BNLLLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<PowerLayer>(LayerParams&);
}
}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_CAFFE_LAYER_LOADERS_HPP__
#define __OPENCV_DNN_CAFFE_LAYER_LOADERS_HPP__
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
//Common template for Caffe layer loaders
template <typename PublicLayer>
Ptr<Layer> createLayerFromCaffe(LayerParams&);
Ptr<Layer> createFlattenLayerFromCaffe(LayerParams&);
}
}
#endif
\ No newline at end of file
......@@ -543,6 +543,13 @@ Layer::Layer(const LayerParams &params)
}
void Layer::setParamsFrom(const LayerParams &params)
{
blobs = params.blobs;
name = params.name;
type = params.type;
}
int Layer::inputNameToIndex(String)
{
return -1;
......
......@@ -40,19 +40,8 @@
//M*/
#include "precomp.hpp"
#include "layers/concat_layer.hpp"
#include "layers/convolution_layer.hpp"
#include "caffe/layer_loaders.hpp"
#include "layers/blank_layer.hpp"
#include "layers/elementwise_layers.hpp"
#include "layers/fully_connected_layer.hpp"
#include "layers/lrn_layer.hpp"
#include "layers/mvn_layer.hpp"
#include "layers/pooling_layer.hpp"
#include "layers/reshape_layer.hpp"
#include "layers/slice_layer.hpp"
#include "layers/softmax_layer.hpp"
#include "layers/split_layer.hpp"
namespace cv
{
......@@ -76,27 +65,27 @@ void initModule()
if (init.status)
return;
REG_RUNTIME_LAYER_CLASS(Slice, SliceLayer)
REG_RUNTIME_LAYER_CLASS(Softmax, SoftMaxLayer)
REG_RUNTIME_LAYER_CLASS(Split, SplitLayer)
REG_RUNTIME_LAYER_CLASS(Reshape, ReshapeLayer)
REG_STATIC_LAYER_FUNC(Flatten, createFlattenLayer)
REG_RUNTIME_LAYER_CLASS(Pooling, PoolingLayer)
REG_RUNTIME_LAYER_CLASS(MVN, MVNLayer)
REG_RUNTIME_LAYER_CLASS(LRN, LRNLayer)
REG_RUNTIME_LAYER_CLASS(InnerProduct, FullyConnectedLayer)
REG_RUNTIME_LAYER_FUNC(Slice, createLayerFromCaffe<SliceLayer>);
REG_RUNTIME_LAYER_FUNC(Split, createLayerFromCaffe<SplitLayer>);
REG_RUNTIME_LAYER_FUNC(Concat, createLayerFromCaffe<ConcatLayer>);
REG_RUNTIME_LAYER_FUNC(Reshape, createLayerFromCaffe<ReshapeLayer>);
REG_RUNTIME_LAYER_FUNC(Flatten, createFlattenLayerFromCaffe);
REG_RUNTIME_LAYER_CLASS(ReLU, ElementWiseLayer<ReLUFunctor>)
REG_RUNTIME_LAYER_CLASS(TanH, ElementWiseLayer<TanHFunctor>)
REG_RUNTIME_LAYER_CLASS(BNLL, ElementWiseLayer<BNLLFunctor>)
REG_RUNTIME_LAYER_CLASS(Power, ElementWiseLayer<PowerFunctor>)
REG_RUNTIME_LAYER_CLASS(AbsVal, ElementWiseLayer<AbsValFunctor>)
REG_RUNTIME_LAYER_CLASS(Sigmoid, ElementWiseLayer<SigmoidFunctor>)
REG_RUNTIME_LAYER_CLASS(Dropout, BlankLayer)
REG_RUNTIME_LAYER_FUNC(Convolution, createLayerFromCaffe<ConvolutionLayer>);
REG_RUNTIME_LAYER_FUNC(Deconvolution, createLayerFromCaffe<DeconvolutionLayer>);
REG_RUNTIME_LAYER_FUNC(Pooling, createLayerFromCaffe<PoolingLayer>);
REG_RUNTIME_LAYER_FUNC(LRN, createLayerFromCaffe<LRNLayer>);
REG_RUNTIME_LAYER_FUNC(InnerProduct, createLayerFromCaffe<InnerProductLayer>);
REG_RUNTIME_LAYER_FUNC(Softmax, createLayerFromCaffe<SoftmaxLayer>);
REG_RUNTIME_LAYER_FUNC(MVN, createLayerFromCaffe<MVNLayer>);
REG_RUNTIME_LAYER_CLASS(Convolution, ConvolutionLayer)
REG_RUNTIME_LAYER_CLASS(Deconvolution, DeConvolutionLayer)
REG_RUNTIME_LAYER_CLASS(Concat, ConcatLayer)
REG_RUNTIME_LAYER_FUNC(ReLU, createLayerFromCaffe<ReLULayer>);
REG_RUNTIME_LAYER_FUNC(Sigmoid, createLayerFromCaffe<SigmoidLayer>);
REG_RUNTIME_LAYER_FUNC(TanH, createLayerFromCaffe<TanHLayer>);
REG_RUNTIME_LAYER_FUNC(BNLL, createLayerFromCaffe<BNLLLayer>);
REG_RUNTIME_LAYER_FUNC(AbsVal, createLayerFromCaffe<AbsLayer>);
REG_RUNTIME_LAYER_FUNC(Power, createLayerFromCaffe<PowerLayer>);
REG_RUNTIME_LAYER_CLASS(Dropout, BlankLayer)
init.status = true;
}
......
......@@ -42,60 +42,80 @@
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "concat_layer.hpp"
#include <opencv2/core/ocl.hpp>
namespace cv
{
namespace dnn
{
ConcatLayer::ConcatLayer(LayerParams &params) : Layer(params)
{
axis = params.get<int>("axis", 1);
CV_Assert(axis >= 0);
}
void ConcatLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
ConcatLayerImpl::ConcatLayerImpl(int axis_ /*= 1*/)
{
axis = axis_;
}
void ConcatLayerImpl::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() > 0);
int refType = inputs[0]->type();
BlobShape refShape = inputs[0]->shape();
CV_Assert(axis < refShape.dims());
axisIdx = inputs[0]->canonicalAxis(axis);
int axisSum = 0;
useOpenCL = false;
for (size_t i = 0; i < inputs.size(); i++)
{
BlobShape curShape = inputs[i]->shape();
CV_Assert(curShape.dims() == refShape.dims() && inputs[i]->type() == refType);
for (int axisId = 0; axisId < refShape.dims(); axisId++)
CV_Assert(curShape.dims() == refShape.dims() && inputs[i]->type() == inputs[0]->type());
for (int curAxis = 0; curAxis < refShape.dims(); curAxis++)
{
if (axisId != axis && refShape[axisId] != curShape[axisId])
if (curAxis != axisIdx && refShape[curAxis] != curShape[curAxis])
CV_Error(Error::StsBadSize, "Inconsitent shape for ConcatLayer");
}
axisSum += curShape[axis];
axisSum += curShape[axisIdx];
useOpenCL |= inputs[i]->getState() == Blob::HEAD_AT_MAT;
}
refShape[axis] = axisSum;
refShape[axisIdx] = axisSum;
useOpenCL &= ocl::useOpenCL();
int allocFlags = (useOpenCL) ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
outputs.resize(1);
outputs[0].create(refShape);
}
outputs[0].create(refShape, inputs[0]->type(), allocFlags);
}
void ConcatLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
const Mat& outMat = outputs[0].matRef();
void ConcatLayerImpl::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
#ifdef HAVE_OPENCL
if (useOpenCL)
forward_<UMat>(inputs, outputs);
else
#endif
forward_<Mat>(inputs, outputs);
}
template<typename XMat>
void ConcatLayerImpl::forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
XMat& outMat = outputs[0].getRef<XMat>();
std::vector<Range> ranges(outputs[0].dims(), Range::all());
int sizeStart = 0;
ranges[axisIdx].start = 0;
for (size_t i = 0; i < inputs.size(); i++)
{
int sizeEnd = sizeStart + inputs[i]->size(axis);
ranges[axis] = Range(sizeStart, sizeEnd);
ranges[axisIdx].end = ranges[axisIdx].start + inputs[i]->size(axisIdx);
inputs[i]->getRefConst<XMat>().copyTo(outMat(&ranges[0]));
ranges[axisIdx].start = ranges[axisIdx].end;
}
}
Mat outSubMat = outMat(&ranges[0]);
inputs[i]->matRef().copyTo(outSubMat);
Ptr<ConcatLayer> ConcatLayer::create(int axis)
{
return Ptr<ConcatLayer>(new ConcatLayerImpl(axis));
}
sizeStart = sizeEnd;
}
}
}
}
......@@ -42,20 +42,29 @@
#ifndef __OPENCV_DNN_LAYERS_CONCAT_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_CONCAT_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class ConcatLayer : public Layer
{
int axis;
public:
ConcatLayer(LayerParams& params);
class ConcatLayerImpl : public ConcatLayer
{
bool useOpenCL;
int axisIdx;
template<typename XMat>
void forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
public:
ConcatLayerImpl(int axis_ = 1);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
};
}
}
#endif
......@@ -42,51 +42,65 @@
#ifndef __OPENCV_DNN_LAYERS_CONVOLUTION_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_CONVOLUTION_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
//TODO: simultaneously convolution and bias addition for cache optimization
class ConvolutionLayer : public Layer
{
protected:
bool bias;
int numOutput, group;
int padH, padW;
int kerH, kerW;
int strideH, strideW;
//TODO: simultaneously convolution and bias addition for cache optimization
class ConvolutionLayerImpl : public ConvolutionLayer
{
public:
ConvolutionLayerImpl();
virtual void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
virtual void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
virtual void init();
protected:
int numOutput, group;
int inpH, inpW, inpCn;
int outH, outW, outCn;
int topH, topW, topCn; //switched between inp/out on deconv/conv
int inpGroupCn, outGroupCn;
int ksize;
bool useOpenCL;
Mat colMat, biasOnesMat;
bool bias;
bool tryUseOpenCL, useOpenCL;
Blob colBlob, biasOnesBlob;
bool is1x1() const;
virtual void computeInpOutShape(const Blob &inpBlob);
template<typename XMat>
void forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void im2col(const Mat &srcImg, Mat &dstCol);
void im2col(const UMat &srcImg, UMat &dstCol);
};
class DeConvolutionLayerImpl : public ConvolutionLayerImpl
{
public:
DeConvolutionLayerImpl();
virtual void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
protected:
inline bool is1x1() const;
virtual void computeInpOutShape(const Blob &inpBlob);
void im2col(Blob &inpBlob, int imNum, int cnGroup);
public:
ConvolutionLayer() {}
ConvolutionLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
template<typename XMat>
void forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void col2im(const Mat &colMat, Mat &dstImg);
void col2im(const UMat &colMat, UMat &dstImg);
};
class DeConvolutionLayer : public ConvolutionLayer
{
protected:
void computeInpOutShape(const Blob &inpBlob);
void col2im(Mat &dstMat);
//Importers
Ptr<Layer> createConvolutionLayerFromCaffe(LayerParams &params);
Ptr<Layer> createDeconvolutionLayerFromCaffe(LayerParams &params);
public:
DeConvolutionLayer(LayerParams &params);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
}
}
#endif
#include "../precomp.hpp"
#include "elementwise_layers.hpp"
namespace cv
{
namespace dnn
{
#define ACTIVATION_CREATOR_FOR(_Layer, _Functor, ...) \
Ptr<_Layer> _Layer::create() { \
return return Ptr<_Layer>( new ElementWiseLayer<_Functor>(_Functor()) ); }
Ptr<ReLULayer> ReLULayer::create(double negativeSlope)
{
return Ptr<ReLULayer>(new ElementWiseLayer<ReLUFunctor>(ReLUFunctor(negativeSlope)));
}
Ptr<TanHLayer> TanHLayer::create()
{
return Ptr<TanHLayer>(new ElementWiseLayer<TanHFunctor>());
}
Ptr<SigmoidLayer> SigmoidLayer::create()
{
return Ptr<SigmoidLayer>(new ElementWiseLayer<SigmoidFunctor>());
}
Ptr<AbsLayer> AbsLayer::create()
{
return Ptr<AbsLayer>(new ElementWiseLayer<AbsValFunctor>());
}
Ptr<BNLLLayer> BNLLLayer::create()
{
return Ptr<BNLLLayer>(new ElementWiseLayer<BNLLFunctor>());
}
Ptr<PowerLayer> PowerLayer::create(double power /*= 1*/, double scale /*= 1*/, double shift /*= 0*/)
{
const PowerFunctor f(power, scale, shift);
return Ptr<PowerLayer>(new ElementWiseLayer<PowerFunctor>(f));
}
}
}
\ No newline at end of file
......@@ -44,6 +44,11 @@
#include "../precomp.hpp"
#include "layers_common.hpp"
#include <cmath>
#include <opencv2/dnn/all_layers.hpp>
#include <opencv2/core/ocl.hpp>
#ifdef HAVE_OPENCL
#include "modules/dnn/opencl_kernels_dnn.hpp"
#endif
namespace cv
{
......@@ -55,40 +60,101 @@ using std::exp;
using std::tanh;
using std::pow;
template<typename Func>
class ElementWiseLayer : public Layer
{
template<typename Func>
class ElementWiseLayer : public Func::Layer
{
bool useOpenCL;
Func func;
template<typename Dtype>
class PBody : public cv::ParallelLoopBody
{
Func &func;
Dtype *data;
public:
ElementWiseLayer(LayerParams &_params) : func(_params) {}
PBody(Mat &mat, Func &func_) :
func(func_), data(mat.ptr<Dtype>())
{}
void operator()(const Range &r) const
{
for (int i = r.start; i < r.end; i++)
data[i] = func(data[i]);
}
};
public:
ElementWiseLayer() {}
ElementWiseLayer(const Func &f) : func(f) {}
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
useOpenCL = ocl::useOpenCL();
outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
{
outputs[i].shareFrom(*inputs[i]); //no data copy
//hotfix: shareFrom doesn't provide properly Mat/UMat switching
if (useOpenCL)
outputs[i].umatRef() = inputs[i]->umatRefConst();
else
outputs[i].matRef() = inputs[i]->matRefConst();
}
}
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
#ifdef HAVE_OPENCL
if (useOpenCL)
forwardOCL(inputs, outputs);
else
#endif
forwardCPU(inputs, outputs);
}
#ifdef HAVE_OPENCL
void forwardOCL(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
for (size_t i = 0; i < inputs.size(); i++)
{
CV_Assert(inputs[i]->ptr() == outputs[i].ptr() && inputs[i]->type() == outputs[i].type());
const UMat &src = inputs[i]->umatRefConst();
UMat &dst = outputs[i].umatRef();
CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);
ocl::Kernel ker;
CV_Assert(func.initKernel(ker, src));
ker.set(0, (int)src.total());
ker.set(1, ocl::KernelArg::PtrReadOnly(src));
ker.set(2, ocl::KernelArg::PtrWriteOnly(dst));
size_t size = outputs[i].total();
size_t gSize = src.total();
CV_Assert(ker.run(1, &gSize, &wgSize, true));
}
}
#endif
void forwardCPU(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
for (size_t i = 0; i < inputs.size(); i++)
{
const Mat &src = inputs[i]->matRefConst();
Mat &dst = outputs[i].matRef();
CV_Assert(src.ptr() == dst.ptr() && src.isContinuous());
if (outputs[i].type() == CV_32F)
Range sizeRange = Range(0, dst.total());
if (dst.type() == CV_32F)
{
float *data = outputs[i].ptrf();
for (size_t j = 0; j < size; j++)
data[j] = func(data[j]);
cv::parallel_for_(sizeRange, PBody<float>(dst, func));
}
else if (outputs[i].type() == CV_64F)
else if (dst.type() == CV_64F)
{
double *data = outputs[i].ptr<double>();
for (size_t j = 0; j < size; j++)
data[j] = func(data[j]);
cv::parallel_for_(sizeRange, PBody<double>(dst, func));
}
else
{
......@@ -96,89 +162,157 @@ using std::pow;
}
}
}
};
};
#ifdef HAVE_OPENCL
static String oclGetTMacro(const UMat &m)
{
return String("-DT=") + ocl::typeToStr(m.type()) + String(" ");
}
#endif
struct ReLUFunctor
{
float negative_slope;
struct ReLUFunctor
{
typedef ReLULayer Layer;
ReLUFunctor(LayerParams &params)
{
if (params.has("negative_slope"))
negative_slope = params.get<float>("negative_slope");
else
negative_slope = 0.f;
}
double slope;
ReLUFunctor(double slope_)
: slope(slope_) {}
template<typename TFloat>
inline TFloat operator()(TFloat x)
inline TFloat operator()(TFloat x) const
{
return (x >= (TFloat)0) ? x : negative_slope * x;
return (x >= (TFloat)0) ? x : (TFloat)slope * x;
}
};
struct TanHFunctor
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
TanHFunctor(LayerParams&) {}
const char *buildoptSlope = (slope == 0) ? "-DRELU_NO_SLOPE" : "";
String buildopt = oclGetTMacro(src) + buildoptSlope;
if (!ker.create("ReLUForward", ocl::dnn::activations_oclsrc, buildopt))
return false;
if (slope != 0)
ker.set(3, (float)slope);
return true;
}
#endif
};
struct TanHFunctor
{
typedef TanHLayer Layer;
template<typename TFloat>
inline TFloat operator()(TFloat x)
inline TFloat operator()(TFloat x) const
{
return tanh(x);
}
};
struct SigmoidFunctor
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
SigmoidFunctor(LayerParams&) {}
if (!ker.create("TanHForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
};
struct SigmoidFunctor
{
typedef SigmoidLayer Layer;
template<typename TFloat>
inline TFloat operator()(TFloat x)
inline TFloat operator()(TFloat x) const
{
return (TFloat)1 / ((TFloat)1 + exp(-x));
}
};
struct AbsValFunctor
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
AbsValFunctor(LayerParams&) {}
if (!ker.create("SigmoidForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
};
struct AbsValFunctor
{
typedef AbsLayer Layer;
template<typename TFloat>
inline TFloat operator()(TFloat x)
inline TFloat operator()(TFloat x) const
{
return abs(x);
}
};
struct PowerFunctor
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("AbsValForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
};
struct BNLLFunctor
{
typedef BNLLLayer Layer;
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{
float power, scale, shift;
return log((TFloat)1 + exp(-abs(x)));
}
PowerFunctor(LayerParams &params)
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
power = params.get<float>("power", 1.0f);
scale = params.get<float>("scale", 1.0f);
shift = params.get<float>("shift", 0.0f);
if (!ker.create("BNLLForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
};
struct PowerFunctor
{
typedef PowerLayer Layer;
double power, scale, shift;
PowerFunctor(double power_, double scale_ = 1, double shift_ = 0)
: power(power_), scale(scale_), shift(shift_) {}
template<typename TFloat>
inline TFloat operator()(TFloat x)
inline TFloat operator()(TFloat x) const
{
return pow((TFloat)shift + (TFloat)scale * x, (TFloat)power);
}
};
struct BNLLFunctor
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
BNLLFunctor(LayerParams&) {}
if (!ker.create("PowForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
template<typename TFloat>
inline TFloat operator()(TFloat x)
{
return log((TFloat)1 + exp(-abs(x)));
ker.set(3, (float)power);
ker.set(4, (float)scale);
ker.set(5, (float)shift);
return true;
}
};
#endif
};
}
}
#endif
......@@ -42,73 +42,88 @@
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "fully_connected_layer.hpp"
#include "op_blas.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/core/ocl.hpp>
namespace cv
{
namespace dnn
{
FullyConnectedLayer::FullyConnectedLayer(LayerParams &params) : Layer(params)
{
numOutputs = params.get<int>("num_output");
bias = params.get<bool>("bias_term", true);
axis_ = params.get<int>("axis", 1);
CV_Assert(blobs.size() == (bias ? 2U : 1U));
CV_Assert(blobs[0].dims() >= 2 && blobs[0].total() >= (size_t)numOutputs);
CV_Assert(!bias || blobs[1].total() == (size_t)numOutputs);
}
FullyConnectedLayerImpl::FullyConnectedLayerImpl(int axis_)
{
axis = axis_;
}
void FullyConnectedLayer::allocate(const std::vector<Blob*> &input, std::vector<Blob> &output)
{
void FullyConnectedLayerImpl::allocate(const std::vector<Blob*> &input, std::vector<Blob> &output)
{
CV_Assert(input.size() > 0);
CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
CV_Assert(blobs[0].dims() == 2);
bias = (blobs.size() >= 1);
axisCan = input[0]->canonicalAxis(axis);
dtype = input[0]->type();
numOutput = blobs[0].size(0);
innerSize = blobs[0].size(1);
outerSize = input[0]->total(0, axisCan);
CV_Assert((size_t)innerSize == input[0]->total(axisCan));
CV_Assert(!bias || (size_t)numOutput == blobs[1].total());
axis = input[0]->canonicalAxis(axis_);
innerSize = (int)input[0]->total(axis);
useOpenCL = ocl::useOpenCL();
int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_UMAT;
CV_Assert((size_t)innerSize * (size_t)numOutputs == blobs[0].total());
CV_Assert(blobs[0].size(-2) == numOutputs && blobs[0].size(-1) == innerSize);
biasOnesBlob.create(Shape(outerSize, 1), dtype, allocFlags);
biasOnesBlob.setTo(1);
output.resize(input.size());
for (size_t i = 0; i < input.size(); i++)
{
if (i != 0)
CV_Assert(input[i]->equalShape(*input[0]));
this->reshape(*input[i], output[i]);
}
CV_Assert(i == 0 || (input[i]->equalShape(*input[0]) && input[i]->type() == dtype));
Shape outShape = input[i]->shape().slice(0, axis) + Shape(numOutput);
output[i].create(outShape, dtype, allocFlags);
}
}
void FullyConnectedLayer::reshape(const Blob &inp, Blob &out)
{
BlobShape inpShape = inp.shape();
BlobShape outShape(axis+1, inpShape.ptr());
outShape[axis] = numOutputs;
void FullyConnectedLayerImpl::forward(std::vector<Blob*> &input, std::vector<Blob> &output)
{
#ifdef HAVE_OPENCL
if (useOpenCL)
forward_<UMat>(input, output);
else
#endif
forward_<Mat>(input, output);
}
out.create(outShape, inp.type());
template<typename XMat>
void FullyConnectedLayerImpl::forward_(std::vector<Blob *> &input, std::vector<Blob> &output)
{
const XMat &weight = blobs[0].getRefConst<XMat>();
const XMat *biasMat = NULL, *biasOnesMat = NULL;
if (bias)
{
biasOnesMat = &biasOnesBlob.getRefConst<XMat>();
biasMat = &blobs[1].getRefConst<XMat>();
}
void FullyConnectedLayer::forward(std::vector<Blob*> &input, std::vector<Blob> &output)
{
for (size_t i = 0; i < input.size(); i++)
{
int M = (int)input[i]->total(0, axis);
int N = numOutputs;
int K = innerSize;
Mat srcMat(M, K, input[i]->type(), input[i]->ptrf());
Mat weight(N, K, blobs[0].type(), blobs[0].ptrf());
Mat dstMat(M, N, output[i].type(), output[i].ptrf());
//important: Caffe stores weights as transposed array
cv::gemm(srcMat, weight, 1, noArray(), 0, dstMat, GEMM_2_T);
const XMat srcMat = reshaped(input[i]->getRefConst<XMat>(), Shape(outerSize, innerSize));
XMat dstMat = reshaped(output[i].getRef<XMat>(), Shape(outerSize, numOutput));
dnn::gemm(srcMat, weight, 1, dstMat, 0, GEMM_2_T);
if (bias)
{
Mat biasOnesMat = Mat::ones(M, 1, CV_32F);
Mat biasMat(1, N, CV_32F, blobs[1].ptrf());
cv::gemm(biasOnesMat, biasMat, 1, dstMat, 1, dstMat);
}
}
dnn::gemm(*biasOnesMat, *biasMat, 1, dstMat, 1);
}
}
Ptr<InnerProductLayer> InnerProductLayer::create(int axis)
{
return Ptr<InnerProductLayer>(new FullyConnectedLayerImpl(axis));
}
}
}
......@@ -42,26 +42,30 @@
#ifndef __OPENCV_DNN_LAYERS_FULLY_CONNECTED_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_FULLY_CONNECTED_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class FullyConnectedLayer : public Layer
{
bool bias;
int numOutputs;
int axis_, axis;
int innerSize;
class FullyConnectedLayerImpl : public InnerProductLayer
{
int axisCan, dtype;
int numOutput, innerSize, outerSize;
bool bias, useOpenCL;
Blob biasOnesBlob;
template<typename XMat>
void forward_(std::vector<Blob*> &input, std::vector<Blob> &output);
void reshape(const Blob &inp, Blob &out);
public:
public:
FullyConnectedLayer(LayerParams &params);
FullyConnectedLayerImpl(int axisCan = 1);
void allocate(const std::vector<Blob*> &input, std::vector<Blob> &output);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
};
}
}
#endif
......@@ -46,44 +46,5 @@ namespace cv
namespace dnn
{
void getKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW)
{
if (params.has("kernel_h") && params.has("kernel_w"))
{
kernelH = params.get<int>("kernel_h");
kernelW = params.get<int>("kernel_w");
}
else if (params.has("kernel_size"))
{
kernelH = kernelW = params.get<int>("kernel_size");
}
else
{
CV_Error(cv::Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
}
if (params.has("pad_h") && params.has("pad_w"))
{
padH = params.get<int>("pad_h");
padW = params.get<int>("pad_w");
}
else
{
padH = padW = params.get<int>("pad", 0);
}
if (params.has("stride_h") && params.has("stride_w"))
{
strideH = params.get<int>("stride_h");
strideW = params.get<int>("stride_w");
}
else
{
strideH = strideW = params.get<int>("stride", 1);
}
CV_Assert(kernelH > 0 && kernelW > 0 && padH >= 0 && padW >= 0 && strideH > 0 && strideW > 0);
}
}
}
......@@ -42,14 +42,14 @@
#ifndef __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__
#define __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__
#include <opencv2/dnn.hpp>
#include "op_blas.hpp"
#include "op_im2col.hpp"
namespace cv
{
namespace dnn
{
void getKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW);
}
}
......
......@@ -42,45 +42,42 @@
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "lrn_layer.hpp"
#include "modules/dnn/opencl_kernels_dnn.hpp"
#include <opencv2/imgproc.hpp>
#include <opencv2/core/ocl.hpp>
#include <opencv2/dnn/shape_utils.hpp>
#include <algorithm>
namespace cv
{
namespace dnn
{
LRNLayer::LRNLayer(LayerParams &params) : Layer(params)
{
String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
if (nrmType == "ACROSS_CHANNELS")
type = CHANNEL_NRM;
else if (nrmType == "WITHIN_CHANNEL")
type = SPATIAL_NRM;
else
CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");
size = params.get<int>("local_size", 5);
if (size % 2 != 1 || size <= 0)
CV_Error(Error::StsBadArg, "LRN layer supports only positive odd values for local_size");
alpha = params.get<double>("alpha", 1);
beta = params.get<double>("beta", 0.75);
}
LRNLayerImpl::LRNLayerImpl(int type_, int size_, double alpha_, double beta_)
{
type = type_;
size = size_;
alpha = alpha_;
beta = beta_;
}
void LRNLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() == 1);
outputs.resize(1);
void LRNLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() == 1 && inputs[0]->dims() == 4);
CV_Assert(type == CHANNEL_NRM || type == SPATIAL_NRM);
useOpenCL = cv::ocl::useOpenCL();
Vec4i shape = inputs[0]->shape4();
outputs[0].create(shape);
if (type == SPATIAL_NRM && !useOpenCL)
buf.create(inputs[0]->shape().slice(2), inputs[0]->type(), Blob::ALLOC_MAT);
if (type == CHANNEL_NRM && useOpenCL)
buf.create(inputs[0]->shape().slice(2), inputs[0]->type(), Blob::ALLOC_UMAT);
shape[0] = 1; //maybe make shape[0] = 1 too
bufBlob.create(shape);
}
outputs.resize(1);
outputs[0].create(inputs[0]->shape(), inputs[0]->type());
}
void LRNLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
void LRNLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
Blob &src = *inputs[0];
Blob &dst = outputs[0];
......@@ -93,72 +90,165 @@ namespace dnn
spatialNormalization(src, dst);
break;
default:
CV_Error(cv::Error::StsNotImplemented, "Unimplemented mode of LRN layer");
CV_Error(Error::StsNotImplemented, "Unimplemented mode of LRN layer");
break;
}
}
}
void LRNLayer::channelNoramlization(Blob &srcBlob, Blob &dstBlob)
template<typename XMat>
static XMat getPlane(XMat &m, int n, int cn)
{
return reshaped(slice(m, n, cn), BlobShape::like(m).slice(2));
}
void LRNLayerImpl::channelNoramlization(Blob &src, Blob &dst)
{
if (!useOpenCL)
channelNoramlization_<Mat>(src, dst);
else
{
CV_DbgAssert(srcBlob.ptr() != dstBlob.ptr());
//channelNoramlization_ocl(src.getRefConst<UMat>(), dst.getRef<UMat>()); //consumes a lot of memory
channelNoramlization_<UMat>(src, dst);
}
}
template<typename XMat>
void LRNLayerImpl::channelNoramlization_(Blob &srcBlob, Blob &dstBlob)
{
int num = srcBlob.num();
int channels = srcBlob.channels();
int ksize = (size - 1) / 2;
XMat srcMat = srcBlob.getRefConst<XMat>();
XMat dstMat = dstBlob.getRef<XMat>();
for (int n = 0; n < num; n++)
{
Mat accum = dstBlob.getPlane(n, channels-1); //trick for memory saving
XMat accum = getPlane(dstMat, n, channels-1); //trick for memory saving
accum.setTo(0);
for (int cn = 0; cn < std::min(ksize, channels); cn++)
cv::accumulateSquare(srcBlob.getPlane(n, cn), accum);
cv::accumulateSquare(getPlane(srcMat, n, cn), accum);
for (int cn = 0; cn < channels; cn++)
{
if (cn + ksize < channels)
{
cv::accumulateSquare(srcBlob.getPlane(n, cn + ksize), accum);
cv::accumulateSquare(getPlane(srcMat, n, cn + ksize), accum);
}
if (cn - ksize - 1 >= 0)
{
Mat left = srcBlob.getPlane(n, cn - ksize - 1);
cv::subtract(accum, left.mul(left), accum); //subtractSquare
//subtractSquare
XMat left = getPlane(srcMat, n, cn - ksize - 1);
cv::pow(left, 2, left);
cv::subtract(accum, left, accum);
}
Mat dst = dstBlob.getPlane(n, cn);
XMat dst = getPlane(dstMat, n, cn);
accum.convertTo(dst, dst.type(), alpha/size, 1);
cv::pow(dst, beta, dst);
cv::divide(srcBlob.getPlane(n, cn), dst, dst);
}
cv::divide(getPlane(srcMat, n, cn), dst, dst);
}
}
}
void LRNLayer::spatialNormalization(Blob &srcBlob, Blob &dstBlob)
{
bool LRNLayerImpl::channelNoramlization_ocl(const UMat &src, UMat &dst)
{
#ifdef HAVE_OPENCL
if (src.offset != 0 || dst.offset != 0) //TODO: add offset
return false;
String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
ocl::Kernel kerScale("LRNFillScale", ocl::dnn::lrn_oclsrc, buildOpts);
if (kerScale.empty())
return false;
ocl::Kernel kerOutput("LRNComputeOutput", ocl::dnn::lrn_oclsrc, buildOpts);
if (kerOutput.empty())
return false;
Shape shape = Shape::like(src);
int ksize = (size - 1) / 2;
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
UMat &scaleBuf = buf.umatRef();
size_t nthreads = (size_t)(shape.total() / shape[1]);
kerScale.args((int)nthreads,
ocl::KernelArg::PtrReadOnly(src), shape[0], shape[1], shape[2], shape[3],
size, (float)(alpha/size), (float)ksize, ocl::KernelArg::PtrWriteOnly(scaleBuf));
if (!kerScale.run(1, &nthreads, &wgSize, true))
return false;
nthreads = (size_t)shape.total();
kerOutput.args((int)nthreads,
ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrReadOnly(scaleBuf),
-beta, ocl::KernelArg::PtrWriteOnly(dst) );
if (!kerOutput.run(1, &nthreads, &wgSize, true))
return false;
return true;
#else
(void)src;
(void)dst;
return false;
#endif
}
void LRNLayerImpl::spatialNormalization(Blob &src, Blob &dst)
{
if (!useOpenCL)
spatialNormalization_<Mat>(src, dst);
else
spatialNormalization_<UMat>(src, dst);
}
//TODO: fix cv::boxFilter with BORDER_ISOLATED flag in CPU mode
template<>
void LRNLayerImpl::sqrBoxFilter_<Mat>(const Mat &src, Mat &dst)
{
Mat bufMat = buf.getRef<Mat>();
src.copyTo(bufMat);
cv::sqrBoxFilter(bufMat, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT);
}
template<>
void LRNLayerImpl::sqrBoxFilter_<UMat>(const UMat &src, UMat &dst)
{
cv::sqrBoxFilter(src, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT | BORDER_ISOLATED);
}
template<typename XMat>
void LRNLayerImpl::spatialNormalization_(Blob &srcBlob, Blob &dstBlob)
{
int num = srcBlob.num();
int channels = srcBlob.channels();
XMat srcMat = srcBlob.getRefConst<XMat>();
XMat dstMat = dstBlob.getRef<XMat>();
for (int n = 0; n < num; n++)
{
for (int cn = 0; cn < channels; cn++)
{
Mat src = srcBlob.getPlane(n, cn);
Mat dst = dstBlob.getPlane(n, cn);
uchar *dataDst0 = dst.data;
XMat src = getPlane(srcMat, n, cn);
XMat dst = getPlane(dstMat, n, cn);
sqrBoxFilter_(src, dst);
cv::pow(srcBlob.getPlane(n, cn), 2, dst);
//TODO: check border type
cv::boxFilter(dst, dst, dst.depth(), cv::Size(size, size), cv::Point(-1, -1), false, cv::BORDER_CONSTANT);
dst.convertTo(dst, dst.type(), alpha/(size*size), 1);
cv::pow(dst, beta, dst);
cv::divide(src, dst, dst);
CV_Assert(dataDst0 == dst.data); //debug
}
}
}
}
Ptr<LRNLayer> LRNLayer::create(int type, int size, double alpha, double beta)
{
return Ptr<LRNLayer>(new LRNLayerImpl(type, size, alpha, beta));
}
}
}
......@@ -42,34 +42,36 @@
#ifndef __OPENCV_DNN_LAYERS_LRN_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_LRN_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class LRNLayer : public Layer
{
enum
{
CHANNEL_NRM,
SPATIAL_NRM,
SPATIAL_CONTRAST_NRM //cuda-convnet feature
} type;
int size;
double alpha, beta;
Blob bufBlob;
class LRNLayerImpl : public LRNLayer
{
bool useOpenCL;
Blob buf;
void channelNoramlization(Blob &src, Blob &dst);
template<typename XMat>
void channelNoramlization_(Blob &src, Blob &dst);
bool channelNoramlization_ocl(const UMat &src, UMat &dst);
void spatialNormalization(Blob &src, Blob &dst);
template<typename XMat>
void spatialNormalization_(Blob &src, Blob &dst);
template<typename XMat>
void sqrBoxFilter_(const XMat &src, XMat &dst);
public:
public:
LRNLayer(LayerParams &params);
LRNLayerImpl(int type = CHANNEL_NRM, int size = 5, double alpha = 1, double beta = 0.75);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
};
}
}
#endif
......@@ -42,20 +42,21 @@
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "mvn_layer.hpp"
#include <opencv2/dnn/shape_utils.hpp>
namespace cv
{
namespace dnn
{
MVNLayer::MVNLayer(LayerParams &params) : Layer(params)
MVNLayerImpl::MVNLayerImpl(bool normVariance_, bool acrossChannels_, double eps_)
{
eps = params.get<double>("eps", 1e-9);
acrossChannels = params.get<bool>("across_channels", false);
normalizeVariance = params.get<bool>("normalize_variance", true);
normVariance = normVariance_;
acrossChannels = acrossChannels_;
eps = eps_;
}
void MVNLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
void MVNLayerImpl::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
......@@ -65,20 +66,17 @@ void MVNLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &ou
}
}
void MVNLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
void MVNLayerImpl::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
{
Blob &inpBlob = *inputs[inpIdx];
Blob &outBlob = outputs[inpIdx];
int workSize[2];
int splitDim = (acrossChannels) ? 1 : 2;
workSize[0] = (int)inpBlob.total(0, splitDim);
workSize[1] = (int)inpBlob.total(splitDim);
Mat inpMat = inpBlob.matRef().reshape(1, 2, workSize);
Mat outMat = outBlob.matRef().reshape(1, 2, workSize);
Shape workSize((int)inpBlob.total(0, splitDim), (int)inpBlob.total(splitDim));
Mat inpMat = reshaped(inpBlob.matRefConst(), workSize);
Mat outMat = reshaped(outBlob.matRef(), workSize);
Scalar mean, dev;
for (int i = 0; i < workSize[0]; i++)
......@@ -86,12 +84,18 @@ void MVNLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
Mat inpRow = inpMat.row(i);
Mat outRow = outMat.row(i);
cv::meanStdDev(inpRow, mean, (normalizeVariance) ? dev : noArray());
double alpha = (normalizeVariance) ? 1/(eps + dev[0]) : 1;
cv::meanStdDev(inpRow, mean, (normVariance) ? dev : noArray());
double alpha = (normVariance) ? 1/(eps + dev[0]) : 1;
inpRow.convertTo(outRow, outRow.type(), alpha, -mean[0] * alpha);
}
}
}
Ptr<MVNLayer> MVNLayer::create(bool normVariance, bool acrossChannels, double eps)
{
return Ptr<MVNLayer>(new MVNLayerImpl(normVariance, acrossChannels, eps));
}
}
}
......@@ -42,20 +42,18 @@
#ifndef __OPENCV_DNN_LAYERS_MVN_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_MVN_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class MVNLayer : public Layer
class MVNLayerImpl : public MVNLayer
{
double eps;
bool acrossChannels, normalizeVariance;
public:
MVNLayer(LayerParams &params);
MVNLayerImpl(bool normVariance_ = true, bool acrossChannels_ = false, double eps_ = 1e-9);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
......
#include "op_blas.hpp"
#if HAVE_CBLAS
#include "opencv_cblas.hpp"
#endif
#include <iostream>
namespace cv
{
namespace dnn
{
void gemm(InputArray A, InputArray B, double alpha, InputOutputArray C, double beta, int flags)
{
if (C.isMat())
gemmCPU(A.getMat(), B.getMat(), alpha, C.getMatRef(), beta, flags);
else
{
cv::gemm(A, B, alpha, (beta == 0) ? noArray() : C, beta, C, flags);
}
}
inline void SwapRowCols(const Mat &A, int &rows, int &cols, bool isTrans)
{
CV_DbgAssert(A.dims == 2);
rows = (isTrans) ? A.cols : A.rows;
cols = (isTrans) ? A.rows : A.cols;
}
void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int flags /*= 0*/)
{
#if HAVE_CBLAS
bool transA = static_cast<bool>(flags & GEMM_1_T);
bool transB = static_cast<bool>(flags & GEMM_2_T);
bool transC = static_cast<bool>(flags & GEMM_3_T);
int Arows, Acols, Brows, Bcols, Crows, Ccols;
SwapRowCols(A, Arows, Acols, transA);
SwapRowCols(B, Brows, Bcols, transB);
SwapRowCols(C, Crows, Ccols, transC);
CV_Assert(!(flags & GEMM_3_T));
CV_Assert(Acols == Brows && Arows == Crows && Bcols == Ccols);
CV_Assert(A.isContinuous() && B.isContinuous() && C.isContinuous());
CV_Assert(A.type() == B.type() && B.type() == C.type());
CV_Assert(A.data != C.data && B.data != C.data);
if (C.type() == CV_32F)
{
cblas_sgemm(CblasRowMajor, transA ? CblasTrans : CblasNoTrans, transB ? CblasTrans : CblasNoTrans,
Arows, Bcols, Acols,
(float)alpha, A.ptr<float>(), A.cols,
B.ptr<float>(), B.cols,
(float)beta, C.ptr<float>(), C.cols);
}
else if (C.type() == CV_64F)
{
//TODO: Should be tested
cblas_dgemm(CblasRowMajor, transA ? CblasTrans : CblasNoTrans, transB ? CblasTrans : CblasNoTrans,
Arows, Bcols, Acols,
alpha, A.ptr<double>(), A.cols,
B.ptr<double>(), B.cols,
beta, C.ptr<double>(), C.cols);
}
else
{
CV_Error(Error::BadDepth, "Only floating point types are supported");
}
#else
cv::gemm(A, B, alpha, C, beta, C, flags);
#endif
}
int getBlasThreads()
{
#ifdef OPENBLAS_VERSION
return openblas_get_num_threads();
#else
return 1;
#endif
}
void setBlasThreads(int numThreads)
{
#ifdef OPENBLAS_VERSION
openblas_set_num_threads(numThreads);
goto_set_num_threads(numThreads);
#else
(void)numThreads; //suppress compilers' warning
#endif
}
}
}
......@@ -39,47 +39,21 @@
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_OP_BLAS_HPP__
#define __OPENCV_DNN_LAYERS_OP_BLAS_HPP__
#include "../precomp.hpp"
#include <opencv2/core/ocl.hpp>
#include "im2col.hpp"
#include "opencl_kernels_dnn.hpp"
namespace cv
{
namespace dnn
{
int getBlasThreads();
#ifdef HAVE_OPENCL
void im2col_ocl(UMat &img,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
UMat &col)
{
int h_out = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int w_out = (width + 2 * pad_w - kernel_w) / stride_w + 1;
CV_Assert(img.isContinuous() && col.isContinuous());
CV_Assert(img.total() == (size_t)channels * height * width);
CV_Assert(col.total() == (size_t)channels * kernel_h * kernel_w * h_out * w_out);
ocl::Kernel im2col_ker("im2col", ocl::dnn::im2col_oclsrc);
CV_Assert(!im2col_ker.empty());
void setBlasThreads(int numThreads);
im2col_ker.args(ocl::KernelArg::PtrReadOnly(img), (int)img.offset,
channels, height, width,
kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
h_out, w_out,
ocl::KernelArg::PtrWriteOnly(col), (int)col.offset
);
size_t localSize = ocl::Device::getDefault().maxWorkGroupSize();
size_t globalSize = (size_t)channels * h_out * w_out;
CV_Assert(im2col_ker.run(1, &globalSize, &localSize, true));
}
#endif // HAVE_OPENCL
void gemm(InputArray A, InputArray B, double alpha, InputOutputArray C, double beta, int flags = 0);
void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int flags = 0);
}
}
#endif
\ No newline at end of file
......@@ -39,88 +39,84 @@
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_IM2COL_HPP__
#define __OPENCV_DNN_LAYERS_IM2COL_HPP__
#include "../precomp.hpp"
#include <opencv2/core/ocl.hpp>
#include "opencl_kernels_dnn.hpp"
#include "op_im2col.hpp"
namespace cv
{
namespace dnn
{
template <typename Dtype>
void im2col_cpu(const Dtype* data_im,
#ifdef HAVE_OPENCL
bool im2col_ocl(const UMat &img,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
Dtype* data_col)
UMat &col)
{
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
int channels_col = channels * kernel_h * kernel_w;
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % kernel_w;
int h_offset = (c / kernel_w) % kernel_h;
int c_im = c / kernel_h / kernel_w;
for (int h = 0; h < height_col; ++h) {
for (int w = 0; w < width_col; ++w) {
int h_pad = h * stride_h - pad_h + h_offset;
int w_pad = w * stride_w - pad_w + w_offset;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_col[(c * height_col + h) * width_col + w] =
data_im[(c_im * height + h_pad) * width + w_pad];
else
data_col[(c * height_col + h) * width_col + w] = 0;
}
}
}
int esz = img.elemSize();
CV_Assert(img.isContinuous() && col.isContinuous());
CV_Assert(img.total() == (size_t)channels * height * width);
CV_Assert(col.total() == (size_t)channels_col * height_col * width_col);
ocl::Kernel ker("im2col", ocl::dnn::im2col_oclsrc, String("-DT=") + ocl::typeToStr(img.type()));
if (ker.empty())
return false;
ker.args(ocl::KernelArg::PtrReadOnly(img), (int)img.offset/esz,
channels, height, width,
kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
height_col, width_col,
ocl::KernelArg::PtrWriteOnly(col), (int)col.offset/esz
);
size_t localSize = ocl::Device::getDefault().maxWorkGroupSize();
size_t globalSize = (size_t)channels * height_col * width_col;
return ker.run(1, &globalSize, &localSize, true);
}
template <typename Dtype>
void col2im_cpu(const Dtype* data_col,
bool col2im_ocl(const UMat &col,
int channels, int height, int width,
int patch_h, int patch_w,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
Dtype* data_im)
UMat &img)
{
memset(data_im, 0, height * width * channels * sizeof(Dtype));
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
int channels_col = channels * kernel_h * kernel_w;
int esz = img.elemSize();
int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1;
int channels_col = channels * patch_h * patch_w;
CV_Assert(img.isContinuous() && col.isContinuous());
CV_Assert(img.total() == (size_t)channels * height * width);
CV_Assert(col.total() == (size_t)channels_col * height_col * width_col);
for (int c = 0; c < channels_col; ++c)
{
int w_offset = c % patch_w;
int h_offset = (c / patch_w) % patch_h;
int c_im = c / patch_h / patch_w;
ocl::Kernel ker("col2im", ocl::dnn::col2im_oclsrc, String("-DT=") + ocl::typeToStr(col.type()));
if (ker.empty())
return false;
for (int h = 0; h < height_col; ++h)
{
for (int w = 0; w < width_col; ++w)
{
int h_pad = h * stride_h - pad_h + h_offset;
int w_pad = w * stride_w - pad_w + w_offset;
ker.args((int)img.total(),
ocl::KernelArg::PtrReadOnly(col), (int)col.offset/esz,
height, width, channels,
kernel_h, kernel_w,
pad_h, pad_w,
stride_h, stride_w,
height_col, width_col,
ocl::KernelArg::PtrWriteOnly(img), (int)img.offset/esz);
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_im[(c_im * height + h_pad) * width + w_pad] +=
data_col[(c * height_col + h) * width_col + w];
}
}
}
size_t localSize = ocl::Device::getDefault().maxWorkGroupSize();
size_t globalSize = img.total();
return ker.run(1, &globalSize, &localSize, true);
}
#ifdef HAVE_OPENCL
void im2col_ocl(UMat &img,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
UMat &col);
#endif
}
}
#endif
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_IM2COL_HPP__
#define __OPENCV_DNN_LAYERS_IM2COL_HPP__
#include "../precomp.hpp"
#include <iostream>
namespace cv
{
namespace dnn
{
template <typename Dtype>
class im2col_CpuPBody : public cv::ParallelLoopBody
{
const Dtype* data_im;
int channels, height, width;
int kernel_h, kernel_w;
int pad_h, pad_w;
int stride_h, stride_w;
Dtype* data_col;
int height_col, width_col, channels_col;
im2col_CpuPBody() {}
public:
static void run(const Dtype* data_im,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
Dtype* data_col)
{
im2col_CpuPBody<Dtype> t;
t.data_im = data_im;
t.data_col = data_col;
t.channels = channels; t.height = height; t.width = width;
t.kernel_h = kernel_h; t.kernel_w = kernel_w;
t.pad_h = pad_h; t.pad_w = pad_w;
t.stride_h = stride_h; t.stride_w = stride_w;
t.height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
t.width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
t.channels_col = channels * kernel_h * kernel_w;
cv::parallel_for_(Range(0, t.channels_col), t);
}
virtual void operator ()(const Range &r) const
{
for (int c = r.start; c < r.end; ++c) {
int w_offset = c % kernel_w;
int h_offset = (c / kernel_w) % kernel_h;
int c_im = c / kernel_h / kernel_w;
for (int h = 0; h < height_col; ++h) {
for (int w = 0; w < width_col; ++w) {
int h_pad = h * stride_h - pad_h + h_offset;
int w_pad = w * stride_w - pad_w + w_offset;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_col[(c * height_col + h) * width_col + w] =
data_im[(c_im * height + h_pad) * width + w_pad];
else
data_col[(c * height_col + h) * width_col + w] = 0;
}
}
}
}
};
template <typename Dtype>
class col2im_CpuPBody : public cv::ParallelLoopBody
{
const Dtype* data_col;
int channels, height, width;
int kernel_h, kernel_w;
int pad_h, pad_w;
int stride_h, stride_w;
Dtype* data_im;
int height_col, width_col;
col2im_CpuPBody() {}
public:
static void run(const Dtype* data_col,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
Dtype* data_im)
{
//TODO: single-threaded version switch
col2im_CpuPBody t;
t.data_col = data_col;
t.data_im = data_im;
t.channels = channels; t.height = height; t.width = width;
t.kernel_h = kernel_h; t.kernel_w = kernel_w;
t.pad_h = pad_h; t.pad_w = pad_w;
t.stride_h = stride_h; t.stride_w = stride_w;
t.height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
t.width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
int img_total = channels * height * width;
cv::parallel_for_(Range(0, img_total), t);
}
virtual void operator ()(const Range &r) const
{
for (int index = r.start; index < r.end; index++)
{
Dtype val = 0;
int w = index % width + pad_w;
int h = (index / width) % height + pad_h;
int c = index / (width * height);
// compute the start and end of the output
int w_col_start = (w < kernel_w) ? 0 : (w - kernel_w) / stride_w + 1;
int w_col_end = std::min(w / stride_w + 1, width_col);
int h_col_start = (h < kernel_h) ? 0 : (h - kernel_h) / stride_h + 1;
int h_col_end = std::min(h / stride_h + 1, height_col);
// equivalent implementation
int offset =
(c * kernel_h * kernel_w + h * kernel_w + w) * height_col * width_col;
int coeff_h_col = (1 - stride_h * kernel_w * height_col) * width_col;
int coeff_w_col = (1 - stride_w * height_col * width_col);
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col];
}
}
data_im[index] = val;
}
}
};
//single-threaded version
template <typename Dtype>
void col2im_cpu(const Dtype* data_col,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
Dtype* data_im)
{
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
int channels_col = channels * kernel_h * kernel_w;
std::memset(data_im, 0, height * width * channels * sizeof(Dtype));
for (int c = 0; c < channels_col; ++c)
{
int w_offset = c % kernel_w;
int h_offset = (c / kernel_w) % kernel_h;
int c_im = c / kernel_h / kernel_w;
for (int h = 0; h < height_col; ++h)
{
for (int w = 0; w < width_col; ++w)
{
int h_pad = h * stride_h - pad_h + h_offset;
int w_pad = w * stride_w - pad_w + w_offset;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_im[(c_im * height + h_pad) * width + w_pad] +=
data_col[(c * height_col + h) * width_col + w];
}
}
}
}
#ifdef HAVE_OPENCL
bool im2col_ocl(const UMat &img,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
UMat &col);
bool col2im_ocl(const UMat &col,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
UMat &img);
#endif
}
}
#endif
This diff is collapsed.
/*M///////////////////////////////////////////////////////////////////////////////////////
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
......@@ -42,37 +42,39 @@
#ifndef __OPENCV_DNN_LAYERS_POOLING_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_POOLING_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class PoolingLayer : public Layer
{
enum
{
MAX,
AVE,
STOCHASTIC
};
int type;
int padH, padW;
int strideH, strideW;
int kernelH, kernelW;
class PoolingLayerImpl : public PoolingLayer
{
bool useOpenCL;
Size inp, out;
void computeOutputShape(Size inpSz);
bool pooling_ocl(const char *kname, const Blob &src, Blob &dst, Blob *mask = NULL);
int inpH, inpW;
int outH, outW;
void maxPooling(Blob &src, Blob &dst);
void maxPooling_cpu(Blob &src, Blob &dst);
bool maxPooling_ocl(Blob &src, Blob &dst);
void computeOutputShape(int inpH, int inpW);
void maxPooling(Blob &input, Blob &output);
void avePooling(Blob &input, Blob &output);
void avePooling(Blob &src, Blob &dst);
void avePooling_cpu(Blob &src, Blob &dst);
bool avePooling_ocl(Blob &src, Blob &dst);
public:
PoolingLayerImpl();
PoolingLayerImpl(int type, Size kernel, Size stride, Size pad);
public:
PoolingLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
};
}
}
#endif
This diff is collapsed.
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_RECURRENT_LAYERS_HPP__
#define __OPENCV_DNN_LAYERS_RECURRENT_LAYERS_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
}
}
#endif
\ No newline at end of file
This diff is collapsed.
......@@ -42,26 +42,23 @@
#ifndef __OPENCV_DNN_LAYERS_RESHAPE_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_RESHAPE_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class ReshapeLayer : public Layer
class ReshapeLayerImpl : public ReshapeLayer
{
std::vector<BlobShape> outShapes;
public:
ReshapeLayer(LayerParams &params);
ReshapeLayerImpl(const BlobShape &newShape_, Range applyingRange_);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*>&, std::vector<Blob>&) {}
protected:
BlobShape shapeDesc;
int inAxis, inNumAxes, autoAxisIdx;
void computeOutputShape(int startAxis, int endAxis, BlobShape &inpShape, BlobShape &outShape);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
Ptr<Layer> createFlattenLayer(LayerParams&);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -40,4 +40,5 @@
//M*/
#include <opencv2/core.hpp>
#include "cvconfig.h"
#include <opencv2/dnn.hpp>
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment