Commit b39cd062 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge remote-tracking branch 'upstream/3.4' into merge-3.4

parents 46def2fd ab8022f7
......@@ -1531,7 +1531,7 @@ class TegraCvtColor_##name##_Invoker : public cv::ParallelLoopBody \
public: \
TegraCvtColor_##name##_Invoker(const uchar * src_data_, size_t src_step_, uchar * dst_data_, size_t dst_step_, int width_, int height_) : \
cv::ParallelLoopBody(), src_data(src_data_), src_step(src_step_), dst_data(dst_data_), dst_step(dst_step_), width(width_), height(height_) {} \
virtual void operator()(const cv::Range& range) const \
virtual void operator()(const cv::Range& range) const CV_OVERRIDE \
{ \
CAROTENE_NS::func(CAROTENE_NS::Size2D(width, range.end-range.start), __VA_ARGS__); \
} \
......
add_definitions(-D__OPENCV_BUILD=1)
add_definitions(-D__OPENCV_APPS=1)
# Unified function for creating OpenCV applications:
# ocv_add_application(tgt [MODULES <m1> [<m2> ...]] SRCS <src1> [<src2> ...])
function(ocv_add_application the_target)
cmake_parse_arguments(APP "" "" "MODULES;SRCS" ${ARGN})
ocv_check_dependencies(${APP_MODULES})
if(NOT OCV_DEPENDENCIES_FOUND)
return()
endif()
project(${the_target})
ocv_target_include_modules_recurse(${the_target} ${APP_MODULES})
ocv_target_include_directories(${the_target} PRIVATE "${OpenCV_SOURCE_DIR}/include/opencv")
ocv_add_executable(${the_target} ${APP_SRCS})
ocv_target_link_libraries(${the_target} ${APP_MODULES})
set_target_properties(${the_target} PROPERTIES
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
OUTPUT_NAME "${the_target}")
if(ENABLE_SOLUTION_FOLDERS)
set_target_properties(${the_target} PROPERTIES FOLDER "applications")
endif()
if(INSTALL_CREATE_DISTRIB)
if(BUILD_SHARED_LIBS)
install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT dev)
endif()
else()
install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT dev)
endif()
endfunction()
link_libraries(${OPENCV_LINKER_LIBS})
macro(ocv_add_app directory)
......
SET(OPENCV_ANNOTATION_DEPS opencv_core opencv_highgui opencv_imgproc opencv_imgcodecs opencv_videoio)
ocv_check_dependencies(${OPENCV_ANNOTATION_DEPS})
if(NOT OCV_DEPENDENCIES_FOUND)
return()
endif()
project(annotation)
set(the_target opencv_annotation)
ocv_target_include_directories(${the_target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${OpenCV_SOURCE_DIR}/include/opencv")
ocv_target_include_modules_recurse(${the_target} ${OPENCV_ANNOTATION_DEPS})
file(GLOB SRCS *.cpp)
set(annotation_files ${SRCS})
ocv_add_executable(${the_target} ${annotation_files})
ocv_target_link_libraries(${the_target} ${OPENCV_ANNOTATION_DEPS})
set_target_properties(${the_target} PROPERTIES
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
OUTPUT_NAME "opencv_annotation")
if(ENABLE_SOLUTION_FOLDERS)
set_target_properties(${the_target} PROPERTIES FOLDER "applications")
endif()
if(INSTALL_CREATE_DISTRIB)
if(BUILD_SHARED_LIBS)
install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT dev)
endif()
else()
install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT dev)
endif()
ocv_add_application(opencv_annotation
MODULES opencv_core opencv_highgui opencv_imgproc opencv_imgcodecs opencv_videoio
SRCS opencv_annotation.cpp)
set(OPENCV_CREATESAMPLES_DEPS opencv_core opencv_imgproc opencv_objdetect opencv_imgcodecs opencv_highgui opencv_calib3d opencv_features2d opencv_videoio)
ocv_check_dependencies(${OPENCV_CREATESAMPLES_DEPS})
if(NOT OCV_DEPENDENCIES_FOUND)
return()
endif()
project(createsamples)
set(the_target opencv_createsamples)
ocv_target_include_directories(${the_target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${OpenCV_SOURCE_DIR}/include/opencv")
ocv_target_include_modules_recurse(${the_target} ${OPENCV_CREATESAMPLES_DEPS})
file(GLOB SRCS *.cpp)
file(GLOB HDRS *.h*)
set(createsamples_files ${SRCS} ${HDRS})
ocv_add_executable(${the_target} ${createsamples_files})
ocv_target_link_libraries(${the_target} ${OPENCV_CREATESAMPLES_DEPS})
set_target_properties(${the_target} PROPERTIES
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
OUTPUT_NAME "opencv_createsamples")
if(ENABLE_SOLUTION_FOLDERS)
set_target_properties(${the_target} PROPERTIES FOLDER "applications")
endif()
if(INSTALL_CREATE_DISTRIB)
if(BUILD_SHARED_LIBS)
install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT dev)
endif()
else()
install(TARGETS ${the_target} OPTIONAL RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT dev)
endif()
ocv_add_application(opencv_createsamples
MODULES opencv_core opencv_imgproc opencv_objdetect opencv_imgcodecs opencv_highgui opencv_calib3d opencv_features2d opencv_videoio
SRCS ${SRCS})
set(OPENCV_INTERACTIVECALIBRATION_DEPS opencv_core opencv_imgproc opencv_features2d opencv_highgui opencv_calib3d opencv_videoio)
set(DEPS opencv_core opencv_imgproc opencv_features2d opencv_highgui opencv_calib3d opencv_videoio)
if(${BUILD_opencv_aruco})
list(APPEND OPENCV_INTERACTIVECALIBRATION_DEPS opencv_aruco)
list(APPEND DEPS opencv_aruco)
endif()
ocv_check_dependencies(${OPENCV_INTERACTIVECALIBRATION_DEPS})
if(NOT OCV_DEPENDENCIES_FOUND)
return()
endif()
project(interactive-calibration)
set(the_target opencv_interactive-calibration)
ocv_target_include_directories(${the_target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${OpenCV_SOURCE_DIR}/include/opencv")
ocv_target_include_modules_recurse(${the_target} ${OPENCV_INTERACTIVECALIBRATION_DEPS})
file(GLOB SRCS *.cpp)
file(GLOB HDRS *.h*)
set(interactive-calibration_files ${SRCS} ${HDRS})
ocv_add_executable(${the_target} ${interactive-calibration_files})
ocv_target_link_libraries(${the_target} ${OPENCV_INTERACTIVECALIBRATION_DEPS})
set_target_properties(${the_target} PROPERTIES
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
OUTPUT_NAME "opencv_interactive-calibration")
if(ENABLE_SOLUTION_FOLDERS)
set_target_properties(${the_target} PROPERTIES FOLDER "applications")
endif()
if(INSTALL_CREATE_DISTRIB)
if(BUILD_SHARED_LIBS)
install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT dev)
endif()
else()
install(TARGETS ${the_target} OPTIONAL RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT dev)
endif()
ocv_add_application(opencv_interactive-calibration MODULES ${DEPS} SRCS ${SRCS})
set(OPENCV_TRAINCASCADE_DEPS opencv_core opencv_imgproc opencv_objdetect opencv_imgcodecs opencv_highgui opencv_calib3d opencv_features2d)
ocv_check_dependencies(${OPENCV_TRAINCASCADE_DEPS})
if(NOT OCV_DEPENDENCIES_FOUND)
return()
endif()
project(traincascade)
set(the_target opencv_traincascade)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Woverloaded-virtual
-Winconsistent-missing-override -Wsuggest-override
)
ocv_target_include_directories(${the_target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${OpenCV_SOURCE_DIR}/include/opencv")
ocv_target_include_modules_recurse(${the_target} ${OPENCV_TRAINCASCADE_DEPS})
ocv_warnings_disable(CMAKE_CXX_FLAGS -Woverloaded-virtual -Winconsistent-missing-override -Wsuggest-override)
file(GLOB SRCS *.cpp)
file(GLOB HDRS *.h*)
set(traincascade_files ${SRCS} ${HDRS})
ocv_add_executable(${the_target} ${traincascade_files})
ocv_target_link_libraries(${the_target} ${OPENCV_TRAINCASCADE_DEPS})
set_target_properties(${the_target} PROPERTIES
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
OUTPUT_NAME "opencv_traincascade")
if(ENABLE_SOLUTION_FOLDERS)
set_target_properties(${the_target} PROPERTIES FOLDER "applications")
endif()
if(INSTALL_CREATE_DISTRIB)
if(BUILD_SHARED_LIBS)
install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT dev)
endif()
else()
install(TARGETS ${the_target} OPTIONAL RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT dev)
endif()
ocv_add_application(opencv_traincascade
MODULES opencv_core opencv_imgproc opencv_objdetect opencv_imgcodecs opencv_highgui opencv_calib3d opencv_features2d
SRCS ${SRCS})
set(OPENCV_APPLICATION_DEPS opencv_core)
ocv_check_dependencies(${OPENCV_APPLICATION_DEPS})
if(NOT OCV_DEPENDENCIES_FOUND)
return()
endif()
project(opencv_version)
set(the_target opencv_version)
ocv_target_include_modules_recurse(${the_target} ${OPENCV_APPLICATION_DEPS})
ocv_add_executable(${the_target} opencv_version.cpp)
ocv_target_link_libraries(${the_target} ${OPENCV_APPLICATION_DEPS})
set_target_properties(${the_target} PROPERTIES
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
OUTPUT_NAME "opencv_version")
set_target_properties(${the_target} PROPERTIES FOLDER "applications")
if(INSTALL_CREATE_DISTRIB)
if(BUILD_SHARED_LIBS)
install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT libs)
endif()
else()
install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT libs)
endif()
ocv_add_application(opencv_version MODULES opencv_core SRCS opencv_version.cpp)
if(WIN32)
project(opencv_version_win32)
set(the_target opencv_version_win32)
ocv_target_include_modules_recurse(${the_target} ${OPENCV_APPLICATION_DEPS})
ocv_add_executable(${the_target} opencv_version.cpp)
ocv_target_link_libraries(${the_target} ${OPENCV_APPLICATION_DEPS})
target_compile_definitions(${the_target} PRIVATE "OPENCV_WIN32_API=1")
set_target_properties(${the_target} PROPERTIES
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
OUTPUT_NAME "opencv_version_win32")
set_target_properties(${the_target} PROPERTIES FOLDER "applications")
if(INSTALL_CREATE_DISTRIB)
if(BUILD_SHARED_LIBS)
install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT libs)
endif()
else()
install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT libs)
endif()
ocv_add_application(opencv_version_win32 MODULES opencv_core SRCS opencv_version.cpp)
target_compile_definitions(opencv_version_win32 PRIVATE "OPENCV_WIN32_API=1")
endif()
SET(OPENCV_VISUALISATION_DEPS opencv_core opencv_highgui opencv_imgproc opencv_videoio opencv_imgcodecs)
ocv_check_dependencies(${OPENCV_VISUALISATION_DEPS})
if(NOT OCV_DEPENDENCIES_FOUND)
return()
endif()
project(visualisation)
set(the_target opencv_visualisation)
ocv_target_include_directories(${the_target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${OpenCV_SOURCE_DIR}/include/opencv")
ocv_target_include_modules_recurse(${the_target} ${OPENCV_VISUALISATION_DEPS})
file(GLOB SRCS *.cpp)
set(visualisation_files ${SRCS})
ocv_add_executable(${the_target} ${visualisation_files})
ocv_target_link_libraries(${the_target} ${OPENCV_VISUALISATION_DEPS})
set_target_properties(${the_target} PROPERTIES
DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
OUTPUT_NAME "opencv_visualisation")
if(ENABLE_SOLUTION_FOLDERS)
set_target_properties(${the_target} PROPERTIES FOLDER "applications")
endif()
if(INSTALL_CREATE_DISTRIB)
if(BUILD_SHARED_LIBS)
install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} CONFIGURATIONS Release COMPONENT dev)
endif()
else()
install(TARGETS ${the_target} RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT dev)
endif()
ocv_add_application(opencv_visualisation
MODULES opencv_core opencv_highgui opencv_imgproc opencv_videoio opencv_imgcodecs
SRCS opencv_visualisation.cpp)
......@@ -361,6 +361,23 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
static Ptr<PermuteLayer> create(const LayerParams& params);
};
/**
* Permute channels of 4-dimensional input blob.
* @param group Number of groups to split input channels and pick in turns
* into output blob.
*
* \f[ groupSize = \frac{number\ of\ channels}{group} \f]
* \f[ output(n, c, h, w) = input(n, groupSize \times (c \% group) + \lfloor \frac{c}{group} \rfloor, h, w) \f]
* Read more at https://arxiv.org/pdf/1707.01083.pdf
*/
class CV_EXPORTS ShuffleChannelLayer : public Layer
{
public:
static Ptr<Layer> create(const LayerParams& params);
int group;
};
/**
* @brief Adds extra values for specific axes.
* @param paddings Vector of paddings in format
......@@ -575,6 +592,17 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
static Ptr<ResizeLayer> create(const LayerParams& params);
};
/**
* @brief Bilinear resize layer from https://github.com/cdmh/deeplab-public
*
* It differs from @ref ResizeLayer in output shape and resize scales computations.
*/
class CV_EXPORTS InterpLayer : public Layer
{
public:
static Ptr<Layer> create(const LayerParams& params);
};
class CV_EXPORTS ProposalLayer : public Layer
{
public:
......
......@@ -988,52 +988,26 @@ struct Net::Impl
ld.inputBlobsId[inNum] = from;
}
static void splitPin(const String &pinAlias, String &layerName, String &outName)
{
size_t delimPos = pinAlias.find('.');
layerName = pinAlias.substr(0, delimPos);
outName = (delimPos == String::npos) ? String() : pinAlias.substr(delimPos + 1);
}
int resolvePinOutputName(LayerData &ld, const String &outName)
{
if (outName.empty())
return 0;
if (std::isdigit(outName[0]))
{
char *lastChar;
long inum = std::strtol(outName.c_str(), &lastChar, 10);
if (*lastChar == 0)
{
CV_Assert(inum == (int)inum);
return (int)inum;
}
}
return ld.getLayerInstance()->outputNameToIndex(outName);
}
LayerPin getPinByAlias(const String &pinAlias)
LayerPin getPinByAlias(const String &layerName)
{
LayerPin pin;
String layerName, outName;
splitPin(pinAlias, layerName, outName);
pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName);
if (pin.lid >= 0)
pin.oid = resolvePinOutputName(getLayerData(pin.lid), outName);
pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName);
return pin;
}
std::vector<LayerPin> getLayerOutPins(const String &pinAlias)
std::vector<LayerPin> getLayerOutPins(const String &layerName)
{
String layerName, outName;
splitPin(pinAlias, layerName, outName);
int lid = (layerName.empty()) ? 0 : getLayerId(layerName);
std::vector<LayerPin> pins;
......@@ -2044,12 +2018,6 @@ int Net::addLayer(const String &name, const String &type, LayerParams &params)
{
CV_TRACE_FUNCTION();
if (name.find('.') != String::npos)
{
CV_Error(Error::StsBadArg, "Added layer name \"" + name + "\" must not contain dot symbol");
return -1;
}
if (impl->getLayerId(name) >= 0)
{
CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net");
......@@ -2689,7 +2657,7 @@ int Layer::inputNameToIndex(String)
int Layer::outputNameToIndex(const String&)
{
return -1;
return 0;
}
bool Layer::supportBackend(int backendId)
......
......@@ -84,6 +84,7 @@ void initializeLayerFactory()
CV_DNN_REGISTER_LAYER_CLASS(Reshape, ReshapeLayer);
CV_DNN_REGISTER_LAYER_CLASS(Flatten, FlattenLayer);
CV_DNN_REGISTER_LAYER_CLASS(Resize, ResizeLayer);
CV_DNN_REGISTER_LAYER_CLASS(Interp, InterpLayer);
CV_DNN_REGISTER_LAYER_CLASS(CropAndResize, CropAndResizeLayer);
CV_DNN_REGISTER_LAYER_CLASS(Convolution, ConvolutionLayer);
......@@ -115,6 +116,7 @@ void initializeLayerFactory()
CV_DNN_REGISTER_LAYER_CLASS(Crop, CropLayer);
CV_DNN_REGISTER_LAYER_CLASS(Eltwise, EltwiseLayer);
CV_DNN_REGISTER_LAYER_CLASS(Permute, PermuteLayer);
CV_DNN_REGISTER_LAYER_CLASS(ShuffleChannel, ShuffleChannelLayer);
CV_DNN_REGISTER_LAYER_CLASS(PriorBox, PriorBoxLayer);
CV_DNN_REGISTER_LAYER_CLASS(PriorBoxClustered, PriorBoxLayer);
CV_DNN_REGISTER_LAYER_CLASS(Reorg, ReorgLayer);
......
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2018, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
#include "layers_common.hpp"
......
......@@ -310,7 +310,6 @@ public:
innerProductOp = Ptr<OCL4DNNInnerProduct<float> >(new OCL4DNNInnerProduct<float>(config));
}
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
for (size_t i = 0; i < inputs.size(); i++)
{
MatShape inshape, outshape;
......@@ -320,7 +319,6 @@ public:
UMat srcMat, dstMat;
srcMat = inputs[i].reshape(1, inshape.size(), &inshape[0]);
dstMat = outputs[i].reshape(1, outshape.size(), &outshape[0]);
dstMat.setTo(0.0f);
if (!innerProductOp->Forward(srcMat, (use_half) ? half_blobs[0] : umat_blobs[0],
(bias) ? (use_half ? half_blobs[1] : umat_blobs[1]) : UMat(),
......@@ -332,6 +330,7 @@ public:
if (!use_half && bias && (outerSize > 1))
{
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
UMat& biases = umat_blobs[1];
cv::gemm(biasOnesMat, biases, 1, dstMat, 1, dstMat, 0);
}
......@@ -354,6 +353,7 @@ public:
if (bias)
{
UMat biasOnesMat = UMat::ones(outerSize, 1, umat_blobs[0].type());
UMat& biases = umat_blobs[1];
cv::gemm(biasOnesMat, biases, 1, dstMat, 1, dstMat, 0);
}
......
......@@ -11,7 +11,7 @@
namespace cv { namespace dnn {
class ResizeLayerImpl CV_FINAL : public ResizeLayer
class ResizeLayerImpl : public ResizeLayer
{
public:
ResizeLayerImpl(const LayerParams& params)
......@@ -33,7 +33,7 @@ public:
interpolation = params.get<String>("interpolation");
CV_Assert(interpolation == "nearest" || interpolation == "bilinear");
alignCorners = params.get<bool>("align_corners", false);
bool alignCorners = params.get<bool>("align_corners", false);
if (alignCorners)
CV_Error(Error::StsNotImplemented, "Resize with align_corners=true is not implemented");
}
......@@ -66,6 +66,8 @@ public:
outHeight = outputs[0].size[2];
outWidth = outputs[0].size[3];
}
scaleHeight = static_cast<float>(inputs[0]->size[2]) / outHeight;
scaleWidth = static_cast<float>(inputs[0]->size[3]) / outWidth;
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
......@@ -103,8 +105,6 @@ public:
const int inpWidth = inp.size[3];
const int inpSpatialSize = inpHeight * inpWidth;
const int outSpatialSize = outHeight * outWidth;
const float heightScale = static_cast<float>(inpHeight) / (outHeight);
const float widthScale = static_cast<float>(inpWidth) / (outWidth);
const int numPlanes = inp.size[0] * inp.size[1];
CV_Assert(inp.isContinuous(), out.isContinuous());
......@@ -112,13 +112,13 @@ public:
Mat outPlanes = out.reshape(1, numPlanes * outHeight);
for (int y = 0; y < outHeight; ++y)
{
float input_y = y * heightScale;
float input_y = y * scaleHeight;
int y0 = static_cast<int>(input_y);
const float* inpData_row0 = inpPlanes.ptr<float>(y0);
const float* inpData_row1 = inpPlanes.ptr<float>(std::min(y0 + 1, inpHeight - 1));
for (int x = 0; x < outWidth; ++x)
{
float input_x = x * widthScale;
float input_x = x * scaleWidth;
int x0 = static_cast<int>(input_x);
int x1 = std::min(x0 + 1, inpWidth - 1);
......@@ -162,10 +162,10 @@ public:
return Ptr<BackendNode>();
}
private:
protected:
int outWidth, outHeight, zoomFactorWidth, zoomFactorHeight;
String interpolation;
bool alignCorners;
float scaleWidth, scaleHeight;
};
......@@ -174,5 +174,44 @@ Ptr<ResizeLayer> ResizeLayer::create(const LayerParams& params)
return Ptr<ResizeLayer>(new ResizeLayerImpl(params));
}
class InterpLayerImpl CV_FINAL : public ResizeLayerImpl
{
public:
InterpLayerImpl(const LayerParams& params) : ResizeLayerImpl(params) {}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 1, inputs[0].size() == 4);
outputs.resize(1, inputs[0]);
outputs[0][2] = outHeight > 0 ? outHeight : (1 + zoomFactorHeight * (outputs[0][2] - 1));
outputs[0][3] = outWidth > 0 ? outWidth : (1 + zoomFactorWidth * (outputs[0][3] - 1));
// We can work in-place (do nothing) if input shape == output shape.
return (outputs[0][2] == inputs[0][2]) && (outputs[0][3] == inputs[0][3]);
}
virtual void finalize(const std::vector<Mat*>& inputs, std::vector<Mat> &outputs) CV_OVERRIDE
{
if (!outWidth && !outHeight)
{
outHeight = outputs[0].size[2];
outWidth = outputs[0].size[3];
}
int inpHeight = inputs[0]->size[2];
int inpWidth = inputs[0]->size[3];
scaleHeight = (outHeight > 1) ? (static_cast<float>(inpHeight - 1) / (outHeight - 1)) : 0.f;
scaleWidth = (outWidth > 1) ? (static_cast<float>(inpWidth - 1) / (outWidth - 1)) : 0.f;
}
};
Ptr<Layer> InterpLayer::create(const LayerParams& params)
{
LayerParams lp(params);
lp.set("interpolation", "bilinear");
return Ptr<Layer>(new InterpLayerImpl(lp));
}
} // namespace dnn
} // namespace cv
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2018, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
#include "../precomp.hpp"
namespace cv { namespace dnn {
class ShuffleChannelLayerImpl CV_FINAL : public ShuffleChannelLayer
{
public:
ShuffleChannelLayerImpl(const LayerParams& params)
{
group = params.get<int>("group", 1);
}
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 1 && inputs[0].size() == 4);
CV_Assert(inputs[0][1] % group == 0);
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
return group == 1;
}
virtual void finalize(const std::vector<Mat*>& inputs, std::vector<Mat> &outputs) CV_OVERRIDE
{
if (group != 1)
{
LayerParams lp;
float order[] = {0, 2, 1, 3};
lp.set("order", DictValue::arrayInt(&order[0], 4));
permute = PermuteLayer::create(lp);
Mat inp = *inputs[0];
Mat out = outputs[0];
permuteInpShape.resize(4);
permuteInpShape[0] = inp.size[0];
permuteInpShape[1] = group;
permuteInpShape[2] = inp.size[1] / group;
permuteInpShape[3] = inp.size[2]*inp.size[3];
permuteOutShape.resize(4);
permuteOutShape[0] = permuteInpShape[0];
permuteOutShape[1] = permuteInpShape[2];
permuteOutShape[2] = permuteInpShape[1];
permuteOutShape[3] = permuteInpShape[3];
inp = inp.reshape(1, permuteInpShape);
out = out.reshape(1, permuteOutShape);
std::vector<Mat*> permuteInputs(1, &inp);
std::vector<Mat> permuteOutputs(1, out);
permute->finalize(permuteInputs, permuteOutputs);
}
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Mat inp = *inputs[0];
Mat out = outputs[0];
if (inp.data != out.data)
{
if (!permute.empty())
{
inp = inp.reshape(1, permuteInpShape);
out = out.reshape(1, permuteOutShape);
std::vector<Mat*> permuteInputs(1, &inp);
std::vector<Mat> permuteOutputs(1, out);
permute->forward(permuteInputs, permuteOutputs, internals);
}
else
inp.copyTo(out);
}
}
private:
Ptr<PermuteLayer> permute;
std::vector<int> permuteInpShape, permuteOutShape;
};
Ptr<Layer> ShuffleChannelLayer::create(const LayerParams& params)
{
return Ptr<Layer>(new ShuffleChannelLayerImpl(params));
}
} // namespace dnn
} // namespace cv
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -306,6 +306,7 @@ class OCL4DNNConvSpatial
std::string kernel_name_;
std::string cache_path_;
bool use_cache_path_; // true if cache_path_ directory exists
bool run_auto_tuning_;
bool force_auto_tuning_;
int32_t kernel_index_;
std::vector< cv::Ptr<kernelConfig> > kernelQueue;
......
......@@ -55,6 +55,7 @@
#include "../include/math_functions.hpp"
#include "../include/default_kernel_config.hpp"
#include "opencv2/dnn/shape_utils.hpp"
#include "opencv2/core/utils/logger.hpp"
#if defined WIN32 || defined _WIN32
#include <windows.h>
......@@ -67,6 +68,69 @@ typedef std::map<std::string, std::string> kernel_hash_t;
static kernel_hash_t kernelConfigMap;
static bool defaultConfigLoaded = false;
static std::string sanitize(const std::string& s)
{
std::string s_ = s;
for (size_t i = 0; i < s_.size(); i++)
{
char c = s_[i];
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'))
{
s_[i] = '_';
}
}
// TODO add hash?
// s_ = s_ + cv::format("_%08llx", crc64((uchar*)s.c_str(), s.size()));
return s_;
}
static void initializeGlobalBuiltinConfigurations(const std::string& cache_path)
{
CV_Assert(defaultConfigLoaded == false);
CV_Assert(kernelConfigMap.empty());
/* fp32 config */
size_t numConfigs = sizeof(default_kernel_config_intel_fp32) /
sizeof(default_kernel_config_intel_fp32[0]) / 2;
for (size_t i = 0; i < numConfigs; i++)
{
std::string key = std::string("Intel(R) Corporation_") + default_kernel_config_intel_fp32[2 * i];
if (!cache_path.empty())
{
std::string cacheFile = cache_path + sanitize(key);
std::ifstream cachedKernel(cacheFile.c_str());
if (cachedKernel)
continue; // external configuration found, skip builtin
}
std::pair<std::string, std::string> entry(
key,
default_kernel_config_intel_fp32[2 * i + 1]);
kernelConfigMap.insert(entry);
}
/* fp16 config */
numConfigs = sizeof(default_kernel_config_intel_fp16) /
sizeof(default_kernel_config_intel_fp16[0]) / 2;
for (size_t i = 0; i < numConfigs; i++)
{
std::string key = std::string("Intel(R) Corporation_") + default_kernel_config_intel_fp16[2 * i];
if (!cache_path.empty())
{
std::string cacheFile = cache_path + sanitize(key);
std::ifstream cachedKernel(cacheFile.c_str());
if (cachedKernel)
continue; // external configuration found, skip builtin
}
std::pair<std::string, std::string> entry(
key,
default_kernel_config_intel_fp16[2 * i + 1]);
kernelConfigMap.insert(entry);
}
defaultConfigLoaded = true;
}
template<typename Dtype>
OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
{
......@@ -139,9 +203,8 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
}
}
force_auto_tuning_ =
(use_cache_path_ && !utils::getConfigurationParameterBool("OPENCV_OCL4DNN_DISABLE_AUTO_TUNING", false))
|| utils::getConfigurationParameterBool("OPENCV_OCL4DNN_FORCE_AUTO_TUNING", false);
run_auto_tuning_ = use_cache_path_ && !utils::getConfigurationParameterBool("OPENCV_OCL4DNN_DISABLE_AUTO_TUNING", false);
force_auto_tuning_ = utils::getConfigurationParameterBool("OPENCV_OCL4DNN_FORCE_AUTO_TUNING", false);
}
template<typename Dtype>
......@@ -272,40 +335,38 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
// options
options_ << " -cl-fast-relaxed-math -D KERNEL_IDLF -D convolve_simd=" << kernel_name_;
options_ << " -cl-mad-enable";
if (clOptionSupport("-cl-no-subgroup-ifp"))
options_ << " -cl-no-subgroup-ifp ";
// defs
int32_t output_width = output_w_;
int32_t output_height = output_h_;
int32_t output_block_width = blockM;
int32_t output_block_height = blockK;
const int32_t last_block_width = (output_width % output_block_width == 0) ?
output_block_width : output_width % output_block_width;
const int32_t last_block_height = (output_height % output_block_height == 0) ?
output_block_height : output_height % output_block_height;
int tile_x = alignSize((output_block_width - 1) * stride_w_ + kernel_w_ * dilation_w_, 4);
int tile_y = (output_block_height -1) * stride_h_ + kernel_h_ * dilation_h_;
int tile_y_stride = (4 * simd_size) / tile_x;
int invec_size = divUp(tile_y, tile_y_stride);
int tile_x = (output_block_width - 1) * stride_w_ + kernel_w_ * dilation_w_;
int tile_y = (output_block_height - 1) * stride_h_ + kernel_h_ * dilation_h_;
int invec_size = tile_y;
addDef("SIMD_SIZE", simd_size);
addDef("filter_qualifier", "__global");
addDef("OUT_BLOCK_WIDTH", output_block_width);
addDef("OUT_BLOCK_HEIGHT", output_block_height);
addDef("LAST_BLOCK_WIDTH", last_block_width);
addDef("LAST_BLOCK_HEIGHT", last_block_height);
addDef("INPUT_DEPTH", channels_ / group_);
addDef("TOTAL_INPUT_DEPTH_SIZE", channels_);
addDef("TOTAL_OUTPUT_DEPTH", num_output_);
addDef("NUM_FILTERS", M_);
addDef("TILE_X", tile_x);
addDef("TILE_Y", tile_y);
addDef("TILE_Y_STRIDE", tile_y_stride);
addDef("INVEC_SIZE", invec_size);
addDef("ALIGNED_NUM_FILTERS", (int)alignSize(M_, simd_size));
addDef("OUT_BLOCK_SIZE", (output_block_width*output_block_height));
addDef("APPLY_BIAS", bias_term_);
addDef("WEIGHT_PREF", ((kernel_w_ * kernel_h_) == 1) ? 1 : 8);
addDef("INPUT_PITCH", (width_ * height_));
addDef("OUTPUT_PITCH", (output_w_ * output_h_));
addDef("LEFT_FILTERS", ((int)alignSize(M_, simd_size) - M_));
addDef("INPUT_WIDTH", width_);
addDef("INPUT_HEIGHT", height_);
addDef("FILTERS_IN_GROUP", ((int)alignSize(M_, simd_size) / simd_size));
setFusionDefine(fused_activ_, fused_eltwise_);
src_ = cv::ocl::dnn::conv_layer_spatial_oclsrc;
......@@ -528,13 +589,6 @@ void OCL4DNNConvSpatial<Dtype>::calculateBenchmark(const UMat &bottom, UMat &ver
return;
}
#define dbg
#ifdef dbg
#define dbgPrint(x) (x)
#else
#define dbgPrint(x)
#endif
// For large enough input size, we do not need to tune kernels for different
// size. The reason is with large input size, there will be enough work items
// to feed al the EUs.
......@@ -545,6 +599,7 @@ void OCL4DNNConvSpatial<Dtype>::calculateBenchmark(const UMat &bottom, UMat &ver
template<typename Dtype>
void OCL4DNNConvSpatial<Dtype>::generateKey()
{
std::string precision = (use_half_) ? "FP16" : "FP32";
std::stringstream keyBuilder;
// FIXME: to support fuse?
keyBuilder << "k" << kernel_w_ << "x" << kernel_h_ << "_"
......@@ -558,21 +613,12 @@ void OCL4DNNConvSpatial<Dtype>::generateKey()
<< "num" << num_ << "_"
<< "M" << M_ << "_"
<< "activ" << fused_activ_ << "_"
<< "eltwise" << fused_eltwise_;
<< "eltwise" << fused_eltwise_ << "_"
<< precision;
key_ = ocl::Device::getDefault().vendorName() + "_EU" + cv::format("%d", ocl::Device::getDefault().maxComputeUnits()) + "_" + keyBuilder.str();
key_sanitized_ = key_;
for (size_t i = 0; i < key_sanitized_.size(); i++)
{
char c = key_sanitized_[i];
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'))
{
key_sanitized_[i] = '_';
}
}
// TODO add hash?
// key_sanitized_ = key_sanitized_ + cv::format("_%08llx", crc64((uchar*)key_.c_str(), key_.size()));
key_sanitized_ = sanitize(key_);
short_key_ = keyBuilder.str();
}
......@@ -587,11 +633,6 @@ std::string OCL4DNNConvSpatial<Dtype>::generateSpecificKey(int32_t type, int32_t
<< "_" << blockHeight
<< "_" << blockDepth;
if (!use_half_)
keyBuilder << "_float";
else
keyBuilder << "_half";
return keyBuilder.str();
}
......@@ -1135,7 +1176,7 @@ float OCL4DNNConvSpatial<float>::timedConvolve(const UMat &bottom, UMat &top,
cv::ocl::Timer timer(queue);
timer.start();
bool res = true;;
dbgPrint(std::cout << "Benchmarking kernel: " << config->kernelName << std::endl);
CV_LOG_INFO(NULL, "Benchmarking kernel: " << config->kernelName);
tuned_ = true;
int loop_cnt = 4;
for (int i = 0; i < loop_cnt; i++) {
......@@ -1152,7 +1193,6 @@ float OCL4DNNConvSpatial<float>::timedConvolve(const UMat &bottom, UMat &top,
}
float elapsedTime = timer.durationNS() * 1e-6 / loop_cnt;
#ifdef dbg
double out_w = output_w_;
double out_h = output_h_;
double out_z = M_;
......@@ -1160,16 +1200,8 @@ float OCL4DNNConvSpatial<float>::timedConvolve(const UMat &bottom, UMat &top,
double k_h = kernel_h_;
double k_z = channels_;
double totalFlops = ((k_w*k_h*k_z -1)*2)*(out_w*out_h*out_z)*num_;
std::cout << "\tEstimated Gflops:" << (totalFlops * 1e-9)
<< std::endl;
std::cout << "\tEstimated GFLOPS/S: " << ((totalFlops * 1e-9)*(1000.0/elapsedTime))
<< std::endl;
#if 0
std::cout << "Estimated utilization: " <<
((((totalFlops/1000)/1000)/1000)*(1000.0/elapsedTime))/880.0
<< std::endl;
#endif
#endif
CV_LOG_INFO(NULL, "\tEstimated Gflops:" << (totalFlops * 1e-9));
CV_LOG_INFO(NULL, "\tEstimated GFLOPS/S: " << ((totalFlops * 1e-9)*(1000.0/elapsedTime)));
return elapsedTime;
}
......@@ -1225,18 +1257,18 @@ bool OCL4DNNConvSpatial<float>::verifyResult(const UMat &bottom,
if (use_half_ && error_factor > 0.1 * fabs(verify_data[offset]) &&
error_factor > 0.04 && !(fabs(verify_data[offset]) < 1.e-3 && error_factor < 1.e-4))
{
dbgPrint(printf("test verification failed @ image %d group %d"
"out_ch %d h %d w %d got %G expected %G\n",
n, g, out_ch, h, w, data[offset], verify_data[offset]));
CV_LOG_ERROR(NULL, "test verification failed @ image " << n << " group " << g
<< " out_ch " << out_ch << " h " << h << " w " << w
<< " got " << data[offset] << " expected " << verify_data[offset]);
verificationFail = 1;
goto out;
}
else if (!use_half_ && error_factor > 0.1 * fabs(verify_data[offset]) &&
!(fabs(verify_data[offset]) < 1.e-3 && error_factor < 1.e-4))
{
dbgPrint(printf("test verification failed @ image %d group %d"
"out_ch %d h %d w %d got %G expected %G\n",
n, g, out_ch, h, w, data[offset], verify_data[offset]));
CV_LOG_ERROR(NULL, "test verification failed @ image " << n << " group " << g
<< " out_ch " << out_ch << " h " << h << " w " << w
<< " got " << data[offset] << " expected " << verify_data[offset]);
verificationFail = 1;
goto out;
}
......@@ -1517,17 +1549,11 @@ void OCL4DNNConvSpatial<float>::generate_idlf_tuneritems(std::vector< cv::Ptr<tu
return;
int actual_tile_x = kernel_w_ * dilation_w_ + (blockM - 1) * stride_w_ ;
int tile_x = alignSize(actual_tile_x, 4);
int tile_y = kernel_h_ * dilation_h_ + (blockK - 1) * stride_h_;
if (tile_x > (4 * simd_size))
return;
if ((blockM * blockK + divUp(tile_x * tile_y, simd_size)) > block_size_max)
int tile_x = alignSize(actual_tile_x, simd_size);
if (tile_x > simd_size)
return;
int tile_y_stride = (4 * simd_size) / tile_x;
int invec_size = divUp(tile_y, tile_y_stride);
if (invec_size > 4)
if (blockM * blockK > block_size_max)
return;
tunerItems.push_back(makePtr<tunerParam>(KERNEL_TYPE_INTEL_IDLF, blockM, blockK, simd_size));
......@@ -1570,11 +1596,7 @@ void OCL4DNNConvSpatial<float>::generateTunerItems(std::vector< cv::Ptr<tunerPar
for (uint32_t height = height_max; height > 0; height--)
{
generate_idlf_tuneritems(tunerItems, width, height, simd_size);
if (tunerItems.size() >= 8 && height == 2)
break;
}
if (tunerItems.size() >= 12 && width == 2)
break;
}
}
}
......@@ -1661,35 +1683,31 @@ void OCL4DNNConvSpatial<float>::setupConvolution(const UMat &bottom,
if (kernelQueue[x]->tested == false) {
bool verified = verifyResult(bottom, top, weight, bias, numImages, kernelQueue[x], verifyTop);
if (verified == false) {
dbgPrint(std::cout << "Kernel "
<< kernelQueue[x]->kernelName
<< " failed verification" << std::endl);
dbgPrint(std::cout << "kernelQueue[x]->workItem_output[0]: "
<< kernelQueue[x]->workItem_output[0] << " "
<< "kernelQueue[x]->workItem_output[1]: "
<< kernelQueue[x]->workItem_output[1] << " "
<< "kernelQueue[x]->workItem_output[2]: "
<< kernelQueue[x]->workItem_output[2] << " "
<< "kernelQueue[x]->kernelType: "
<< kernelQueue[x]->kernelType << " "
<< "kernelQueue[x]->global_work_size[0]: "
<< kernelQueue[x]->global_work_size[0] << " "
<< "kernelQueue[x]->global_work_size[1]: "
<< kernelQueue[x]->global_work_size[1] << " "
<< "kernelQueue[x]->global_work_size[2]: "
<< kernelQueue[x]->global_work_size[2] << " "
<< "kernelQueue[x]->local_work_size[0]: "
<< kernelQueue[x]->local_work_size[0] << " "
<< "kernelQueue[x]->local_work_size[1]: "
<< kernelQueue[x]->local_work_size[1] << " "
<< "kernelQueue[x]->local_work_size[2]: "
<< kernelQueue[x]->local_work_size[2] << " "
<< kernelQueue[x]->swizzle_weights << " "
<< kernelQueue[x]->use_null_local << std::endl);
CV_LOG_ERROR(NULL, "Kernel " << kernelQueue[x]->kernelName << " failed verification");
CV_LOG_ERROR(NULL, "kernelQueue[x]->workItem_output[0]: "
<< kernelQueue[x]->workItem_output[0] << " "
<< "kernelQueue[x]->workItem_output[1]: "
<< kernelQueue[x]->workItem_output[1] << " "
<< "kernelQueue[x]->workItem_output[2]: "
<< kernelQueue[x]->workItem_output[2] << " "
<< "kernelQueue[x]->kernelType: "
<< kernelQueue[x]->kernelType << " "
<< "kernelQueue[x]->global_work_size[0]: "
<< kernelQueue[x]->global_work_size[0] << " "
<< "kernelQueue[x]->global_work_size[1]: "
<< kernelQueue[x]->global_work_size[1] << " "
<< "kernelQueue[x]->global_work_size[2]: "
<< kernelQueue[x]->global_work_size[2] << " "
<< "kernelQueue[x]->local_work_size[0]: "
<< kernelQueue[x]->local_work_size[0] << " "
<< "kernelQueue[x]->local_work_size[1]: "
<< kernelQueue[x]->local_work_size[1] << " "
<< "kernelQueue[x]->local_work_size[2]: "
<< kernelQueue[x]->local_work_size[2] << " "
<< kernelQueue[x]->swizzle_weights << " "
<< kernelQueue[x]->use_null_local);
} else {
dbgPrint(std::cout << "Kernel "
<< kernelQueue[x]->kernelName
<< " pass verification" << std::endl);
CV_LOG_INFO(NULL, "Kernel " << kernelQueue[x]->kernelName << " pass verification");
}
}
#endif
......@@ -1718,19 +1736,28 @@ void OCL4DNNConvSpatial<float>::setupConvolution(const UMat &bottom,
break;
} else {
kernelQueue[fastestKernel]->tested = true;
dbgPrint(std::cout << "Kernel " <<
kernelQueue[fastestKernel]->kernelName <<
" failed verification" << std::endl);
CV_LOG_ERROR(NULL, "Kernel " << kernelQueue[fastestKernel]->kernelName <<
" failed verification");
failures++;
}
}
}
if (verification) {
dbgPrint(std::cout << "Kernel <" << kernelQueue[kernel_index_]->kernelName <<
"> passed verification" << std::endl);
dbgPrint(std::cout << "Convolution Time:" << kernelQueue[kernel_index_]->executionTime << std::endl);
CV_LOG_INFO(NULL, "Kernel <" << kernelQueue[kernel_index_]->kernelName <<
"> passed verification");
CV_LOG_INFO(NULL, "Convolution Time:" << kernelQueue[kernel_index_]->executionTime);
double out_w = output_w_;
double out_h = output_h_;
double out_z = M_;
double k_w = kernel_w_;
double k_h = kernel_h_;
double k_z = channels_;
float elapsedTime = kernelQueue[kernel_index_]->executionTime;
double totalFlops = ((k_w*k_h*k_z -1)*2)*(out_w*out_h*out_z)*num_;
CV_LOG_INFO(NULL, "\tEstimated Gflops:" << (totalFlops * 1e-9));
CV_LOG_INFO(NULL, "\tEstimated GFLOPS/S: " << ((totalFlops * 1e-9)*(1000.0/elapsedTime)));
} else {
dbgPrint(std::cout << "fallback to basic kernel" << std::endl);
CV_LOG_INFO(NULL, "fallback to basic kernel");
options_.str(""); options_.clear(); // clear contents and state flags
createBasicKernel(1, 1, 1);
kernel_index_ = kernelQueue.size() - 1;
......@@ -1798,14 +1825,14 @@ void OCL4DNNConvSpatial<Dtype>::prepareKernel(const UMat &bottom, UMat &top,
if (loadCachedConfig()) // check in-memory cache
return;
if (loadTunedConfig()) // check external storage
if (loadTunedConfig()) // check external storage
return;
UMat benchData(1, numImages * top_dim_, (use_half_) ? CV_16SC1 : CV_32FC1);
calculateBenchmark(bottom, benchData, (use_half_) ? weights_half : weight, bias, numImages);
if (force_auto_tuning_)
if (run_auto_tuning_ || force_auto_tuning_)
{
setupConvolution(bottom, top, weight, bias, numImages, benchData);
}
......@@ -1820,18 +1847,8 @@ template<typename Dtype>
bool OCL4DNNConvSpatial<Dtype>::loadCachedConfig()
{
cv::AutoLock lock(kernelConfigMutex);
if (!defaultConfigLoaded)
{
const size_t numConfigs = sizeof(default_kernel_config_intel)/sizeof(default_kernel_config_intel[0])/2;
for (size_t i = 0; i < numConfigs; i++)
{
std::pair<std::string, std::string> entry(
std::string("Intel(R) Corporation_") + default_kernel_config_intel[2 * i],
default_kernel_config_intel[2 * i + 1]);
kernelConfigMap.insert(entry);
}
defaultConfigLoaded = true;
}
if (!defaultConfigLoaded && !force_auto_tuning_)
initializeGlobalBuiltinConfigurations((use_cache_path_ && !cache_path_.empty()) ? (cache_path_ + '/') : std::string());
kernel_hash_t::iterator it = kernelConfigMap.find(key_);
if (it != kernelConfigMap.end())
......@@ -1904,9 +1921,12 @@ bool OCL4DNNConvSpatial<Dtype>::setupKernelByConfig(int x, int y, int z, int typ
template<typename Dtype>
bool OCL4DNNConvSpatial<Dtype>::loadTunedConfig()
{
if (force_auto_tuning_)
return false; // don't load results from external storage
if (!use_cache_path_)
{
if (cache_path_.empty() && !force_auto_tuning_)
if (cache_path_.empty())
{
static int warn_ = 0;
if (!warn_)
......
......@@ -206,8 +206,6 @@ __kernel void ConvolveBasic(
#elif defined KERNEL_IDLF
#define VLOAD4(_v, _p) do { _v = vload4(0, _p); } while(0)
// Each work-item computes a OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT region of one output map.
// Each work-group (which will be mapped to 1 SIMD16/SIMD8 EU thread) will compute 16/8 different feature maps, but each feature map is for the same region of the input image.
// NDRange: (output_width+pad)/ OUT_BLOCK_WIDTH, (output_height+pad)/OUT_BLOCK_HEIGHT, NUM_FILTERS/OUT_BLOCK_DEPTH
......@@ -219,190 +217,123 @@ __kernel void
convolve_simd(
ELTWISE_DATA_ARG
FUSED_ARG
__global Dtype* inputs_base,
filter_qualifier Dtype* weights_base,
__global Dtype* inputs,
__global Dtype* weights,
BIAS_KERNEL_ARG
__global Dtype* outputs_base,
__global Dtype* outputs,
const ushort input_width,
const ushort input_height,
const ushort output_width,
const ushort output_height)
{
__global Dtype* outputs = outputs_base;
__global Dtype* inputs = inputs_base;
filter_qualifier Dtype* weights = weights_base;
unsigned int oc = get_global_id(0) * OUT_BLOCK_WIDTH; // oc = Output Column
unsigned int or = get_global_id(1) * OUT_BLOCK_HEIGHT;// or = Output Row
unsigned int fm = get_global_id(2);// fm = Feature Map = od = Output Depth
unsigned int or = get_global_id(1) * OUT_BLOCK_HEIGHT; // or = Output Row
unsigned int fm = get_global_id(2); // fm = Feature Map = od = Output Depth
unsigned int fmg = get_group_id(2);
unsigned int lid = get_local_id(2);
Dtype out[OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT];
int in_addr;
Dtype out[OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT] = { 0.0f };
// find weights address of given neuron (lid is index)
unsigned int weight_addr = (fmg % (ALIGNED_NUM_FILTERS/SIMD_SIZE)) * INPUT_DEPTH * KERNEL_WIDTH * KERNEL_HEIGHT * SIMD_SIZE + lid;
unsigned int weight_addr = (fmg % FILTERS_IN_GROUP) *
INPUT_DEPTH * KERNEL_WIDTH * KERNEL_HEIGHT * SIMD_SIZE + lid;
for(int i=0;i<OUT_BLOCK_SIZE;i++) {
out[i]=0.0f;
}
unsigned int num_in_batch = fm / ALIGNED_NUM_FILTERS;
unsigned int num_in_batch = ( fm ) / ALIGNED_NUM_FILTERS;
unsigned int input_batch_offset = num_in_batch * INPUT_PITCH * TOTAL_INPUT_DEPTH_SIZE;
unsigned int input_batch_offset = num_in_batch * input_height * input_width * TOTAL_INPUT_DEPTH_SIZE;
int curr_local_y = ( lid / ( TILE_X / 4 ) );
int curr_local_x = ( lid % ( TILE_X / 4 ) ) * 4;
int curr_y = or * STRIDE_Y + curr_local_y;
int curr_x = oc * STRIDE_X + curr_local_x;
int curr_y = or * STRIDE_Y;
int curr_x = oc * STRIDE_X + lid;
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
int saved_y = curr_y;
#endif
in_addr = input_batch_offset
+ (curr_y - INPUT_PAD_H) * input_width // y tile offset
+ curr_x - INPUT_PAD_W; // x tile offset
union {
Dtype4 in_vec[INVEC_SIZE];
Dtype in_array[INVEC_SIZE * 4];
} in_buf;
int in_addr = input_batch_offset
+ (curr_y - INPUT_PAD_H) * INPUT_WIDTH // y tile offset
+ curr_x - INPUT_PAD_W; // x tile offset
Dtype in_buf[INVEC_SIZE];
for(int kd = 0; kd < INPUT_DEPTH; kd++)
{
int in_offset = in_addr;
int reg = 0;
LOOP(INVEC_SIZE, reg,
{
if (curr_local_y + reg * TILE_Y_STRIDE < TILE_Y || INVEC_SIZE * TILE_Y_STRIDE <= (TILE_Y + 2) || reg < INVEC_SIZE - 1) {
__attribute__((opencl_unroll_hint(INVEC_SIZE)))
for (int reg = 0; reg < INVEC_SIZE; reg++)
{
in_buf[reg] = inputs[in_offset];
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + 3 >= INPUT_PAD_W && curr_x < input_width + INPUT_PAD_W) {
if (curr_x < INPUT_PAD_W) {
in_buf.in_vec[reg].s0 = 0;
if (curr_x + 1 >= INPUT_PAD_W && curr_x + 1 < input_width + INPUT_PAD_W)
in_buf.in_vec[reg].s1 = *(inputs + in_offset + 1);
else
in_buf.in_vec[reg].s1 = 0;
if (curr_x + 2 >= INPUT_PAD_W && curr_x + 2 < input_width + INPUT_PAD_W)
in_buf.in_vec[reg].s2 = *(inputs + in_offset + 2);
else
in_buf.in_vec[reg].s2 = 0;
if (curr_x + 3 < input_width + INPUT_PAD_W)
in_buf.in_vec[reg].s3 = *(inputs + in_offset + 3);
else
in_buf.in_vec[reg].s3 = 0;
} else {
VLOAD4(in_buf.in_vec[reg], inputs + in_offset);
if (curr_x + 1 >= input_width + INPUT_PAD_W)
in_buf.in_vec[reg].s1 = 0;
if (curr_x + 2 >= input_width + INPUT_PAD_W)
in_buf.in_vec[reg].s2 = 0;
if (curr_x + 3 >= input_width + INPUT_PAD_W)
in_buf.in_vec[reg].s3 = 0;
}
} else {
in_buf.in_vec[reg] = 0;
if (!(curr_y >= INPUT_PAD_H && curr_y < INPUT_HEIGHT + INPUT_PAD_H &&
curr_x >= INPUT_PAD_W && curr_x < INPUT_WIDTH + INPUT_PAD_W))
{
in_buf[reg] = 0;
}
curr_y += TILE_Y_STRIDE;
#else
VLOAD4(in_buf.in_vec[reg], inputs + in_offset);
#endif
}
in_offset += input_width * TILE_Y_STRIDE;
});
in_addr += input_height * input_width;
curr_y += 1;
in_offset += INPUT_WIDTH;
}
in_addr += INPUT_PITCH;
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
curr_y = saved_y;
#endif
#if KERNEL_WIDTH * KERNEL_HEIGHT != 1
#define WEIGHT_PREF 8
#else
#define WEIGHT_PREF 1
#endif
union {
Dtype w[WEIGHT_PREF];
#if KERNEL_WIDTH * KERNEL_HEIGHT != 1
INT_TYPE8 ui8;
#endif
} weight_buf;
Dtype weight_buf[WEIGHT_PREF];
int w_idx=0;
unsigned int orig_weight_addr = weight_addr;
#if KERNEL_WIDTH * KERNEL_HEIGHT != 1
weight_buf.ui8 = SUB_GROUP_BLOCK_READ8((__global INT_TYPE *)&weights[weight_addr]);
weight_addr += SIMD_SIZE * WEIGHT_PREF;
#else
weight_buf.w[0] = as_Dtype(SUB_GROUP_BLOCK_READ((__global INT_TYPE *)&weights[weight_addr]));
weight_addr += SIMD_SIZE * 1;
#endif
for (int i = 0; i < WEIGHT_PREF; i++)
{
weight_buf[i] = weights[weight_addr];
weight_addr += SIMD_SIZE;
}
#define BLOCK_IN(n) sub_group_broadcast( in_buf.in_array[((n)%4) + ((n) / (TILE_Y_STRIDE * TILE_X)) * 4], (((n) % (TILE_Y_STRIDE * TILE_X))/4))
#define BLOCK_IN(n, c) intel_sub_group_shuffle(in_buf[n], (c))
int kr = 0; // kr = Kernel Row
LOOP(KERNEL_HEIGHT, kr,// LOOP is a macro that unrolls the loop.
{
int kc = 0; // kc = Kernel Column
LOOP(KERNEL_WIDTH, kc,
{
int kc = 0; // kc = Kernel Column
LOOP(KERNEL_WIDTH, kc,
{
for(int br=0; br < OUT_BLOCK_HEIGHT; br++) {
for(int bc=0; bc < OUT_BLOCK_WIDTH; bc++) {
Dtype input = BLOCK_IN((br * STRIDE_Y + kr * DILATION_Y) * TILE_X + bc * STRIDE_X + kc * DILATION_X);
out[br * OUT_BLOCK_WIDTH + bc] = mad(weight_buf.w[w_idx % WEIGHT_PREF], input, out[br * OUT_BLOCK_WIDTH + bc]);
}
}
#if KERNEL_WIDTH * KERNEL_HEIGHT > WEIGHT_PREF
// We assume KERNEL_W is equal to KERNEL_H here.
if ((w_idx + 1) % WEIGHT_PREF == 0
#if KERNEL_WIDTH * KERNEL_HEIGHT % 8 != 0
&& ((w_idx + 1) <= (KERNEL_WIDTH * KERNEL_HEIGHT - WEIGHT_PREF))
#endif
) {
weight_buf.ui8 = SUB_GROUP_BLOCK_READ8((__global INT_TYPE *)&weights[weight_addr]);
weight_addr += SIMD_SIZE * WEIGHT_PREF; // weights must be stored in just the right SIMD swizzled format for this to work, see host code for details.
for (int br=0; br < OUT_BLOCK_HEIGHT; br++)
{
for(int bc=0; bc < OUT_BLOCK_WIDTH; bc++)
{
Dtype input = BLOCK_IN((br * STRIDE_Y + kr * DILATION_Y), bc * STRIDE_X + kc * DILATION_X);
out[br * OUT_BLOCK_WIDTH + bc] = mad(weight_buf[w_idx % WEIGHT_PREF], input, out[br * OUT_BLOCK_WIDTH + bc]);
}
#if KERNEL_WIDTH*KERNEL_HEIGHT % 8 == 0
// need to do nothing
#else
else if ((w_idx + 1) % WEIGHT_PREF == 0 && ((w_idx + 1) > (KERNEL_WIDTH * KERNEL_HEIGHT - WEIGHT_PREF)))
#if KERNEL_WIDTH * KERNEL_HEIGHT % 8 == 1
weight_buf.w[0] = weights[weight_addr];
#elif KERNEL_WIDTH * KERNEL_HEIGHT % 8 == 2
weight_buf.ui8.s01 = SUB_GROUP_BLOCK_READ2((__global INT_TYPE *)&weights[weight_addr]);
#elif KERNEL_WIDTH * KERNEL_HEIGHT % 8 <= 4
weight_buf.ui8.s0123 = SUB_GROUP_BLOCK_READ4((__global INT_TYPE *)&weights[weight_addr]);
#else
weight_buf.ui8 = SUB_GROUP_BLOCK_READ8((__global INT_TYPE *)&weights[weight_addr]);
#endif
#endif
#endif
++w_idx;
});
}
weight_buf[w_idx % WEIGHT_PREF] = weights[weight_addr];
weight_addr += SIMD_SIZE;
++w_idx;
});
weight_addr = orig_weight_addr + KERNEL_WIDTH * KERNEL_HEIGHT * SIMD_SIZE;
}
// dead code to work around possible compiler bug.
if (ALIGNED_NUM_FILTERS != NUM_FILTERS && fm > 0xfffffffeul) {
outputs[0] = BLOCK_IN(fm % SIMD_SIZE);
});
weight_addr -= WEIGHT_PREF * SIMD_SIZE;
}
fm = fm % ALIGNED_NUM_FILTERS;
if ((ALIGNED_NUM_FILTERS == NUM_FILTERS || fm < NUM_FILTERS)) {
unsigned int out_addr = ( num_in_batch * TOTAL_OUTPUT_DEPTH + fm ) * output_width * output_height;
out_addr += or * output_width + oc;
// we need this address calculation for biases because we support views and batching
#if LEFT_FILTERS > 0
if (fm < NUM_FILTERS)
#endif
{
unsigned int out_addr = (num_in_batch * TOTAL_OUTPUT_DEPTH + fm) * OUTPUT_PITCH;
out_addr += or * output_width + oc;
// we need this address calculation for biases because we support views and batching
#if APPLY_BIAS
Dtype bias = biases_base[fm];
Dtype bias = biases_base[fm];
#else
Dtype bias = 0;
Dtype bias = 0;
#endif
for(unsigned int r = 0; r < OUT_BLOCK_HEIGHT; r++) {
for(unsigned int r = 0; r < OUT_BLOCK_HEIGHT; r++)
{
if (r + or >= output_height) break;
for(unsigned int c = 0; c < OUT_BLOCK_WIDTH; c++) {
for(unsigned int c = 0; c < OUT_BLOCK_WIDTH; c++)
{
if (c + oc >= output_width) break;
// this does a scattered write to SIMD_SIZE different feature maps, so that data within one map is contiguous, thus ready for input to next layer.
// this does a scattered write to SIMD_SIZE different feature maps,
// so that data within one map is contiguous, thus ready for input to next layer.
ACTIVATION_FUNCTION(outputs, out_addr + r * output_width + c, bias + out[r * OUT_BLOCK_WIDTH + c], fm);
}
}
}
......
......@@ -1137,11 +1137,95 @@ private:
int outWidth, outHeight, zoomFactor;
};
TEST(Layer_Test_Interp, Accuracy)
TEST(Layer_Test_Interp_custom, Accuracy)
{
CV_DNN_REGISTER_LAYER_CLASS(Interp, InterpLayer);
testLayerUsingCaffeModels("layer_interp", DNN_TARGET_CPU, false, false);
LayerFactory::unregisterLayer("Interp");
}
TEST(Layer_Test_Interp, Accuracy)
{
testLayerUsingCaffeModels("layer_interp", DNN_TARGET_CPU, false, false);
}
TEST(Layer_Test_PoolingIndices, Accuracy)
{
Net net;
LayerParams lp;
lp.set("pool", "max");
lp.set("kernel_w", 2);
lp.set("kernel_h", 2);
lp.set("stride_w", 2);
lp.set("stride_h", 2);
lp.set("pad_w", 0);
lp.set("pad_h", 0);
lp.name = "testLayer.name"; // This test also checks that OpenCV lets use names with dots.
lp.type = "Pooling";
net.addLayerToPrev(lp.name, lp.type, lp);
Mat inp(10, 10, CV_8U);
randu(inp, 0, 255);
Mat maxValues(5, 5, CV_32F, Scalar(-1)), indices(5, 5, CV_32F, Scalar(-1));
for (int y = 0; y < 10; ++y)
{
int dstY = y / 2;
for (int x = 0; x < 10; ++x)
{
int dstX = x / 2;
uint8_t val = inp.at<uint8_t>(y, x);
if ((float)inp.at<uint8_t>(y, x) > maxValues.at<float>(dstY, dstX))
{
maxValues.at<float>(dstY, dstX) = val;
indices.at<float>(dstY, dstX) = y * 10 + x;
}
}
}
net.setInput(blobFromImage(inp));
std::vector<Mat> outputs;
net.forward(outputs, lp.name);
normAssert(maxValues, outputs[0].reshape(1, 5));
normAssert(indices, outputs[1].reshape(1, 5));
}
typedef testing::TestWithParam<tuple<Vec4i, int> > Layer_Test_ShuffleChannel;
TEST_P(Layer_Test_ShuffleChannel, Accuracy)
{
Vec4i inpShapeVec = get<0>(GetParam());
int group = get<1>(GetParam());
ASSERT_EQ(inpShapeVec[1] % group, 0);
const int groupSize = inpShapeVec[1] / group;
Net net;
LayerParams lp;
lp.set("group", group);
lp.type = "ShuffleChannel";
lp.name = "testLayer";
net.addLayerToPrev(lp.name, lp.type, lp);
const int inpShape[] = {inpShapeVec[0], inpShapeVec[1], inpShapeVec[2], inpShapeVec[3]};
Mat inp(4, inpShape, CV_32F);
randu(inp, 0, 255);
net.setInput(inp);
Mat out = net.forward();
for (int n = 0; n < inpShapeVec[0]; ++n)
{
for (int c = 0; c < inpShapeVec[1]; ++c)
{
Mat outChannel = getPlane(out, n, c);
Mat inpChannel = getPlane(inp, n, groupSize * (c % group) + c / group);
normAssert(outChannel, inpChannel);
}
}
}
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_ShuffleChannel, Combine(
/*input shape*/ Values(Vec4i(1, 6, 5, 7), Vec4i(3, 12, 1, 4)),
/*group*/ Values(1, 2, 3, 6)
));
}} // namespace
......@@ -87,7 +87,7 @@ static void runTorchNet(String prefix, int targetId = DNN_TARGET_CPU, String out
if (outLayerName.empty())
outLayerName = net.getLayerNames().back();
net.setInput(inp, "0");
net.setInput(inp);
std::vector<Mat> outBlobs;
net.forward(outBlobs, outLayerName);
normAssert(outRef, outBlobs[0]);
......
......@@ -70,6 +70,35 @@
#include "PS1080.h"
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
static cv::Mutex initOpenNI2Mutex;
struct OpenNI2Initializer
{
public:
static void init()
{
cv::AutoLock al(initOpenNI2Mutex);
static OpenNI2Initializer initializer;
}
private:
OpenNI2Initializer()
{
// Initialize and configure the context.
openni::Status status = openni::OpenNI::initialize();
if (status != openni::STATUS_OK)
{
CV_Error(CV_StsError, std::string("Failed to initialize:") + openni::OpenNI::getExtendedError());
}
}
~OpenNI2Initializer()
{
openni::OpenNI::shutdown();
}
};
class CvCapture_OpenNI2 : public CvCapture
{
public:
......@@ -107,6 +136,8 @@ protected:
static openni::VideoMode defaultStreamOutputMode(int stream);
CvCapture_OpenNI2(int index, const char * filename);
IplImage* retrieveDepthMap();
IplImage* retrievePointCloudMap();
IplImage* retrieveDisparityMap();
......@@ -116,8 +147,8 @@ protected:
IplImage* retrieveGrayImage();
IplImage* retrieveIrImage();
openni::Status toggleStream(int stream, bool toggle);
bool readCamerasParams();
void toggleStream(int stream, bool toggle);
void readCamerasParams();
double getDepthGeneratorProperty(int propIdx) const;
bool setDepthGeneratorProperty(int propIdx, double propVal);
......@@ -131,12 +162,11 @@ protected:
// OpenNI context
openni::Device device;
bool isContextOpened;
openni::Recorder recorder;
// Data generators with its metadata
openni::VideoStream streams[CV_MAX_NUM_STREAMS];
openni::VideoFrameRef streamFrames[CV_MAX_NUM_STREAMS];
cv::Mat streamImages[CV_MAX_NUM_STREAMS];
std::vector<openni::VideoStream> streams;
std::vector<openni::VideoFrameRef> streamFrames;
std::vector<cv::Mat> streamImages;
int maxBufferSize, maxTimeDuration; // for approx sync
bool isCircleBuffer;
......@@ -191,80 +221,103 @@ openni::VideoMode CvCapture_OpenNI2::defaultStreamOutputMode(int stream)
return mode;
}
CvCapture_OpenNI2::CvCapture_OpenNI2( int index )
{
const char* deviceURI = openni::ANY_DEVICE;
openni::Status status;
int deviceType = DEVICE_DEFAULT;
noSampleValue = shadowValue = 0;
isContextOpened = false;
maxBufferSize = DEFAULT_MAX_BUFFER_SIZE;
isCircleBuffer = DEFAULT_IS_CIRCLE_BUFFER;
maxTimeDuration = DEFAULT_MAX_TIME_DURATION;
if( index >= 10 )
{
deviceType = index / 10;
index %= 10;
}
CvCapture_OpenNI2::CvCapture_OpenNI2(int index) :
CvCapture_OpenNI2(index, nullptr)
{ }
CvCapture_OpenNI2::CvCapture_OpenNI2(const char * filename) :
CvCapture_OpenNI2(-1, filename)
{ }
CvCapture_OpenNI2::CvCapture_OpenNI2(int index, const char * filename) :
device(),
isContextOpened(false),
streams(CV_MAX_NUM_STREAMS),
streamFrames(CV_MAX_NUM_STREAMS),
streamImages(CV_MAX_NUM_STREAMS),
maxBufferSize(DEFAULT_MAX_BUFFER_SIZE),
maxTimeDuration(DEFAULT_MAX_TIME_DURATION),
isCircleBuffer(DEFAULT_IS_CIRCLE_BUFFER),
baseline(0),
depthFocalLength_VGA(0),
shadowValue(0),
noSampleValue(0),
outputMaps(outputMapsTypesCount)
{
// Initialize and configure the context.
status = openni::OpenNI::initialize();
OpenNI2Initializer::init();
if (status != openni::STATUS_OK)
const char* deviceURI = openni::ANY_DEVICE;
bool needColor = true;
bool needIR = true;
if (index >= 0)
{
CV_Error(CV_StsError, cv::format("Failed to initialize:", openni::OpenNI::getExtendedError()));
return;
}
int deviceType = DEVICE_DEFAULT;
if (index >= 10)
{
deviceType = index / 10;
index %= 10;
}
// Asus XTION and Occipital Structure Sensor do not have an image generator
needColor = (deviceType != DEVICE_ASUS_XTION);
// find appropriate device URI
openni::Array<openni::DeviceInfo> ldevs;
if (index > 0)
{
openni::OpenNI::enumerateDevices(&ldevs);
deviceURI = ldevs[index].getUri();
// find appropriate device URI
openni::Array<openni::DeviceInfo> ldevs;
if (index > 0)
{
openni::OpenNI::enumerateDevices(&ldevs);
if (index < ldevs.getSize())
deviceURI = ldevs[index].getUri();
else
{
CV_Error(CV_StsError, "OpenCVKinect2: Device index exceeds the number of available OpenNI devices");
}
}
}
status = device.open(deviceURI);
if( status != openni::STATUS_OK )
else
{
CV_Error(CV_StsError, cv::format("OpenCVKinect: Device open failed see: %s\n", openni::OpenNI::getExtendedError()));
openni::OpenNI::shutdown();
return;
deviceURI = filename;
}
status = toggleStream(CV_DEPTH_STREAM, true);
// Asus XTION and Occipital Structure Sensor do not have an image generator
if (deviceType != DEVICE_ASUS_XTION)
status = openni::Status(status | toggleStream(CV_COLOR_STREAM, true));
openni::Status status;
status = device.open(deviceURI);
if (status != openni::STATUS_OK)
{
openni::OpenNI::shutdown();
return;
CV_Error(CV_StsError, std::string("OpenCVKinect2: Failed to open device: ") + openni::OpenNI::getExtendedError());
}
if (!readCamerasParams())
{
CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::CvCapture_OpenNI2 : Could not read cameras parameters\n"));
return;
}
toggleStream(CV_DEPTH_STREAM, true);
if (needColor)
toggleStream(CV_COLOR_STREAM, true);
if (needIR)
toggleStream(CV_IR_STREAM, true);
setProperty(CV_CAP_PROP_OPENNI_REGISTRATION, 1.0);
outputMaps.resize( outputMapsTypesCount );
// default for Kinect2 camera
setProperty(CV_CAP_PROP_OPENNI2_MIRROR, 0.0);
isContextOpened = true;
}
setProperty(CV_CAP_PROP_OPENNI_REGISTRATION, 1.0);
CvCapture_OpenNI2::~CvCapture_OpenNI2()
{
for (size_t i = 0; i < streams.size(); ++i)
{
streamFrames[i].release();
streams[i].stop();
streams[i].destroy();
}
device.close();
}
openni::Status CvCapture_OpenNI2::toggleStream(int stream, bool toggle)
void CvCapture_OpenNI2::toggleStream(int stream, bool toggle)
{
openni::Status status;
// for logging
static const char* stream_names[CV_MAX_NUM_STREAMS] = {
static const std::string stream_names[CV_MAX_NUM_STREAMS] = {
"depth",
"color",
"IR"
......@@ -280,140 +333,92 @@ openni::Status CvCapture_OpenNI2::toggleStream(int stream, bool toggle)
{
// already opened
if (streams[stream].isValid())
return openni::STATUS_OK;
return;
// open stream
status = streams[stream].create(device, stream_sensor_types[stream]);
if (status == openni::STATUS_OK)
{
// set video mode
status = streams[stream].setVideoMode(defaultStreamOutputMode(stream)); // xn::DepthGenerator supports VGA only! (Jan 2011)
if (status != openni::STATUS_OK)
// try to set up default stream mode (if available)
const openni::Array<openni::VideoMode>& vm = streams[stream].getSensorInfo().getSupportedVideoModes();
openni::VideoMode dm = defaultStreamOutputMode(stream);
for (int i = 0; i < vm.getSize(); i++)
{
CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::CvCapture_OpenNI2 : Couldn't set %s stream output mode: %s\n",
stream_names[stream],
openni::OpenNI::getExtendedError()));
streams[stream].destroy();
return status;
if (vm[i].getPixelFormat() == dm.getPixelFormat() &&
vm[i].getResolutionX() == dm.getResolutionX() &&
vm[i].getResolutionY() == dm.getResolutionY() &&
vm[i].getFps() == dm.getFps())
{
status = streams[stream].setVideoMode(defaultStreamOutputMode(stream));
if (status != openni::STATUS_OK)
{
streams[stream].destroy();
CV_Error(CV_StsError, std::string("OpenCVKinect2 : Couldn't set ") +
stream_names[stream] + std::string(" stream output mode: ") +
std::string(openni::OpenNI::getExtendedError()));
}
}
}
// start stream
status = streams[stream].start();
if (status != openni::STATUS_OK)
{
CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::CvCapture_OpenNI2 : Couldn't start %s stream: %s\n",
stream_names[stream],
openni::OpenNI::getExtendedError()));
streams[stream].destroy();
return status;
CV_Error(CV_StsError, std::string("CvCapture_OpenNI2::CvCapture_OpenNI2 : Couldn't start ") +
stream_names[stream] + std::string(" stream: ") +
std::string(openni::OpenNI::getExtendedError()));
}
}
else
{
CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::CvCapture_OpenNI2 : Couldn't find %s stream:: %s\n",
stream_names[stream],
openni::OpenNI::getExtendedError()));
return status;
CV_Error(CV_StsError, std::string("CvCapture_OpenNI2::CvCapture_OpenNI2 : Couldn't find ") +
stream_names[stream] + " stream: " +
std::string(openni::OpenNI::getExtendedError()));
}
}
else if (streams[stream].isValid()) // want to close stream
{
streams[stream].stop();
streams[stream].destroy();
}
return openni::STATUS_OK;
}
CvCapture_OpenNI2::CvCapture_OpenNI2(const char * filename)
{
openni::Status status;
isContextOpened = false;
maxBufferSize = DEFAULT_MAX_BUFFER_SIZE;
isCircleBuffer = DEFAULT_IS_CIRCLE_BUFFER;
maxTimeDuration = DEFAULT_MAX_TIME_DURATION;
// Initialize and configure the context.
status = openni::OpenNI::initialize();
if (status != openni::STATUS_OK)
{
CV_Error(CV_StsError, cv::format("Failed to initialize:", openni::OpenNI::getExtendedError()));
return;
}
//FIX for libfreenect2
//which stops the whole device when stopping only one stream
// Open file
status = device.open(filename);
if( status != openni::STATUS_OK )
{
CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::CvCapture_OpenNI2 : Failed to open input file (%s): %s\n", filename, openni::OpenNI::getExtendedError()));
return;
//streams[stream].stop();
//streams[stream].destroy();
}
status = openni::Status(toggleStream(CV_DEPTH_STREAM, true) | toggleStream(CV_COLOR_STREAM, true));
if (status != openni::STATUS_OK)
{
openni::OpenNI::shutdown();
return;
}
if( !readCamerasParams() )
{
CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::CvCapture_OpenNI2 : Could not read cameras parameters\n"));
return;
}
outputMaps.resize( outputMapsTypesCount );
isContextOpened = true;
}
CvCapture_OpenNI2::~CvCapture_OpenNI2()
{
for (int i = 0; i < CV_MAX_NUM_STREAMS; ++i)
{
streamFrames[i].release();
streams[i].stop();
streams[i].destroy();
}
device.close();
openni::OpenNI::shutdown();
}
bool CvCapture_OpenNI2::readCamerasParams()
void CvCapture_OpenNI2::readCamerasParams()
{
double pixelSize = 0;
if (streams[CV_DEPTH_STREAM].getProperty<double>(XN_STREAM_PROPERTY_ZERO_PLANE_PIXEL_SIZE, &pixelSize) != openni::STATUS_OK)
{
CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::readCamerasParams : Could not read pixel size!\n"));
return false;
CV_Error(CV_StsError, "CvCapture_OpenNI2::readCamerasParams : Could not read pixel size!" +
std::string(openni::OpenNI::getExtendedError()));
}
// pixel size @ VGA = pixel size @ SXGA x 2
pixelSize *= 2.0; // in mm
// focal length of IR camera in pixels for VGA resolution
int zeroPlanDistance; // in mm
if (streams[CV_DEPTH_STREAM].getProperty(XN_STREAM_PROPERTY_ZERO_PLANE_DISTANCE, &zeroPlanDistance) != openni::STATUS_OK)
unsigned long long zeroPlaneDistance; // in mm
if (streams[CV_DEPTH_STREAM].getProperty(XN_STREAM_PROPERTY_ZERO_PLANE_DISTANCE, &zeroPlaneDistance) != openni::STATUS_OK)
{
CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::readCamerasParams : Could not read virtual plane distance!\n"));
return false;
CV_Error(CV_StsError, "CvCapture_OpenNI2::readCamerasParams : Could not read virtual plane distance!" +
std::string(openni::OpenNI::getExtendedError()));
}
if (streams[CV_DEPTH_STREAM].getProperty<double>(XN_STREAM_PROPERTY_EMITTER_DCMOS_DISTANCE, &baseline) != openni::STATUS_OK)
{
CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::readCamerasParams : Could not read base line!\n"));
return false;
CV_Error(CV_StsError, "CvCapture_OpenNI2::readCamerasParams : Could not read base line!" +
std::string(openni::OpenNI::getExtendedError()));
}
// baseline from cm -> mm
baseline *= 10;
// focal length from mm -> pixels (valid for 640x480)
depthFocalLength_VGA = (int)((double)zeroPlanDistance / (double)pixelSize);
return true;
depthFocalLength_VGA = (int)((double)zeroPlaneDistance / (double)pixelSize);
}
double CvCapture_OpenNI2::getProperty( int propIdx ) const
......@@ -500,7 +505,7 @@ double CvCapture_OpenNI2::getCommonProperty( int propIdx ) const
break;
}
default :
CV_Error( CV_StsBadArg, cv::format("Such parameter (propIdx=%d) isn't supported for getting.\n", propIdx) );
CV_Error( CV_StsBadArg, cv::format("Such parameter (propIdx=%d) isn't supported for getting.", propIdx) );
}
return propValue;
......@@ -525,14 +530,20 @@ bool CvCapture_OpenNI2::setCommonProperty( int propIdx, double propValue )
// There is a set of properties that correspond to depth generator by default
// (is they are pass without particular generator flag).
case CV_CAP_PROP_OPENNI_REGISTRATION:
isSet = setDepthGeneratorProperty( propIdx, propValue );
isSet = setDepthGeneratorProperty(propIdx, propValue);
break;
case CV_CAP_PROP_OPENNI2_SYNC:
isSet = device.setDepthColorSyncEnabled(propValue > 0.0) == openni::STATUS_OK;
break;
case CV_CAP_PROP_FRAME_WIDTH:
case CV_CAP_PROP_FRAME_HEIGHT:
case CV_CAP_PROP_AUTOFOCUS:
isSet = false;
break;
default:
CV_Error( CV_StsBadArg, cv::format("Such parameter (propIdx=%d) isn't supported for setting.\n", propIdx) );
CV_Error(CV_StsBadArg, cv::format("Such parameter (propIdx=%d) isn't supported for setting.", propIdx));
}
return isSet;
......@@ -565,9 +576,13 @@ double CvCapture_OpenNI2::getDepthGeneratorProperty( int propIdx ) const
propValue = streams[CV_DEPTH_STREAM].getMaxPixelValue();
break;
case CV_CAP_PROP_OPENNI_BASELINE :
if(baseline <= 0)
const_cast<CvCapture_OpenNI2*>(this)->readCamerasParams();
propValue = baseline;
break;
case CV_CAP_PROP_OPENNI_FOCAL_LENGTH :
if(depthFocalLength_VGA <= 0)
const_cast<CvCapture_OpenNI2*>(this)->readCamerasParams();
propValue = (double)depthFocalLength_VGA;
break;
case CV_CAP_PROP_OPENNI_REGISTRATION :
......@@ -580,7 +595,7 @@ double CvCapture_OpenNI2::getDepthGeneratorProperty( int propIdx ) const
propValue = streamFrames[CV_DEPTH_STREAM].getFrameIndex();
break;
default :
CV_Error( CV_StsBadArg, cv::format("Depth generator does not support such parameter (propIdx=%d) for getting.\n", propIdx) );
CV_Error( CV_StsBadArg, cv::format("Depth generator does not support such parameter (propIdx=%d) for getting.", propIdx) );
}
return propValue;
......@@ -594,7 +609,10 @@ bool CvCapture_OpenNI2::setDepthGeneratorProperty( int propIdx, double propValue
{
case CV_CAP_PROP_OPENNI_GENERATOR_PRESENT:
if (isContextOpened)
isSet = toggleStream(CV_DEPTH_STREAM, propValue > 0.0) == openni::STATUS_OK;
{
toggleStream(CV_DEPTH_STREAM, propValue > 0.0);
isSet = true;
}
break;
case CV_CAP_PROP_OPENNI_REGISTRATION:
{
......@@ -612,12 +630,13 @@ bool CvCapture_OpenNI2::setDepthGeneratorProperty( int propIdx, double propValue
{
openni::Status status = device.setImageRegistrationMode(mode);
if( status != openni::STATUS_OK )
CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::setDepthGeneratorProperty : %s\n", openni::OpenNI::getExtendedError()));
CV_Error(CV_StsError, std::string("CvCapture_OpenNI2::setDepthGeneratorProperty: ") +
std::string(openni::OpenNI::getExtendedError()));
else
isSet = true;
}
else
CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::setDepthGeneratorProperty : Unsupported viewpoint.\n"));
CV_Error(CV_StsError, "CvCapture_OpenNI2::setDepthGeneratorProperty: Unsupported viewpoint.");
}
else
isSet = true;
......@@ -627,14 +646,15 @@ bool CvCapture_OpenNI2::setDepthGeneratorProperty( int propIdx, double propValue
{
openni::Status status = device.setImageRegistrationMode(openni::IMAGE_REGISTRATION_OFF);
if( status != openni::STATUS_OK )
CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::setDepthGeneratorProperty : %s\n", openni::OpenNI::getExtendedError()));
CV_Error(CV_StsError, std::string("CvCapture_OpenNI2::setDepthGeneratorProperty: ") +
std::string(openni::OpenNI::getExtendedError()));
else
isSet = true;
}
}
break;
default:
CV_Error( CV_StsBadArg, cv::format("Depth generator does not support such parameter (propIdx=%d) for setting.\n", propIdx) );
CV_Error( CV_StsBadArg, cv::format("Depth generator does not support such parameter (propIdx=%d) for setting.", propIdx) );
}
return isSet;
......@@ -668,7 +688,7 @@ double CvCapture_OpenNI2::getImageGeneratorProperty( int propIdx ) const
propValue = (double)streamFrames[CV_COLOR_STREAM].getFrameIndex();
break;
default :
CV_Error( CV_StsBadArg, cv::format("Image generator does not support such parameter (propIdx=%d) for getting.\n", propIdx) );
CV_Error( CV_StsBadArg, cv::format("Image generator does not support such parameter (propIdx=%d) for getting.", propIdx) );
}
return propValue;
......@@ -682,7 +702,10 @@ bool CvCapture_OpenNI2::setImageGeneratorProperty(int propIdx, double propValue)
{
case CV_CAP_PROP_OPENNI_GENERATOR_PRESENT:
if (isContextOpened)
isSet = toggleStream(CV_COLOR_STREAM, propValue > 0.0) == openni::STATUS_OK;
{
toggleStream(CV_COLOR_STREAM, propValue > 0.0);
isSet = true;
}
break;
case CV_CAP_PROP_OPENNI_OUTPUT_MODE :
{
......@@ -713,18 +736,19 @@ bool CvCapture_OpenNI2::setImageGeneratorProperty(int propIdx, double propValue)
mode.setFps(60);
break;
default :
CV_Error( CV_StsBadArg, "Unsupported image generator output mode.\n");
CV_Error( CV_StsBadArg, "Unsupported image generator output mode.");
}
openni::Status status = streams[CV_COLOR_STREAM].setVideoMode( mode );
if( status != openni::STATUS_OK )
CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::setImageGeneratorProperty : %s\n", openni::OpenNI::getExtendedError()));
CV_Error(CV_StsError, std::string("CvCapture_OpenNI2::setImageGeneratorProperty: ") +
std::string(openni::OpenNI::getExtendedError()));
else
isSet = true;
break;
}
default:
CV_Error( CV_StsBadArg, cv::format("Image generator does not support such parameter (propIdx=%d) for setting.\n", propIdx) );
CV_Error( CV_StsBadArg, cv::format("Image generator does not support such parameter (propIdx=%d) for setting.", propIdx) );
}
return isSet;
......@@ -758,7 +782,7 @@ double CvCapture_OpenNI2::getIrGeneratorProperty(int propIdx) const
propValue = (double)streamFrames[CV_IR_STREAM].getFrameIndex();
break;
default:
CV_Error(CV_StsBadArg, cv::format("Image generator does not support such parameter (propIdx=%d) for getting.\n", propIdx));
CV_Error(CV_StsBadArg, cv::format("Image generator does not support such parameter (propIdx=%d) for getting.", propIdx));
}
return propValue;
......@@ -772,7 +796,10 @@ bool CvCapture_OpenNI2::setIrGeneratorProperty(int propIdx, double propValue)
{
case CV_CAP_PROP_OPENNI_GENERATOR_PRESENT:
if (isContextOpened)
isSet = toggleStream(CV_IR_STREAM, propValue > 0.0) == openni::STATUS_OK;
{
toggleStream(CV_IR_STREAM, propValue > 0.0);
isSet = true;
}
break;
case CV_CAP_PROP_OPENNI_OUTPUT_MODE:
{
......@@ -803,18 +830,19 @@ bool CvCapture_OpenNI2::setIrGeneratorProperty(int propIdx, double propValue)
mode.setFps(60);
break;
default:
CV_Error(CV_StsBadArg, "Unsupported image generator output mode.\n");
CV_Error(CV_StsBadArg, "Unsupported image generator output mode.");
}
openni::Status status = streams[CV_IR_STREAM].setVideoMode(mode);
if (status != openni::STATUS_OK)
CV_Error(CV_StsError, cv::format("CvCapture_OpenNI2::setImageGeneratorProperty : %s\n", openni::OpenNI::getExtendedError()));
CV_Error(CV_StsError, std::string("CvCapture_OpenNI2::setImageGeneratorProperty: ") +
std::string(openni::OpenNI::getExtendedError()));
else
isSet = true;
break;
}
default:
CV_Error(CV_StsBadArg, cv::format("Image generator does not support such parameter (propIdx=%d) for setting.\n", propIdx));
CV_Error(CV_StsBadArg, cv::format("Image generator does not support such parameter (propIdx=%d) for setting.", propIdx));
}
return isSet;
......@@ -931,10 +959,12 @@ IplImage* CvCapture_OpenNI2::retrieveDisparityMap()
if (!streamFrames[CV_DEPTH_STREAM].isValid())
return 0;
readCamerasParams();
cv::Mat disp32;
computeDisparity_32F(streamFrames[CV_DEPTH_STREAM], disp32, baseline, depthFocalLength_VGA, noSampleValue, shadowValue);
disp32.convertTo( outputMaps[CV_CAP_OPENNI_DISPARITY_MAP].mat, CV_8UC1 );
disp32.convertTo(outputMaps[CV_CAP_OPENNI_DISPARITY_MAP].mat, CV_8UC1);
return outputMaps[CV_CAP_OPENNI_DISPARITY_MAP].getIplImagePtr();
}
......@@ -944,6 +974,8 @@ IplImage* CvCapture_OpenNI2::retrieveDisparityMap_32F()
if (!streamFrames[CV_DEPTH_STREAM].isValid())
return 0;
readCamerasParams();
computeDisparity_32F(streamFrames[CV_DEPTH_STREAM], outputMaps[CV_CAP_OPENNI_DISPARITY_MAP_32F].mat, baseline, depthFocalLength_VGA, noSampleValue, shadowValue);
return outputMaps[CV_CAP_OPENNI_DISPARITY_MAP_32F].getIplImagePtr();
......@@ -966,7 +998,7 @@ inline void getBGRImageFromMetaData( const openni::VideoFrameRef& imageMetaData,
{
cv::Mat bufferImage;
if( imageMetaData.getVideoMode().getPixelFormat() != openni::PIXEL_FORMAT_RGB888 )
CV_Error( CV_StsUnsupportedFormat, "Unsupported format of grabbed image\n" );
CV_Error( CV_StsUnsupportedFormat, "Unsupported format of grabbed image." );
bgrImage.create(imageMetaData.getHeight(), imageMetaData.getWidth(), CV_8UC3);
bufferImage.create(imageMetaData.getHeight(), imageMetaData.getWidth(), CV_8UC3);
......@@ -989,7 +1021,7 @@ inline void getGrayImageFromMetaData(const openni::VideoFrameRef& imageMetaData,
}
else
{
CV_Error(CV_StsUnsupportedFormat, "Unsupported format of grabbed image\n");
CV_Error(CV_StsUnsupportedFormat, "Unsupported format of grabbed image.");
}
}
......
......@@ -20,7 +20,7 @@ int main(int, char**)
//--- INITIALIZE VIDEOCAPTURE
VideoCapture cap;
// open the default camera using default API
cap.open(0);
// cap.open(0);
// OR advance usage: select any API backend
int deviceID = 0; // 0 = open default camera
int apiID = cv::CAP_ANY; // 0 = autodetect default API
......
......@@ -9,7 +9,7 @@ truck
boat
traffic light
fire hydrant
street sign
stop sign
parking meter
bench
......@@ -23,11 +23,11 @@ elephant
bear
zebra
giraffe
hat
backpack
umbrella
shoe
eye glasses
handbag
tie
suitcase
......@@ -42,7 +42,7 @@ skateboard
surfboard
tennis racket
bottle
plate
wine glass
cup
fork
......@@ -63,12 +63,12 @@ chair
couch
potted plant
bed
mirror
dining table
window
desk
toilet
door
tv
laptop
mouse
......@@ -80,7 +80,7 @@ oven
toaster
sink
refrigerator
blender
book
clock
vase
......
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
......@@ -14,6 +14,8 @@
#include <iomanip>
#include <stdexcept>
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS // eliminate build warning
#ifdef __APPLE__
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment