• WuZhiwen's avatar
    Merge pull request #12703 from wzw-intel:vkcom · 6e3ea8b4
    WuZhiwen authored
    * dnn: Add a Vulkan based backend
    
    This commit adds a new backend "DNN_BACKEND_VKCOM" and a
    new target "DNN_TARGET_VULKAN". VKCOM means vulkan based
    computation library.
    
    This backend uses Vulkan API and SPIR-V shaders to do
    the inference computation for layers. The layer types
    that implemented in DNN_BACKEND_VKCOM include:
    Conv, Concat, ReLU, LRN, PriorBox, Softmax, MaxPooling,
    AvePooling, Permute
    
    This is just a beginning work for Vulkan in OpenCV DNN,
    more layer types will be supported and performance
    tuning is on the way.
    Signed-off-by: 's avatarWu Zhiwen <zhiwen.wu@intel.com>
    
    * dnn/vulkan: Add FindVulkan.cmake to detect Vulkan SDK
    
    In order to build dnn with Vulkan support, need installing
    Vulkan SDK and setting environment variable "VULKAN_SDK" and
    add "-DWITH_VULKAN=ON" to cmake command.
    
    You can download Vulkan SDK from:
    https://vulkan.lunarg.com/sdk/home#linux
    
    For how to install, see
    https://vulkan.lunarg.com/doc/sdk/latest/linux/getting_started.html
    https://vulkan.lunarg.com/doc/sdk/latest/windows/getting_started.html
    https://vulkan.lunarg.com/doc/sdk/latest/mac/getting_started.html
    respectively for linux, windows and mac.
    
    To run the vulkan backend, also need installing mesa driver.
    On Ubuntu, use this command 'sudo apt-get install mesa-vulkan-drivers'
    
    To test, use command '$BUILD_DIR/bin/opencv_test_dnn --gtest_filter=*VkCom*'
    Signed-off-by: 's avatarWu Zhiwen <zhiwen.wu@intel.com>
    
    * dnn/Vulkan: dynamically load Vulkan runtime
    
    No compile-time dependency on Vulkan library.
    If Vulkan runtime is unavailable, fallback to CPU path.
    
    Use environment "OPENCL_VULKAN_RUNTIME" to specify path to your
    own vulkan runtime library.
    Signed-off-by: 's avatarWu Zhiwen <zhiwen.wu@intel.com>
    
    * dnn/Vulkan: Add a python script to compile GLSL shaders to SPIR-V shaders
    
    The SPIR-V shaders are in format of text-based 32-bit hexadecimal
    numbers, and inserted into .cpp files as unsigned int32 array.
    
    * dnn/Vulkan: Put Vulkan headers into 3rdparty directory and some other fixes
    
    Vulkan header files are copied from
    https://github.com/KhronosGroup/Vulkan-Docs/tree/master/include/vulkan
    to 3rdparty/include
    
    Fix the Copyright declaration issue.
    
    Refine OpenCVDetectVulkan.cmake
    
    * dnn/Vulkan: Add vulkan backend tests into existing ones.
    
    Also fixed some test failures.
    
    - Don't use bool variable as uniform for shader
    - Fix dispathed group number beyond max issue
    - Bypass "group > 1" convolution. This should be support in future.
    
    * dnn/Vulkan: Fix multiple initialization in one thread.
    6e3ea8b4
op_concat.cpp 3.47 KB
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
// Copyright (C) 2018, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.

#include "../../precomp.hpp"
#include "common.hpp"
#include "internal.hpp"
#include "../include/op_concat.hpp"

namespace cv { namespace dnn { namespace vkcom {

#ifdef HAVE_VULKAN

#define LOCAL_SZ_X 256

struct ConcatParam {
    int out_concat_axis;
    int accumulated_concat_axis;
    int concat_size;
    int total_concat_size;
    int thread_num;
};

OpConcat::OpConcat(const int axis)
{
    init(axis);
    type_ = "Concat";
}

bool OpConcat::init(const int axis)
{
    axis_ = axis;
#define BUFFER_NUM 2
    OpBase::initVulkanThing(BUFFER_NUM);
    return true;
}

void OpConcat::reshapeOutTensor(std::vector<Tensor *>& in, Tensor& out)
{
    int sum_axis = 0;

    for (int i = 0; i < in.size(); ++i)
    {
        sum_axis += in[i]->dimSize(axis_);
    }

    Shape shape = in[0]->getShape();
    shape[axis_] = sum_axis;
    out.reshape(NULL, shape);
}

bool OpConcat::forward(std::vector<Tensor>& ins,
                       std::vector<Tensor>& blobs,
                       std::vector<Tensor>& outs)
{
    return forward(ins, outs[0]);
}

bool OpConcat::forward(std::vector<Tensor>& ins, Tensor& out)
{
    int input_num = ins.size();
    Tensor& first_tensor = ins[0];
    int sum_axis = first_tensor.dimSize(axis_);
    int dim_num = first_tensor.dimNum();
    for (int i = 1; i < input_num; ++i)
    {
        Tensor& tensor = ins[i];
        assert(tensor.dimNum() == dim_num);
        for (int d = 0; d < dim_num; ++d)
        {
            if (d == axis_)
            {
                sum_axis += tensor.dimSize(axis_);;
            }
            else
            {
                assert(first_tensor.dimSize(d) == tensor.dimSize(d));
            }
        }
    }

    assert(out.dimSize(axis_) == sum_axis);
    for (int d = 0; d < dim_num; ++d)
    {
        if (d != axis_)
        {
            assert(out.dimSize(d) == first_tensor.dimSize(d));
        }
    }
    out_concat_axis_ = sum_axis;
    concat_size_ = out.count(axis_ + 1);

    if (pipeline_ == VK_NULL_HANDLE)
    {
        config_.local_size_x = LOCAL_SZ_X;
        config_.block_height = 1;
        config_.block_width  = 1;
        config_.block_depth  = 1;
        createShaderModule(concat_spv, sizeof(concat_spv));
        createPipeline(sizeof(ConcatParam));
    }

    accumulated_concat_axis_ = 0;
    for (int i = 0; i < input_num; i++)
    {
        bindTensor(device_, ins[i], 0, descriptor_set_);
        bindTensor(device_, out, 1, descriptor_set_);
        total_concat_size_ = ins[i].count(axis_);
        thread_num_ = ins[i].count();
        computeGroupCount();
        ConcatParam param = {out_concat_axis_,
                             accumulated_concat_axis_,
                             concat_size_,
                             total_concat_size_,
                             thread_num_};
        recordCommandBuffer((void *)&param, sizeof(ConcatParam));
        runCommandBuffer();
        accumulated_concat_axis_ += ins[i].dimSize(axis_);
    }

    return true;
}

bool OpConcat::computeGroupCount()
{
    group_x_ = alignSize(thread_num_, config_.local_size_x) / config_.local_size_x;
    group_y_ = 1;
    group_z_ = 1;

    return true;
}

#endif // HAVE_VULKAN

}}} // namespace cv::dnn::vkcom