Commit 02cc1cd6 authored by WuZhiwen's avatar WuZhiwen Committed by Alexander Alekhin

Merge pull request #13244 from wzw-intel:init_vulkan

* dnn/Vulkan: don't init Vulkan runtime if using other backend/target

Don't need to explictly call a init API but will automatically
init Vulkan environment the first time to use an VkCom object.
Signed-off-by: 's avatarWu Zhiwen <zhiwen.wu@intel.com>

* dnn/Vulkan: depress compilier warning for "-Wsign-promo"
Signed-off-by: 's avatarWu Zhiwen <zhiwen.wu@intel.com>
parent 0e6cf419
......@@ -911,21 +911,8 @@ struct Net::Impl
typedef std::map<int, LayerShapes> LayersShapesMap;
typedef std::map<int, LayerData> MapIdToLayerData;
~Impl()
{
#ifdef HAVE_VULKAN
// Vulkan requires explicit releasing the child objects of
// VkDevice object prior to releasing VkDevice object itself.
layers.clear();
backendWrappers.clear();
vkcom::deinitPerThread();
#endif
}
Impl()
{
#ifdef HAVE_VULKAN
vkcom::initPerThread();
#endif
//allocate fake net input layer
netInputLayer = Ptr<DataLayer>(new DataLayer());
LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second;
......
......@@ -36,7 +36,6 @@ protected:
void recordCommandBuffer(void* push_constants = NULL, size_t push_constants_size = 0);
void runCommandBuffer();
const Context* ctx_;
VkPipeline pipeline_;
VkCommandBuffer cmd_buffer_;
VkDescriptorPool descriptor_pool_;
......
......@@ -39,9 +39,6 @@ enum PaddingMode { kPaddingModeSame, kPaddingModeValid, kPaddingModeCaffe, kPadd
enum FusedActivationType { kNone, kRelu, kRelu1, kRelu6, kActivationNum };
typedef std::vector<int> Shape;
/* context APIs */
bool initPerThread();
void deinitPerThread();
bool isAvailable();
#endif // HAVE_VULKAN
......
......@@ -18,7 +18,7 @@ static uint32_t findMemoryType(uint32_t memoryTypeBits, VkMemoryPropertyFlags pr
{
VkPhysicalDeviceMemoryProperties memoryProperties;
vkGetPhysicalDeviceMemoryProperties(getPhysicalDevice(), &memoryProperties);
vkGetPhysicalDeviceMemoryProperties(kPhysicalDevice, &memoryProperties);
for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; ++i) {
if ((memoryTypeBits & (1 << i)) &&
......
......@@ -29,6 +29,10 @@
namespace cv { namespace dnn { namespace vkcom {
#ifdef HAVE_VULKAN
extern VkPhysicalDevice kPhysicalDevice;
extern VkDevice kDevice;
extern VkQueue kQueue;
extern VkCommandPool kCmdPool;
enum ShapeIdx
{
......@@ -42,7 +46,7 @@ enum ShapeIdx
{ \
if (f != VK_SUCCESS) \
{ \
CV_LOG_ERROR(NULL, "Vulkan check failed, result = " << f); \
CV_LOG_ERROR(NULL, "Vulkan check failed, result = " << (int)f); \
CV_Error(Error::StsError, "Vulkan check failed"); \
} \
}
......
......@@ -6,186 +6,124 @@
// Third party copyrights are property of their respective owners.
#include "../../precomp.hpp"
#include "common.hpp"
#include "internal.hpp"
#include "../include/op_conv.hpp"
#include "../include/op_pool.hpp"
#include "../include/op_lrn.hpp"
#include "../include/op_concat.hpp"
#include "../include/op_softmax.hpp"
#include "../vulkan/vk_loader.hpp"
#include "common.hpp"
#include "context.hpp"
namespace cv { namespace dnn { namespace vkcom {
#ifdef HAVE_VULKAN
static bool enableValidationLayers = false;
static VkInstance kInstance;
static VkPhysicalDevice kPhysicalDevice;
static VkDebugReportCallbackEXT kDebugReportCallback;
static uint32_t kQueueFamilyIndex;
std::shared_ptr<Context> kCtx;
bool enableValidationLayers = false;
VkInstance kInstance;
VkPhysicalDevice kPhysicalDevice;
VkDevice kDevice;
VkQueue kQueue;
VkCommandPool kCmdPool;
VkDebugReportCallbackEXT kDebugReportCallback;
uint32_t kQueueFamilyIndex;
std::vector<const char *> kEnabledLayers;
typedef std::map<std::thread::id, Context*> IdToContextMap;
IdToContextMap kThreadResources;
static std::map<std::string, std::vector<uint32_t>> kShaders;
static int init_count = 0;
static bool init();
static void release();
static uint32_t getComputeQueueFamilyIndex();
static bool checkExtensionAvailability(const char *extension_name,
const std::vector<VkExtensionProperties>
&available_extensions);
static VKAPI_ATTR VkBool32 VKAPI_CALL debugReportCallbackFn(
VkDebugReportFlagsEXT flags,
VkDebugReportObjectTypeEXT objectType,
uint64_t object,
size_t location,
int32_t messageCode,
const char* pLayerPrefix,
const char* pMessage,
void* pUserData);
std::map<std::string, std::vector<uint32_t>> kShaders;
static void setContext(Context* ctx)
static uint32_t getComputeQueueFamilyIndex()
{
cv::AutoLock lock(getInitializationMutex());
std::thread::id tid = std::this_thread::get_id();
if (kThreadResources.find(tid) != kThreadResources.end())
{
return;
}
kThreadResources.insert(std::pair<std::thread::id, Context*>(tid, ctx));
}
uint32_t queueFamilyCount;
Context* getContext()
{
Context* ctx = NULL;
vkGetPhysicalDeviceQueueFamilyProperties(kPhysicalDevice, &queueFamilyCount, NULL);
cv::AutoLock lock(getInitializationMutex());
std::thread::id tid = std::this_thread::get_id();
IdToContextMap::iterator it = kThreadResources.find(tid);
if (it != kThreadResources.end())
std::vector<VkQueueFamilyProperties> queueFamilies(queueFamilyCount);
vkGetPhysicalDeviceQueueFamilyProperties(kPhysicalDevice,
&queueFamilyCount,
queueFamilies.data());
uint32_t i = 0;
for (; i < queueFamilies.size(); ++i)
{
VkQueueFamilyProperties props = queueFamilies[i];
if (props.queueCount > 0 && (props.queueFlags & VK_QUEUE_COMPUTE_BIT))
{
ctx = it->second;
break;
}
}
return ctx;
}
static void removeContext()
{
cv::AutoLock lock(getInitializationMutex());
std::thread::id tid = std::this_thread::get_id();
IdToContextMap::iterator it = kThreadResources.find(tid);
if (it == kThreadResources.end())
if (i == queueFamilies.size())
{
return;
throw std::runtime_error("could not find a queue family that supports operations");
}
kThreadResources.erase(it);
return i;
}
bool initPerThread()
bool checkExtensionAvailability(const char *extension_name,
const std::vector<VkExtensionProperties> &available_extensions)
{
VkDevice device;
VkQueue queue;
VkCommandPool cmd_pool;
VKCOM_CHECK_BOOL_RET_VAL(init(), false);
Context* ctx = getContext();
if (ctx)
for( size_t i = 0; i < available_extensions.size(); ++i )
{
if( strcmp( available_extensions[i].extensionName, extension_name ) == 0 )
{
ctx->ref++;
return true;
}
}
return false;
}
// create device, queue, command pool
VkDeviceQueueCreateInfo queueCreateInfo = {};
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queueCreateInfo.queueFamilyIndex = kQueueFamilyIndex;
queueCreateInfo.queueCount = 1; // create one queue in this family. We don't need more.
float queuePriorities = 1.0; // we only have one queue, so this is not that imporant.
queueCreateInfo.pQueuePriorities = &queuePriorities;
VkDeviceCreateInfo deviceCreateInfo = {};
// Specify any desired device features here. We do not need any for this application, though.
VkPhysicalDeviceFeatures deviceFeatures = {};
deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
deviceCreateInfo.enabledLayerCount = kEnabledLayers.size();
deviceCreateInfo.ppEnabledLayerNames = kEnabledLayers.data();
deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo;
deviceCreateInfo.queueCreateInfoCount = 1;
deviceCreateInfo.pEnabledFeatures = &deviceFeatures;
VK_CHECK_RESULT(vkCreateDevice(kPhysicalDevice, &deviceCreateInfo, NULL, &device));
// Get a handle to the only member of the queue family.
vkGetDeviceQueue(device, kQueueFamilyIndex, 0, &queue);
// create command pool
VkCommandPoolCreateInfo commandPoolCreateInfo = {};
commandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
// the queue family of this command pool. All command buffers allocated from this command pool,
// must be submitted to queues of this family ONLY.
commandPoolCreateInfo.queueFamilyIndex = kQueueFamilyIndex;
VK_CHECK_RESULT(vkCreateCommandPool(device, &commandPoolCreateInfo, NULL, &cmd_pool));
ctx = new Context();
ctx->device = device;
ctx->queue = queue;
ctx->cmd_pool = cmd_pool;
ctx->ref = 1;
setContext(ctx);
return true;
VKAPI_ATTR VkBool32 VKAPI_CALL debugReportCallbackFn(
VkDebugReportFlagsEXT flags,
VkDebugReportObjectTypeEXT objectType,
uint64_t object,
size_t location,
int32_t messageCode,
const char* pLayerPrefix,
const char* pMessage,
void* pUserData)
{
std::cout << "Debug Report: " << pLayerPrefix << ":" << pMessage << std::endl;
return VK_FALSE;
}
void deinitPerThread()
// internally used
void createContext()
{
Context* ctx = getContext();
if (ctx == NULL)
cv::AutoLock lock(getInitializationMutex());
if (!kCtx)
{
release();
return;
kCtx.reset(new Context());
}
}
if (ctx->ref > 1)
bool isAvailable()
{
try
{
ctx->ref--;
createContext();
}
else if (ctx->ref == 1)
catch (const cv::Exception& e)
{
for(auto &kv: ctx->shader_modules)
{
vkDestroyShaderModule(ctx->device, kv.second, NULL);
}
ctx->shader_modules.clear();
vkDestroyCommandPool(ctx->device, ctx->cmd_pool, NULL);
vkDestroyDevice(ctx->device, NULL);
removeContext();
delete ctx;
CV_LOG_ERROR(NULL, "Failed to init Vulkan environment. " << e.what());
return false;
}
else
CV_Assert(0);
release();
return true;
}
static bool init()
Context::Context()
{
cv::AutoLock lock(getInitializationMutex());
if (init_count == 0)
{
if(!loadVulkanLibrary())
{
return false;
CV_Error(Error::StsError, "loadVulkanLibrary failed");
return;
}
else if (!loadVulkanEntry())
{
return false;
CV_Error(Error::StsError, "loadVulkanEntry failed");
return;
}
else if (!loadVulkanGlobalFunctions())
{
return false;
CV_Error(Error::StsError, "loadVulkanGlobalFunctions failed");
return;
}
// create VkInstance, VkPhysicalDevice
......@@ -259,7 +197,8 @@ static bool init()
if (!loadVulkanFunctions(kInstance))
{
return false;
CV_Error(Error::StsError, "loadVulkanFunctions failed");
return;
}
if (enableValidationLayers && vkCreateDebugReportCallbackEXT)
......@@ -297,23 +236,47 @@ static bool init()
}
kQueueFamilyIndex = getComputeQueueFamilyIndex();
}
init_count++;
return true;
// create device, queue, command pool
VkDeviceQueueCreateInfo queueCreateInfo = {};
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queueCreateInfo.queueFamilyIndex = kQueueFamilyIndex;
queueCreateInfo.queueCount = 1; // create one queue in this family. We don't need more.
float queuePriorities = 1.0; // we only have one queue, so this is not that imporant.
queueCreateInfo.pQueuePriorities = &queuePriorities;
VkDeviceCreateInfo deviceCreateInfo = {};
// Specify any desired device features here. We do not need any for this application, though.
VkPhysicalDeviceFeatures deviceFeatures = {};
deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
deviceCreateInfo.enabledLayerCount = kEnabledLayers.size();
deviceCreateInfo.ppEnabledLayerNames = kEnabledLayers.data();
deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo;
deviceCreateInfo.queueCreateInfoCount = 1;
deviceCreateInfo.pEnabledFeatures = &deviceFeatures;
VK_CHECK_RESULT(vkCreateDevice(kPhysicalDevice, &deviceCreateInfo, NULL, &kDevice));
// Get a handle to the only member of the queue family.
vkGetDeviceQueue(kDevice, kQueueFamilyIndex, 0, &kQueue);
// create command pool
VkCommandPoolCreateInfo commandPoolCreateInfo = {};
commandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
// the queue family of this command pool. All command buffers allocated from this command pool,
// must be submitted to queues of this family ONLY.
commandPoolCreateInfo.queueFamilyIndex = kQueueFamilyIndex;
VK_CHECK_RESULT(vkCreateCommandPool(kDevice, &commandPoolCreateInfo, NULL, &kCmdPool));
}
static void release()
Context::~Context()
{
cv::AutoLock lock(getInitializationMutex());
if (init_count == 0)
{
return;
}
vkDestroyCommandPool(kDevice, kCmdPool, NULL);
vkDestroyDevice(kDevice, NULL);
init_count--;
if (init_count == 0)
{
if (enableValidationLayers) {
auto func = (PFN_vkDestroyDebugReportCallbackEXT)
vkGetInstanceProcAddr(kInstance, "vkDestroyDebugReportCallbackEXT");
......@@ -324,80 +287,10 @@ static void release()
}
kShaders.clear();
vkDestroyInstance(kInstance, NULL);
}
return;
}
// Returns the index of a queue family that supports compute operations.
static uint32_t getComputeQueueFamilyIndex()
{
uint32_t queueFamilyCount;
vkGetPhysicalDeviceQueueFamilyProperties(kPhysicalDevice, &queueFamilyCount, NULL);
std::vector<VkQueueFamilyProperties> queueFamilies(queueFamilyCount);
vkGetPhysicalDeviceQueueFamilyProperties(kPhysicalDevice,
&queueFamilyCount,
queueFamilies.data());
uint32_t i = 0;
for (; i < queueFamilies.size(); ++i)
{
VkQueueFamilyProperties props = queueFamilies[i];
if (props.queueCount > 0 && (props.queueFlags & VK_QUEUE_COMPUTE_BIT))
{
break;
}
}
if (i == queueFamilies.size())
{
throw std::runtime_error("could not find a queue family that supports operations");
}
return i;
}
bool checkExtensionAvailability(const char *extension_name,
const std::vector<VkExtensionProperties> &available_extensions)
{
for( size_t i = 0; i < available_extensions.size(); ++i )
{
if( strcmp( available_extensions[i].extensionName, extension_name ) == 0 )
{
return true;
}
}
return false;
}
VKAPI_ATTR VkBool32 VKAPI_CALL debugReportCallbackFn(
VkDebugReportFlagsEXT flags,
VkDebugReportObjectTypeEXT objectType,
uint64_t object,
size_t location,
int32_t messageCode,
const char* pLayerPrefix,
const char* pMessage,
void* pUserData)
{
std::cout << "Debug Report: " << pLayerPrefix << ":" << pMessage << std::endl;
return VK_FALSE;
}
// internally used functions
VkPhysicalDevice getPhysicalDevice()
{
return kPhysicalDevice;
}
bool isAvailable()
{
return getContext() != NULL;
}
#endif // HAVE_VULKAN
}}} // namespace cv::dnn::vkcom
......@@ -7,7 +7,6 @@
#ifndef OPENCV_DNN_VKCOM_CONTEXT_HPP
#define OPENCV_DNN_VKCOM_CONTEXT_HPP
#include "common.hpp"
namespace cv { namespace dnn { namespace vkcom {
......@@ -15,13 +14,12 @@ namespace cv { namespace dnn { namespace vkcom {
struct Context
{
VkDevice device;
VkQueue queue;
VkCommandPool cmd_pool;
std::map<std::string, VkShaderModule> shader_modules;
int ref;
Context();
~Context();
};
void createContext();
#endif // HAVE_VULKAN
}}} // namespace cv::dnn::vkcom
......
......@@ -16,8 +16,8 @@ namespace cv { namespace dnn { namespace vkcom {
OpBase::OpBase()
{
ctx_ = getContext();
device_ = ctx_->device;
createContext();
device_ = kDevice;
pipeline_ = VK_NULL_HANDLE;
cmd_buffer_ = VK_NULL_HANDLE;
descriptor_pool_ = VK_NULL_HANDLE;
......@@ -139,7 +139,7 @@ void OpBase::createCommandBuffer()
{
VkCommandBufferAllocateInfo info = {};
info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
info.commandPool = ctx_->cmd_pool;
info.commandPool = kCmdPool;
info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
info.commandBufferCount = 1;
VK_CHECK_RESULT(vkAllocateCommandBuffers(device_, &info, &cmd_buffer_));
......@@ -176,7 +176,7 @@ void OpBase::runCommandBuffer()
fence_create_info_.flags = 0;
VK_CHECK_RESULT(vkCreateFence(device_, &fence_create_info_, NULL, &fence));
VK_CHECK_RESULT(vkQueueSubmit(ctx_->queue, 1, &submit_info, fence));
VK_CHECK_RESULT(vkQueueSubmit(kQueue, 1, &submit_info, fence));
VK_CHECK_RESULT(vkWaitForFences(device_, 1, &fence, VK_TRUE, 100000000000));
vkDestroyFence(device_, fence, NULL);
}
......
......@@ -15,15 +15,15 @@ namespace cv { namespace dnn { namespace vkcom {
Tensor::Tensor(Format fmt) : size_in_byte_(0), format_(fmt)
{
Context *ctx = getContext();
device_ = ctx->device;
createContext();
device_ = kDevice;
}
Tensor::Tensor(const char* data, std::vector<int>& shape, Format fmt)
: size_in_byte_(0), format_(fmt)
{
Context *ctx = getContext();
device_ = ctx->device;
createContext();
device_ = kDevice;
reshape(data, shape);
}
......
......@@ -8,6 +8,10 @@
#ifndef OPENCV_DNN_VKCOM_VULKAN_VK_LOADER_HPP
#define OPENCV_DNN_VKCOM_VULKAN_VK_LOADER_HPP
#ifdef HAVE_VULKAN
#include <vulkan/vulkan.h>
#endif // HAVE_VULKAN
namespace cv { namespace dnn { namespace vkcom {
#ifdef HAVE_VULKAN
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment