Commit 997ad127 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #12985 from wzw-intel:vkcom_refine

parents 777eaa73 33c9d57c
...@@ -1411,9 +1411,44 @@ struct Net::Impl ...@@ -1411,9 +1411,44 @@ struct Net::Impl
continue; continue;
} }
if (ld.type == "Convolution")
{
std::vector<MatShape> in_shapes;
std::vector<MatShape> out_shapes;
CV_Assert(ld.inputBlobs.size() == ld.outputBlobs.size());
for (int i = 0; i < ld.inputBlobs.size(); i++)
{
in_shapes.push_back(shape(*ld.inputBlobs[i]));
out_shapes.push_back(shape(ld.outputBlobs[i]));
}
int64 flops = layer->getFLOPS(in_shapes, out_shapes);
// FIXME
//
// This is a workaround for GPU hang on heavy convolution workload ( > 10 GFLOPS).
// For the long time task, vkWaitForFences() return without error but next call on
// vkQueueSubmit() return -4, i.e. "VK_ERROR_DEVICE_LOST" and driver reports GPU hang.
//
// Need more investigation on root cause of GPU hang and need to optimize convolution shader
// to reduce process time.
if (flops > CV_BIG_INT(10) * 1000 * 1000 * 1000)
{
continue;
}
}
ld.skip = false; ld.skip = false;
ld.backendNodes[DNN_BACKEND_VKCOM] =
layer->initVkCom(ld.inputBlobsWrappers); try
{
ld.backendNodes[DNN_BACKEND_VKCOM] =
layer->initVkCom(ld.inputBlobsWrappers);
}
catch (const cv::Exception& e)
{
CV_LOG_ERROR(NULL, "initVkCom failed, fallback to CPU implementation. " << e.what());
ld.backendNodes[DNN_BACKEND_VKCOM] = Ptr<BackendNode>();
}
} }
#endif #endif
} }
...@@ -2318,7 +2353,16 @@ struct Net::Impl ...@@ -2318,7 +2353,16 @@ struct Net::Impl
} }
else if (preferableBackend == DNN_BACKEND_VKCOM) else if (preferableBackend == DNN_BACKEND_VKCOM)
{ {
forwardVkCom(ld.outputBlobsWrappers, node); try
{
forwardVkCom(ld.outputBlobsWrappers, node);
}
catch (const cv::Exception& e)
{
CV_LOG_ERROR(NULL, "forwardVkCom failed, fallback to CPU implementation. " << e.what());
it->second = Ptr<BackendNode>();
forwardLayer(ld);
}
} }
else else
{ {
......
...@@ -18,6 +18,7 @@ layout(push_constant) uniform pushBlock { ...@@ -18,6 +18,7 @@ layout(push_constant) uniform pushBlock {
int channel_size; int channel_size;
int outer_size; int outer_size;
int channels; int channels;
int logsoftmax;
} p; } p;
layout(local_size_x = LOCAL_SZ_X, local_size_y = 1, local_size_z = 1) in; layout(local_size_x = LOCAL_SZ_X, local_size_y = 1, local_size_z = 1) in;
...@@ -68,9 +69,8 @@ void main() ...@@ -68,9 +69,8 @@ void main()
for (int i = 0; i < p.channel_size; ++i) for (int i = 0; i < p.channel_size; ++i)
{ {
float v = output_buffer[index] / sum_buffer[reduced_buffer_off + i]; float v = output_buffer[index] / sum_buffer[reduced_buffer_off + i];
#ifdef LOG_SOFTMAX if (p.logsoftmax == 1)
v = log(v); v = log(v);
#endif
output_buffer[index] = v; output_buffer[index] = v;
index++; index++;
} }
......
...@@ -16,7 +16,7 @@ extern const unsigned int permute_spv[765]; ...@@ -16,7 +16,7 @@ extern const unsigned int permute_spv[765];
extern const unsigned int lrn_spv[1845]; extern const unsigned int lrn_spv[1845];
extern const unsigned int concat_spv[541]; extern const unsigned int concat_spv[541];
extern const unsigned int avg_pool_spv[1538]; extern const unsigned int avg_pool_spv[1538];
extern const unsigned int softmax_spv[1440]; extern const unsigned int softmax_spv[1496];
extern const unsigned int prior_box_spv[1480]; extern const unsigned int prior_box_spv[1480];
extern const unsigned int max_pool_spv[1449]; extern const unsigned int max_pool_spv[1449];
extern const unsigned int relu_spv[502]; extern const unsigned int relu_spv[502];
......
...@@ -42,7 +42,8 @@ enum ShapeIdx ...@@ -42,7 +42,8 @@ enum ShapeIdx
{ \ { \
if (f != VK_SUCCESS) \ if (f != VK_SUCCESS) \
{ \ { \
CV_LOG_WARNING(NULL, "Vulkan check failed"); \ CV_LOG_ERROR(NULL, "Vulkan check failed, result = " << f); \
CV_Error(Error::StsError, "Vulkan check failed"); \
} \ } \
} }
......
...@@ -22,6 +22,7 @@ struct SoftmaxParam { ...@@ -22,6 +22,7 @@ struct SoftmaxParam {
int channel_size; int channel_size;
int outer_size; int outer_size;
int channels; int channels;
int logsoftmax;
}; };
OpSoftmax::OpSoftmax(const int axis, const bool log_softmax) OpSoftmax::OpSoftmax(const int axis, const bool log_softmax)
...@@ -90,7 +91,7 @@ bool OpSoftmax::forward(Tensor& in, Tensor& out) ...@@ -90,7 +91,7 @@ bool OpSoftmax::forward(Tensor& in, Tensor& out)
bindTensor(device_, *max_tensor_, 1, descriptor_set_); bindTensor(device_, *max_tensor_, 1, descriptor_set_);
bindTensor(device_, *sum_tensor_, 2, descriptor_set_); bindTensor(device_, *sum_tensor_, 2, descriptor_set_);
bindTensor(device_, out, 3, descriptor_set_); bindTensor(device_, out, 3, descriptor_set_);
SoftmaxParam param = {channel_size_, outer_size_, channels_}; SoftmaxParam param = {channel_size_, outer_size_, channels_, log_softmax_ == true ? 1 : 0};
recordCommandBuffer((void *)&param, sizeof(SoftmaxParam)); recordCommandBuffer((void *)&param, sizeof(SoftmaxParam));
runCommandBuffer(); runCommandBuffer();
return true; return true;
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
namespace cv { namespace dnn { namespace vkcom { namespace cv { namespace dnn { namespace vkcom {
#include "function_list.inl" #include "function_list.inl.hpp"
}}} // namespace cv::dnn::vkcom }}} // namespace cv::dnn::vkcom
#endif // HAVE_VULKAN #endif // HAVE_VULKAN
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
namespace cv { namespace dnn { namespace vkcom { namespace cv { namespace dnn { namespace vkcom {
#include "function_list.inl" #include "function_list.inl.hpp"
}}} // namespace cv::dnn::vkcom }}} // namespace cv::dnn::vkcom
#endif // HAVE_VULKAN #endif // HAVE_VULKAN
......
...@@ -57,7 +57,7 @@ bool loadVulkanFunctions(VkInstance& instance) ...@@ -57,7 +57,7 @@ bool loadVulkanFunctions(VkInstance& instance)
return false; \ return false; \
} }
#include "function_list.inl" #include "function_list.inl.hpp"
return true; return true;
} }
...@@ -74,7 +74,7 @@ bool loadVulkanGlobalFunctions() ...@@ -74,7 +74,7 @@ bool loadVulkanGlobalFunctions()
return false; \ return false; \
} }
#include "function_list.inl" #include "function_list.inl.hpp"
return true; return true;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment