Merge pull request #12985 from wzw-intel:vkcom_refine

997ad127 · Alexander Alekhin · 777eaa73 · 33c9d57c · 997ad127 · 997ad127
Commit 997ad127 authored Nov 08, 2018 by Alexander Alekhin
10 changed files
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -1411,9 +1411,44 @@ struct Net::Impl
                continue;
            }
+            if (ld.type == "Convolution")
+            {
+                std::vector<MatShape> in_shapes;
+                std::vector<MatShape> out_shapes;
+                CV_Assert(ld.inputBlobs.size() == ld.outputBlobs.size());
+                for (int i = 0; i < ld.inputBlobs.size(); i++)
+                {
+                    in_shapes.push_back(shape(*ld.inputBlobs[i]));
+                    out_shapes.push_back(shape(ld.outputBlobs[i]));
+                }
+                int64 flops = layer->getFLOPS(in_shapes, out_shapes);
+                // FIXME
+                //
+                // This is a workaround for GPU hang on heavy convolution workload ( > 10 GFLOPS).
+                // For the long time task, vkWaitForFences() return without error but next call on
+                // vkQueueSubmit() return -4, i.e. "VK_ERROR_DEVICE_LOST" and driver reports GPU hang.
+                //
+                // Need more investigation on root cause of GPU hang and need to optimize convolution shader
+                // to reduce process time.
+                if (flops > CV_BIG_INT(10) * 1000 * 1000 * 1000)
+                {
+                    continue;
+                }
+            }
            ld.skip = false;
-            ld.backendNodes[DNN_BACKEND_VKCOM] =
-                layer->initVkCom(ld.inputBlobsWrappers);
+            try
+            {
+                ld.backendNodes[DNN_BACKEND_VKCOM] =
+                    layer->initVkCom(ld.inputBlobsWrappers);
+            }
+            catch (const cv::Exception& e)
+            {
+                CV_LOG_ERROR(NULL, "initVkCom failed, fallback to CPU implementation. " << e.what());
+                ld.backendNodes[DNN_BACKEND_VKCOM] = Ptr<BackendNode>();
+            }
        }
 #endif
    }
@@ -2318,7 +2353,16 @@ struct Net::Impl
                }
                else if (preferableBackend == DNN_BACKEND_VKCOM)
                {
-                    forwardVkCom(ld.outputBlobsWrappers, node);
+                    try
+                    {
+                        forwardVkCom(ld.outputBlobsWrappers, node);
+                    }
+                    catch (const cv::Exception& e)
+                    {
+                        CV_LOG_ERROR(NULL, "forwardVkCom failed, fallback to CPU implementation. " << e.what());
+                        it->second = Ptr<BackendNode>();
+                        forwardLayer(ld);
+                    }
                }
                else
                {

--- a/modules/dnn/src/vkcom/shader/softmax.comp
+++ b/modules/dnn/src/vkcom/shader/softmax.comp
@@ -18,6 +18,7 @@ layout(push_constant) uniform pushBlock {
    int channel_size;
    int outer_size;
    int channels;
+    int logsoftmax;
 } p;
 layout(local_size_x = LOCAL_SZ_X, local_size_y = 1, local_size_z = 1) in;
@@ -68,9 +69,8 @@ void main()
        for (int i = 0; i < p.channel_size; ++i)
        {
            float v = output_buffer[index] / sum_buffer[reduced_buffer_off + i];
-#ifdef LOG_SOFTMAX
+            if (p.logsoftmax == 1)
-            v = log(v);
+                v = log(v);
-#endif
            output_buffer[index] = v;
            index++;
        }

--- a/modules/dnn/src/vkcom/shader/softmax_spv.cpp
+++ b/modules/dnn/src/vkcom/shader/softmax_spv.cpp
--- a/modules/dnn/src/vkcom/shader/spv_shader.hpp
+++ b/modules/dnn/src/vkcom/shader/spv_shader.hpp
@@ -16,7 +16,7 @@ extern const unsigned int permute_spv[765];
 extern const unsigned int lrn_spv[1845];
 extern const unsigned int concat_spv[541];
 extern const unsigned int avg_pool_spv[1538];
-extern const unsigned int softmax_spv[1440];
+extern const unsigned int softmax_spv[1496];
 extern const unsigned int prior_box_spv[1480];
 extern const unsigned int max_pool_spv[1449];
 extern const unsigned int relu_spv[502];

--- a/modules/dnn/src/vkcom/src/common.hpp
+++ b/modules/dnn/src/vkcom/src/common.hpp
@@ -42,7 +42,8 @@ enum ShapeIdx
 { \
        if (f != VK_SUCCESS) \
        { \
-            CV_LOG_WARNING(NULL, "Vulkan check failed"); \
+            CV_LOG_ERROR(NULL, "Vulkan check failed, result = " << f); \
+            CV_Error(Error::StsError, "Vulkan check failed"); \
        } \
 }

--- a/modules/dnn/src/vkcom/src/op_softmax.cpp
+++ b/modules/dnn/src/vkcom/src/op_softmax.cpp
@@ -22,6 +22,7 @@ struct SoftmaxParam {
    int channel_size;
    int outer_size;
    int channels;
+    int logsoftmax;
 };
 OpSoftmax::OpSoftmax(const int axis, const bool log_softmax)
@@ -90,7 +91,7 @@ bool OpSoftmax::forward(Tensor& in, Tensor& out)
    bindTensor(device_, *max_tensor_,  1, descriptor_set_);
    bindTensor(device_, *sum_tensor_,  2, descriptor_set_);
    bindTensor(device_, out, 3, descriptor_set_);
-    SoftmaxParam param = {channel_size_, outer_size_, channels_};
+    SoftmaxParam param = {channel_size_, outer_size_, channels_, log_softmax_ == true ? 1 : 0};
    recordCommandBuffer((void *)&param, sizeof(SoftmaxParam));
    runCommandBuffer();
    return true;

--- a/modules/dnn/src/vkcom/vulkan/function_list.inl
+++ b/modules/dnn/src/vkcom/vulkan/function_list.inl
--- a/modules/dnn/src/vkcom/vulkan/vk_functions.cpp
+++ b/modules/dnn/src/vkcom/vulkan/vk_functions.cpp
@@ -17,7 +17,7 @@
 namespace cv { namespace dnn { namespace vkcom {
-#include "function_list.inl"
+#include "function_list.inl.hpp"
 }}} // namespace cv::dnn::vkcom
 #endif // HAVE_VULKAN
--- a/modules/dnn/src/vkcom/vulkan/vk_functions.hpp
+++ b/modules/dnn/src/vkcom/vulkan/vk_functions.hpp
@@ -20,7 +20,7 @@
 namespace cv { namespace dnn { namespace vkcom {
-#include "function_list.inl"
+#include "function_list.inl.hpp"
 }}} // namespace cv::dnn::vkcom
 #endif // HAVE_VULKAN

--- a/modules/dnn/src/vkcom/vulkan/vk_loader.cpp
+++ b/modules/dnn/src/vkcom/vulkan/vk_loader.cpp
@@ -57,7 +57,7 @@ bool loadVulkanFunctions(VkInstance& instance)
      return false; \
    }
-#include "function_list.inl"
+#include "function_list.inl.hpp"
    return true;
 }
@@ -74,7 +74,7 @@ bool loadVulkanGlobalFunctions()
      return false; \
    }
-#include "function_list.inl"
+#include "function_list.inl.hpp"
    return true;
 }