Address potential bug in cudnnGetReductionWorkspaceSize (#1990)

* When CUDNN_DATA_TYPE == CUDNN_DATA_DOUBLE, it appears that the cudnn calculated workspace size is incorrect. Adding a temporary fix here until the underlying issue is found. * Add softmax test illustrating bug in cudnn impl. * disable new unit test in intel GPU

Address potential bug in cudnnGetReductionWorkspaceSize (#1990)
* When CUDNN_DATA_TYPE == CUDNN_DATA_DOUBLE, it appears that the cudnn calculated workspace size is incorrect. Adding a temporary fix here until the underlying issue is found. * Add softmax test illustrating bug in cudnn impl. * disable new unit test in intel GPU
dfc20454 · Chris Sullivan · Robert Kimball · 32398641 · dfc20454 · dfc20454
Commit dfc20454 authored Nov 08, 2018 by Chris Sullivan Committed by Robert Kimball Nov 08, 2018
Showing with 42 additions and 0 deletions

cudnn_emitter.cpp src/ngraph/runtime/gpu/cudnn_emitter.cpp +5 -0

unit_test.manifest src/ngraph/runtime/intelgpu/unit_test.manifest +1 -0

backend_test.in.cpp test/backend_test.in.cpp +36 -0

No files found.
--- a/src/ngraph/runtime/gpu/cudnn_emitter.cpp
+++ b/src/ngraph/runtime/gpu/cudnn_emitter.cpp
@@ -196,6 +196,11 @@ size_t runtime::gpu::CUDNNEmitter::build_reduce_forward(const cudnnReduceTensorO
    size_t workspace_size = 0;
    CUDNN_SAFE_CALL(cudnnGetReductionWorkspaceSize(
        *m_ctx->cudnn_handle, desc, input_desc, output_desc, &workspace_size));
+    size_t input_buffer_size = shape_size(input_shape) * input_type.size();
+    if (workspace_size < input_buffer_size)
+    {
+        workspace_size = input_buffer_size;
+    }
    size_t workspace_idx = allocator.reserve_workspace(workspace_size);
    void* alpha = m_host_parameters.allocate_by_datatype(data_type, 1.0);

--- a/src/ngraph/runtime/intelgpu/unit_test.manifest
+++ b/src/ngraph/runtime/intelgpu/unit_test.manifest
@@ -61,6 +61,7 @@ reverse_sequence_n4d2c3h2w2
 select_and_scatter_3d_without_overlap
 select_and_scatter_with_overlap
 select_and_scatter_without_overlap
+softmax_axis_3d_double
 topk_1d_max_all
 topk_1d_max_one
 topk_1d_max_partial

--- a/test/backend_test.in.cpp
+++ b/test/backend_test.in.cpp
@@ -4034,6 +4034,42 @@ NGRAPH_TEST(${BACKEND_NAME}, softmax_axis_3d)
    EXPECT_TRUE(test::all_close(expected, read_vector<float>(result)));
 }
+NGRAPH_TEST(${BACKEND_NAME}, softmax_axis_3d_double)
+{
+    Shape shape{2, 2, 3};
+    auto A = make_shared<op::Parameter>(element::f64, shape);
+    auto f = make_shared<Function>(make_shared<op::Softmax>(A, AxisSet{0}), op::ParameterVector{A});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    auto a = backend->create_tensor(element::f64, shape);
+    copy_data(a, vector<double>{-10, -20, -30, -40, -50, -60, -1, -2, -3, -4, -5, -6});
+    auto result = backend->create_tensor(element::f64, shape);
+    auto d0 = expf(-10) + expf(-1);
+    auto d1 = expf(-20) + expf(-2);
+    auto d2 = expf(-30) + expf(-3);
+    auto d3 = expf(-40) + expf(-4);
+    auto d4 = expf(-50) + expf(-5);
+    auto d5 = expf(-60) + expf(-6);
+    backend->call_with_validate(f, {result}, {a});
+    vector<double> expected{expf(-10) / d0,
+                            expf(-20) / d1,
+                            expf(-30) / d2,
+                            expf(-40) / d3,
+                            expf(-50) / d4,
+                            expf(-60) / d5,
+                            expf(-1) / d0,
+                            expf(-2) / d1,
+                            expf(-3) / d2,
+                            expf(-4) / d3,
+                            expf(-5) / d4,
+                            expf(-6) / d5};
+    EXPECT_TRUE(test::all_close(expected, read_vector<double>(result)));
+}
 NGRAPH_TEST(${BACKEND_NAME}, softmax_axis)
 {
    Shape shape{2, 3};