Merge branch 'master' into master

0f05495c · Scott Cyphers · GitHub · 1b5340c4 · 244c9fcf · 0f05495c
Unverified Commit 0f05495c authored Dec 04, 2018 by Scott Cyphers Committed by GitHub Dec 04, 2018
5 changed files
--- a/cmake/external_cldnn.cmake
+++ b/cmake/external_cldnn.cmake
@@ -22,7 +22,7 @@ include(ExternalProject)
 #------------------------------------------------------------------------------
 set(CLDNN_GIT_REPO_URL https://github.com/intel/clDNN.git)
-set(CLDNN_GIT_LABEL df28d2861716cac7a6a9eff4e49e47162959a748)
+set(CLDNN_GIT_LABEL 02add7c4ce2baa81e2a32fa02d733dcc4f013108)
 set(BOOST_VERSION 1.64.0)
 set(OUT_DIR ${EXTERNAL_PROJECTS_ROOT}/cldnn/out)

--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
@@ -30,6 +30,7 @@
 #include <CPP/crop.hpp>
 #include <CPP/data.hpp>
 #include <CPP/eltwise.hpp>
+#include <CPP/gemm.hpp>
 #include <CPP/input_layout.hpp>
 #include <CPP/layout.hpp>
 #include <CPP/lrn.hpp>
@@ -675,14 +676,51 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
        {
            arguments_check(op, 2, 1);
-            do_dot_operation(topology,
+            const shared_ptr<op::Dot> dot_inst = static_pointer_cast<op::Dot>(op);
-                             get_input_name(op, 0),
+            const size_t axes_count = dot_inst->get_reduction_axes_count();
-                             get_input_shape(op, 0),
+            const Shape& input0_shape = get_input_shape(op, 0);
-                             get_input_name(op, 1),
+            const Shape& input1_shape = get_input_shape(op, 1);
-                             get_input_shape(op, 1),
+            const size_t input0_elem_count = shape_size(input0_shape);
-                             get_output_name(op),
+            const size_t input1_elem_count = shape_size(input1_shape);
-                             get_output_shape(op),
-                             get_output_type(op));
+            if (get_input_type(op) == element::f32 && get_input_type(op, 1) == element::f32 &&
+                get_output_type(op) == element::f32 && input0_elem_count && input1_elem_count &&
+                (axes_count == 1) && (input0_shape.size() < 3) && (input1_shape.size() < 3) &&
+                !input0_shape.empty() && !input1_shape.empty())
+            {
+                string input1_name = get_input_name(op, 1);
+                // If we have A[5] and B[5] here, in cldnn we have A[1, 1, 1, 5] and B[1, 1, 1, 5]
+                // it needs to be reshaped into A[1, 1, 1, 5] and B[1, 1, 5, 1]
+                if (!input0_shape.empty() && (input1_shape.size() == 1))
+                {
+                    const string new_name = input1_name + "_reshaped";
+                    Shape new_shape = input1_shape;
+                    new_shape.push_back(1);
+                    const cldnn::tensor reshaped_tensor =
+                        intelgpu_space::create_cldnn_tensor(new_shape);
+                    const cldnn::reshape reshape_op(new_name, input1_name, reshaped_tensor);
+                    topology.add(reshape_op);
+                    input1_name = new_name;
+                }
+                const cldnn::gemm dot_op(get_output_name(op), get_input_name(op, 0), input1_name);
+                topology.add(dot_op);
+            }
+            else
+            {
+                do_dot_operation(topology,
+                                 get_input_name(op, 0),
+                                 get_input_shape(op, 0),
+                                 get_input_name(op, 1),
+                                 get_input_shape(op, 1),
+                                 get_output_name(op),
+                                 get_output_shape(op),
+                                 get_output_type(op),
+                                 axes_count);
+            }
            break;
        }
        case OP_TYPEID::MaxPool:

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
@@ -72,7 +72,8 @@ namespace ngraph
                                  const Shape& inputB_shape,
                                  const std::string& output_name,
                                  const Shape& output_shape,
-                                  const element::Type& output_type);
+                                  const element::Type& output_type,
+                                  size_t reduction_axes_count);
            void do_slice_operation(cldnn::topology& topology,
                                    const std::string& input_name,

--- a/src/ngraph/runtime/intelgpu/unit_test.manifest
+++ b/src/ngraph/runtime/intelgpu/unit_test.manifest
 avg_pool_2d_2channel_2image_padded_only_above_do_not_include_in_computation
 avg_pool_2d_2channel_2image_padded_only_above_include_in_computation
 avg_pool_3d_uneven_strided_padded
+backwards_batch_norm_three_outputs
 backwards_batch_norm_training
 backwards_dot_scalar_tensor
 backwards_dot_tensor3_tensor3
@@ -9,7 +10,6 @@ backwards_dot_tensor_vector
 backwards_exp
 backwards_maxpool_n2_c1_hw5_3x3_str2_max
 backwards_maxpool_n4_c1_hw4_2x2_max
-backwards_relu
 backwards_replace_slice
 backwards_reverse_sequence_n3_c2_h3
 backwards_reverse_sequence_n4d2c3h2w2
@@ -21,41 +21,37 @@ batch_norm_training_0eps_f64
 batch_norm_one_output
 batch_norm_three_outputs
 dequantize
-dequantize_zero_offset
 dequantize_axes
-dequantize_int8
-dequantize_int8_zero_offset
 dequantize_int32
 dequantize_int32_zero_offset
+dequantize_int8
+dequantize_int8_zero_offset
+dequantize_zero_offset
 divide_by_zero_int32
-dot_3d_multi_axis
-dot_4d_5d_multi_axis
-dot_4d_5d_multi_axis_more
-generate_mask
 function_call
+generate_mask
 max_pool_3d
-maxpool_bprop_larger_than_cache
 numeric_double_inf
 numeric_double_nan
 quantize
-quantize_zero_offset
 quantize_axes
-quantize_int8
+quantize_clamp_int32
-quantize_int8_zero_offset
+quantize_clamp_int8
+quantize_clamp_uint8
 quantize_int32
 quantize_int32_zero_offset
-quantize_clamp_uint8
+quantize_int8
-quantize_clamp_int8
+quantize_int8_zero_offset
-quantize_clamp_int32
+quantize_ROUND_DOWN
-quantize_ROUND_NEAREST_TOWARD_ZERO
-quantize_ROUND_NEAREST_TOWARD_INFINITY
-quantize_ROUND_NEAREST_UPWARD
 quantize_ROUND_NEAREST_DOWNWARD
 quantize_ROUND_NEAREST_TOWARD_EVEN
+quantize_ROUND_NEAREST_TOWARD_INFINITY
+quantize_ROUND_NEAREST_TOWARD_ZERO
+quantize_ROUND_NEAREST_UPWARD
 quantize_ROUND_TOWARD_INFINITY
 quantize_ROUND_TOWARD_ZERO
 quantize_ROUND_UP
-quantize_ROUND_DOWN
+quantize_zero_offset
 reduce_window_emulating_max_pool_1d_1channel_1image
 reduce_window_emulating_max_pool_1d_1channel_2image
 reduce_window_emulating_max_pool_1d_2channel_2image
@@ -73,9 +69,14 @@ reverse_sequence_n2c3h4w2
 reverse_sequence_n4c3h2w2
 reverse_sequence_n4d2c3h2w2
 select_and_scatter_3d_without_overlap
-select_and_scatter_with_overlap
 select_and_scatter_without_overlap
+select_and_scatter_with_overlap
+shape_of_5d
+shape_of_matrix
+shape_of_scalar
+shape_of_vector
 softmax_axis_3d_double
+sum_stable_acc
 topk_1d_max_all
 topk_1d_max_one
 topk_1d_max_partial
@@ -88,17 +89,17 @@ topk_2d_max_partial
 topk_2d_min_all
 topk_2d_min_one
 topk_2d_min_partial
+topk_3d_large_input_max
+topk_3d_large_input_min
 topk_3d_max_all
 topk_3d_max_one
 topk_3d_max_partial
 topk_3d_min_all
 topk_3d_min_one
 topk_3d_min_partial
+topk_3d_single_output
 topk_5d_max_partial
 topk_int64
-topk_3d_large_input_max
-topk_3d_large_input_min
-topk_3d_single_output
 zero_sized_abs
 zero_sized_acos
 zero_sized_add
@@ -145,3 +146,4 @@ max_3d_to_scalar_double
 argmin_trivial_in_i32
 argmax_4D_axis_3_i64_in_i32
 argmin_trivial_in_double
+maxpool_bprop_larger_than_cache