IntelGPU backend: Greater, Less, Equal operations (#1331)

f9ded0b1 · shssf · Robert Kimball · c5889b2b · f9ded0b1 · f9ded0b1
Commit f9ded0b1 authored Aug 05, 2018 by shssf Committed by Robert Kimball Aug 05, 2018
3 changed files
--- a/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
@@ -65,6 +65,16 @@ static void arguments_check(const shared_ptr<Node>& op, size_t input, size_t out
    }
 }

+static void argument_type_check(const element::Type& type)
+{
+    if (type != element::f32)
+    {
+        ostringstream os;
+        os << "Kernel data type " << type << " is not supported";
+        throw invalid_argument(os.str());
+    }
+}
+
 static void do_eltwise_operation(cldnn::topology& topology,
                                 const shared_ptr<Node>& op,
                                 cldnn::eltwise_mode mode)
@@ -98,6 +108,33 @@ static void do_unary_operation(cldnn::topology& topology,
    topology.add(cldnn_unary);
 }

+static void do_logical_operation(cldnn::topology& topology,
+                                 const shared_ptr<Node>& op,
+                                 const string& operation)
+{
+    arguments_check(op, 2, 1);
+
+    const string& inputA_name = op->get_inputs().at(0).get_tensor().get_name();
+    const Shape& inputA_shape = op->get_inputs().at(0).get_shape();
+    argument_type_check(op->get_inputs().at(0).get_tensor().get_element_type());
+    const string& inputB_name = op->get_inputs().at(1).get_tensor().get_name();
+    const Shape& inputB_shape = op->get_inputs().at(1).get_shape();
+    argument_type_check(op->get_inputs().at(1).get_tensor().get_element_type());
+    const string& output_name = op->get_outputs().begin()->get_tensor().get_name();
+    const Shape& output_shape = op->get_outputs().begin()->get_shape();
+    const element::Type& output_type = op->get_outputs().begin()->get_tensor().get_element_type();
+
+    runtime::intelgpu::do_logic_kernel(topology,
+                                       inputA_name,
+                                       inputA_shape,
+                                       inputB_name,
+                                       inputB_shape,
+                                       output_name,
+                                       output_shape,
+                                       output_type,
+                                       operation);
+}
+
 // This function needed to only change the name of the data in topology
 // No real data copy needed
 static void do_equal_propagation(cldnn::topology& topology,
@@ -487,6 +524,30 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
        {
            do_unary_operation(topology, op, activation_logistic);
        }
+        else if ("Greater" == op->description())
+        {
+            do_logical_operation(topology, op, " > ");
+        }
+        else if ("GreaterEq" == op->description())
+        {
+            do_logical_operation(topology, op, " >= ");
+        }
+        else if ("Equal" == op->description())
+        {
+            do_logical_operation(topology, op, " == ");
+        }
+        else if ("NotEqual" == op->description())
+        {
+            do_logical_operation(topology, op, " != ");
+        }
+        else if ("Less" == op->description())
+        {
+            do_logical_operation(topology, op, " < ");
+        }
+        else if ("LessEq" == op->description())
+        {
+            do_logical_operation(topology, op, " <= ");
+        }
        else if ("Subtract" == op->description())
        {
            do_eltwise_operation(topology, op, cldnn::eltwise_mode::sub);

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
@@ -592,3 +592,67 @@ void runtime::intelgpu::do_select_operation(cldnn::topology& topology,
                                                {1});
    topology.add(op_select);
 }
+
+void runtime::intelgpu::do_logic_kernel(cldnn::topology& topology,
+                                        const string& inputA_name,
+                                        const Shape& inputA_shape,
+                                        const string& inputB_name,
+                                        const Shape& inputB_shape,
+                                        const string& output_name,
+                                        const Shape& output_shape,
+                                        const element::Type& output_type,
+                                        const string& operation)
+{
+    const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
+    const string entry_point_name = "logic_" + output_name;
+    codegen::CodeWriter writer;
+
+    writer << "__kernel void " << entry_point_name << "(const __global float inputA"
+           << array_dims(inputA_shape) << ", const __global float inputB"
+           << array_dims(inputB_shape) << ", __global char output" << array_dims(output_shape)
+           << ")\n";
+
+    writer.block_begin();
+    {
+        size_t var_idx = 0;
+        // Main loops
+        for (auto const& i : output_shape)
+        {
+            writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << i << "; ++i"
+                   << var_idx << ")\n";
+            writer.block_begin();
+            ++var_idx;
+        }
+
+        writer << "if (inputA" << access_dims(inputA_shape) << operation << "inputB"
+               << access_dims(inputB_shape) << ")\n";
+        writer.block_begin();
+        {
+            writer << "output" << access_dims(output_shape) << " = 1;\n";
+        }
+        writer.block_end();
+        writer << "else\n";
+        writer.block_begin();
+        {
+            writer << "output" << access_dims(output_shape) << " = 0;\n";
+        }
+        writer.block_end();
+
+        // Closing brackets for main loops
+        for (auto const& i : output_shape)
+        {
+            writer.block_end();
+        }
+    }
+    writer.block_end();
+
+    const cldnn::custom_gpu_primitive op_logical(output_name,
+                                                 {inputA_name, inputB_name},
+                                                 {writer.get_code()},
+                                                 entry_point_name,
+                                                 get_kernel_args(2, 1),
+                                                 "",
+                                                 layout,
+                                                 {1});
+    topology.add(op_logical);
+}
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
@@ -70,6 +70,16 @@ namespace ngraph
                                     const Shape& output_shape,
                                     const element::Type& output_type);

+            void do_logic_kernel(cldnn::topology& topology,
+                                 const std::string& inputA_name,
+                                 const Shape& inputA_shape,
+                                 const std::string& inputB_name,
+                                 const Shape& inputB_shape,
+                                 const std::string& output_name,
+                                 const Shape& output_shape,
+                                 const element::Type& output_type,
+                                 const std::string& operation);
+
            // Helper functions used in cldnn::custom_gpu_primitive kernels
            std::vector<cldnn_arg> get_kernel_args(size_t input, size_t output);
            std::string array_dims(const Shape& dimentions);