Commit cae66197 authored by shssf's avatar shssf Committed by Robert Kimball

IntelGPU backend: Operation Reduce implemented (#1736)

* IntelGPU backend: Operation Reduce implemented

* PR1736. Style fixed
parent 7ac35345
......@@ -23,6 +23,7 @@ set(SRC
intelgpu_op_custom_kernels.cpp
intelgpu_op_convolution.cpp
intelgpu_op_softmax.cpp
intelgpu_op_custom_func_call.cpp
code_writer.cpp
)
......
......@@ -45,6 +45,7 @@
#include "ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_convolution.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_softmax.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_tensor_view.hpp"
......@@ -69,6 +70,7 @@
#include "ngraph/op/pad.hpp"
#include "ngraph/op/parameter_vector.hpp"
#include "ngraph/op/product.hpp"
#include "ngraph/op/reduce.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/reverse.hpp"
#include "ngraph/op/slice.hpp"
......@@ -788,6 +790,27 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
topology.add(cldnn_activ_grad);
break;
}
case OP_TYPEID::Reduce:
{
arguments_check(op, 2, 1);
const shared_ptr<op::Reduce> red_op = static_pointer_cast<op::Reduce>(op);
const AxisSet& axis = red_op->get_reduction_axes();
vector<shared_ptr<Function>> func = red_op->get_functions();
// Empty axis is not a case for do_equal_propagation()
do_reduce_func_call(topology,
get_input_name(op, 0),
get_input_shape(op, 0),
get_input_name(op, 1),
get_input_shape(op, 1),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
axis,
func);
break;
}
case OP_TYPEID::Abs:
{
do_unary_operation(topology, op, activation_abs);
......@@ -1350,7 +1373,6 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
case OP_TYPEID::FunctionCall:
case OP_TYPEID::Dequantize:
case OP_TYPEID::Quantize:
case OP_TYPEID::Reduce:
case OP_TYPEID::ReduceWindow:
case OP_TYPEID::ReplaceSlice:
case OP_TYPEID::ReverseSequence:
......
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <CPP/custom_gpu_primitive.hpp>
#include "ngraph/runtime/intelgpu/code_writer.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
using namespace std;
using namespace ngraph;
static void get_custom_func_name(codegen::CodeWriter& writer,
vector<shared_ptr<Function>>& func,
const string& func_name,
const string& type_name)
{
if (func.size() != 1)
{
throw invalid_argument("IntelGPU Custom_Call operation. Custom function number: " +
to_string(func.size()) + " expected: 1");
}
writer << type_name << " " << func_name << "(const " << type_name << " input0, const "
<< type_name << " input1)\n";
writer.block_begin();
{
for (shared_ptr<Node> op : func.at(0)->get_ordered_ops())
{
if ((op->description() != "Parameter") && (op->description() != "Result"))
{
if (op->description() == "Multiply")
{
writer << "return input0 * input1;\n";
}
else if (op->description() == "Add")
{
writer << "return input0 + input1;\n";
}
else if (op->description() == "Maximum")
{
writer << "return max(input0, input1);\n";
}
else if (op->description() == "Minimum")
{
writer << "return min(input0, input1);\n";
}
else if (op->description() == "And")
{
writer << "return input0 && input1;\n";
}
else if (op->description() == "Or")
{
writer << "return input0 || input1;\n";
}
else if (op->description() == "Equal")
{
writer << "return input0 == input1;\n";
}
else if (op->description() == "NotEqual")
{
writer << "return input0 != input1;\n";
}
else
{
writer << "UNIMPLEMENTED_FUNCTION_INTELGPU: " << op->description() << "\n";
}
}
}
} // End of function bracket
writer.block_end();
}
void runtime::intelgpu::do_reduce_func_call(cldnn::topology& topology,
const string& input0_name,
const Shape& input0_shape,
const string& input1_name,
const Shape& input1_shape,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const AxisSet& axis,
vector<shared_ptr<Function>>& func)
{
const string entry_point_name = "reduce_func_call_" + output_name;
const string aux_point_name = "aux_call_" + output_name;
const string kernel_type_name = get_opencl_type_name(output_type);
const size_t input_size = shape_size<Shape>(input0_shape);
codegen::CodeWriter writer;
get_custom_func_name(writer, func, aux_point_name, kernel_type_name);
// The kernel name and parameters
gen_func_def(writer,
entry_point_name,
{2, kernel_type_name},
{input0_shape, {1}},
kernel_type_name,
output_shape);
writer.block_begin();
{
// Initialization loop
size_t var_idx = 0;
for (auto const& i : output_shape)
{
writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << i << "; ++i"
<< var_idx << ")\n";
writer.block_begin();
++var_idx;
}
writer << "output" << access_dims(output_shape) << " = input1" << access_dims(input1_shape)
<< ";\n";
// Closing brackets for initialization loop
for (auto const& i : output_shape)
{
writer.block_end();
}
if (input_size && !input0_shape.empty())
{
// Main operation loop
var_idx = 0;
for (auto const& i : input0_shape)
{
writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << i << "; ++i"
<< var_idx << ")\n";
writer.block_begin();
++var_idx;
}
writer << "output" << access_dims(input0_shape, "i", axis) << " = " << aux_point_name
<< "(output" << access_dims(input0_shape, "i", axis) << ", input0"
<< access_dims(input0_shape) << ");\n";
// Closing brackets for loop
for (auto const& i : input0_shape)
{
writer.block_end();
}
}
} // End of function bracket
writer.block_end();
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
const cldnn::custom_gpu_primitive op_product(output_name,
{input0_name, input1_name},
{writer.get_code()},
entry_point_name,
get_kernel_args(2, 1),
"",
layout,
{1});
topology.add(op_product);
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <CPP/topology.hpp>
#include "ngraph/axis_set.hpp"
#include "ngraph/function.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace intelgpu
{
void do_reduce_func_call(cldnn::topology& topology,
const std::string& input0_name,
const Shape& input0_shape,
const std::string& input1_name,
const Shape& input1_shape,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type,
const AxisSet& axis,
std::vector<std::shared_ptr<Function>>& func);
}
}
}
......@@ -26,15 +26,6 @@ quantize
quantize_axes
quantize_clamp
quantize_int8
reduce_3d_to_vector
reduce_matrix_cols_zero
reduce_matrix_columns
reduce_matrix_rows
reduce_matrix_rows_zero
reduce_matrix_to_scalar_zero_by_zero
reduce_to_scalar
reduce_trivial
reduce_vector_zero
reduce_window_emulating_max_pool_1d_1channel_1image
reduce_window_emulating_max_pool_1d_1channel_2image
reduce_window_emulating_max_pool_1d_2channel_2image
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment