Commit 3e0f87bf authored by Anna Alberska's avatar Anna Alberska Committed by Robert Kimball

IntelGPU backend: Gemm operation (#2904)

* add gemm operation

* style apply

* erase if statement

* fix the test

* enable tests
parent c67892af
......@@ -805,6 +805,53 @@ shared_ptr<runtime::Executable>
}
break;
}
case OP_TYPEID::Gemm:
{
arguments_check(op, 3, 1);
const shared_ptr<op::Gemm> gemm_op = static_pointer_cast<op::Gemm>(op);
const double alpha = gemm_op->get_alpha();
const double beta = gemm_op->get_beta();
const bool transA = gemm_op->get_transA();
const bool transB = gemm_op->get_transB();
if (op->get_input_element_type(0) == element::f32 &&
op->get_input_element_type(1) == element::f32 &&
op->get_input_element_type(2) == element::f32 &&
op->get_output_element_type(0) == element::f32)
{
const cldnn::gemm gemm_op(op->get_output_tensor_name(0),
op->get_input_tensor_name(0),
op->get_input_tensor_name(1),
op->get_input_tensor_name(2),
transA,
transB,
(float)alpha,
(float)beta);
topology.add(gemm_op);
}
else
{
if (alpha == 1.0 && beta == 0.0 && transA == false && transB == false)
{
do_dot_operation(topology,
op->get_input_tensor_name(0),
op->get_input_shape(0),
op->get_input_tensor_name(1),
op->get_input_shape(1),
op->get_output_tensor_name(0),
op->get_output_shape(0),
op->get_output_element_type(0),
0);
}
else
{
kern.emit<op::Gemm>(gemm_op);
}
}
break;
}
case OP_TYPEID::MaxPool:
{
arguments_check(op, 1, 1);
......@@ -1990,7 +2037,6 @@ shared_ptr<runtime::Executable>
case OP_TYPEID::Erf:
case OP_TYPEID::Gather:
case OP_TYPEID::GatherND:
case OP_TYPEID::Gemm:
case OP_TYPEID::GenerateMask:
case OP_TYPEID::HardSigmoid:
case OP_TYPEID::MVN:
......
......@@ -31,6 +31,7 @@
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/equal.hpp"
#include "ngraph/op/fused/conv_fused.hpp"
#include "ngraph/op/fused/gemm.hpp"
#include "ngraph/op/fused/group_conv.hpp"
#include "ngraph/op/greater.hpp"
#include "ngraph/op/greater_eq.hpp"
......@@ -134,6 +135,7 @@ private:
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBiasAdd>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBiasBackpropFiltersBias>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Equal>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Gemm>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Greater>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::GreaterEq>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Less>& op) const;
......
......@@ -1044,6 +1044,111 @@ void runtime::intelgpu::do_dot_operation(cldnn::topology& topology,
topology.add(op_dot);
}
CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::Gemm>& op) const
{
const string& input0_name = op->get_input_tensor_name(0);
const Shape& input0_shape = op->get_input_shape(0);
const string& input1_name = op->get_input_tensor_name(1);
const Shape& input1_shape = op->get_input_shape(1);
const string& input2_name = op->get_input_tensor_name(2);
const Shape& input2_shape = op->get_input_shape(2);
const string& output_name = op->get_output_tensor_name(0);
const Shape& output_shape = op->get_output_shape(0);
const element::Type& output_type = op->get_output_element_type(0);
const double alpha = op->get_alpha();
const double beta = op->get_beta();
const bool transA = op->get_transA();
const bool transB = op->get_transB();
string entry_point_name = "gemm_" + output_name;
const string type_name = get_opencl_type_name(output_type);
CodeWriter writer;
vector<size_t> gws;
gen_func_def(writer,
entry_point_name,
{3, type_name},
{input0_shape, input1_shape, input2_shape},
type_name,
output_shape);
writer.block_begin();
{
writer << type_name << " temp[" << output_shape.at(0) << "][" << output_shape.at(1)
<< "];\n";
writer << "for(uint i0 = 0; i0 < " << output_shape.at(0) << "; ++i0)\n";
writer.block_begin();
{
writer << "for(uint i1 = 0; i1 < " << output_shape.at(1) << "; ++i1)\n";
writer.block_begin();
{
string input2_coords;
if (input2_shape.empty())
{
input2_coords = "[0]";
}
else if (!input2_shape.empty() && input2_shape.size() == 1)
{
input2_coords = "[i1]";
}
else
{
input2_coords = "[i0][i1]";
}
writer << "temp[i0][i1] = input2" << input2_coords << " * " << beta << ";\n";
}
writer.block_end();
}
writer.block_end();
writer << "const uint i0 = get_global_id(0);";
gws.push_back(output_shape.at(0));
writer << "/*trip count " << output_shape.at(0) << "*/\n";
writer.block_begin();
{
writer << "const uint i1 = get_global_id(1);";
gws.push_back(output_shape.at(1));
writer << "/*trip count " << output_shape.at(1) << "*/\n";
writer.block_begin();
{
string acc;
if (type_name == "float")
{
acc = "0.0f";
}
else
{
acc = "0.0";
}
writer << type_name << " acc = " << acc << ";\n";
size_t k_coord = transA ? input0_shape.at(0) : input0_shape.at(1);
writer << "for (uint k=0; k < " << k_coord << "; ++k)\n";
writer.block_begin();
{
string input0_coord = transA ? "[k][i0]" : "[i0][k]";
string input1_coord = transB ? "[i1][k]" : "[k][i1]";
writer << "acc += input0" << input0_coord << " * input1" << input1_coord
<< ";\n";
}
writer.block_end();
writer << "output[i0][i1] = acc * " << alpha << " + temp[i0][i1];\n";
}
writer.block_end();
}
writer.block_end();
}
writer.block_end();
const CustomKernelInfo krn_ret(output_name,
output_shape,
output_type,
{input0_name, input1_name, input2_name},
{writer.get_code()},
entry_point_name,
gws);
return {krn_ret};
}
CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::Slice>& op) const
{
const string& input_name = op->get_input_tensor_name(0);
......
......@@ -72,8 +72,6 @@ normalize_across_hw_w_scale
normalize_invalid_input_tensor_rank
normalize_invalid_scale_rank
normalize
gemm
gemm_broadcast_input_C
hardsigmoid
mvn_mean_normalization
mvn_mean_normalization_split_channels
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment