Commit 43b91a57 authored by Anna Alberska's avatar Anna Alberska Committed by Robert Kimball

add type into dot operation (#1714)

parent 6367ad7d
...@@ -729,15 +729,17 @@ void runtime::intelgpu::do_avg_pool_backprop_operation(cldnn::topology& topology ...@@ -729,15 +729,17 @@ void runtime::intelgpu::do_avg_pool_backprop_operation(cldnn::topology& topology
static void do_1d_scalar_mul(codegen::CodeWriter& writer, static void do_1d_scalar_mul(codegen::CodeWriter& writer,
string& entry_point_name, string& entry_point_name,
const Shape& input0_shape, const Shape& input0_shape,
const Shape& input1_shape) const Shape& input1_shape,
const string& type_name)
{ {
const size_t input0_count = input0_shape.empty() ? 0 : shape_size<Shape>(input0_shape); const size_t input0_count = input0_shape.empty() ? 0 : shape_size<Shape>(input0_shape);
const size_t input1_count = input1_shape.empty() ? 0 : shape_size<Shape>(input1_shape); const size_t input1_count = input1_shape.empty() ? 0 : shape_size<Shape>(input1_shape);
const size_t output_count = max(input0_count, input1_count); const size_t output_count = max(input0_count, input1_count);
entry_point_name += "_do_1d_scalar_mul"; entry_point_name += "_do_1d_scalar_mul";
writer << "__kernel void " << entry_point_name << "(const __global float* input0" writer << "__kernel void " << entry_point_name << "(const __global " << type_name << "* input0"
<< ", const __global float* input1, __global float* output)\n"; << ", const __global " << type_name << "* input1, __global " << type_name
<< "* output)\n";
writer.block_begin(); writer.block_begin();
{ {
writer << "for (uint i1 = 0; i1 < " << output_count << "; ++i1)\n"; writer << "for (uint i1 = 0; i1 < " << output_count << "; ++i1)\n";
...@@ -755,16 +757,17 @@ static vector<size_t> do_2d_2d_mul(codegen::CodeWriter& writer, ...@@ -755,16 +757,17 @@ static vector<size_t> do_2d_2d_mul(codegen::CodeWriter& writer,
string& entry_point_name, string& entry_point_name,
const Shape& input0_shape, const Shape& input0_shape,
const Shape& input1_shape, const Shape& input1_shape,
const Shape& output_shape) const Shape& output_shape,
const string& type_name)
{ {
entry_point_name += "_do_2d_2d_mul"; entry_point_name += "_do_2d_2d_mul";
vector<size_t> gws; vector<size_t> gws;
runtime::intelgpu::gen_func_def(writer, runtime::intelgpu::gen_func_def(writer,
entry_point_name, entry_point_name,
{2, "float"}, {2, type_name},
{input0_shape, input1_shape}, {input0_shape, input1_shape},
"float", type_name,
output_shape); output_shape);
writer.block_begin(); writer.block_begin();
...@@ -773,7 +776,7 @@ static vector<size_t> do_2d_2d_mul(codegen::CodeWriter& writer, ...@@ -773,7 +776,7 @@ static vector<size_t> do_2d_2d_mul(codegen::CodeWriter& writer,
gws = runtime::intelgpu::generate_loops(writer, output_shape, true); gws = runtime::intelgpu::generate_loops(writer, output_shape, true);
// Inner loop // Inner loop
writer << "float sum = 0.0f;\n"; writer << type_name << " sum = 0;\n";
writer << "for (uint i2 = 0; i2 < " << input0_shape.at(1) << "; ++i2)\n"; writer << "for (uint i2 = 0; i2 < " << input0_shape.at(1) << "; ++i2)\n";
writer.block_begin(); writer.block_begin();
{ {
...@@ -794,16 +797,17 @@ static vector<size_t> do_3d_3d_mul(codegen::CodeWriter& writer, ...@@ -794,16 +797,17 @@ static vector<size_t> do_3d_3d_mul(codegen::CodeWriter& writer,
string& entry_point_name, string& entry_point_name,
const Shape& input0_shape, const Shape& input0_shape,
const Shape& input1_shape, const Shape& input1_shape,
const Shape& output_shape) const Shape& output_shape,
const string& type_name)
{ {
entry_point_name += "_do_3d_3d_mul"; entry_point_name += "_do_3d_3d_mul";
vector<size_t> gws; vector<size_t> gws;
runtime::intelgpu::gen_func_def(writer, runtime::intelgpu::gen_func_def(writer,
entry_point_name, entry_point_name,
{2, "float"}, {2, type_name},
{input0_shape, input1_shape}, {input0_shape, input1_shape},
"float", type_name,
output_shape); output_shape);
writer.block_begin(); writer.block_begin();
...@@ -812,7 +816,7 @@ static vector<size_t> do_3d_3d_mul(codegen::CodeWriter& writer, ...@@ -812,7 +816,7 @@ static vector<size_t> do_3d_3d_mul(codegen::CodeWriter& writer,
gws = runtime::intelgpu::generate_loops(writer, output_shape, true); gws = runtime::intelgpu::generate_loops(writer, output_shape, true);
// Inner loop // Inner loop
writer << "float sum = 0.0f;\n"; writer << type_name << " sum = 0;\n";
writer << "for (uint i4 = 0; i4 < " << input0_shape.back() << "; ++i4)\n"; writer << "for (uint i4 = 0; i4 < " << input0_shape.back() << "; ++i4)\n";
writer.block_begin(); writer.block_begin();
{ {
...@@ -833,16 +837,17 @@ static vector<size_t> do_3d_2d_mul(codegen::CodeWriter& writer, ...@@ -833,16 +837,17 @@ static vector<size_t> do_3d_2d_mul(codegen::CodeWriter& writer,
string& entry_point_name, string& entry_point_name,
const Shape& input0_shape, const Shape& input0_shape,
const Shape& input1_shape, const Shape& input1_shape,
const Shape& output_shape) const Shape& output_shape,
const string& type_name)
{ {
entry_point_name += "_do_3d_2d_mul"; entry_point_name += "_do_3d_2d_mul";
vector<size_t> gws; vector<size_t> gws;
runtime::intelgpu::gen_func_def(writer, runtime::intelgpu::gen_func_def(writer,
entry_point_name, entry_point_name,
{2, "float"}, {2, type_name},
{input0_shape, input1_shape}, {input0_shape, input1_shape},
"float", type_name,
output_shape); output_shape);
writer.block_begin(); writer.block_begin();
...@@ -851,7 +856,7 @@ static vector<size_t> do_3d_2d_mul(codegen::CodeWriter& writer, ...@@ -851,7 +856,7 @@ static vector<size_t> do_3d_2d_mul(codegen::CodeWriter& writer,
gws = runtime::intelgpu::generate_loops(writer, output_shape, true); gws = runtime::intelgpu::generate_loops(writer, output_shape, true);
// Inner loop // Inner loop
writer << "float sum = 0.0f;\n"; writer << type_name << " sum = 0;\n";
writer << "for (uint i3 = 0; i3 < " << input0_shape.back() << "; ++i3)\n"; writer << "for (uint i3 = 0; i3 < " << input0_shape.back() << "; ++i3)\n";
writer.block_begin(); writer.block_begin();
{ {
...@@ -872,16 +877,17 @@ static vector<size_t> do_2d_1d_mul(codegen::CodeWriter& writer, ...@@ -872,16 +877,17 @@ static vector<size_t> do_2d_1d_mul(codegen::CodeWriter& writer,
string& entry_point_name, string& entry_point_name,
const Shape& input0_shape, const Shape& input0_shape,
const Shape& input1_shape, const Shape& input1_shape,
const Shape& output_shape) const Shape& output_shape,
const string& type_name)
{ {
entry_point_name += "_do_2d_1d_mul"; entry_point_name += "_do_2d_1d_mul";
vector<size_t> gws; vector<size_t> gws;
runtime::intelgpu::gen_func_def(writer, runtime::intelgpu::gen_func_def(writer,
entry_point_name, entry_point_name,
{2, "float"}, {2, type_name},
{input0_shape, input1_shape}, {input0_shape, input1_shape},
"float", type_name,
output_shape); output_shape);
writer.block_begin(); writer.block_begin();
...@@ -889,7 +895,7 @@ static vector<size_t> do_2d_1d_mul(codegen::CodeWriter& writer, ...@@ -889,7 +895,7 @@ static vector<size_t> do_2d_1d_mul(codegen::CodeWriter& writer,
// Main loops // Main loops
gws = runtime::intelgpu::generate_loops(writer, output_shape, true); gws = runtime::intelgpu::generate_loops(writer, output_shape, true);
writer << "float sum = 0.0f;\n"; writer << type_name << " sum = 0;\n";
// Inner loop // Inner loop
writer << "for (uint i1 = 0; i1 < " << input0_shape.at(1) << "; ++i1)\n"; writer << "for (uint i1 = 0; i1 < " << input0_shape.at(1) << "; ++i1)\n";
writer.block_begin(); writer.block_begin();
...@@ -907,12 +913,14 @@ static vector<size_t> do_2d_1d_mul(codegen::CodeWriter& writer, ...@@ -907,12 +913,14 @@ static vector<size_t> do_2d_1d_mul(codegen::CodeWriter& writer,
return gws; return gws;
} }
static void do_scalar_scalar_mul(codegen::CodeWriter& writer, string& entry_point_name) static void do_scalar_scalar_mul(codegen::CodeWriter& writer,
string& entry_point_name,
const string& type_name)
{ {
entry_point_name += "_scalar_scalar_mul"; entry_point_name += "_scalar_scalar_mul";
runtime::intelgpu::gen_func_def( runtime::intelgpu::gen_func_def(
writer, entry_point_name, {2, "float"}, {{1}, {1}}, "float", {1}); writer, entry_point_name, {2, type_name}, {{1}, {1}}, type_name, {1});
writer.block_begin(); writer.block_begin();
{ {
...@@ -921,7 +929,10 @@ static void do_scalar_scalar_mul(codegen::CodeWriter& writer, string& entry_poin ...@@ -921,7 +929,10 @@ static void do_scalar_scalar_mul(codegen::CodeWriter& writer, string& entry_poin
writer.block_end(); writer.block_end();
} }
static void do_1d_1d_mul(codegen::CodeWriter& writer, string& entry_point_name, const Shape& shape) static void do_1d_1d_mul(codegen::CodeWriter& writer,
string& entry_point_name,
const Shape& shape,
const string& type_name)
{ {
if (shape.size() > 1) if (shape.size() > 1)
{ {
...@@ -932,11 +943,11 @@ static void do_1d_1d_mul(codegen::CodeWriter& writer, string& entry_point_name, ...@@ -932,11 +943,11 @@ static void do_1d_1d_mul(codegen::CodeWriter& writer, string& entry_point_name,
entry_point_name += "_do_1d_1d_mul"; entry_point_name += "_do_1d_1d_mul";
runtime::intelgpu::gen_func_def( runtime::intelgpu::gen_func_def(
writer, entry_point_name, {2, "float"}, {2, shape}, "float", {1}); writer, entry_point_name, {2, type_name}, {2, shape}, type_name, {1});
writer.block_begin(); writer.block_begin();
{ {
writer << "float sum = 0.0f;\n" writer << type_name << " sum = 0;\n"
<< "for (uint i = 0; i < " << shape.front() << "; ++i)\n"; << "for (uint i = 0; i < " << shape.front() << "; ++i)\n";
writer.block_begin(); writer.block_begin();
{ {
...@@ -959,6 +970,7 @@ void runtime::intelgpu::do_dot_operation(cldnn::topology& topology, ...@@ -959,6 +970,7 @@ void runtime::intelgpu::do_dot_operation(cldnn::topology& topology,
{ {
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape); const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
string entry_point_name = "dot_" + output_name; string entry_point_name = "dot_" + output_name;
const string type_name = get_opencl_type_name(output_type);
codegen::CodeWriter writer; codegen::CodeWriter writer;
vector<size_t> gws = {1}; vector<size_t> gws = {1};
...@@ -968,34 +980,38 @@ void runtime::intelgpu::do_dot_operation(cldnn::topology& topology, ...@@ -968,34 +980,38 @@ void runtime::intelgpu::do_dot_operation(cldnn::topology& topology,
if (is_input0_scalar && is_input1_scalar && is_output_scalar) if (is_input0_scalar && is_input1_scalar && is_output_scalar)
{ {
do_scalar_scalar_mul(writer, entry_point_name); do_scalar_scalar_mul(writer, entry_point_name, type_name);
} }
else if (((is_input0_scalar && !is_input1_scalar) || (!is_input0_scalar && is_input1_scalar)) && else if (((is_input0_scalar && !is_input1_scalar) || (!is_input0_scalar && is_input1_scalar)) &&
!is_output_scalar) !is_output_scalar)
{ {
do_1d_scalar_mul(writer, entry_point_name, input0_shape, input1_shape); do_1d_scalar_mul(writer, entry_point_name, input0_shape, input1_shape, type_name);
} }
else if (!is_input0_scalar && !is_input1_scalar && is_output_scalar) else if (!is_input0_scalar && !is_input1_scalar && is_output_scalar)
{ {
do_1d_1d_mul(writer, entry_point_name, input1_shape); do_1d_1d_mul(writer, entry_point_name, input1_shape, type_name);
} }
else if (!is_input0_scalar && !is_input1_scalar && !is_output_scalar) else if (!is_input0_scalar && !is_input1_scalar && !is_output_scalar)
{ {
if (input0_shape.size() == 2 && input1_shape.size() == 1) if (input0_shape.size() == 2 && input1_shape.size() == 1)
{ {
gws = do_2d_1d_mul(writer, entry_point_name, input0_shape, input1_shape, output_shape); gws = do_2d_1d_mul(
writer, entry_point_name, input0_shape, input1_shape, output_shape, type_name);
} }
else if (input0_shape.size() == 2 && input1_shape.size() == 2) else if (input0_shape.size() == 2 && input1_shape.size() == 2)
{ {
gws = do_2d_2d_mul(writer, entry_point_name, input0_shape, input1_shape, output_shape); gws = do_2d_2d_mul(
writer, entry_point_name, input0_shape, input1_shape, output_shape, type_name);
} }
else if (input0_shape.size() == 3 && input1_shape.size() == 3) else if (input0_shape.size() == 3 && input1_shape.size() == 3)
{ {
gws = do_3d_3d_mul(writer, entry_point_name, input0_shape, input1_shape, output_shape); gws = do_3d_3d_mul(
writer, entry_point_name, input0_shape, input1_shape, output_shape, type_name);
} }
else if (input0_shape.size() == 3 && input1_shape.size() == 2) else if (input0_shape.size() == 3 && input1_shape.size() == 2)
{ {
gws = do_3d_2d_mul(writer, entry_point_name, input0_shape, input1_shape, output_shape); gws = do_3d_2d_mul(
writer, entry_point_name, input0_shape, input1_shape, output_shape, type_name);
} }
else else
{ {
......
...@@ -15,7 +15,6 @@ backwards_tanh ...@@ -15,7 +15,6 @@ backwards_tanh
batch_norm_one_output batch_norm_one_output
batch_norm_three_outputs batch_norm_three_outputs
divide_by_zero_int32 divide_by_zero_int32
dot_matrix_vector_int64
function_call function_call
max_pool_3d max_pool_3d
numeric_double_inf numeric_double_inf
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment