Unverified Commit 0f05495c authored by Scott Cyphers's avatar Scott Cyphers Committed by GitHub

Merge branch 'master' into master

parents 1b5340c4 244c9fcf
......@@ -22,7 +22,7 @@ include(ExternalProject)
#------------------------------------------------------------------------------
set(CLDNN_GIT_REPO_URL https://github.com/intel/clDNN.git)
set(CLDNN_GIT_LABEL df28d2861716cac7a6a9eff4e49e47162959a748)
set(CLDNN_GIT_LABEL 02add7c4ce2baa81e2a32fa02d733dcc4f013108)
set(BOOST_VERSION 1.64.0)
set(OUT_DIR ${EXTERNAL_PROJECTS_ROOT}/cldnn/out)
......
......@@ -30,6 +30,7 @@
#include <CPP/crop.hpp>
#include <CPP/data.hpp>
#include <CPP/eltwise.hpp>
#include <CPP/gemm.hpp>
#include <CPP/input_layout.hpp>
#include <CPP/layout.hpp>
#include <CPP/lrn.hpp>
......@@ -675,6 +676,41 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{
arguments_check(op, 2, 1);
const shared_ptr<op::Dot> dot_inst = static_pointer_cast<op::Dot>(op);
const size_t axes_count = dot_inst->get_reduction_axes_count();
const Shape& input0_shape = get_input_shape(op, 0);
const Shape& input1_shape = get_input_shape(op, 1);
const size_t input0_elem_count = shape_size(input0_shape);
const size_t input1_elem_count = shape_size(input1_shape);
if (get_input_type(op) == element::f32 && get_input_type(op, 1) == element::f32 &&
get_output_type(op) == element::f32 && input0_elem_count && input1_elem_count &&
(axes_count == 1) && (input0_shape.size() < 3) && (input1_shape.size() < 3) &&
!input0_shape.empty() && !input1_shape.empty())
{
string input1_name = get_input_name(op, 1);
// If we have A[5] and B[5] here, in cldnn we have A[1, 1, 1, 5] and B[1, 1, 1, 5]
// it needs to be reshaped into A[1, 1, 1, 5] and B[1, 1, 5, 1]
if (!input0_shape.empty() && (input1_shape.size() == 1))
{
const string new_name = input1_name + "_reshaped";
Shape new_shape = input1_shape;
new_shape.push_back(1);
const cldnn::tensor reshaped_tensor =
intelgpu_space::create_cldnn_tensor(new_shape);
const cldnn::reshape reshape_op(new_name, input1_name, reshaped_tensor);
topology.add(reshape_op);
input1_name = new_name;
}
const cldnn::gemm dot_op(get_output_name(op), get_input_name(op, 0), input1_name);
topology.add(dot_op);
}
else
{
do_dot_operation(topology,
get_input_name(op, 0),
get_input_shape(op, 0),
......@@ -682,7 +718,9 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
get_input_shape(op, 1),
get_output_name(op),
get_output_shape(op),
get_output_type(op));
get_output_type(op),
axes_count);
}
break;
}
case OP_TYPEID::MaxPool:
......
......@@ -306,16 +306,6 @@ static string access_dims_strided(const Shape& dimentions,
return buffer;
}
static void do_dot_operation_error(const Shape& input0_shape,
const Shape& input1_shape,
const Shape& output_shape)
{
throw invalid_argument("IntelGPU Dot operation. Conbination input0_shape" +
runtime::intelgpu::array_dims(input0_shape) + ", input1_shape" +
runtime::intelgpu::array_dims(input1_shape) + ", output_shape" +
runtime::intelgpu::array_dims(output_shape) + " is not supported.");
}
void runtime::intelgpu::do_pad_operation(cldnn::topology& topology,
const string& input_name,
const Shape& input_shape,
......@@ -742,82 +732,30 @@ void runtime::intelgpu::do_avg_pool_backprop_operation(cldnn::topology& topology
topology.add(op_avg_pool_backprop);
}
static void do_1d_scalar_mul(codegen::CodeWriter& writer,
string& entry_point_name,
const Shape& input0_shape,
const Shape& input1_shape,
const string& type_name)
{
const size_t input0_count = input0_shape.empty() ? 0 : shape_size<Shape>(input0_shape);
const size_t input1_count = input1_shape.empty() ? 0 : shape_size<Shape>(input1_shape);
const size_t output_count = max(input0_count, input1_count);
entry_point_name += "_do_1d_scalar_mul";
writer << "__kernel void " << entry_point_name << "(const __global " << type_name << "* input0"
<< ", const __global " << type_name << "* input1, __global " << type_name
<< "* output)\n";
writer.block_begin();
{
writer << "for (uint i1 = 0; i1 < " << output_count << "; ++i1)\n";
writer.block_begin();
{
writer << "output[i1] = input0[" << (input0_count > 0 ? "i1" : "0") << "] * input1["
<< (input1_count > 0 ? "i1" : "0") << "];\n";
}
writer.block_end();
}
writer.block_end();
}
static vector<size_t> do_2d_2d_mul(codegen::CodeWriter& writer,
string& entry_point_name,
void runtime::intelgpu::do_dot_operation(cldnn::topology& topology,
const string& input0_name,
const Shape& input0_shape,
const string& input1_name,
const Shape& input1_shape,
const string& output_name,
const Shape& output_shape,
const string& type_name)
const element::Type& output_type,
size_t reduction_axes_count)
{
entry_point_name += "_do_2d_2d_mul";
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
string entry_point_name = "dot_" + output_name;
const string type_name = get_opencl_type_name(output_type);
const size_t input0_axes = input0_shape.size() - reduction_axes_count;
size_t var_idx = reduction_axes_count;
Shape reduction_shape;
codegen::CodeWriter writer;
vector<size_t> gws;
runtime::intelgpu::gen_func_def(writer,
entry_point_name,
{2, type_name},
{input0_shape, input1_shape},
type_name,
output_shape);
writer.block_begin();
for (auto it = input1_shape.begin(); (it != input1_shape.end()) && (var_idx > 0); ++it)
{
// Main loops
gws = runtime::intelgpu::generate_loops(writer, output_shape, true);
// Inner loop
writer << type_name << " sum = 0;\n";
writer << "for (uint i2 = 0; i2 < " << input0_shape.at(1) << "; ++i2)\n";
writer.block_begin();
{
writer << "sum += input0[i0][i2] * input1[i2][i1];\n";
}
writer.block_end();
writer << "output[i0][i1] = sum;\n";
// Closing brackets for main loops
runtime::intelgpu::generate_loops(writer, output_shape, false);
reduction_shape.push_back(*it);
--var_idx;
}
writer.block_end();
return gws;
}
static vector<size_t> do_3d_3d_mul(codegen::CodeWriter& writer,
string& entry_point_name,
const Shape& input0_shape,
const Shape& input1_shape,
const Shape& output_shape,
const string& type_name)
{
entry_point_name += "_do_3d_3d_mul";
vector<size_t> gws;
runtime::intelgpu::gen_func_def(writer,
entry_point_name,
......@@ -825,219 +763,84 @@ static vector<size_t> do_3d_3d_mul(codegen::CodeWriter& writer,
{input0_shape, input1_shape},
type_name,
output_shape);
writer.block_begin();
{
// Main loops
gws = runtime::intelgpu::generate_loops(writer, output_shape, true);
// Inner loop
writer << type_name << " sum = 0;\n";
writer << "for (uint i4 = 0; i4 < " << input0_shape.back() << "; ++i4)\n";
writer.block_begin();
{
writer << "sum += input0[i0][i1][i4] * input1[i4][i2][i3];\n";
}
writer.block_end();
writer << "output[i0][i1][i2][i3] = sum;\n";
// Closing brackets for main loops
runtime::intelgpu::generate_loops(writer, output_shape, false);
}
writer.block_end();
return gws;
}
static vector<size_t> do_3d_2d_mul(codegen::CodeWriter& writer,
string& entry_point_name,
const Shape& input0_shape,
const Shape& input1_shape,
const Shape& output_shape,
const string& type_name)
{
entry_point_name += "_do_3d_2d_mul";
vector<size_t> gws;
writer << "// reduction_axes_count:" << reduction_axes_count << "\n"
<< "// reduction_shape:" << reduction_shape << "\n";
runtime::intelgpu::gen_func_def(writer,
entry_point_name,
{2, type_name},
{input0_shape, input1_shape},
type_name,
output_shape);
writer.block_begin();
{
// Main loops
gws = runtime::intelgpu::generate_loops(writer, output_shape, true);
// Inner loop
writer << type_name << " sum = 0;\n";
writer << "for (uint i3 = 0; i3 < " << input0_shape.back() << "; ++i3)\n";
writer.block_begin();
{
writer << "sum += input0[i0][i1][i3] * input1[i3][i2];\n";
}
writer.block_end();
writer << "output[i0][i1][i2] = sum;\n";
// Closing brackets for main loops
runtime::intelgpu::generate_loops(writer, output_shape, false);
}
writer.block_end();
return gws;
}
static vector<size_t> do_2d_1d_mul(codegen::CodeWriter& writer,
string& entry_point_name,
const Shape& input0_shape,
const Shape& input1_shape,
const Shape& output_shape,
const string& type_name)
{
entry_point_name += "_do_2d_1d_mul";
vector<size_t> gws;
runtime::intelgpu::gen_func_def(writer,
entry_point_name,
{2, type_name},
{input0_shape, input1_shape},
type_name,
output_shape);
writer.block_begin();
// Reduction loops
var_idx = 0;
for (auto const& i : reduction_shape)
{
// Main loops
gws = runtime::intelgpu::generate_loops(writer, output_shape, true);
writer << type_name << " sum = 0;\n";
// Inner loop
writer << "for (uint i1 = 0; i1 < " << input0_shape.at(1) << "; ++i1)\n";
writer << "for (uint k" << var_idx << " = 0; k" << var_idx << " < " << i << "; ++k"
<< var_idx << ")\n";
writer.block_begin();
{
writer << "sum += input0[i0][i1] * input1[i1];\n";
}
writer.block_end();
writer << "output[i0] = sum;\n";
// Closing brackets for main loops
runtime::intelgpu::generate_loops(writer, output_shape, false);
++var_idx;
}
writer.block_end();
return gws;
}
static void do_scalar_scalar_mul(codegen::CodeWriter& writer,
string& entry_point_name,
const string& type_name)
{
entry_point_name += "_scalar_scalar_mul";
runtime::intelgpu::gen_func_def(
writer, entry_point_name, {2, type_name}, {{1}, {1}}, type_name, {1});
writer << "sum += input0";
writer.block_begin();
if (input0_shape.empty())
{
writer << "output[0] = input0[0] * input1[0];\n";
writer << "[0]";
}
writer.block_end();
}
static void do_1d_1d_mul(codegen::CodeWriter& writer,
string& entry_point_name,
const Shape& shape,
const string& type_name)
{
if (shape.size() > 1)
else
{
// main axes indexes
for (size_t i = 0; i < input0_axes; ++i)
{
throw invalid_argument("do_1d_1d_mul: Shape" + runtime::intelgpu::array_dims(shape) +
" must be 1D");
writer << "[i" << i << "]";
}
entry_point_name += "_do_1d_1d_mul";
runtime::intelgpu::gen_func_def(
writer, entry_point_name, {2, type_name}, {2, shape}, type_name, {1});
writer.block_begin();
{
writer << type_name << " sum = 0;\n"
<< "for (uint i = 0; i < " << shape.front() << "; ++i)\n";
writer.block_begin();
// reduction axes indexes
for (size_t i = 0; i < reduction_shape.size(); ++i)
{
writer << "sum += input0[i] * input1[i];\n";
writer << "[k" << i << "]";
}
writer.block_end();
writer << "output[0] = sum;\n";
}
writer.block_end();
}
void runtime::intelgpu::do_dot_operation(cldnn::topology& topology,
const string& input0_name,
const Shape& input0_shape,
const string& input1_name,
const Shape& input1_shape,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type)
{
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
string entry_point_name = "dot_" + output_name;
const string type_name = get_opencl_type_name(output_type);
codegen::CodeWriter writer;
vector<size_t> gws = {1};
const bool is_input0_scalar = input0_shape.empty();
const bool is_input1_scalar = input1_shape.empty();
const bool is_output_scalar = output_shape.empty();
// operation
writer << " * input1";
if (is_input0_scalar && is_input1_scalar && is_output_scalar)
{
do_scalar_scalar_mul(writer, entry_point_name, type_name);
}
else if (((is_input0_scalar && !is_input1_scalar) || (!is_input0_scalar && is_input1_scalar)) &&
!is_output_scalar)
if (input1_shape.empty())
{
do_1d_scalar_mul(writer, entry_point_name, input0_shape, input1_shape, type_name);
writer << "[0]";
}
else if (!is_input0_scalar && !is_input1_scalar && is_output_scalar)
{
do_1d_1d_mul(writer, entry_point_name, input1_shape, type_name);
}
else if (!is_input0_scalar && !is_input1_scalar && !is_output_scalar)
{
if (input0_shape.size() == 2 && input1_shape.size() == 1)
else
{
gws = do_2d_1d_mul(
writer, entry_point_name, input0_shape, input1_shape, output_shape, type_name);
}
else if (input0_shape.size() == 2 && input1_shape.size() == 2)
// reduction axes indexes
for (size_t i = 0; i < reduction_shape.size(); ++i)
{
gws = do_2d_2d_mul(
writer, entry_point_name, input0_shape, input1_shape, output_shape, type_name);
writer << "[k" << i << "]";
}
else if (input0_shape.size() == 3 && input1_shape.size() == 3)
// main axes indexes
for (size_t i = input0_axes; i < output_shape.size(); ++i)
{
gws = do_3d_3d_mul(
writer, entry_point_name, input0_shape, input1_shape, output_shape, type_name);
writer << "[i" << i << "]";
}
else if (input0_shape.size() == 3 && input1_shape.size() == 2)
{
gws = do_3d_2d_mul(
writer, entry_point_name, input0_shape, input1_shape, output_shape, type_name);
}
else
writer << ";\n";
// Closing brackets for reduction loops
for (auto const& i : reduction_shape)
{
do_dot_operation_error(input0_shape, input1_shape, output_shape);
}
writer.block_end();
}
else
{
do_dot_operation_error(input0_shape, input1_shape, output_shape);
writer << "output" << runtime::intelgpu::access_dims(output_shape) << " = sum;\n";
// Closing brackets for main loops
runtime::intelgpu::generate_loops(writer, output_shape, false);
}
writer.block_end();
const cldnn::custom_gpu_primitive op_dot(output_name,
{input0_name, input1_name},
......
......@@ -72,7 +72,8 @@ namespace ngraph
const Shape& inputB_shape,
const std::string& output_name,
const Shape& output_shape,
const element::Type& output_type);
const element::Type& output_type,
size_t reduction_axes_count);
void do_slice_operation(cldnn::topology& topology,
const std::string& input_name,
......
avg_pool_2d_2channel_2image_padded_only_above_do_not_include_in_computation
avg_pool_2d_2channel_2image_padded_only_above_include_in_computation
avg_pool_3d_uneven_strided_padded
backwards_batch_norm_three_outputs
backwards_batch_norm_training
backwards_dot_scalar_tensor
backwards_dot_tensor3_tensor3
......@@ -9,7 +10,6 @@ backwards_dot_tensor_vector
backwards_exp
backwards_maxpool_n2_c1_hw5_3x3_str2_max
backwards_maxpool_n4_c1_hw4_2x2_max
backwards_relu
backwards_replace_slice
backwards_reverse_sequence_n3_c2_h3
backwards_reverse_sequence_n4d2c3h2w2
......@@ -21,41 +21,37 @@ batch_norm_training_0eps_f64
batch_norm_one_output
batch_norm_three_outputs
dequantize
dequantize_zero_offset
dequantize_axes
dequantize_int8
dequantize_int8_zero_offset
dequantize_int32
dequantize_int32_zero_offset
dequantize_int8
dequantize_int8_zero_offset
dequantize_zero_offset
divide_by_zero_int32
dot_3d_multi_axis
dot_4d_5d_multi_axis
dot_4d_5d_multi_axis_more
generate_mask
function_call
generate_mask
max_pool_3d
maxpool_bprop_larger_than_cache
numeric_double_inf
numeric_double_nan
quantize
quantize_zero_offset
quantize_axes
quantize_int8
quantize_int8_zero_offset
quantize_clamp_int32
quantize_clamp_int8
quantize_clamp_uint8
quantize_int32
quantize_int32_zero_offset
quantize_clamp_uint8
quantize_clamp_int8
quantize_clamp_int32
quantize_ROUND_NEAREST_TOWARD_ZERO
quantize_ROUND_NEAREST_TOWARD_INFINITY
quantize_ROUND_NEAREST_UPWARD
quantize_int8
quantize_int8_zero_offset
quantize_ROUND_DOWN
quantize_ROUND_NEAREST_DOWNWARD
quantize_ROUND_NEAREST_TOWARD_EVEN
quantize_ROUND_NEAREST_TOWARD_INFINITY
quantize_ROUND_NEAREST_TOWARD_ZERO
quantize_ROUND_NEAREST_UPWARD
quantize_ROUND_TOWARD_INFINITY
quantize_ROUND_TOWARD_ZERO
quantize_ROUND_UP
quantize_ROUND_DOWN
quantize_zero_offset
reduce_window_emulating_max_pool_1d_1channel_1image
reduce_window_emulating_max_pool_1d_1channel_2image
reduce_window_emulating_max_pool_1d_2channel_2image
......@@ -73,9 +69,14 @@ reverse_sequence_n2c3h4w2
reverse_sequence_n4c3h2w2
reverse_sequence_n4d2c3h2w2
select_and_scatter_3d_without_overlap
select_and_scatter_with_overlap
select_and_scatter_without_overlap
select_and_scatter_with_overlap
shape_of_5d
shape_of_matrix
shape_of_scalar
shape_of_vector
softmax_axis_3d_double
sum_stable_acc
topk_1d_max_all
topk_1d_max_one
topk_1d_max_partial
......@@ -88,17 +89,17 @@ topk_2d_max_partial
topk_2d_min_all
topk_2d_min_one
topk_2d_min_partial
topk_3d_large_input_max
topk_3d_large_input_min
topk_3d_max_all
topk_3d_max_one
topk_3d_max_partial
topk_3d_min_all
topk_3d_min_one
topk_3d_min_partial
topk_3d_single_output
topk_5d_max_partial
topk_int64
topk_3d_large_input_max
topk_3d_large_input_min
topk_3d_single_output
zero_sized_abs
zero_sized_acos
zero_sized_add
......@@ -145,3 +146,4 @@ max_3d_to_scalar_double
argmin_trivial_in_i32
argmax_4D_axis_3_i64_in_i32
argmin_trivial_in_double
maxpool_bprop_larger_than_cache
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment