Unverified Commit 0f05495c authored by Scott Cyphers's avatar Scott Cyphers Committed by GitHub

Merge branch 'master' into master

parents 1b5340c4 244c9fcf
...@@ -22,7 +22,7 @@ include(ExternalProject) ...@@ -22,7 +22,7 @@ include(ExternalProject)
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
set(CLDNN_GIT_REPO_URL https://github.com/intel/clDNN.git) set(CLDNN_GIT_REPO_URL https://github.com/intel/clDNN.git)
set(CLDNN_GIT_LABEL df28d2861716cac7a6a9eff4e49e47162959a748) set(CLDNN_GIT_LABEL 02add7c4ce2baa81e2a32fa02d733dcc4f013108)
set(BOOST_VERSION 1.64.0) set(BOOST_VERSION 1.64.0)
set(OUT_DIR ${EXTERNAL_PROJECTS_ROOT}/cldnn/out) set(OUT_DIR ${EXTERNAL_PROJECTS_ROOT}/cldnn/out)
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <CPP/crop.hpp> #include <CPP/crop.hpp>
#include <CPP/data.hpp> #include <CPP/data.hpp>
#include <CPP/eltwise.hpp> #include <CPP/eltwise.hpp>
#include <CPP/gemm.hpp>
#include <CPP/input_layout.hpp> #include <CPP/input_layout.hpp>
#include <CPP/layout.hpp> #include <CPP/layout.hpp>
#include <CPP/lrn.hpp> #include <CPP/lrn.hpp>
...@@ -675,14 +676,51 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -675,14 +676,51 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
arguments_check(op, 2, 1); arguments_check(op, 2, 1);
do_dot_operation(topology, const shared_ptr<op::Dot> dot_inst = static_pointer_cast<op::Dot>(op);
get_input_name(op, 0), const size_t axes_count = dot_inst->get_reduction_axes_count();
get_input_shape(op, 0), const Shape& input0_shape = get_input_shape(op, 0);
get_input_name(op, 1), const Shape& input1_shape = get_input_shape(op, 1);
get_input_shape(op, 1), const size_t input0_elem_count = shape_size(input0_shape);
get_output_name(op), const size_t input1_elem_count = shape_size(input1_shape);
get_output_shape(op),
get_output_type(op)); if (get_input_type(op) == element::f32 && get_input_type(op, 1) == element::f32 &&
get_output_type(op) == element::f32 && input0_elem_count && input1_elem_count &&
(axes_count == 1) && (input0_shape.size() < 3) && (input1_shape.size() < 3) &&
!input0_shape.empty() && !input1_shape.empty())
{
string input1_name = get_input_name(op, 1);
// If we have A[5] and B[5] here, in cldnn we have A[1, 1, 1, 5] and B[1, 1, 1, 5]
// it needs to be reshaped into A[1, 1, 1, 5] and B[1, 1, 5, 1]
if (!input0_shape.empty() && (input1_shape.size() == 1))
{
const string new_name = input1_name + "_reshaped";
Shape new_shape = input1_shape;
new_shape.push_back(1);
const cldnn::tensor reshaped_tensor =
intelgpu_space::create_cldnn_tensor(new_shape);
const cldnn::reshape reshape_op(new_name, input1_name, reshaped_tensor);
topology.add(reshape_op);
input1_name = new_name;
}
const cldnn::gemm dot_op(get_output_name(op), get_input_name(op, 0), input1_name);
topology.add(dot_op);
}
else
{
do_dot_operation(topology,
get_input_name(op, 0),
get_input_shape(op, 0),
get_input_name(op, 1),
get_input_shape(op, 1),
get_output_name(op),
get_output_shape(op),
get_output_type(op),
axes_count);
}
break; break;
} }
case OP_TYPEID::MaxPool: case OP_TYPEID::MaxPool:
......
...@@ -306,16 +306,6 @@ static string access_dims_strided(const Shape& dimentions, ...@@ -306,16 +306,6 @@ static string access_dims_strided(const Shape& dimentions,
return buffer; return buffer;
} }
static void do_dot_operation_error(const Shape& input0_shape,
const Shape& input1_shape,
const Shape& output_shape)
{
throw invalid_argument("IntelGPU Dot operation. Conbination input0_shape" +
runtime::intelgpu::array_dims(input0_shape) + ", input1_shape" +
runtime::intelgpu::array_dims(input1_shape) + ", output_shape" +
runtime::intelgpu::array_dims(output_shape) + " is not supported.");
}
void runtime::intelgpu::do_pad_operation(cldnn::topology& topology, void runtime::intelgpu::do_pad_operation(cldnn::topology& topology,
const string& input_name, const string& input_name,
const Shape& input_shape, const Shape& input_shape,
...@@ -742,82 +732,30 @@ void runtime::intelgpu::do_avg_pool_backprop_operation(cldnn::topology& topology ...@@ -742,82 +732,30 @@ void runtime::intelgpu::do_avg_pool_backprop_operation(cldnn::topology& topology
topology.add(op_avg_pool_backprop); topology.add(op_avg_pool_backprop);
} }
static void do_1d_scalar_mul(codegen::CodeWriter& writer, void runtime::intelgpu::do_dot_operation(cldnn::topology& topology,
string& entry_point_name, const string& input0_name,
const Shape& input0_shape, const Shape& input0_shape,
const Shape& input1_shape, const string& input1_name,
const string& type_name) const Shape& input1_shape,
{ const string& output_name,
const size_t input0_count = input0_shape.empty() ? 0 : shape_size<Shape>(input0_shape); const Shape& output_shape,
const size_t input1_count = input1_shape.empty() ? 0 : shape_size<Shape>(input1_shape); const element::Type& output_type,
const size_t output_count = max(input0_count, input1_count); size_t reduction_axes_count)
entry_point_name += "_do_1d_scalar_mul";
writer << "__kernel void " << entry_point_name << "(const __global " << type_name << "* input0"
<< ", const __global " << type_name << "* input1, __global " << type_name
<< "* output)\n";
writer.block_begin();
{
writer << "for (uint i1 = 0; i1 < " << output_count << "; ++i1)\n";
writer.block_begin();
{
writer << "output[i1] = input0[" << (input0_count > 0 ? "i1" : "0") << "] * input1["
<< (input1_count > 0 ? "i1" : "0") << "];\n";
}
writer.block_end();
}
writer.block_end();
}
static vector<size_t> do_2d_2d_mul(codegen::CodeWriter& writer,
string& entry_point_name,
const Shape& input0_shape,
const Shape& input1_shape,
const Shape& output_shape,
const string& type_name)
{ {
entry_point_name += "_do_2d_2d_mul"; const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
string entry_point_name = "dot_" + output_name;
const string type_name = get_opencl_type_name(output_type);
const size_t input0_axes = input0_shape.size() - reduction_axes_count;
size_t var_idx = reduction_axes_count;
Shape reduction_shape;
codegen::CodeWriter writer;
vector<size_t> gws; vector<size_t> gws;
runtime::intelgpu::gen_func_def(writer, for (auto it = input1_shape.begin(); (it != input1_shape.end()) && (var_idx > 0); ++it)
entry_point_name,
{2, type_name},
{input0_shape, input1_shape},
type_name,
output_shape);
writer.block_begin();
{ {
// Main loops reduction_shape.push_back(*it);
gws = runtime::intelgpu::generate_loops(writer, output_shape, true); --var_idx;
// Inner loop
writer << type_name << " sum = 0;\n";
writer << "for (uint i2 = 0; i2 < " << input0_shape.at(1) << "; ++i2)\n";
writer.block_begin();
{
writer << "sum += input0[i0][i2] * input1[i2][i1];\n";
}
writer.block_end();
writer << "output[i0][i1] = sum;\n";
// Closing brackets for main loops
runtime::intelgpu::generate_loops(writer, output_shape, false);
} }
writer.block_end();
return gws;
}
static vector<size_t> do_3d_3d_mul(codegen::CodeWriter& writer,
string& entry_point_name,
const Shape& input0_shape,
const Shape& input1_shape,
const Shape& output_shape,
const string& type_name)
{
entry_point_name += "_do_3d_3d_mul";
vector<size_t> gws;
runtime::intelgpu::gen_func_def(writer, runtime::intelgpu::gen_func_def(writer,
entry_point_name, entry_point_name,
...@@ -825,219 +763,84 @@ static vector<size_t> do_3d_3d_mul(codegen::CodeWriter& writer, ...@@ -825,219 +763,84 @@ static vector<size_t> do_3d_3d_mul(codegen::CodeWriter& writer,
{input0_shape, input1_shape}, {input0_shape, input1_shape},
type_name, type_name,
output_shape); output_shape);
writer.block_begin(); writer.block_begin();
{ {
writer << "// reduction_axes_count:" << reduction_axes_count << "\n"
<< "// reduction_shape:" << reduction_shape << "\n";
// Main loops // Main loops
gws = runtime::intelgpu::generate_loops(writer, output_shape, true); gws = runtime::intelgpu::generate_loops(writer, output_shape, true);
// Inner loop
writer << type_name << " sum = 0;\n"; writer << type_name << " sum = 0;\n";
writer << "for (uint i4 = 0; i4 < " << input0_shape.back() << "; ++i4)\n";
writer.block_begin();
{
writer << "sum += input0[i0][i1][i4] * input1[i4][i2][i3];\n";
}
writer.block_end();
writer << "output[i0][i1][i2][i3] = sum;\n";
// Closing brackets for main loops
runtime::intelgpu::generate_loops(writer, output_shape, false);
}
writer.block_end();
return gws;
}
static vector<size_t> do_3d_2d_mul(codegen::CodeWriter& writer, // Reduction loops
string& entry_point_name, var_idx = 0;
const Shape& input0_shape, for (auto const& i : reduction_shape)
const Shape& input1_shape, {
const Shape& output_shape, writer << "for (uint k" << var_idx << " = 0; k" << var_idx << " < " << i << "; ++k"
const string& type_name) << var_idx << ")\n";
{ writer.block_begin();
entry_point_name += "_do_3d_2d_mul";
vector<size_t> gws;
runtime::intelgpu::gen_func_def(writer, ++var_idx;
entry_point_name, }
{2, type_name},
{input0_shape, input1_shape},
type_name,
output_shape);
writer.block_begin(); writer << "sum += input0";
{
// Main loops
gws = runtime::intelgpu::generate_loops(writer, output_shape, true);
// Inner loop if (input0_shape.empty())
writer << type_name << " sum = 0;\n";
writer << "for (uint i3 = 0; i3 < " << input0_shape.back() << "; ++i3)\n";
writer.block_begin();
{ {
writer << "sum += input0[i0][i1][i3] * input1[i3][i2];\n"; writer << "[0]";
} }
writer.block_end(); else
writer << "output[i0][i1][i2] = sum;\n"; {
// main axes indexes
// Closing brackets for main loops for (size_t i = 0; i < input0_axes; ++i)
runtime::intelgpu::generate_loops(writer, output_shape, false); {
} writer << "[i" << i << "]";
writer.block_end(); }
return gws;
}
static vector<size_t> do_2d_1d_mul(codegen::CodeWriter& writer,
string& entry_point_name,
const Shape& input0_shape,
const Shape& input1_shape,
const Shape& output_shape,
const string& type_name)
{
entry_point_name += "_do_2d_1d_mul";
vector<size_t> gws;
runtime::intelgpu::gen_func_def(writer, // reduction axes indexes
entry_point_name, for (size_t i = 0; i < reduction_shape.size(); ++i)
{2, type_name}, {
{input0_shape, input1_shape}, writer << "[k" << i << "]";
type_name, }
output_shape); }
writer.block_begin(); // operation
{ writer << " * input1";
// Main loops
gws = runtime::intelgpu::generate_loops(writer, output_shape, true);
writer << type_name << " sum = 0;\n"; if (input1_shape.empty())
// Inner loop
writer << "for (uint i1 = 0; i1 < " << input0_shape.at(1) << "; ++i1)\n";
writer.block_begin();
{ {
writer << "sum += input0[i0][i1] * input1[i1];\n"; writer << "[0]";
} }
writer.block_end(); else
writer << "output[i0] = sum;\n"; {
// reduction axes indexes
// Closing brackets for main loops for (size_t i = 0; i < reduction_shape.size(); ++i)
runtime::intelgpu::generate_loops(writer, output_shape, false); {
} writer << "[k" << i << "]";
writer.block_end(); }
return gws;
}
static void do_scalar_scalar_mul(codegen::CodeWriter& writer,
string& entry_point_name,
const string& type_name)
{
entry_point_name += "_scalar_scalar_mul";
runtime::intelgpu::gen_func_def(
writer, entry_point_name, {2, type_name}, {{1}, {1}}, type_name, {1});
writer.block_begin();
{
writer << "output[0] = input0[0] * input1[0];\n";
}
writer.block_end();
}
static void do_1d_1d_mul(codegen::CodeWriter& writer,
string& entry_point_name,
const Shape& shape,
const string& type_name)
{
if (shape.size() > 1)
{
throw invalid_argument("do_1d_1d_mul: Shape" + runtime::intelgpu::array_dims(shape) +
" must be 1D");
}
entry_point_name += "_do_1d_1d_mul"; // main axes indexes
for (size_t i = input0_axes; i < output_shape.size(); ++i)
{
writer << "[i" << i << "]";
}
}
runtime::intelgpu::gen_func_def( writer << ";\n";
writer, entry_point_name, {2, type_name}, {2, shape}, type_name, {1});
writer.block_begin(); // Closing brackets for reduction loops
{ for (auto const& i : reduction_shape)
writer << type_name << " sum = 0;\n"
<< "for (uint i = 0; i < " << shape.front() << "; ++i)\n";
writer.block_begin();
{ {
writer << "sum += input0[i] * input1[i];\n"; writer.block_end();
} }
writer.block_end();
writer << "output[0] = sum;\n";
}
writer.block_end();
}
void runtime::intelgpu::do_dot_operation(cldnn::topology& topology,
const string& input0_name,
const Shape& input0_shape,
const string& input1_name,
const Shape& input1_shape,
const string& output_name,
const Shape& output_shape,
const element::Type& output_type)
{
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(output_type, output_shape);
string entry_point_name = "dot_" + output_name;
const string type_name = get_opencl_type_name(output_type);
codegen::CodeWriter writer;
vector<size_t> gws = {1};
const bool is_input0_scalar = input0_shape.empty(); writer << "output" << runtime::intelgpu::access_dims(output_shape) << " = sum;\n";
const bool is_input1_scalar = input1_shape.empty();
const bool is_output_scalar = output_shape.empty();
if (is_input0_scalar && is_input1_scalar && is_output_scalar) // Closing brackets for main loops
{ runtime::intelgpu::generate_loops(writer, output_shape, false);
do_scalar_scalar_mul(writer, entry_point_name, type_name);
}
else if (((is_input0_scalar && !is_input1_scalar) || (!is_input0_scalar && is_input1_scalar)) &&
!is_output_scalar)
{
do_1d_scalar_mul(writer, entry_point_name, input0_shape, input1_shape, type_name);
}
else if (!is_input0_scalar && !is_input1_scalar && is_output_scalar)
{
do_1d_1d_mul(writer, entry_point_name, input1_shape, type_name);
}
else if (!is_input0_scalar && !is_input1_scalar && !is_output_scalar)
{
if (input0_shape.size() == 2 && input1_shape.size() == 1)
{
gws = do_2d_1d_mul(
writer, entry_point_name, input0_shape, input1_shape, output_shape, type_name);
}
else if (input0_shape.size() == 2 && input1_shape.size() == 2)
{
gws = do_2d_2d_mul(
writer, entry_point_name, input0_shape, input1_shape, output_shape, type_name);
}
else if (input0_shape.size() == 3 && input1_shape.size() == 3)
{
gws = do_3d_3d_mul(
writer, entry_point_name, input0_shape, input1_shape, output_shape, type_name);
}
else if (input0_shape.size() == 3 && input1_shape.size() == 2)
{
gws = do_3d_2d_mul(
writer, entry_point_name, input0_shape, input1_shape, output_shape, type_name);
}
else
{
do_dot_operation_error(input0_shape, input1_shape, output_shape);
}
}
else
{
do_dot_operation_error(input0_shape, input1_shape, output_shape);
} }
writer.block_end();
const cldnn::custom_gpu_primitive op_dot(output_name, const cldnn::custom_gpu_primitive op_dot(output_name,
{input0_name, input1_name}, {input0_name, input1_name},
......
...@@ -72,7 +72,8 @@ namespace ngraph ...@@ -72,7 +72,8 @@ namespace ngraph
const Shape& inputB_shape, const Shape& inputB_shape,
const std::string& output_name, const std::string& output_name,
const Shape& output_shape, const Shape& output_shape,
const element::Type& output_type); const element::Type& output_type,
size_t reduction_axes_count);
void do_slice_operation(cldnn::topology& topology, void do_slice_operation(cldnn::topology& topology,
const std::string& input_name, const std::string& input_name,
......
avg_pool_2d_2channel_2image_padded_only_above_do_not_include_in_computation avg_pool_2d_2channel_2image_padded_only_above_do_not_include_in_computation
avg_pool_2d_2channel_2image_padded_only_above_include_in_computation avg_pool_2d_2channel_2image_padded_only_above_include_in_computation
avg_pool_3d_uneven_strided_padded avg_pool_3d_uneven_strided_padded
backwards_batch_norm_three_outputs
backwards_batch_norm_training backwards_batch_norm_training
backwards_dot_scalar_tensor backwards_dot_scalar_tensor
backwards_dot_tensor3_tensor3 backwards_dot_tensor3_tensor3
...@@ -9,7 +10,6 @@ backwards_dot_tensor_vector ...@@ -9,7 +10,6 @@ backwards_dot_tensor_vector
backwards_exp backwards_exp
backwards_maxpool_n2_c1_hw5_3x3_str2_max backwards_maxpool_n2_c1_hw5_3x3_str2_max
backwards_maxpool_n4_c1_hw4_2x2_max backwards_maxpool_n4_c1_hw4_2x2_max
backwards_relu
backwards_replace_slice backwards_replace_slice
backwards_reverse_sequence_n3_c2_h3 backwards_reverse_sequence_n3_c2_h3
backwards_reverse_sequence_n4d2c3h2w2 backwards_reverse_sequence_n4d2c3h2w2
...@@ -21,41 +21,37 @@ batch_norm_training_0eps_f64 ...@@ -21,41 +21,37 @@ batch_norm_training_0eps_f64
batch_norm_one_output batch_norm_one_output
batch_norm_three_outputs batch_norm_three_outputs
dequantize dequantize
dequantize_zero_offset
dequantize_axes dequantize_axes
dequantize_int8
dequantize_int8_zero_offset
dequantize_int32 dequantize_int32
dequantize_int32_zero_offset dequantize_int32_zero_offset
dequantize_int8
dequantize_int8_zero_offset
dequantize_zero_offset
divide_by_zero_int32 divide_by_zero_int32
dot_3d_multi_axis
dot_4d_5d_multi_axis
dot_4d_5d_multi_axis_more
generate_mask
function_call function_call
generate_mask
max_pool_3d max_pool_3d
maxpool_bprop_larger_than_cache
numeric_double_inf numeric_double_inf
numeric_double_nan numeric_double_nan
quantize quantize
quantize_zero_offset
quantize_axes quantize_axes
quantize_int8 quantize_clamp_int32
quantize_int8_zero_offset quantize_clamp_int8
quantize_clamp_uint8
quantize_int32 quantize_int32
quantize_int32_zero_offset quantize_int32_zero_offset
quantize_clamp_uint8 quantize_int8
quantize_clamp_int8 quantize_int8_zero_offset
quantize_clamp_int32 quantize_ROUND_DOWN
quantize_ROUND_NEAREST_TOWARD_ZERO
quantize_ROUND_NEAREST_TOWARD_INFINITY
quantize_ROUND_NEAREST_UPWARD
quantize_ROUND_NEAREST_DOWNWARD quantize_ROUND_NEAREST_DOWNWARD
quantize_ROUND_NEAREST_TOWARD_EVEN quantize_ROUND_NEAREST_TOWARD_EVEN
quantize_ROUND_NEAREST_TOWARD_INFINITY
quantize_ROUND_NEAREST_TOWARD_ZERO
quantize_ROUND_NEAREST_UPWARD
quantize_ROUND_TOWARD_INFINITY quantize_ROUND_TOWARD_INFINITY
quantize_ROUND_TOWARD_ZERO quantize_ROUND_TOWARD_ZERO
quantize_ROUND_UP quantize_ROUND_UP
quantize_ROUND_DOWN quantize_zero_offset
reduce_window_emulating_max_pool_1d_1channel_1image reduce_window_emulating_max_pool_1d_1channel_1image
reduce_window_emulating_max_pool_1d_1channel_2image reduce_window_emulating_max_pool_1d_1channel_2image
reduce_window_emulating_max_pool_1d_2channel_2image reduce_window_emulating_max_pool_1d_2channel_2image
...@@ -73,9 +69,14 @@ reverse_sequence_n2c3h4w2 ...@@ -73,9 +69,14 @@ reverse_sequence_n2c3h4w2
reverse_sequence_n4c3h2w2 reverse_sequence_n4c3h2w2
reverse_sequence_n4d2c3h2w2 reverse_sequence_n4d2c3h2w2
select_and_scatter_3d_without_overlap select_and_scatter_3d_without_overlap
select_and_scatter_with_overlap
select_and_scatter_without_overlap select_and_scatter_without_overlap
select_and_scatter_with_overlap
shape_of_5d
shape_of_matrix
shape_of_scalar
shape_of_vector
softmax_axis_3d_double softmax_axis_3d_double
sum_stable_acc
topk_1d_max_all topk_1d_max_all
topk_1d_max_one topk_1d_max_one
topk_1d_max_partial topk_1d_max_partial
...@@ -88,17 +89,17 @@ topk_2d_max_partial ...@@ -88,17 +89,17 @@ topk_2d_max_partial
topk_2d_min_all topk_2d_min_all
topk_2d_min_one topk_2d_min_one
topk_2d_min_partial topk_2d_min_partial
topk_3d_large_input_max
topk_3d_large_input_min
topk_3d_max_all topk_3d_max_all
topk_3d_max_one topk_3d_max_one
topk_3d_max_partial topk_3d_max_partial
topk_3d_min_all topk_3d_min_all
topk_3d_min_one topk_3d_min_one
topk_3d_min_partial topk_3d_min_partial
topk_3d_single_output
topk_5d_max_partial topk_5d_max_partial
topk_int64 topk_int64
topk_3d_large_input_max
topk_3d_large_input_min
topk_3d_single_output
zero_sized_abs zero_sized_abs
zero_sized_acos zero_sized_acos
zero_sized_add zero_sized_add
...@@ -145,3 +146,4 @@ max_3d_to_scalar_double ...@@ -145,3 +146,4 @@ max_3d_to_scalar_double
argmin_trivial_in_i32 argmin_trivial_in_i32
argmax_4D_axis_3_i64_in_i32 argmax_4D_axis_3_i64_in_i32
argmin_trivial_in_double argmin_trivial_in_double
maxpool_bprop_larger_than_cache
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment