Commit 74a3a354 authored by shssf's avatar shssf Committed by Robert Kimball

IntelGPU backend. Use clDNN broadcast and fix compilation (#1648)

parent 2c03914e
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <CPP/activation.hpp> #include <CPP/activation.hpp>
#include <CPP/activation_grad.hpp> #include <CPP/activation_grad.hpp>
#include <CPP/batch_norm.hpp> #include <CPP/batch_norm.hpp>
#include <CPP/broadcast.hpp>
#include <CPP/concatenation.hpp> #include <CPP/concatenation.hpp>
#include <CPP/convolution.hpp> #include <CPP/convolution.hpp>
#include <CPP/data.hpp> #include <CPP/data.hpp>
...@@ -592,6 +593,15 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -592,6 +593,15 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{ {
do_equal_propagation(topology, get_input_name(op), get_output_name(op)); do_equal_propagation(topology, get_input_name(op), get_output_name(op));
} }
else if (get_input_shape(op).empty() ||
(get_input_shape(op).size() == 1 && get_input_shape(op).at(0) == 1))
{
const cldnn::tensor output_tensor_size =
runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(get_output_shape(op));
const cldnn::broadcast cldnn_broadcast(
get_output_name(op), get_input_name(op), output_tensor_size);
topology.add(cldnn_broadcast);
}
else else
{ {
do_bcast_sum_operation(topology, do_bcast_sum_operation(topology,
...@@ -1197,9 +1207,6 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -1197,9 +1207,6 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
case OP_TYPEID::AllReduce: case OP_TYPEID::AllReduce:
case OP_TYPEID::ArgMax: case OP_TYPEID::ArgMax:
case OP_TYPEID::ArgMin: case OP_TYPEID::ArgMin:
case OP_TYPEID::Atan:
case OP_TYPEID::Ceiling:
case OP_TYPEID::Floor:
case OP_TYPEID::FunctionCall: case OP_TYPEID::FunctionCall:
case OP_TYPEID::LRN: case OP_TYPEID::LRN:
case OP_TYPEID::Reduce: case OP_TYPEID::Reduce:
...@@ -1207,9 +1214,7 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func) ...@@ -1207,9 +1214,7 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
case OP_TYPEID::ReplaceSlice: case OP_TYPEID::ReplaceSlice:
case OP_TYPEID::ReverseSequence: case OP_TYPEID::ReverseSequence:
case OP_TYPEID::SelectAndScatter: case OP_TYPEID::SelectAndScatter:
case OP_TYPEID::Sign:
case OP_TYPEID::StopGradient: case OP_TYPEID::StopGradient:
case OP_TYPEID::Tan:
case OP_TYPEID::TopK: case OP_TYPEID::TopK:
{ {
throw unsupported_op("Unsupported op '" + op->description() + throw unsupported_op("Unsupported op '" + op->description() +
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment