Commit e5757725 authored by nishant.b.patel's avatar nishant.b.patel

Merge branch 'master' into quantized_conv_temp

parents b13eacf6 d0f03eec
......@@ -19,7 +19,7 @@ import test
def pytest_addoption(parser):
parser.addoption('--backend', default='INTERPRETER',
choices=['INTERPRETER', 'CPU', 'GPU', 'NNP', 'PlaidML'],
choices=['INTERPRETER', 'CPU', 'GPU', 'NNP', 'PlaidML', 'INTELGPU'],
help='Select from available backends')
......@@ -31,20 +31,25 @@ def pytest_configure(config):
def pytest_collection_modifyitems(config, items):
backend_name = config.getvalue('backend')
gpu_skip = pytest.mark.skip(reason='Skipping test on the GPU backend.')
cpu_skip = pytest.mark.skip(reason='Skipping test on the CPU backend.')
nnp_skip = pytest.mark.skip(reason='Skipping test on the NNP backend.')
interpreter_skip = pytest.mark.skip(reason='Skipping test on the INTERPRETER backend.')
plaidml_skip = pytest.mark.skip(reason='Skipping test on the PlaidML backend.')
keywords = {
'GPU': 'skip_on_gpu',
'CPU': 'skip_on_cpu',
'NNP': 'skip_on_nnp',
'INTERPRETER': 'skip_on_interpreter',
'PlaidML': 'skip_on_plaidml',
'INTELGPU': 'skip_on_intelgpu',
}
skip_markers = {
'GPU': pytest.mark.skip(reason='Skipping test on the GPU backend.'),
'CPU': pytest.mark.skip(reason='Skipping test on the CPU backend.'),
'NNP': pytest.mark.skip(reason='Skipping test on the NNP backend.'),
'INTERPRETER': pytest.mark.skip(reason='Skipping test on the INTERPRETER backend.'),
'PlaidML': pytest.mark.skip(reason='Skipping test on the PlaidML backend.'),
'INTELGPU': pytest.mark.skip(reason='Skipping test on the INTELGPU backend.'),
}
for item in items:
if backend_name == 'GPU' and 'skip_on_gpu' in item.keywords:
item.add_marker(gpu_skip)
if backend_name == 'CPU' and 'skip_on_cpu' in item.keywords:
item.add_marker(cpu_skip)
if backend_name == 'NNP' and 'skip_on_nnp' in item.keywords:
item.add_marker(nnp_skip)
if backend_name == 'INTERPRETER' and 'skip_on_interpreter' in item.keywords:
item.add_marker(interpreter_skip)
if backend_name == 'PlaidML' and 'skip_on_plaidml' in item.keywords:
item.add_marker(plaidml_skip)
skip_this_backend = keywords[backend_name]
if skip_this_backend in item.keywords:
item.add_marker(skip_markers[backend_name])
......@@ -33,7 +33,6 @@ from test.ngraph.util import run_op_numeric_data, run_op_node
(ng.exp, np.exp, -100., 100.),
(ng.floor, np.floor, -100., 100.),
(ng.log, np.log, 0, 100.),
(ng.logical_not, np.logical_not, -10, 10),
(ng.relu, lambda x: np.maximum(0, x), -100., 100.),
(ng.sign, np.sign, -100., 100.),
(ng.sin, np.sin, -100., 100.),
......@@ -68,7 +67,6 @@ def test_unary_op_array(ng_api_fn, numpy_fn, range_start, range_end):
(ng.exp, np.exp, np.float32(1.5)),
(ng.floor, np.floor, np.float32(1.5)),
(ng.log, np.log, np.float32(1.5)),
(ng.logical_not, np.logical_not, np.int32(0)),
(ng.relu, lambda x: np.maximum(0, x), np.float32(-0.125)),
(ng.sign, np.sign, np.float32(0.)),
(ng.sin, np.sin, np.float32(np.pi / 4.0)),
......@@ -86,3 +84,19 @@ def test_unary_op_scalar(ng_api_fn, numpy_fn, input_data):
result = run_op_numeric_data(input_data, ng_api_fn)
assert np.allclose(result, expected)
@pytest.mark.parametrize('input_data', [
(np.array([True, False, True, False])),
(np.array(True)),
(np.array(False)),
])
@pytest.mark.skip_on_gpu
def test_logical_not(input_data):
expected = np.logical_not(input_data)
result = run_op_node([input_data], ng.logical_not)[0]
assert np.array_equal(result, expected)
result = run_op_numeric_data(input_data, ng.logical_not)[0]
assert np.array_equal(result, expected)
......@@ -818,6 +818,7 @@ def test_slice():
@pytest.mark.skip_on_gpu
@pytest.mark.skip_on_intelgpu
def test_replace_slice():
element_type = Type.f32
......
......@@ -482,6 +482,8 @@ set(SRC ${SRC}
if(NGRAPH_JSON_ENABLE)
list(APPEND SRC serializer.cpp serializer.hpp event_tracing.cpp event_tracing.hpp)
else()
list(APPEND SRC serializer_stub.cpp)
endif()
configure_file(version.in.hpp version.hpp)
......
......@@ -22,6 +22,41 @@
using namespace ngraph;
NGRAPH_API const reduction::Type reduction::sum(reduction::Type_t::sum);
NGRAPH_API const reduction::Type reduction::prod(reduction::Type_t::prod);
NGRAPH_API const reduction::Type reduction::min(reduction::Type_t::min);
NGRAPH_API const reduction::Type reduction::max(reduction::Type_t::max);
std::ostream& reduction::operator<<(std::ostream& out, const reduction::Type& obj)
{
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch (obj.get_type())
{
case reduction::Type_t::sum: out << "sum"; break;
case reduction::Type_t::prod: out << "prod"; break;
case reduction::Type_t::min: out << "min"; break;
case reduction::Type_t::max: out << "max"; break;
}
#if !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic pop
#endif
return out;
};
bool reduction::Type::operator==(const reduction::Type& other) const
{
return m_type == other.m_type;
}
reduction::Type_t reduction::Type::get_type() const
{
return m_type;
}
static std::unique_ptr<DistributedInterface> s_distributed_interface;
void ngraph::set_distributed_interface(std::unique_ptr<DistributedInterface> distributed_interface)
......
......@@ -24,6 +24,38 @@
namespace ngraph
{
namespace reduction
{
enum class Type_t
{
sum,
prod,
min,
max,
};
class Type
{
public:
Type(const Type_t t)
: m_type(t)
{
}
friend std::ostream& operator<<(std::ostream&, const Type&);
bool operator==(const Type& other) const;
bool operator!=(const Type& other) const { return !(*this == other); }
Type_t get_type() const;
private:
Type_t m_type;
};
std::ostream& operator<<(std::ostream& out, const Type& obj);
extern NGRAPH_API const Type sum;
extern NGRAPH_API const Type prod;
extern NGRAPH_API const Type min;
extern NGRAPH_API const Type max;
}
class DistributedInterface
{
public:
......@@ -33,8 +65,11 @@ namespace ngraph
virtual int get_rank() = 0;
virtual void log_print(const std::string& timestamp, const std::vector<char>& buf) = 0;
virtual void
all_reduce(void* in, void* out, element::Type_t element_type, size_t count) = 0;
virtual void all_reduce(void* in,
void* out,
element::Type_t element_type,
reduction::Type reduce_type,
size_t count) = 0;
virtual void
broadcast(void* in, element::Type_t element_type, size_t count, int root_id) = 0;
};
......
......@@ -65,8 +65,11 @@ namespace ngraph
std::printf("%s [MLSL RANK: %d]: %s\n", timestamp.c_str(), get_rank(), buf.data());
}
void
all_reduce(void* in, void* out, element::Type_t element_type, size_t count) override
void all_reduce(void* in,
void* out,
element::Type_t element_type,
reduction::Type reduce_type,
size_t count) override
{
auto data_type = MLSL::DT_FLOAT;
......@@ -83,10 +86,29 @@ namespace ngraph
throw std::runtime_error("AllReduce op supports only f32 and f64 types");
}
decltype(MLSL::RT_SUM) mlsl_reduce_type;
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch (reduce_type.get_type())
{
case reduction::Type_t::sum: mlsl_reduce_type = MLSL::RT_SUM; break;
case reduction::Type_t::prod:
throw std::runtime_error("MLSL doesn't support allreduce prod");
break;
case reduction::Type_t::min: mlsl_reduce_type = MLSL::RT_MIN; break;
case reduction::Type_t::max: mlsl_reduce_type = MLSL::RT_MAX; break;
}
#if !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic pop
#endif
MLSL::Environment& env = MLSL::Environment::GetEnv();
MLSL::Distribution* distribution = env.CreateDistribution(env.GetProcessCount(), 1);
MLSL::CommReq* req =
distribution->AllReduce(in, out, count, data_type, MLSL::RT_SUM, MLSL::GT_DATA);
MLSL::CommReq* req = distribution->AllReduce(
in, out, count, data_type, mlsl_reduce_type, MLSL::GT_DATA);
env.Wait(req);
env.DeleteDistribution(distribution);
}
......
......@@ -35,8 +35,11 @@ namespace ngraph
{
std::printf("%s: %s\n", timestamp.c_str(), buf.data());
}
void
all_reduce(void* in, void* out, element::Type_t element_type, size_t count) override
void all_reduce(void* in,
void* out,
element::Type_t element_type,
reduction::Type reduce_type,
size_t count) override
{
throw ngraph_error("Distributed Library not supported/mentioned");
}
......
......@@ -77,8 +77,11 @@ namespace ngraph
"%s [OpenMPI RANK: %d]: %s\n", timestamp.c_str(), get_rank(), buf.data());
}
void
all_reduce(void* in, void* out, element::Type_t element_type, size_t count) override
void all_reduce(void* in,
void* out,
element::Type_t element_type,
reduction::Type reduce_type,
size_t count) override
{
auto data_type = MPI_FLOAT;
......@@ -95,7 +98,24 @@ namespace ngraph
throw std::runtime_error("AllReduce op supports only f32 and f64 types");
}
MPI_Allreduce(in, out, count, data_type, MPI_SUM, MPI_COMM_WORLD);
decltype(MPI_SUM) mpi_reduce_type;
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch (reduce_type.get_type())
{
case reduction::Type_t::sum: mpi_reduce_type = MPI_SUM; break;
case reduction::Type_t::prod: mpi_reduce_type = MPI_PROD; break;
case reduction::Type_t::min: mpi_reduce_type = MPI_MIN; break;
case reduction::Type_t::max: mpi_reduce_type = MPI_MAX; break;
}
#if !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic pop
#endif
MPI_Allreduce(in, out, count, data_type, mpi_reduce_type, MPI_COMM_WORLD);
}
void broadcast(void* in,
......
......@@ -22,11 +22,13 @@ using namespace ngraph;
const string op::AllReduce::type_name{"AllReduce"};
op::AllReduce::AllReduce()
: m_reduce_type(reduction::sum)
{
}
op::AllReduce::AllReduce(const shared_ptr<Node>& arg)
op::AllReduce::AllReduce(const shared_ptr<Node>& arg, const reduction::Type reduce_type)
: Op(check_single_output_args({arg}))
, m_reduce_type(reduce_type)
{
constructor_validate_and_infer_types();
}
......@@ -47,5 +49,10 @@ void op::AllReduce::validate_and_infer_types()
shared_ptr<Node> op::AllReduce::copy_with_new_args(const NodeVector& new_args) const
{
check_new_args_count(this, new_args);
return make_shared<AllReduce>(new_args.at(0));
return make_shared<AllReduce>(new_args.at(0), get_reduce_type());
}
reduction::Type op::AllReduce::get_reduce_type() const
{
return m_reduce_type;
}
......@@ -30,11 +30,16 @@ namespace ngraph
static const std::string type_name;
const std::string& description() const override { return type_name; }
AllReduce();
AllReduce(const std::shared_ptr<Node>& arg);
AllReduce(const std::shared_ptr<Node>& arg,
const reduction::Type reduce_type = reduction::sum);
void validate_and_infer_types() override;
std::shared_ptr<Node> copy_with_new_args(const NodeVector& new_args) const override;
reduction::Type get_reduce_type() const;
private:
const reduction::Type m_reduce_type;
};
}
}
......@@ -27,8 +27,14 @@
using namespace std;
using namespace ngraph;
op::GroupConvolution::GroupConvolution(const shared_ptr<Node>& data_batch,
const shared_ptr<Node>& filters,
const string op::GroupConvolution::type_name{"GroupConvolution"};
op::GroupConvolution::GroupConvolution()
{
}
op::GroupConvolution::GroupConvolution(const Output<Node>& data_batch,
const Output<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
......@@ -36,7 +42,7 @@ op::GroupConvolution::GroupConvolution(const shared_ptr<Node>& data_batch,
const Strides& data_dilation_strides,
const size_t groups,
const PadType& pad_type)
: FusedOp("GroupConvolution", check_single_output_args({data_batch, filters}))
: FusedOp({data_batch, filters})
, m_window_movement_strides(window_movement_strides)
, m_window_dilation_strides(window_dilation_strides)
, m_padding_below(padding_below)
......@@ -45,7 +51,6 @@ op::GroupConvolution::GroupConvolution(const shared_ptr<Node>& data_batch,
, m_groups(groups)
, m_pad_type(pad_type)
{
// TODO: Move this out of constructor to validate_and_infer_types()
constructor_validate_and_infer_types();
}
......@@ -129,35 +134,35 @@ shared_ptr<Node> op::GroupConvolution::copy_with_new_args(const NodeVector& new_
NodeVector op::GroupConvolution::decompose_op() const
{
auto data = get_argument(0);
auto filters = get_argument(1);
auto data = input(0);
auto filters = input(1);
// Split one convolution op to N ops where N is the number of groups
// and concat results after computation.
// reference: https://github.com/NervanaSystems/ngraph-mxnet/blob/fdd692/src/ngraph/ngraph_emitter.cc#L822-L856
std::size_t n_data_channels{data->get_shape().at(1)};
std::size_t n_filters_channels{filters->get_shape().at(0)};
std::size_t n_data_channels{data.get_shape().at(1)};
std::size_t n_filters_channels{filters.get_shape().at(0)};
std::size_t data_group_size{n_data_channels / m_groups};
std::size_t filters_group_size{n_filters_channels / m_groups};
NodeVector convolution_nodes;
// initial bounds for splice
std::vector<std::size_t> data_lower_bounds(data->get_shape().size());
std::vector<std::size_t> data_upper_bounds{data->get_shape()};
std::vector<std::size_t> filters_lower_bounds(filters->get_shape().size());
std::vector<std::size_t> filters_upper_bounds{filters->get_shape()};
std::vector<std::size_t> data_lower_bounds(data.get_shape().size());
std::vector<std::size_t> data_upper_bounds{data.get_shape()};
std::vector<std::size_t> filters_lower_bounds(filters.get_shape().size());
std::vector<std::size_t> filters_upper_bounds{filters.get_shape()};
for (std::size_t group{0}; group < m_groups; ++group)
{
// slice data
data_lower_bounds[1] = group * data_group_size;
data_upper_bounds[1] = (group + 1) * data_group_size;
auto sliced_data =
std::make_shared<ngraph::op::Slice>(data, data_lower_bounds, data_upper_bounds);
auto sliced_data = std::make_shared<ngraph::op::Slice>(
data.get_source_output(), data_lower_bounds, data_upper_bounds);
// slice filters
filters_lower_bounds[0] = group * filters_group_size;
filters_upper_bounds[0] = (group + 1) * filters_group_size;
auto sliced_filters = std::make_shared<ngraph::op::Slice>(
filters, filters_lower_bounds, filters_upper_bounds);
filters.get_source_output(), filters_lower_bounds, filters_upper_bounds);
convolution_nodes.push_back(
std::make_shared<ngraph::op::Convolution>(sliced_data,
......
......@@ -29,8 +29,12 @@ namespace ngraph
class GroupConvolution : public ngraph::op::util::FusedOp
{
public:
GroupConvolution(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
NGRAPH_API
static const std::string type_name;
const std::string& description() const override { return type_name; }
GroupConvolution();
GroupConvolution(const Output<Node>& data_batch,
const Output<Node>& filters,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
......
......@@ -19,11 +19,17 @@
using namespace std;
using namespace ngraph;
op::Slice::Slice(const shared_ptr<Node>& arg,
const string op::Slice::type_name{"Slice"};
op::Slice::Slice()
{
}
op::Slice::Slice(const Output<Node>& arg,
const Coordinate& lower_bounds,
const Coordinate& upper_bounds,
const Strides& strides)
: Op("Slice", check_single_output_args({arg}))
: Op({arg})
, m_lower_bounds(lower_bounds)
, m_upper_bounds(upper_bounds)
, m_strides(strides)
......@@ -31,10 +37,10 @@ op::Slice::Slice(const shared_ptr<Node>& arg,
constructor_validate_and_infer_types();
}
op::Slice::Slice(const shared_ptr<Node>& arg,
op::Slice::Slice(const Output<Node>& arg,
const Coordinate& lower_bounds,
const Coordinate& upper_bounds)
: Op("Slice", check_single_output_args({arg}))
: Op({arg})
, m_lower_bounds(lower_bounds)
, m_upper_bounds(upper_bounds)
, m_strides(Strides())
......
......@@ -28,6 +28,11 @@ namespace ngraph
class Slice : public Op
{
public:
NGRAPH_API
static const std::string type_name;
const std::string& description() const override { return type_name; }
/// \brief Constructs a tensor slice operation
Slice();
/// \brief Constructs a tensor slice operation.
///
/// \param arg The tensor to be sliced.
......@@ -35,17 +40,16 @@ namespace ngraph
/// \param upper_bounds The axiswise upper bounds of the slice (exclusive).
/// \param strides The slicing strides; for example, strides of `{n,m}` means to take
/// every nth row and every mth column of the input matrix.
Slice(const std::shared_ptr<Node>& arg,
Slice(const Output<Node>& arg,
const Coordinate& lower_bounds,
const Coordinate& upper_bounds,
const Strides& strides);
/// \brief Constructs a tensor slice operation with unit strides; i.e., every element inside the bounding box will be copied to the output slice.
///
/// \param arg The tensor to be sliced.
/// \param lower_bounds The axiswise lower bounds of the slice (inclusive).
/// \param upper_bounds The axiswise upper bounds of the slice (exclusive).
Slice(const std::shared_ptr<Node>& arg,
Slice(const Output<Node>& arg,
const Coordinate& lower_bounds,
const Coordinate& upper_bounds);
......
......@@ -30,6 +30,11 @@ op::util::FusedOp::FusedOp(const NodeVector& args)
{
}
op::util::FusedOp::FusedOp(const OutputVector& args)
: Op(args)
{
}
op::util::FusedOp::FusedOp(const std::string& node_type, const NodeVector& args)
: Op(node_type, args)
{
......
......@@ -51,6 +51,8 @@ namespace ngraph
/// \param args Nodes that produce the input tensors for the fused op
FusedOp(const NodeVector& args);
FusedOp(const OutputVector& args);
/// \brief Constructs a FusedOp
///
/// \param args Nodes that produce the input tensors for the fused op
......
......@@ -37,6 +37,9 @@ namespace ngraph
auto out_buffer_index = external_function->get_buffer_index(out[0].get_name());
auto count = static_cast<int>(out[0].get_size());
auto data_type = args[0].get_element_type().get_type_enum();
const ngraph::op::AllReduce* allreduce =
static_cast<const ngraph::op::AllReduce*>(node);
auto reduce_type = allreduce->get_reduce_type();
auto external_function_name = external_function->get_function_name();
NGRAPH_DEBUG_PRINT(
......@@ -48,11 +51,13 @@ namespace ngraph
node->get_friendly_name().c_str(),
count);
auto functor = [&, count, data_type, arg_buffer_index, out_buffer_index](
auto functor =
[&, count, reduce_type, data_type, arg_buffer_index, out_buffer_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
get_distributed_interface()->all_reduce(ctx->buffer_data[arg_buffer_index],
ctx->buffer_data[out_buffer_index],
data_type,
reduce_type,
count);
};
functors.emplace_back(functor);
......
......@@ -53,9 +53,11 @@ namespace ngraph
if (is_int64)
{
if (args[0].get_element_type() == element::f32 ||
if ((args[0].get_element_type() == element::f32 ||
args[0].get_element_type() == element::f64 ||
args[0].get_element_type() == element::u8)
args[0].get_element_type() == element::u8 ||
args[0].get_element_type() == element::i8) &&
params_shape.size() <= 3 && out_shape.size() <= 3)
{
std::function<decltype(runtime::cpu::kernel::gather_i64<float, 2, 2>)>
kernel;
......@@ -111,9 +113,11 @@ namespace ngraph
else
{
if (args[0].get_element_type() == element::f32 ||
if ((args[0].get_element_type() == element::f32 ||
args[0].get_element_type() == element::f64 ||
args[0].get_element_type() == element::u8)
args[0].get_element_type() == element::u8 ||
args[0].get_element_type() == element::i8) &&
params_shape.size() <= 3 && out_shape.size() <= 3)
{
std::function<decltype(runtime::cpu::kernel::gather_i32<float, 2, 2>)>
kernel;
......
......@@ -46,7 +46,9 @@ namespace ngraph
}
if (args[0].get_element_type() != element::f64 &&
args[0].get_element_type() != element::f32)
args[0].get_element_type() != element::f32 &&
args[0].get_element_type() != element::u8 &&
args[0].get_element_type() != element::i8)
{
throw ngraph_error("Unsupported type in CPU Builder for ScatterAdd");
}
......@@ -59,6 +61,8 @@ namespace ngraph
auto element_type = args[0].get_element_type();
if (is_int64)
{
if (inputs_shape.size() <= 3 && updates_shape.size() <= 3)
{
std::function<decltype(runtime::cpu::kernel::scatter_add_i64<float, 2, 2>)>
kernel;
......@@ -91,6 +95,13 @@ namespace ngraph
functors.emplace_back(functor);
}
else
{
throw ngraph_error("Unsupported ranks in CPU Builder for ScatterAdd");
}
}
else
{
if (inputs_shape.size() <= 3 && updates_shape.size() <= 3)
{
std::function<decltype(runtime::cpu::kernel::scatter_add_i32<float, 2, 2>)>
kernel;
......@@ -122,6 +133,11 @@ namespace ngraph
};
functors.emplace_back(functor);
}
else
{
throw ngraph_error("Unsupported ranks in CPU Builder for ScatterAdd");
}
}
}
REGISTER_OP_BUILDER(ScatterAdd);
}
......
......@@ -211,14 +211,6 @@
KV = K<ET, 2, R2>; \
else if (R1 == 3) \
KV = K<ET, 3, R2>; \
else if (R1 == 4) \
KV = K<ET, 4, R2>; \
else if (R1 == 5) \
KV = K<ET, 5, R2>; \
else if (R1 == 6) \
KV = K<ET, 6, R2>; \
else if (R1 == 7) \
KV = K<ET, 7, R2>; \
else \
throw ngraph_error("Unsupported first rank " + std::to_string(R1) + " for kernel " #K);
......@@ -235,22 +227,6 @@
{ \
SELECT_RANK1(KV, ET, R1, 3, K); \
} \
else if (R2 == 4) \
{ \
SELECT_RANK1(KV, ET, R1, 4, K); \
} \
else if (R2 == 5) \
{ \
SELECT_RANK1(KV, ET, R1, 5, K); \
} \
else if (R2 == 6) \
{ \
SELECT_RANK1(KV, ET, R1, 6, K); \
} \
else if (R2 == 7) \
{ \
SELECT_RANK1(KV, ET, R1, 7, K); \
} \
else \
{ \
throw ngraph_error("Unsupported second rank " + std::to_string(R2) + " for kernel " #K); \
......@@ -270,6 +246,10 @@
{ \
SELECT_2RANKS(KV, uint8_t, R1, R2, K); \
} \
else if (ET == element::i8) \
{ \
SELECT_2RANKS(KV, int8_t, R1, R2, K); \
} \
else \
{ \
throw ngraph_error("Unsupported element type " + ET.c_type_string() + " for kernel " #K); \
......
......@@ -271,10 +271,13 @@ namespace ngraph
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::AllReduce)
{
const ngraph::op::AllReduce* allreduce =
static_cast<const ngraph::op::AllReduce*>(node);
writer << "ngraph::get_distributed_interface()->all_reduce(" << args[0].get_name()
<< ", " << out[0].get_name() << ", "
<< "ngraph::element::Type_t::" << args[0].get_element_type().get_type_name()
<< ", " << out[0].get_size() << ");\n";
<< ", " << out[0].get_size() << ", "
<< "ngraph::Reduce_t::" << allreduce->get_reduce_type() << ");\n";
}
template <>
......@@ -1842,8 +1845,9 @@ namespace ngraph
writer.block_begin();
if ((args[0].get_element_type() == element::f64 ||
args[0].get_element_type() == element::f32 ||
args[0].get_element_type() == element::u8) &&
gather->get_axis() == 0)
args[0].get_element_type() == element::u8 ||
args[0].get_element_type() == element::i8) &&
args[0].get_shape().size() <= 3 && out[0].get_shape().size() <= 3)
{
writer << "cpu::kernel::gather<" << args[0].get_type() << ", "
<< args[1].get_element_type().c_type_string() << ", "
......@@ -1903,8 +1907,11 @@ namespace ngraph
}
writer.block_begin();
if (args[0].get_element_type() == element::f64 ||
args[0].get_element_type() == element::f32)
if ((args[0].get_element_type() == element::f64 ||
args[0].get_element_type() == element::f32 ||
args[0].get_element_type() == element::u8 ||
args[0].get_element_type() == element::i8) &&
args[0].get_shape().size() <= 3 && args[2].get_shape().size() <= 3)
{
writer << "cpu::kernel::scatter_add<" << args[0].get_type() << ", "
<< args[1].get_element_type().c_type_string() << ", "
......
......@@ -31,7 +31,7 @@ namespace ngraph
{
namespace kernel
{
// Calculate the indices from position 0 to rank-1.
// Calculate the indices for positions 0 to rank-1.
static void
get_indices(const Shape& shape, int index, std::vector<int>& indices, int rank)
{
......@@ -93,8 +93,11 @@ namespace ngraph
if (indices_rank == 0)
{
//TODO Enable this if compiler issue with CODEGEN is fixed or DEX needs it.
#if 0
#ifdef _OPENMP
#pragma omp parallel for
#endif
#endif
for (int i = 0; i < outer_loop_num; i++)
{
......@@ -142,7 +145,11 @@ namespace ngraph
}
else
{
auto num_indices = shape_size(indices_shape);
size_t num_indices = 1;
for (auto d : indices_shape)
{
num_indices *= d;
}
#ifdef _OPENMP
#pragma omp parallel for
......
......@@ -2238,6 +2238,32 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconvb_add()
std::dynamic_pointer_cast<ngraph::op::Add>(m.get_match_root()->get_argument(0));
auto dq_l_m = std::dynamic_pointer_cast<ngraph::op::Dequantize>(pattern_map[dq_l_label]);
auto dq_r_m = std::dynamic_pointer_cast<ngraph::op::Dequantize>(pattern_map[dq_r_label]);
// both left and right are QuantizedConvolutionBias
if (dq_r_m->get_argument(0)->description() == "QuantizedConvolutionBias")
{
for (auto user : m.get_match_root()->get_users())
{
auto q_m = std::dynamic_pointer_cast<ngraph::op::Quantize>(user);
if (q_m)
{
auto q_m_scale = q_m->get_argument(1);
auto dq_l_m_scale = dq_l_m->get_argument(1);
auto dq_r_m_scale = dq_r_m->get_argument(1);
if (!ngraph::compare_constants(q_m_scale, dq_l_m_scale) &&
ngraph::compare_constants(q_m_scale, dq_r_m_scale))
{
NGRAPH_DEBUG << "Scales of Q and DQ of right branch match";
// switch left and right branch
auto temp = dq_l_m;
dq_l_m = dq_r_m;
dq_r_m = temp;
}
break;
}
}
}
auto qconv =
std::static_pointer_cast<ngraph::op::QuantizedConvolutionBias>(dq_l_m->get_argument(0));
auto inplace_input = dq_r_m->get_argument(0);
......
......@@ -24,6 +24,7 @@
#include <vector>
#include "ngraph/op/all.hpp"
#include "ngraph/op/allreduce.hpp"
#include "ngraph/op/any.hpp"
#include "ngraph/op/argmax.hpp"
#include "ngraph/op/argmin.hpp"
......@@ -255,9 +256,12 @@ private:
}
case OP_TYPEID::AllReduce:
{
const ngraph::op::AllReduce* allreduce =
static_cast<const ngraph::op::AllReduce*>(&node);
reference::allreduce<T>(args[0]->get_data_ptr<T>(),
out[0]->get_data_ptr<T>(),
node.get_input_element_type(0).get_type_enum(),
allreduce->get_reduce_type(),
static_cast<int>(shape_size(node.get_input_shape(0))));
break;
}
......
......@@ -76,11 +76,11 @@ namespace ngraph
// input count.
void check_inputs(std::size_t expected_input_count) const
{
if (op().get_input_size() != expected_input_count)
if (op().get_input_size() < expected_input_count)
{
std::ostringstream os;
os << "The PlaidML nGraph backend only supports " << op().description()
<< " operations with an input count == " << expected_input_count
<< " operations with an input count >= " << expected_input_count
<< " (got " << op().get_input_size() << " inputs)";
throw std::runtime_error{os.str()};
}
......
......@@ -261,92 +261,25 @@ batch_mat_mul_forward
dot_matrix_2x0_0x2
# dgkutnic ww24.5: these tests are to be triaged by the PlaidML team
convolution_3d_1item_large_5o3i_padded_uneven_filter_uneven_data_dilation_data_dilated
select
product_trivial
product_trivial_5d
product_to_scalar
product_matrix_columns
product_matrix_rows
product_3d_to_matrix_most_sig
product_3d_to_matrix_least_sig
product_3d_to_vector
product_3d_to_scalar
product_2d_to_scalar_int32
product_to_scalar_int32
product_to_scalar_int8
max_trivial
max_trivial_5d
max_to_scalar
max_to_scalar_int8
max_matrix_columns
max_matrix_rows
max_matrix_rows_int32
max_3d_to_matrix_most_sig
max_3d_to_matrix_least_sig
max_3d_to_vector
max_3d_to_scalar
max_3d_to_scalar_int32
min_trivial
min_trivial_5d
min_trivial_5d_int32
min_to_scalar
min_to_scalar_int8
min_matrix_columns
min_matrix_rows
min_matrix_rows_int32
min_3d_to_matrix_most_sig
min_3d_to_matrix_least_sig
min_3d_to_vector
min_3d_to_scalar
min_3d_to_scalar_int32
sum_to_scalar
# ww25.2: re-scrubbed this list of tests after fixing check_inputs
# initial debug points to some of these failing due to precision issues
sqrt
batch_norm_inference_0eps_f32
batch_norm_inference_f32
batch_norm_training_0eps_f32
argmin_trivial
argmax_trivial
argmin_trivial_in_i32
sum_large_1d_to_scalar
sum_matrix_columns
sum_matrix_6d
sum_matrix_rows
sum_3d_to_matrix_most_sig
sum_3d_to_matrix_least_sig
sum_3d_to_vector
sum_3d_to_scalar
sum_3d_to_scalar_int32
sum_5d_to_scalar
sum_5d_to_scalar_int32
sum_2d_to_scalar_int8
sum_stable_acc
sum_stable_simple_float
one_hot_scalar_2_in_3
one_hot_scalar_1_in_3
one_hot_scalar_0_in_3
lstm_cell_no_bias_no_peepholes
lstm_cell_bias_peepholes
lstm_cell_bias_peepholes_clip_input_forget
lstm_cell_activaction_functions
group_conv_transpose
group_conv_transpose_output_shape
divide_python_rounding_int32
any_2x2_to_scalar_true
any_2x2_to_scalar_false
any_2x3_eliminate_col_dim
any_2x3_eliminate_row_dim
any_2x2x3_eliminate_dim_1
any_2x2x3_eliminate_dim_2
any_2x2x3_eliminate_dims_0_1
any_2x2x3_eliminate_dims_0_2
any_2x2x3_eliminate_dims_1_2
any_2x2x3_eliminate_dims_0_1_2
all_trivial
all_2x2_to_scalar_false
all_2x2_to_scalar_true
all_2x3_eliminate_col_dim
all_2x3_eliminate_row_dim
all_2x2x3_eliminate_dim_0
all_2x2x3_eliminate_dim_1
all_2x2x3_eliminate_dim_2
all_2x2x3_eliminate_dims_0_1
all_2x2x3_eliminate_dims_0_2
all_2x2x3_eliminate_dims_1_2
all_2x2x3_eliminate_dims_0_1_2
all_dynamic_axis
all_change_axis
backwards_broadcast0
backwards_broadcast1
backwards_select
backwards_select_nested
backwards_sum_v2s
backwards_sum_m2s
backwards_sum_m2v_0
backwards_sum_m2v_1
backwards_batchmatmul_tensor2_tensor2
......@@ -25,9 +25,13 @@ namespace ngraph
namespace reference
{
template <typename T>
void allreduce(T* arg, T* out, const element::Type_t element_type, int count)
void allreduce(T* arg,
T* out,
const element::Type_t element_type,
const reduction::Type reduce_type,
int count)
{
get_distributed_interface()->all_reduce(arg, out, element_type, count);
get_distributed_interface()->all_reduce(arg, out, element_type, reduce_type, count);
}
}
}
......
This diff is collapsed.
......@@ -62,42 +62,3 @@ namespace ngraph
/// Option may be enabled by setting the environment variable NGRAPH_SERIALIZER_OUTPUT_SHAPES
void set_serialize_output_shapes(bool enable);
}
#ifdef NGRAPH_JSON_DISABLE
// Rather than making every reference to the serializer conditionally compile here we just
// provide some null stubs to resolve link issues
// The `inline` is so we don't get multiple definitions of function
std::string inline ngraph::serialize(std::shared_ptr<ngraph::Function> func, size_t indent)
{
return "";
}
void inline ngraph::serialize(const std::string& path,
std::shared_ptr<ngraph::Function> func,
size_t indent)
{
throw std::runtime_error("serializer disabled in build");
}
void inline ngraph::serialize(std::ostream& out,
std::shared_ptr<ngraph::Function> func,
size_t indent)
{
throw std::runtime_error("serializer disabled in build");
}
std::shared_ptr<ngraph::Function> inline ngraph::deserialize(std::istream& in)
{
throw std::runtime_error("serializer disabled in build");
}
std::shared_ptr<ngraph::Function> inline ngraph::deserialize(const std::string& str)
{
throw std::runtime_error("serializer disabled in build");
}
void inline ngraph::set_serialize_output_shapes(bool enable)
{
throw std::runtime_error("serializer disabled in build");
}
#endif
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/serializer.hpp"
std::string ngraph::serialize(std::shared_ptr<ngraph::Function> func, size_t indent)
{
throw std::runtime_error("serializer disabled in build");
}
void ngraph::serialize(const std::string& path,
std::shared_ptr<ngraph::Function> func,
size_t indent)
{
throw std::runtime_error("serializer disabled in build");
}
void ngraph::serialize(std::ostream& out, std::shared_ptr<ngraph::Function> func, size_t indent)
{
throw std::runtime_error("serializer disabled in build");
}
std::shared_ptr<ngraph::Function> ngraph::deserialize(std::istream& in)
{
throw std::runtime_error("serializer disabled in build");
}
std::shared_ptr<ngraph::Function> ngraph::deserialize(const std::string& str)
{
throw std::runtime_error("serializer disabled in build");
}
void ngraph::set_serialize_output_shapes(bool enable)
{
throw std::runtime_error("serializer disabled in build");
}
......@@ -35,6 +35,7 @@ using namespace ngraph;
static string s_manifest = "${MANIFEST}";
#if 0
NGRAPH_TEST(${BACKEND_NAME}, scatter_add_4d_indices)
{
Shape ref_shape{3, 3, 3};
......@@ -122,13 +123,14 @@ NGRAPH_TEST(${BACKEND_NAME}, scatter_add_3d_indices)
read_vector<float>(result),
MIN_FLOAT_TOLERANCE_BITS));
}
#endif
NGRAPH_TEST(${BACKEND_NAME}, scatter_add_2d_indices)
{
Shape ref_shape{2, 3, 3};
Shape ref_shape{3};
Shape indices_shape{2, 2};
Shape updates_shape{2, 2, 3, 3};
Shape out_shape{2, 3, 3};
Shape updates_shape{2, 2};
Shape out_shape{3};
auto R = make_shared<op::Parameter>(element::f32, ref_shape);
auto I = make_shared<op::Parameter>(element::i32, indices_shape);
auto U = make_shared<op::Parameter>(element::f32, updates_shape);
......@@ -140,20 +142,17 @@ NGRAPH_TEST(${BACKEND_NAME}, scatter_add_2d_indices)
// Create some tensors for input/output
auto r = backend->create_tensor(element::f32, ref_shape);
copy_data(r, vector<float>{0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 9});
copy_data(r, vector<float>{0, 1, 2});
auto i = backend->create_tensor(element::i32, indices_shape);
copy_data(i, vector<int32_t>{0, 1, 1, 0});
auto u = backend->create_tensor(element::f32, updates_shape);
copy_data(u, vector<float>{0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 9,
1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8});
copy_data(u, vector<float>{1, 2, 3, 4});
auto result = backend->create_tensor(element::f32, out_shape);
auto c = backend->compile(f);
c->call_with_validate({result}, {r, i, u});
EXPECT_TRUE(test::all_close_f(
(vector<float>{0, 3, 6, 9, 12, 15, 18, 21, 24, 3, 6, 9, 12, 15, 18, 21, 24, 27}),
read_vector<float>(result),
MIN_FLOAT_TOLERANCE_BITS));
(vector<float>{5, 6, 2}), read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
}
NGRAPH_TEST(${BACKEND_NAME}, scatter_add_1d_indices)
......
......@@ -150,3 +150,19 @@ TEST(build_graph, no_arg_construction)
validate_nodes_and_infer_types(ops);
ASSERT_EQ(add1->get_output_shape(0), Shape{7});
}
TEST(build_graph, multi_output_split)
{
const auto data = make_shared<op::Parameter>(element::f32, Shape{64, 8, 100, 150});
auto filters = make_shared<op::Parameter>(element::f32, Shape{128, 2, 10, 20});
const auto split = make_shared<op::Split>(data, 1, 2);
auto conv = make_shared<op::GroupConvolution>(split->output(1),
filters,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1},
2);
EXPECT_EQ(conv->get_shape(), (Shape{64, 128, 91, 131}));
}
......@@ -3691,6 +3691,120 @@ TEST(cpu_quant_fusion, qconvba)
EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
}
TEST(cpu_quant_fusion, qconvba_q)
{
auto make_function = []() {
Shape shape_input{1, 2, 2, 2};
Shape shape_weights{1, 2, 1, 1};
Shape shape_summand{1, 1, 2, 2};
auto input_l = std::make_shared<op::Parameter>(element::f32, shape_input);
auto weights_l = std::make_shared<op::Parameter>(element::f32, shape_weights);
auto bias_l = std::make_shared<op::Parameter>(element::f32, Shape{shape_weights[0]});
auto input_r = std::make_shared<op::Parameter>(element::f32, shape_input);
auto weights_r = std::make_shared<op::Parameter>(element::f32, shape_weights);
auto bias_r = std::make_shared<op::Parameter>(element::f32, Shape{shape_weights[0]});
auto input_scale_l = op::Constant::create(element::f32, Shape{}, {2.0f});
auto weights_scale_l = op::Constant::create(element::f32, Shape{}, {2.0f});
auto output_scale_l = op::Constant::create(element::f32, Shape{}, {4.0f});
auto input_scale_r = op::Constant::create(element::f32, Shape{}, {5.0f});
auto weights_scale_r = op::Constant::create(element::f32, Shape{}, {5.0f});
auto output_scale_r = op::Constant::create(element::f32, Shape{}, {20.0f});
auto int8_zero = op::Constant::create(element::i8, Shape{}, {0});
auto int32_zero = op::Constant::create(element::i32, Shape{}, {0});
auto uint8_zero = op::Constant::create(element::u8, Shape{}, {0});
op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
auto q_input_l = std::make_shared<op::Quantize>(
input_l, input_scale_l, uint8_zero, element::u8, AxisSet{}, round_mode);
auto q_weights_l = std::make_shared<op::Quantize>(
weights_l, weights_scale_l, int8_zero, element::i8, AxisSet{}, round_mode);
auto q_bias_l = std::make_shared<op::Quantize>(bias_l,
input_scale_l * weights_scale_l,
int32_zero,
element::i32,
AxisSet{},
round_mode);
auto q_input_r = std::make_shared<op::Quantize>(
input_r, input_scale_r, uint8_zero, element::u8, AxisSet{}, round_mode);
auto q_weights_r = std::make_shared<op::Quantize>(
weights_r, weights_scale_r, int8_zero, element::i8, AxisSet{}, round_mode);
auto q_bias_r = std::make_shared<op::Quantize>(bias_r,
input_scale_r * weights_scale_r,
int32_zero,
element::i32,
AxisSet{},
round_mode);
// Left Graph
auto requant_scale_l = (input_scale_l * weights_scale_l) / output_scale_l;
auto conv_l = std::make_shared<op::QuantizedConvolutionBias>(q_input_l,
q_weights_l,
q_bias_l,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1},
requant_scale_l);
auto dq_l = std::make_shared<op::Dequantize>(
conv_l, output_scale_l, int8_zero, element::f32, AxisSet{});
auto r_l = std::make_shared<op::Reshape>(dq_l, AxisVector{0, 1, 2, 3}, Shape{1, 2, 2});
auto b_l = std::make_shared<op::Broadcast>(r_l, Shape{1, 1, 2, 2}, AxisSet{0});
// Right Graph
auto requant_scale_r = (input_scale_r * weights_scale_r) / output_scale_r;
auto conv_r = std::make_shared<op::QuantizedConvolutionBias>(q_input_r,
q_weights_r,
q_bias_r,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1},
requant_scale_r);
auto dq_r = std::make_shared<op::Dequantize>(
conv_r, output_scale_r, int8_zero, element::f32, AxisSet{});
auto r_r = std::make_shared<op::Reshape>(dq_r, AxisVector{0, 1, 2, 3}, Shape{1, 2, 2});
auto b_r = std::make_shared<op::Broadcast>(r_r, Shape{1, 1, 2, 2}, AxisSet{0});
auto add = b_l + b_r;
auto relu = std::make_shared<op::Relu>(add);
auto q = std::make_shared<op::Quantize>(
relu, output_scale_r, uint8_zero, element::u8, AxisSet{}, round_mode);
auto dq = std::make_shared<op::Dequantize>(
q, output_scale_r, uint8_zero, element::f32, AxisSet{});
return make_shared<Function>(
NodeVector{dq},
ParameterVector{input_l, weights_l, bias_l, input_r, weights_r, bias_r});
};
auto cpu_f1 = make_function();
auto cpu_f2 = make_function();
test::Uniform<float> rng(2.0f, 2.0f);
vector<vector<float>> args;
for (shared_ptr<op::Parameter> param : cpu_f1->get_parameters())
{
vector<float> tensor_val(shape_size(param->get_shape()));
rng.initialize(tensor_val);
args.push_back(tensor_val);
}
// Disable CPUQuantFusion
set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:0", 1);
auto cpu1_results = execute(cpu_f1, args, "CPU");
// Enable CPUQuantFusion
set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:1", 1);
auto cpu2_results = execute(cpu_f2, args, "CPU");
EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
auto backend = runtime::Backend::create("CPU");
auto fuse = make_function();
backend->compile(fuse);
ASSERT_EQ(count_ops_of_type<op::Quantize>(fuse), 6);
}
#ifndef NGRAPH_JSON_DISABLE
// Tests that rely on deserializing json files
TEST(cpu_fusion, fuse_conv_bias)
......
......@@ -29,25 +29,61 @@
using namespace std;
using namespace ngraph;
TEST(distributed_${BACKEND_NAME}, allreduce)
static void test_allreduce_common(reduction::Type reduce_type)
{
auto comm_size = get_distributed_interface()->get_size();
if (comm_size > 1)
{
auto shape = Shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::AllReduce>(A), ParameterVector{A});
auto f =
make_shared<Function>(make_shared<op::AllReduce>(A, reduce_type), ParameterVector{A});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
auto v = vector<float>{1, 2, 3, 4};
auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{1, 2, 3, 4});
auto result = backend->create_tensor(element::f32, shape);
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch (reduce_type.get_type())
{
case reduction::Type_t::sum:
copy_data(a, v);
std::transform(
v.begin(), v.end(), v.begin(), std::bind1st(std::multiplies<float>(), comm_size));
break;
case reduction::Type_t::prod:
copy_data(a, v);
std::transform(v.begin(), v.end(), v.begin(), [&](float elm) -> float {
return pow(elm, comm_size);
});
break;
case reduction::Type_t::min:
case reduction::Type_t::max:
auto shift = get_distributed_interface()->get_rank();
std::rotate(v.begin(), v.begin() + shift % v.size(), v.end());
copy_data(a, v);
if (reduce_type == reduction::Type_t::min)
{
std::fill(v.begin(), v.end(), 1);
for (int i = 1; i < static_cast<int>(v.size()) - comm_size + 1; i++)
v[i] = i + 1;
}
else
{
std::fill(v.begin(), v.end(), v.size());
for (int i = 0; i < static_cast<int>(v.size()) - comm_size; i++)
v[i] = i + 2;
}
}
#if !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic pop
#endif
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a});
......@@ -55,6 +91,28 @@ TEST(distributed_${BACKEND_NAME}, allreduce)
}
}
TEST(distributed_${BACKEND_NAME}, allreduce_sum)
{
test_allreduce_common(reduction::sum);
}
TEST(distributed_${BACKEND_NAME}, allreduce_min)
{
test_allreduce_common(reduction::min);
}
TEST(distributed_${BACKEND_NAME}, allreduce_max)
{
test_allreduce_common(reduction::max);
}
#if !defined(NGRAPH_DISTRIBUTED_MLSL_ENABLE)
TEST(distributed_${BACKEND_NAME}, allreduce_prod)
{
test_allreduce_common(reduction::prod);
}
#endif
TEST(distributed_${BACKEND_NAME}, broadcastdistributed)
{
auto shape = Shape{2, 2};
......
......@@ -324,3 +324,19 @@ TEST(serialize, constant_infinity_nan)
EXPECT_NE(str.find(R"(label="C)"), string::npos);
EXPECT_NE(str.find(R"(label="D)"), string::npos);
}
TEST(serialize, non_zero_node_output)
{
auto arg = make_shared<op::Parameter>(element::f32, Shape{10});
auto topk = make_shared<op::TopK>(arg, 0, element::i32, 5, true);
auto abs = make_shared<op::Abs>(Output<Node>(topk, 1));
auto result = make_shared<op::Result>(abs);
auto f = make_shared<Function>(ResultVector{result}, ParameterVector{arg});
string s = serialize(f);
shared_ptr<Function> g = deserialize(s);
auto g_result = g->get_results().at(0);
auto g_abs = g_result->input(0).get_source_output().get_node_shared_ptr();
auto topk_out = g_abs->input(0).get_source_output();
EXPECT_EQ(topk_out.get_index(), 1);
EXPECT_EQ(topk_out.get_node()->description(), "TopK");
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment