Commit e5757725 authored by nishant.b.patel's avatar nishant.b.patel

Merge branch 'master' into quantized_conv_temp

parents b13eacf6 d0f03eec
...@@ -19,7 +19,7 @@ import test ...@@ -19,7 +19,7 @@ import test
def pytest_addoption(parser): def pytest_addoption(parser):
parser.addoption('--backend', default='INTERPRETER', parser.addoption('--backend', default='INTERPRETER',
choices=['INTERPRETER', 'CPU', 'GPU', 'NNP', 'PlaidML'], choices=['INTERPRETER', 'CPU', 'GPU', 'NNP', 'PlaidML', 'INTELGPU'],
help='Select from available backends') help='Select from available backends')
...@@ -31,20 +31,25 @@ def pytest_configure(config): ...@@ -31,20 +31,25 @@ def pytest_configure(config):
def pytest_collection_modifyitems(config, items): def pytest_collection_modifyitems(config, items):
backend_name = config.getvalue('backend') backend_name = config.getvalue('backend')
gpu_skip = pytest.mark.skip(reason='Skipping test on the GPU backend.') keywords = {
cpu_skip = pytest.mark.skip(reason='Skipping test on the CPU backend.') 'GPU': 'skip_on_gpu',
nnp_skip = pytest.mark.skip(reason='Skipping test on the NNP backend.') 'CPU': 'skip_on_cpu',
interpreter_skip = pytest.mark.skip(reason='Skipping test on the INTERPRETER backend.') 'NNP': 'skip_on_nnp',
plaidml_skip = pytest.mark.skip(reason='Skipping test on the PlaidML backend.') 'INTERPRETER': 'skip_on_interpreter',
'PlaidML': 'skip_on_plaidml',
'INTELGPU': 'skip_on_intelgpu',
}
skip_markers = {
'GPU': pytest.mark.skip(reason='Skipping test on the GPU backend.'),
'CPU': pytest.mark.skip(reason='Skipping test on the CPU backend.'),
'NNP': pytest.mark.skip(reason='Skipping test on the NNP backend.'),
'INTERPRETER': pytest.mark.skip(reason='Skipping test on the INTERPRETER backend.'),
'PlaidML': pytest.mark.skip(reason='Skipping test on the PlaidML backend.'),
'INTELGPU': pytest.mark.skip(reason='Skipping test on the INTELGPU backend.'),
}
for item in items: for item in items:
if backend_name == 'GPU' and 'skip_on_gpu' in item.keywords: skip_this_backend = keywords[backend_name]
item.add_marker(gpu_skip) if skip_this_backend in item.keywords:
if backend_name == 'CPU' and 'skip_on_cpu' in item.keywords: item.add_marker(skip_markers[backend_name])
item.add_marker(cpu_skip)
if backend_name == 'NNP' and 'skip_on_nnp' in item.keywords:
item.add_marker(nnp_skip)
if backend_name == 'INTERPRETER' and 'skip_on_interpreter' in item.keywords:
item.add_marker(interpreter_skip)
if backend_name == 'PlaidML' and 'skip_on_plaidml' in item.keywords:
item.add_marker(plaidml_skip)
...@@ -33,7 +33,6 @@ from test.ngraph.util import run_op_numeric_data, run_op_node ...@@ -33,7 +33,6 @@ from test.ngraph.util import run_op_numeric_data, run_op_node
(ng.exp, np.exp, -100., 100.), (ng.exp, np.exp, -100., 100.),
(ng.floor, np.floor, -100., 100.), (ng.floor, np.floor, -100., 100.),
(ng.log, np.log, 0, 100.), (ng.log, np.log, 0, 100.),
(ng.logical_not, np.logical_not, -10, 10),
(ng.relu, lambda x: np.maximum(0, x), -100., 100.), (ng.relu, lambda x: np.maximum(0, x), -100., 100.),
(ng.sign, np.sign, -100., 100.), (ng.sign, np.sign, -100., 100.),
(ng.sin, np.sin, -100., 100.), (ng.sin, np.sin, -100., 100.),
...@@ -68,7 +67,6 @@ def test_unary_op_array(ng_api_fn, numpy_fn, range_start, range_end): ...@@ -68,7 +67,6 @@ def test_unary_op_array(ng_api_fn, numpy_fn, range_start, range_end):
(ng.exp, np.exp, np.float32(1.5)), (ng.exp, np.exp, np.float32(1.5)),
(ng.floor, np.floor, np.float32(1.5)), (ng.floor, np.floor, np.float32(1.5)),
(ng.log, np.log, np.float32(1.5)), (ng.log, np.log, np.float32(1.5)),
(ng.logical_not, np.logical_not, np.int32(0)),
(ng.relu, lambda x: np.maximum(0, x), np.float32(-0.125)), (ng.relu, lambda x: np.maximum(0, x), np.float32(-0.125)),
(ng.sign, np.sign, np.float32(0.)), (ng.sign, np.sign, np.float32(0.)),
(ng.sin, np.sin, np.float32(np.pi / 4.0)), (ng.sin, np.sin, np.float32(np.pi / 4.0)),
...@@ -86,3 +84,19 @@ def test_unary_op_scalar(ng_api_fn, numpy_fn, input_data): ...@@ -86,3 +84,19 @@ def test_unary_op_scalar(ng_api_fn, numpy_fn, input_data):
result = run_op_numeric_data(input_data, ng_api_fn) result = run_op_numeric_data(input_data, ng_api_fn)
assert np.allclose(result, expected) assert np.allclose(result, expected)
@pytest.mark.parametrize('input_data', [
(np.array([True, False, True, False])),
(np.array(True)),
(np.array(False)),
])
@pytest.mark.skip_on_gpu
def test_logical_not(input_data):
expected = np.logical_not(input_data)
result = run_op_node([input_data], ng.logical_not)[0]
assert np.array_equal(result, expected)
result = run_op_numeric_data(input_data, ng.logical_not)[0]
assert np.array_equal(result, expected)
...@@ -818,6 +818,7 @@ def test_slice(): ...@@ -818,6 +818,7 @@ def test_slice():
@pytest.mark.skip_on_gpu @pytest.mark.skip_on_gpu
@pytest.mark.skip_on_intelgpu
def test_replace_slice(): def test_replace_slice():
element_type = Type.f32 element_type = Type.f32
......
...@@ -482,6 +482,8 @@ set(SRC ${SRC} ...@@ -482,6 +482,8 @@ set(SRC ${SRC}
if(NGRAPH_JSON_ENABLE) if(NGRAPH_JSON_ENABLE)
list(APPEND SRC serializer.cpp serializer.hpp event_tracing.cpp event_tracing.hpp) list(APPEND SRC serializer.cpp serializer.hpp event_tracing.cpp event_tracing.hpp)
else()
list(APPEND SRC serializer_stub.cpp)
endif() endif()
configure_file(version.in.hpp version.hpp) configure_file(version.in.hpp version.hpp)
......
...@@ -22,6 +22,41 @@ ...@@ -22,6 +22,41 @@
using namespace ngraph; using namespace ngraph;
NGRAPH_API const reduction::Type reduction::sum(reduction::Type_t::sum);
NGRAPH_API const reduction::Type reduction::prod(reduction::Type_t::prod);
NGRAPH_API const reduction::Type reduction::min(reduction::Type_t::min);
NGRAPH_API const reduction::Type reduction::max(reduction::Type_t::max);
std::ostream& reduction::operator<<(std::ostream& out, const reduction::Type& obj)
{
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch (obj.get_type())
{
case reduction::Type_t::sum: out << "sum"; break;
case reduction::Type_t::prod: out << "prod"; break;
case reduction::Type_t::min: out << "min"; break;
case reduction::Type_t::max: out << "max"; break;
}
#if !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic pop
#endif
return out;
};
bool reduction::Type::operator==(const reduction::Type& other) const
{
return m_type == other.m_type;
}
reduction::Type_t reduction::Type::get_type() const
{
return m_type;
}
static std::unique_ptr<DistributedInterface> s_distributed_interface; static std::unique_ptr<DistributedInterface> s_distributed_interface;
void ngraph::set_distributed_interface(std::unique_ptr<DistributedInterface> distributed_interface) void ngraph::set_distributed_interface(std::unique_ptr<DistributedInterface> distributed_interface)
......
...@@ -24,6 +24,38 @@ ...@@ -24,6 +24,38 @@
namespace ngraph namespace ngraph
{ {
namespace reduction
{
enum class Type_t
{
sum,
prod,
min,
max,
};
class Type
{
public:
Type(const Type_t t)
: m_type(t)
{
}
friend std::ostream& operator<<(std::ostream&, const Type&);
bool operator==(const Type& other) const;
bool operator!=(const Type& other) const { return !(*this == other); }
Type_t get_type() const;
private:
Type_t m_type;
};
std::ostream& operator<<(std::ostream& out, const Type& obj);
extern NGRAPH_API const Type sum;
extern NGRAPH_API const Type prod;
extern NGRAPH_API const Type min;
extern NGRAPH_API const Type max;
}
class DistributedInterface class DistributedInterface
{ {
public: public:
...@@ -33,8 +65,11 @@ namespace ngraph ...@@ -33,8 +65,11 @@ namespace ngraph
virtual int get_rank() = 0; virtual int get_rank() = 0;
virtual void log_print(const std::string& timestamp, const std::vector<char>& buf) = 0; virtual void log_print(const std::string& timestamp, const std::vector<char>& buf) = 0;
virtual void virtual void all_reduce(void* in,
all_reduce(void* in, void* out, element::Type_t element_type, size_t count) = 0; void* out,
element::Type_t element_type,
reduction::Type reduce_type,
size_t count) = 0;
virtual void virtual void
broadcast(void* in, element::Type_t element_type, size_t count, int root_id) = 0; broadcast(void* in, element::Type_t element_type, size_t count, int root_id) = 0;
}; };
......
...@@ -65,8 +65,11 @@ namespace ngraph ...@@ -65,8 +65,11 @@ namespace ngraph
std::printf("%s [MLSL RANK: %d]: %s\n", timestamp.c_str(), get_rank(), buf.data()); std::printf("%s [MLSL RANK: %d]: %s\n", timestamp.c_str(), get_rank(), buf.data());
} }
void void all_reduce(void* in,
all_reduce(void* in, void* out, element::Type_t element_type, size_t count) override void* out,
element::Type_t element_type,
reduction::Type reduce_type,
size_t count) override
{ {
auto data_type = MLSL::DT_FLOAT; auto data_type = MLSL::DT_FLOAT;
...@@ -83,10 +86,29 @@ namespace ngraph ...@@ -83,10 +86,29 @@ namespace ngraph
throw std::runtime_error("AllReduce op supports only f32 and f64 types"); throw std::runtime_error("AllReduce op supports only f32 and f64 types");
} }
decltype(MLSL::RT_SUM) mlsl_reduce_type;
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch (reduce_type.get_type())
{
case reduction::Type_t::sum: mlsl_reduce_type = MLSL::RT_SUM; break;
case reduction::Type_t::prod:
throw std::runtime_error("MLSL doesn't support allreduce prod");
break;
case reduction::Type_t::min: mlsl_reduce_type = MLSL::RT_MIN; break;
case reduction::Type_t::max: mlsl_reduce_type = MLSL::RT_MAX; break;
}
#if !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic pop
#endif
MLSL::Environment& env = MLSL::Environment::GetEnv(); MLSL::Environment& env = MLSL::Environment::GetEnv();
MLSL::Distribution* distribution = env.CreateDistribution(env.GetProcessCount(), 1); MLSL::Distribution* distribution = env.CreateDistribution(env.GetProcessCount(), 1);
MLSL::CommReq* req = MLSL::CommReq* req = distribution->AllReduce(
distribution->AllReduce(in, out, count, data_type, MLSL::RT_SUM, MLSL::GT_DATA); in, out, count, data_type, mlsl_reduce_type, MLSL::GT_DATA);
env.Wait(req); env.Wait(req);
env.DeleteDistribution(distribution); env.DeleteDistribution(distribution);
} }
......
...@@ -35,8 +35,11 @@ namespace ngraph ...@@ -35,8 +35,11 @@ namespace ngraph
{ {
std::printf("%s: %s\n", timestamp.c_str(), buf.data()); std::printf("%s: %s\n", timestamp.c_str(), buf.data());
} }
void void all_reduce(void* in,
all_reduce(void* in, void* out, element::Type_t element_type, size_t count) override void* out,
element::Type_t element_type,
reduction::Type reduce_type,
size_t count) override
{ {
throw ngraph_error("Distributed Library not supported/mentioned"); throw ngraph_error("Distributed Library not supported/mentioned");
} }
......
...@@ -77,8 +77,11 @@ namespace ngraph ...@@ -77,8 +77,11 @@ namespace ngraph
"%s [OpenMPI RANK: %d]: %s\n", timestamp.c_str(), get_rank(), buf.data()); "%s [OpenMPI RANK: %d]: %s\n", timestamp.c_str(), get_rank(), buf.data());
} }
void void all_reduce(void* in,
all_reduce(void* in, void* out, element::Type_t element_type, size_t count) override void* out,
element::Type_t element_type,
reduction::Type reduce_type,
size_t count) override
{ {
auto data_type = MPI_FLOAT; auto data_type = MPI_FLOAT;
...@@ -95,7 +98,24 @@ namespace ngraph ...@@ -95,7 +98,24 @@ namespace ngraph
throw std::runtime_error("AllReduce op supports only f32 and f64 types"); throw std::runtime_error("AllReduce op supports only f32 and f64 types");
} }
MPI_Allreduce(in, out, count, data_type, MPI_SUM, MPI_COMM_WORLD); decltype(MPI_SUM) mpi_reduce_type;
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch (reduce_type.get_type())
{
case reduction::Type_t::sum: mpi_reduce_type = MPI_SUM; break;
case reduction::Type_t::prod: mpi_reduce_type = MPI_PROD; break;
case reduction::Type_t::min: mpi_reduce_type = MPI_MIN; break;
case reduction::Type_t::max: mpi_reduce_type = MPI_MAX; break;
}
#if !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic pop
#endif
MPI_Allreduce(in, out, count, data_type, mpi_reduce_type, MPI_COMM_WORLD);
} }
void broadcast(void* in, void broadcast(void* in,
......
...@@ -22,11 +22,13 @@ using namespace ngraph; ...@@ -22,11 +22,13 @@ using namespace ngraph;
const string op::AllReduce::type_name{"AllReduce"}; const string op::AllReduce::type_name{"AllReduce"};
op::AllReduce::AllReduce() op::AllReduce::AllReduce()
: m_reduce_type(reduction::sum)
{ {
} }
op::AllReduce::AllReduce(const shared_ptr<Node>& arg) op::AllReduce::AllReduce(const shared_ptr<Node>& arg, const reduction::Type reduce_type)
: Op(check_single_output_args({arg})) : Op(check_single_output_args({arg}))
, m_reduce_type(reduce_type)
{ {
constructor_validate_and_infer_types(); constructor_validate_and_infer_types();
} }
...@@ -47,5 +49,10 @@ void op::AllReduce::validate_and_infer_types() ...@@ -47,5 +49,10 @@ void op::AllReduce::validate_and_infer_types()
shared_ptr<Node> op::AllReduce::copy_with_new_args(const NodeVector& new_args) const shared_ptr<Node> op::AllReduce::copy_with_new_args(const NodeVector& new_args) const
{ {
check_new_args_count(this, new_args); check_new_args_count(this, new_args);
return make_shared<AllReduce>(new_args.at(0)); return make_shared<AllReduce>(new_args.at(0), get_reduce_type());
}
reduction::Type op::AllReduce::get_reduce_type() const
{
return m_reduce_type;
} }
...@@ -30,11 +30,16 @@ namespace ngraph ...@@ -30,11 +30,16 @@ namespace ngraph
static const std::string type_name; static const std::string type_name;
const std::string& description() const override { return type_name; } const std::string& description() const override { return type_name; }
AllReduce(); AllReduce();
AllReduce(const std::shared_ptr<Node>& arg); AllReduce(const std::shared_ptr<Node>& arg,
const reduction::Type reduce_type = reduction::sum);
void validate_and_infer_types() override; void validate_and_infer_types() override;
std::shared_ptr<Node> copy_with_new_args(const NodeVector& new_args) const override; std::shared_ptr<Node> copy_with_new_args(const NodeVector& new_args) const override;
reduction::Type get_reduce_type() const;
private:
const reduction::Type m_reduce_type;
}; };
} }
} }
...@@ -27,8 +27,14 @@ ...@@ -27,8 +27,14 @@
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
op::GroupConvolution::GroupConvolution(const shared_ptr<Node>& data_batch, const string op::GroupConvolution::type_name{"GroupConvolution"};
const shared_ptr<Node>& filters,
op::GroupConvolution::GroupConvolution()
{
}
op::GroupConvolution::GroupConvolution(const Output<Node>& data_batch,
const Output<Node>& filters,
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Strides& window_dilation_strides, const Strides& window_dilation_strides,
const CoordinateDiff& padding_below, const CoordinateDiff& padding_below,
...@@ -36,7 +42,7 @@ op::GroupConvolution::GroupConvolution(const shared_ptr<Node>& data_batch, ...@@ -36,7 +42,7 @@ op::GroupConvolution::GroupConvolution(const shared_ptr<Node>& data_batch,
const Strides& data_dilation_strides, const Strides& data_dilation_strides,
const size_t groups, const size_t groups,
const PadType& pad_type) const PadType& pad_type)
: FusedOp("GroupConvolution", check_single_output_args({data_batch, filters})) : FusedOp({data_batch, filters})
, m_window_movement_strides(window_movement_strides) , m_window_movement_strides(window_movement_strides)
, m_window_dilation_strides(window_dilation_strides) , m_window_dilation_strides(window_dilation_strides)
, m_padding_below(padding_below) , m_padding_below(padding_below)
...@@ -45,7 +51,6 @@ op::GroupConvolution::GroupConvolution(const shared_ptr<Node>& data_batch, ...@@ -45,7 +51,6 @@ op::GroupConvolution::GroupConvolution(const shared_ptr<Node>& data_batch,
, m_groups(groups) , m_groups(groups)
, m_pad_type(pad_type) , m_pad_type(pad_type)
{ {
// TODO: Move this out of constructor to validate_and_infer_types()
constructor_validate_and_infer_types(); constructor_validate_and_infer_types();
} }
...@@ -129,35 +134,35 @@ shared_ptr<Node> op::GroupConvolution::copy_with_new_args(const NodeVector& new_ ...@@ -129,35 +134,35 @@ shared_ptr<Node> op::GroupConvolution::copy_with_new_args(const NodeVector& new_
NodeVector op::GroupConvolution::decompose_op() const NodeVector op::GroupConvolution::decompose_op() const
{ {
auto data = get_argument(0); auto data = input(0);
auto filters = get_argument(1); auto filters = input(1);
// Split one convolution op to N ops where N is the number of groups // Split one convolution op to N ops where N is the number of groups
// and concat results after computation. // and concat results after computation.
// reference: https://github.com/NervanaSystems/ngraph-mxnet/blob/fdd692/src/ngraph/ngraph_emitter.cc#L822-L856 // reference: https://github.com/NervanaSystems/ngraph-mxnet/blob/fdd692/src/ngraph/ngraph_emitter.cc#L822-L856
std::size_t n_data_channels{data->get_shape().at(1)}; std::size_t n_data_channels{data.get_shape().at(1)};
std::size_t n_filters_channels{filters->get_shape().at(0)}; std::size_t n_filters_channels{filters.get_shape().at(0)};
std::size_t data_group_size{n_data_channels / m_groups}; std::size_t data_group_size{n_data_channels / m_groups};
std::size_t filters_group_size{n_filters_channels / m_groups}; std::size_t filters_group_size{n_filters_channels / m_groups};
NodeVector convolution_nodes; NodeVector convolution_nodes;
// initial bounds for splice // initial bounds for splice
std::vector<std::size_t> data_lower_bounds(data->get_shape().size()); std::vector<std::size_t> data_lower_bounds(data.get_shape().size());
std::vector<std::size_t> data_upper_bounds{data->get_shape()}; std::vector<std::size_t> data_upper_bounds{data.get_shape()};
std::vector<std::size_t> filters_lower_bounds(filters->get_shape().size()); std::vector<std::size_t> filters_lower_bounds(filters.get_shape().size());
std::vector<std::size_t> filters_upper_bounds{filters->get_shape()}; std::vector<std::size_t> filters_upper_bounds{filters.get_shape()};
for (std::size_t group{0}; group < m_groups; ++group) for (std::size_t group{0}; group < m_groups; ++group)
{ {
// slice data // slice data
data_lower_bounds[1] = group * data_group_size; data_lower_bounds[1] = group * data_group_size;
data_upper_bounds[1] = (group + 1) * data_group_size; data_upper_bounds[1] = (group + 1) * data_group_size;
auto sliced_data = auto sliced_data = std::make_shared<ngraph::op::Slice>(
std::make_shared<ngraph::op::Slice>(data, data_lower_bounds, data_upper_bounds); data.get_source_output(), data_lower_bounds, data_upper_bounds);
// slice filters // slice filters
filters_lower_bounds[0] = group * filters_group_size; filters_lower_bounds[0] = group * filters_group_size;
filters_upper_bounds[0] = (group + 1) * filters_group_size; filters_upper_bounds[0] = (group + 1) * filters_group_size;
auto sliced_filters = std::make_shared<ngraph::op::Slice>( auto sliced_filters = std::make_shared<ngraph::op::Slice>(
filters, filters_lower_bounds, filters_upper_bounds); filters.get_source_output(), filters_lower_bounds, filters_upper_bounds);
convolution_nodes.push_back( convolution_nodes.push_back(
std::make_shared<ngraph::op::Convolution>(sliced_data, std::make_shared<ngraph::op::Convolution>(sliced_data,
......
...@@ -29,8 +29,12 @@ namespace ngraph ...@@ -29,8 +29,12 @@ namespace ngraph
class GroupConvolution : public ngraph::op::util::FusedOp class GroupConvolution : public ngraph::op::util::FusedOp
{ {
public: public:
GroupConvolution(const std::shared_ptr<Node>& data_batch, NGRAPH_API
const std::shared_ptr<Node>& filters, static const std::string type_name;
const std::string& description() const override { return type_name; }
GroupConvolution();
GroupConvolution(const Output<Node>& data_batch,
const Output<Node>& filters,
const Strides& window_movement_strides, const Strides& window_movement_strides,
const Strides& window_dilation_strides, const Strides& window_dilation_strides,
const CoordinateDiff& padding_below, const CoordinateDiff& padding_below,
......
...@@ -19,11 +19,17 @@ ...@@ -19,11 +19,17 @@
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
op::Slice::Slice(const shared_ptr<Node>& arg, const string op::Slice::type_name{"Slice"};
op::Slice::Slice()
{
}
op::Slice::Slice(const Output<Node>& arg,
const Coordinate& lower_bounds, const Coordinate& lower_bounds,
const Coordinate& upper_bounds, const Coordinate& upper_bounds,
const Strides& strides) const Strides& strides)
: Op("Slice", check_single_output_args({arg})) : Op({arg})
, m_lower_bounds(lower_bounds) , m_lower_bounds(lower_bounds)
, m_upper_bounds(upper_bounds) , m_upper_bounds(upper_bounds)
, m_strides(strides) , m_strides(strides)
...@@ -31,10 +37,10 @@ op::Slice::Slice(const shared_ptr<Node>& arg, ...@@ -31,10 +37,10 @@ op::Slice::Slice(const shared_ptr<Node>& arg,
constructor_validate_and_infer_types(); constructor_validate_and_infer_types();
} }
op::Slice::Slice(const shared_ptr<Node>& arg, op::Slice::Slice(const Output<Node>& arg,
const Coordinate& lower_bounds, const Coordinate& lower_bounds,
const Coordinate& upper_bounds) const Coordinate& upper_bounds)
: Op("Slice", check_single_output_args({arg})) : Op({arg})
, m_lower_bounds(lower_bounds) , m_lower_bounds(lower_bounds)
, m_upper_bounds(upper_bounds) , m_upper_bounds(upper_bounds)
, m_strides(Strides()) , m_strides(Strides())
......
...@@ -28,6 +28,11 @@ namespace ngraph ...@@ -28,6 +28,11 @@ namespace ngraph
class Slice : public Op class Slice : public Op
{ {
public: public:
NGRAPH_API
static const std::string type_name;
const std::string& description() const override { return type_name; }
/// \brief Constructs a tensor slice operation
Slice();
/// \brief Constructs a tensor slice operation. /// \brief Constructs a tensor slice operation.
/// ///
/// \param arg The tensor to be sliced. /// \param arg The tensor to be sliced.
...@@ -35,17 +40,16 @@ namespace ngraph ...@@ -35,17 +40,16 @@ namespace ngraph
/// \param upper_bounds The axiswise upper bounds of the slice (exclusive). /// \param upper_bounds The axiswise upper bounds of the slice (exclusive).
/// \param strides The slicing strides; for example, strides of `{n,m}` means to take /// \param strides The slicing strides; for example, strides of `{n,m}` means to take
/// every nth row and every mth column of the input matrix. /// every nth row and every mth column of the input matrix.
Slice(const std::shared_ptr<Node>& arg, Slice(const Output<Node>& arg,
const Coordinate& lower_bounds, const Coordinate& lower_bounds,
const Coordinate& upper_bounds, const Coordinate& upper_bounds,
const Strides& strides); const Strides& strides);
/// \brief Constructs a tensor slice operation with unit strides; i.e., every element inside the bounding box will be copied to the output slice. /// \brief Constructs a tensor slice operation with unit strides; i.e., every element inside the bounding box will be copied to the output slice.
/// ///
/// \param arg The tensor to be sliced. /// \param arg The tensor to be sliced.
/// \param lower_bounds The axiswise lower bounds of the slice (inclusive). /// \param lower_bounds The axiswise lower bounds of the slice (inclusive).
/// \param upper_bounds The axiswise upper bounds of the slice (exclusive). /// \param upper_bounds The axiswise upper bounds of the slice (exclusive).
Slice(const std::shared_ptr<Node>& arg, Slice(const Output<Node>& arg,
const Coordinate& lower_bounds, const Coordinate& lower_bounds,
const Coordinate& upper_bounds); const Coordinate& upper_bounds);
......
...@@ -30,6 +30,11 @@ op::util::FusedOp::FusedOp(const NodeVector& args) ...@@ -30,6 +30,11 @@ op::util::FusedOp::FusedOp(const NodeVector& args)
{ {
} }
op::util::FusedOp::FusedOp(const OutputVector& args)
: Op(args)
{
}
op::util::FusedOp::FusedOp(const std::string& node_type, const NodeVector& args) op::util::FusedOp::FusedOp(const std::string& node_type, const NodeVector& args)
: Op(node_type, args) : Op(node_type, args)
{ {
......
...@@ -51,6 +51,8 @@ namespace ngraph ...@@ -51,6 +51,8 @@ namespace ngraph
/// \param args Nodes that produce the input tensors for the fused op /// \param args Nodes that produce the input tensors for the fused op
FusedOp(const NodeVector& args); FusedOp(const NodeVector& args);
FusedOp(const OutputVector& args);
/// \brief Constructs a FusedOp /// \brief Constructs a FusedOp
/// ///
/// \param args Nodes that produce the input tensors for the fused op /// \param args Nodes that produce the input tensors for the fused op
......
...@@ -37,6 +37,9 @@ namespace ngraph ...@@ -37,6 +37,9 @@ namespace ngraph
auto out_buffer_index = external_function->get_buffer_index(out[0].get_name()); auto out_buffer_index = external_function->get_buffer_index(out[0].get_name());
auto count = static_cast<int>(out[0].get_size()); auto count = static_cast<int>(out[0].get_size());
auto data_type = args[0].get_element_type().get_type_enum(); auto data_type = args[0].get_element_type().get_type_enum();
const ngraph::op::AllReduce* allreduce =
static_cast<const ngraph::op::AllReduce*>(node);
auto reduce_type = allreduce->get_reduce_type();
auto external_function_name = external_function->get_function_name(); auto external_function_name = external_function->get_function_name();
NGRAPH_DEBUG_PRINT( NGRAPH_DEBUG_PRINT(
...@@ -48,11 +51,13 @@ namespace ngraph ...@@ -48,11 +51,13 @@ namespace ngraph
node->get_friendly_name().c_str(), node->get_friendly_name().c_str(),
count); count);
auto functor = [&, count, data_type, arg_buffer_index, out_buffer_index]( auto functor =
[&, count, reduce_type, data_type, arg_buffer_index, out_buffer_index](
CPURuntimeContext* ctx, CPUExecutionContext* ectx) { CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
get_distributed_interface()->all_reduce(ctx->buffer_data[arg_buffer_index], get_distributed_interface()->all_reduce(ctx->buffer_data[arg_buffer_index],
ctx->buffer_data[out_buffer_index], ctx->buffer_data[out_buffer_index],
data_type, data_type,
reduce_type,
count); count);
}; };
functors.emplace_back(functor); functors.emplace_back(functor);
......
...@@ -53,9 +53,11 @@ namespace ngraph ...@@ -53,9 +53,11 @@ namespace ngraph
if (is_int64) if (is_int64)
{ {
if (args[0].get_element_type() == element::f32 || if ((args[0].get_element_type() == element::f32 ||
args[0].get_element_type() == element::f64 || args[0].get_element_type() == element::f64 ||
args[0].get_element_type() == element::u8) args[0].get_element_type() == element::u8 ||
args[0].get_element_type() == element::i8) &&
params_shape.size() <= 3 && out_shape.size() <= 3)
{ {
std::function<decltype(runtime::cpu::kernel::gather_i64<float, 2, 2>)> std::function<decltype(runtime::cpu::kernel::gather_i64<float, 2, 2>)>
kernel; kernel;
...@@ -111,9 +113,11 @@ namespace ngraph ...@@ -111,9 +113,11 @@ namespace ngraph
else else
{ {
if (args[0].get_element_type() == element::f32 || if ((args[0].get_element_type() == element::f32 ||
args[0].get_element_type() == element::f64 || args[0].get_element_type() == element::f64 ||
args[0].get_element_type() == element::u8) args[0].get_element_type() == element::u8 ||
args[0].get_element_type() == element::i8) &&
params_shape.size() <= 3 && out_shape.size() <= 3)
{ {
std::function<decltype(runtime::cpu::kernel::gather_i32<float, 2, 2>)> std::function<decltype(runtime::cpu::kernel::gather_i32<float, 2, 2>)>
kernel; kernel;
......
...@@ -46,7 +46,9 @@ namespace ngraph ...@@ -46,7 +46,9 @@ namespace ngraph
} }
if (args[0].get_element_type() != element::f64 && if (args[0].get_element_type() != element::f64 &&
args[0].get_element_type() != element::f32) args[0].get_element_type() != element::f32 &&
args[0].get_element_type() != element::u8 &&
args[0].get_element_type() != element::i8)
{ {
throw ngraph_error("Unsupported type in CPU Builder for ScatterAdd"); throw ngraph_error("Unsupported type in CPU Builder for ScatterAdd");
} }
...@@ -59,6 +61,8 @@ namespace ngraph ...@@ -59,6 +61,8 @@ namespace ngraph
auto element_type = args[0].get_element_type(); auto element_type = args[0].get_element_type();
if (is_int64) if (is_int64)
{
if (inputs_shape.size() <= 3 && updates_shape.size() <= 3)
{ {
std::function<decltype(runtime::cpu::kernel::scatter_add_i64<float, 2, 2>)> std::function<decltype(runtime::cpu::kernel::scatter_add_i64<float, 2, 2>)>
kernel; kernel;
...@@ -91,6 +95,13 @@ namespace ngraph ...@@ -91,6 +95,13 @@ namespace ngraph
functors.emplace_back(functor); functors.emplace_back(functor);
} }
else else
{
throw ngraph_error("Unsupported ranks in CPU Builder for ScatterAdd");
}
}
else
{
if (inputs_shape.size() <= 3 && updates_shape.size() <= 3)
{ {
std::function<decltype(runtime::cpu::kernel::scatter_add_i32<float, 2, 2>)> std::function<decltype(runtime::cpu::kernel::scatter_add_i32<float, 2, 2>)>
kernel; kernel;
...@@ -122,6 +133,11 @@ namespace ngraph ...@@ -122,6 +133,11 @@ namespace ngraph
}; };
functors.emplace_back(functor); functors.emplace_back(functor);
} }
else
{
throw ngraph_error("Unsupported ranks in CPU Builder for ScatterAdd");
}
}
} }
REGISTER_OP_BUILDER(ScatterAdd); REGISTER_OP_BUILDER(ScatterAdd);
} }
......
...@@ -211,14 +211,6 @@ ...@@ -211,14 +211,6 @@
KV = K<ET, 2, R2>; \ KV = K<ET, 2, R2>; \
else if (R1 == 3) \ else if (R1 == 3) \
KV = K<ET, 3, R2>; \ KV = K<ET, 3, R2>; \
else if (R1 == 4) \
KV = K<ET, 4, R2>; \
else if (R1 == 5) \
KV = K<ET, 5, R2>; \
else if (R1 == 6) \
KV = K<ET, 6, R2>; \
else if (R1 == 7) \
KV = K<ET, 7, R2>; \
else \ else \
throw ngraph_error("Unsupported first rank " + std::to_string(R1) + " for kernel " #K); throw ngraph_error("Unsupported first rank " + std::to_string(R1) + " for kernel " #K);
...@@ -235,22 +227,6 @@ ...@@ -235,22 +227,6 @@
{ \ { \
SELECT_RANK1(KV, ET, R1, 3, K); \ SELECT_RANK1(KV, ET, R1, 3, K); \
} \ } \
else if (R2 == 4) \
{ \
SELECT_RANK1(KV, ET, R1, 4, K); \
} \
else if (R2 == 5) \
{ \
SELECT_RANK1(KV, ET, R1, 5, K); \
} \
else if (R2 == 6) \
{ \
SELECT_RANK1(KV, ET, R1, 6, K); \
} \
else if (R2 == 7) \
{ \
SELECT_RANK1(KV, ET, R1, 7, K); \
} \
else \ else \
{ \ { \
throw ngraph_error("Unsupported second rank " + std::to_string(R2) + " for kernel " #K); \ throw ngraph_error("Unsupported second rank " + std::to_string(R2) + " for kernel " #K); \
...@@ -270,6 +246,10 @@ ...@@ -270,6 +246,10 @@
{ \ { \
SELECT_2RANKS(KV, uint8_t, R1, R2, K); \ SELECT_2RANKS(KV, uint8_t, R1, R2, K); \
} \ } \
else if (ET == element::i8) \
{ \
SELECT_2RANKS(KV, int8_t, R1, R2, K); \
} \
else \ else \
{ \ { \
throw ngraph_error("Unsupported element type " + ET.c_type_string() + " for kernel " #K); \ throw ngraph_error("Unsupported element type " + ET.c_type_string() + " for kernel " #K); \
......
...@@ -271,10 +271,13 @@ namespace ngraph ...@@ -271,10 +271,13 @@ namespace ngraph
template <> template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::AllReduce) void CPU_Emitter::EMITTER_DECL(ngraph::op::AllReduce)
{ {
const ngraph::op::AllReduce* allreduce =
static_cast<const ngraph::op::AllReduce*>(node);
writer << "ngraph::get_distributed_interface()->all_reduce(" << args[0].get_name() writer << "ngraph::get_distributed_interface()->all_reduce(" << args[0].get_name()
<< ", " << out[0].get_name() << ", " << ", " << out[0].get_name() << ", "
<< "ngraph::element::Type_t::" << args[0].get_element_type().get_type_name() << "ngraph::element::Type_t::" << args[0].get_element_type().get_type_name()
<< ", " << out[0].get_size() << ");\n"; << ", " << out[0].get_size() << ", "
<< "ngraph::Reduce_t::" << allreduce->get_reduce_type() << ");\n";
} }
template <> template <>
...@@ -1842,8 +1845,9 @@ namespace ngraph ...@@ -1842,8 +1845,9 @@ namespace ngraph
writer.block_begin(); writer.block_begin();
if ((args[0].get_element_type() == element::f64 || if ((args[0].get_element_type() == element::f64 ||
args[0].get_element_type() == element::f32 || args[0].get_element_type() == element::f32 ||
args[0].get_element_type() == element::u8) && args[0].get_element_type() == element::u8 ||
gather->get_axis() == 0) args[0].get_element_type() == element::i8) &&
args[0].get_shape().size() <= 3 && out[0].get_shape().size() <= 3)
{ {
writer << "cpu::kernel::gather<" << args[0].get_type() << ", " writer << "cpu::kernel::gather<" << args[0].get_type() << ", "
<< args[1].get_element_type().c_type_string() << ", " << args[1].get_element_type().c_type_string() << ", "
...@@ -1903,8 +1907,11 @@ namespace ngraph ...@@ -1903,8 +1907,11 @@ namespace ngraph
} }
writer.block_begin(); writer.block_begin();
if (args[0].get_element_type() == element::f64 || if ((args[0].get_element_type() == element::f64 ||
args[0].get_element_type() == element::f32) args[0].get_element_type() == element::f32 ||
args[0].get_element_type() == element::u8 ||
args[0].get_element_type() == element::i8) &&
args[0].get_shape().size() <= 3 && args[2].get_shape().size() <= 3)
{ {
writer << "cpu::kernel::scatter_add<" << args[0].get_type() << ", " writer << "cpu::kernel::scatter_add<" << args[0].get_type() << ", "
<< args[1].get_element_type().c_type_string() << ", " << args[1].get_element_type().c_type_string() << ", "
......
...@@ -31,7 +31,7 @@ namespace ngraph ...@@ -31,7 +31,7 @@ namespace ngraph
{ {
namespace kernel namespace kernel
{ {
// Calculate the indices from position 0 to rank-1. // Calculate the indices for positions 0 to rank-1.
static void static void
get_indices(const Shape& shape, int index, std::vector<int>& indices, int rank) get_indices(const Shape& shape, int index, std::vector<int>& indices, int rank)
{ {
...@@ -93,8 +93,11 @@ namespace ngraph ...@@ -93,8 +93,11 @@ namespace ngraph
if (indices_rank == 0) if (indices_rank == 0)
{ {
//TODO Enable this if compiler issue with CODEGEN is fixed or DEX needs it.
#if 0
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for #pragma omp parallel for
#endif
#endif #endif
for (int i = 0; i < outer_loop_num; i++) for (int i = 0; i < outer_loop_num; i++)
{ {
...@@ -142,7 +145,11 @@ namespace ngraph ...@@ -142,7 +145,11 @@ namespace ngraph
} }
else else
{ {
auto num_indices = shape_size(indices_shape); size_t num_indices = 1;
for (auto d : indices_shape)
{
num_indices *= d;
}
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for #pragma omp parallel for
......
...@@ -2238,6 +2238,32 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconvb_add() ...@@ -2238,6 +2238,32 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconvb_add()
std::dynamic_pointer_cast<ngraph::op::Add>(m.get_match_root()->get_argument(0)); std::dynamic_pointer_cast<ngraph::op::Add>(m.get_match_root()->get_argument(0));
auto dq_l_m = std::dynamic_pointer_cast<ngraph::op::Dequantize>(pattern_map[dq_l_label]); auto dq_l_m = std::dynamic_pointer_cast<ngraph::op::Dequantize>(pattern_map[dq_l_label]);
auto dq_r_m = std::dynamic_pointer_cast<ngraph::op::Dequantize>(pattern_map[dq_r_label]); auto dq_r_m = std::dynamic_pointer_cast<ngraph::op::Dequantize>(pattern_map[dq_r_label]);
// both left and right are QuantizedConvolutionBias
if (dq_r_m->get_argument(0)->description() == "QuantizedConvolutionBias")
{
for (auto user : m.get_match_root()->get_users())
{
auto q_m = std::dynamic_pointer_cast<ngraph::op::Quantize>(user);
if (q_m)
{
auto q_m_scale = q_m->get_argument(1);
auto dq_l_m_scale = dq_l_m->get_argument(1);
auto dq_r_m_scale = dq_r_m->get_argument(1);
if (!ngraph::compare_constants(q_m_scale, dq_l_m_scale) &&
ngraph::compare_constants(q_m_scale, dq_r_m_scale))
{
NGRAPH_DEBUG << "Scales of Q and DQ of right branch match";
// switch left and right branch
auto temp = dq_l_m;
dq_l_m = dq_r_m;
dq_r_m = temp;
}
break;
}
}
}
auto qconv = auto qconv =
std::static_pointer_cast<ngraph::op::QuantizedConvolutionBias>(dq_l_m->get_argument(0)); std::static_pointer_cast<ngraph::op::QuantizedConvolutionBias>(dq_l_m->get_argument(0));
auto inplace_input = dq_r_m->get_argument(0); auto inplace_input = dq_r_m->get_argument(0);
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <vector> #include <vector>
#include "ngraph/op/all.hpp" #include "ngraph/op/all.hpp"
#include "ngraph/op/allreduce.hpp"
#include "ngraph/op/any.hpp" #include "ngraph/op/any.hpp"
#include "ngraph/op/argmax.hpp" #include "ngraph/op/argmax.hpp"
#include "ngraph/op/argmin.hpp" #include "ngraph/op/argmin.hpp"
...@@ -255,9 +256,12 @@ private: ...@@ -255,9 +256,12 @@ private:
} }
case OP_TYPEID::AllReduce: case OP_TYPEID::AllReduce:
{ {
const ngraph::op::AllReduce* allreduce =
static_cast<const ngraph::op::AllReduce*>(&node);
reference::allreduce<T>(args[0]->get_data_ptr<T>(), reference::allreduce<T>(args[0]->get_data_ptr<T>(),
out[0]->get_data_ptr<T>(), out[0]->get_data_ptr<T>(),
node.get_input_element_type(0).get_type_enum(), node.get_input_element_type(0).get_type_enum(),
allreduce->get_reduce_type(),
static_cast<int>(shape_size(node.get_input_shape(0)))); static_cast<int>(shape_size(node.get_input_shape(0))));
break; break;
} }
......
...@@ -76,11 +76,11 @@ namespace ngraph ...@@ -76,11 +76,11 @@ namespace ngraph
// input count. // input count.
void check_inputs(std::size_t expected_input_count) const void check_inputs(std::size_t expected_input_count) const
{ {
if (op().get_input_size() != expected_input_count) if (op().get_input_size() < expected_input_count)
{ {
std::ostringstream os; std::ostringstream os;
os << "The PlaidML nGraph backend only supports " << op().description() os << "The PlaidML nGraph backend only supports " << op().description()
<< " operations with an input count == " << expected_input_count << " operations with an input count >= " << expected_input_count
<< " (got " << op().get_input_size() << " inputs)"; << " (got " << op().get_input_size() << " inputs)";
throw std::runtime_error{os.str()}; throw std::runtime_error{os.str()};
} }
......
...@@ -261,92 +261,25 @@ batch_mat_mul_forward ...@@ -261,92 +261,25 @@ batch_mat_mul_forward
dot_matrix_2x0_0x2 dot_matrix_2x0_0x2
# dgkutnic ww24.5: these tests are to be triaged by the PlaidML team # dgkutnic ww24.5: these tests are to be triaged by the PlaidML team
# ww25.2: re-scrubbed this list of tests after fixing check_inputs
convolution_3d_1item_large_5o3i_padded_uneven_filter_uneven_data_dilation_data_dilated # initial debug points to some of these failing due to precision issues
select sqrt
product_trivial batch_norm_inference_0eps_f32
product_trivial_5d batch_norm_inference_f32
product_to_scalar batch_norm_training_0eps_f32
product_matrix_columns argmin_trivial
product_matrix_rows argmax_trivial
product_3d_to_matrix_most_sig argmin_trivial_in_i32
product_3d_to_matrix_least_sig
product_3d_to_vector
product_3d_to_scalar
product_2d_to_scalar_int32
product_to_scalar_int32
product_to_scalar_int8
max_trivial
max_trivial_5d
max_to_scalar
max_to_scalar_int8
max_matrix_columns
max_matrix_rows
max_matrix_rows_int32
max_3d_to_matrix_most_sig
max_3d_to_matrix_least_sig
max_3d_to_vector
max_3d_to_scalar
max_3d_to_scalar_int32
min_trivial
min_trivial_5d
min_trivial_5d_int32
min_to_scalar
min_to_scalar_int8
min_matrix_columns
min_matrix_rows
min_matrix_rows_int32
min_3d_to_matrix_most_sig
min_3d_to_matrix_least_sig
min_3d_to_vector
min_3d_to_scalar
min_3d_to_scalar_int32
sum_to_scalar
sum_large_1d_to_scalar sum_large_1d_to_scalar
sum_matrix_columns
sum_matrix_6d
sum_matrix_rows
sum_3d_to_matrix_most_sig
sum_3d_to_matrix_least_sig
sum_3d_to_vector
sum_3d_to_scalar
sum_3d_to_scalar_int32
sum_5d_to_scalar
sum_5d_to_scalar_int32
sum_2d_to_scalar_int8
sum_stable_acc sum_stable_acc
sum_stable_simple_float one_hot_scalar_2_in_3
one_hot_scalar_1_in_3
one_hot_scalar_0_in_3
lstm_cell_no_bias_no_peepholes
lstm_cell_bias_peepholes
lstm_cell_bias_peepholes_clip_input_forget
lstm_cell_activaction_functions
group_conv_transpose
group_conv_transpose_output_shape
divide_python_rounding_int32 divide_python_rounding_int32
any_2x2_to_scalar_true
any_2x2_to_scalar_false
any_2x3_eliminate_col_dim
any_2x3_eliminate_row_dim
any_2x2x3_eliminate_dim_1
any_2x2x3_eliminate_dim_2
any_2x2x3_eliminate_dims_0_1
any_2x2x3_eliminate_dims_0_2
any_2x2x3_eliminate_dims_1_2
any_2x2x3_eliminate_dims_0_1_2
all_trivial
all_2x2_to_scalar_false
all_2x2_to_scalar_true
all_2x3_eliminate_col_dim
all_2x3_eliminate_row_dim
all_2x2x3_eliminate_dim_0
all_2x2x3_eliminate_dim_1
all_2x2x3_eliminate_dim_2
all_2x2x3_eliminate_dims_0_1
all_2x2x3_eliminate_dims_0_2
all_2x2x3_eliminate_dims_1_2
all_2x2x3_eliminate_dims_0_1_2
all_dynamic_axis
all_change_axis
backwards_broadcast0
backwards_broadcast1
backwards_select
backwards_select_nested
backwards_sum_v2s
backwards_sum_m2s
backwards_sum_m2v_0
backwards_sum_m2v_1
backwards_batchmatmul_tensor2_tensor2 backwards_batchmatmul_tensor2_tensor2
...@@ -25,9 +25,13 @@ namespace ngraph ...@@ -25,9 +25,13 @@ namespace ngraph
namespace reference namespace reference
{ {
template <typename T> template <typename T>
void allreduce(T* arg, T* out, const element::Type_t element_type, int count) void allreduce(T* arg,
T* out,
const element::Type_t element_type,
const reduction::Type reduce_type,
int count)
{ {
get_distributed_interface()->all_reduce(arg, out, element_type, count); get_distributed_interface()->all_reduce(arg, out, element_type, reduce_type, count);
} }
} }
} }
......
This diff is collapsed.
...@@ -62,42 +62,3 @@ namespace ngraph ...@@ -62,42 +62,3 @@ namespace ngraph
/// Option may be enabled by setting the environment variable NGRAPH_SERIALIZER_OUTPUT_SHAPES /// Option may be enabled by setting the environment variable NGRAPH_SERIALIZER_OUTPUT_SHAPES
void set_serialize_output_shapes(bool enable); void set_serialize_output_shapes(bool enable);
} }
#ifdef NGRAPH_JSON_DISABLE
// Rather than making every reference to the serializer conditionally compile here we just
// provide some null stubs to resolve link issues
// The `inline` is so we don't get multiple definitions of function
std::string inline ngraph::serialize(std::shared_ptr<ngraph::Function> func, size_t indent)
{
return "";
}
void inline ngraph::serialize(const std::string& path,
std::shared_ptr<ngraph::Function> func,
size_t indent)
{
throw std::runtime_error("serializer disabled in build");
}
void inline ngraph::serialize(std::ostream& out,
std::shared_ptr<ngraph::Function> func,
size_t indent)
{
throw std::runtime_error("serializer disabled in build");
}
std::shared_ptr<ngraph::Function> inline ngraph::deserialize(std::istream& in)
{
throw std::runtime_error("serializer disabled in build");
}
std::shared_ptr<ngraph::Function> inline ngraph::deserialize(const std::string& str)
{
throw std::runtime_error("serializer disabled in build");
}
void inline ngraph::set_serialize_output_shapes(bool enable)
{
throw std::runtime_error("serializer disabled in build");
}
#endif
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/serializer.hpp"
std::string ngraph::serialize(std::shared_ptr<ngraph::Function> func, size_t indent)
{
throw std::runtime_error("serializer disabled in build");
}
void ngraph::serialize(const std::string& path,
std::shared_ptr<ngraph::Function> func,
size_t indent)
{
throw std::runtime_error("serializer disabled in build");
}
void ngraph::serialize(std::ostream& out, std::shared_ptr<ngraph::Function> func, size_t indent)
{
throw std::runtime_error("serializer disabled in build");
}
std::shared_ptr<ngraph::Function> ngraph::deserialize(std::istream& in)
{
throw std::runtime_error("serializer disabled in build");
}
std::shared_ptr<ngraph::Function> ngraph::deserialize(const std::string& str)
{
throw std::runtime_error("serializer disabled in build");
}
void ngraph::set_serialize_output_shapes(bool enable)
{
throw std::runtime_error("serializer disabled in build");
}
...@@ -35,6 +35,7 @@ using namespace ngraph; ...@@ -35,6 +35,7 @@ using namespace ngraph;
static string s_manifest = "${MANIFEST}"; static string s_manifest = "${MANIFEST}";
#if 0
NGRAPH_TEST(${BACKEND_NAME}, scatter_add_4d_indices) NGRAPH_TEST(${BACKEND_NAME}, scatter_add_4d_indices)
{ {
Shape ref_shape{3, 3, 3}; Shape ref_shape{3, 3, 3};
...@@ -122,13 +123,14 @@ NGRAPH_TEST(${BACKEND_NAME}, scatter_add_3d_indices) ...@@ -122,13 +123,14 @@ NGRAPH_TEST(${BACKEND_NAME}, scatter_add_3d_indices)
read_vector<float>(result), read_vector<float>(result),
MIN_FLOAT_TOLERANCE_BITS)); MIN_FLOAT_TOLERANCE_BITS));
} }
#endif
NGRAPH_TEST(${BACKEND_NAME}, scatter_add_2d_indices) NGRAPH_TEST(${BACKEND_NAME}, scatter_add_2d_indices)
{ {
Shape ref_shape{2, 3, 3}; Shape ref_shape{3};
Shape indices_shape{2, 2}; Shape indices_shape{2, 2};
Shape updates_shape{2, 2, 3, 3}; Shape updates_shape{2, 2};
Shape out_shape{2, 3, 3}; Shape out_shape{3};
auto R = make_shared<op::Parameter>(element::f32, ref_shape); auto R = make_shared<op::Parameter>(element::f32, ref_shape);
auto I = make_shared<op::Parameter>(element::i32, indices_shape); auto I = make_shared<op::Parameter>(element::i32, indices_shape);
auto U = make_shared<op::Parameter>(element::f32, updates_shape); auto U = make_shared<op::Parameter>(element::f32, updates_shape);
...@@ -140,20 +142,17 @@ NGRAPH_TEST(${BACKEND_NAME}, scatter_add_2d_indices) ...@@ -140,20 +142,17 @@ NGRAPH_TEST(${BACKEND_NAME}, scatter_add_2d_indices)
// Create some tensors for input/output // Create some tensors for input/output
auto r = backend->create_tensor(element::f32, ref_shape); auto r = backend->create_tensor(element::f32, ref_shape);
copy_data(r, vector<float>{0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 9}); copy_data(r, vector<float>{0, 1, 2});
auto i = backend->create_tensor(element::i32, indices_shape); auto i = backend->create_tensor(element::i32, indices_shape);
copy_data(i, vector<int32_t>{0, 1, 1, 0}); copy_data(i, vector<int32_t>{0, 1, 1, 0});
auto u = backend->create_tensor(element::f32, updates_shape); auto u = backend->create_tensor(element::f32, updates_shape);
copy_data(u, vector<float>{0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 9, copy_data(u, vector<float>{1, 2, 3, 4});
1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8});
auto result = backend->create_tensor(element::f32, out_shape); auto result = backend->create_tensor(element::f32, out_shape);
auto c = backend->compile(f); auto c = backend->compile(f);
c->call_with_validate({result}, {r, i, u}); c->call_with_validate({result}, {r, i, u});
EXPECT_TRUE(test::all_close_f( EXPECT_TRUE(test::all_close_f(
(vector<float>{0, 3, 6, 9, 12, 15, 18, 21, 24, 3, 6, 9, 12, 15, 18, 21, 24, 27}), (vector<float>{5, 6, 2}), read_vector<float>(result), MIN_FLOAT_TOLERANCE_BITS));
read_vector<float>(result),
MIN_FLOAT_TOLERANCE_BITS));
} }
NGRAPH_TEST(${BACKEND_NAME}, scatter_add_1d_indices) NGRAPH_TEST(${BACKEND_NAME}, scatter_add_1d_indices)
......
...@@ -150,3 +150,19 @@ TEST(build_graph, no_arg_construction) ...@@ -150,3 +150,19 @@ TEST(build_graph, no_arg_construction)
validate_nodes_and_infer_types(ops); validate_nodes_and_infer_types(ops);
ASSERT_EQ(add1->get_output_shape(0), Shape{7}); ASSERT_EQ(add1->get_output_shape(0), Shape{7});
} }
TEST(build_graph, multi_output_split)
{
const auto data = make_shared<op::Parameter>(element::f32, Shape{64, 8, 100, 150});
auto filters = make_shared<op::Parameter>(element::f32, Shape{128, 2, 10, 20});
const auto split = make_shared<op::Split>(data, 1, 2);
auto conv = make_shared<op::GroupConvolution>(split->output(1),
filters,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1},
2);
EXPECT_EQ(conv->get_shape(), (Shape{64, 128, 91, 131}));
}
...@@ -3691,6 +3691,120 @@ TEST(cpu_quant_fusion, qconvba) ...@@ -3691,6 +3691,120 @@ TEST(cpu_quant_fusion, qconvba)
EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0))); EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
} }
TEST(cpu_quant_fusion, qconvba_q)
{
auto make_function = []() {
Shape shape_input{1, 2, 2, 2};
Shape shape_weights{1, 2, 1, 1};
Shape shape_summand{1, 1, 2, 2};
auto input_l = std::make_shared<op::Parameter>(element::f32, shape_input);
auto weights_l = std::make_shared<op::Parameter>(element::f32, shape_weights);
auto bias_l = std::make_shared<op::Parameter>(element::f32, Shape{shape_weights[0]});
auto input_r = std::make_shared<op::Parameter>(element::f32, shape_input);
auto weights_r = std::make_shared<op::Parameter>(element::f32, shape_weights);
auto bias_r = std::make_shared<op::Parameter>(element::f32, Shape{shape_weights[0]});
auto input_scale_l = op::Constant::create(element::f32, Shape{}, {2.0f});
auto weights_scale_l = op::Constant::create(element::f32, Shape{}, {2.0f});
auto output_scale_l = op::Constant::create(element::f32, Shape{}, {4.0f});
auto input_scale_r = op::Constant::create(element::f32, Shape{}, {5.0f});
auto weights_scale_r = op::Constant::create(element::f32, Shape{}, {5.0f});
auto output_scale_r = op::Constant::create(element::f32, Shape{}, {20.0f});
auto int8_zero = op::Constant::create(element::i8, Shape{}, {0});
auto int32_zero = op::Constant::create(element::i32, Shape{}, {0});
auto uint8_zero = op::Constant::create(element::u8, Shape{}, {0});
op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
auto q_input_l = std::make_shared<op::Quantize>(
input_l, input_scale_l, uint8_zero, element::u8, AxisSet{}, round_mode);
auto q_weights_l = std::make_shared<op::Quantize>(
weights_l, weights_scale_l, int8_zero, element::i8, AxisSet{}, round_mode);
auto q_bias_l = std::make_shared<op::Quantize>(bias_l,
input_scale_l * weights_scale_l,
int32_zero,
element::i32,
AxisSet{},
round_mode);
auto q_input_r = std::make_shared<op::Quantize>(
input_r, input_scale_r, uint8_zero, element::u8, AxisSet{}, round_mode);
auto q_weights_r = std::make_shared<op::Quantize>(
weights_r, weights_scale_r, int8_zero, element::i8, AxisSet{}, round_mode);
auto q_bias_r = std::make_shared<op::Quantize>(bias_r,
input_scale_r * weights_scale_r,
int32_zero,
element::i32,
AxisSet{},
round_mode);
// Left Graph
auto requant_scale_l = (input_scale_l * weights_scale_l) / output_scale_l;
auto conv_l = std::make_shared<op::QuantizedConvolutionBias>(q_input_l,
q_weights_l,
q_bias_l,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1},
requant_scale_l);
auto dq_l = std::make_shared<op::Dequantize>(
conv_l, output_scale_l, int8_zero, element::f32, AxisSet{});
auto r_l = std::make_shared<op::Reshape>(dq_l, AxisVector{0, 1, 2, 3}, Shape{1, 2, 2});
auto b_l = std::make_shared<op::Broadcast>(r_l, Shape{1, 1, 2, 2}, AxisSet{0});
// Right Graph
auto requant_scale_r = (input_scale_r * weights_scale_r) / output_scale_r;
auto conv_r = std::make_shared<op::QuantizedConvolutionBias>(q_input_r,
q_weights_r,
q_bias_r,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1},
requant_scale_r);
auto dq_r = std::make_shared<op::Dequantize>(
conv_r, output_scale_r, int8_zero, element::f32, AxisSet{});
auto r_r = std::make_shared<op::Reshape>(dq_r, AxisVector{0, 1, 2, 3}, Shape{1, 2, 2});
auto b_r = std::make_shared<op::Broadcast>(r_r, Shape{1, 1, 2, 2}, AxisSet{0});
auto add = b_l + b_r;
auto relu = std::make_shared<op::Relu>(add);
auto q = std::make_shared<op::Quantize>(
relu, output_scale_r, uint8_zero, element::u8, AxisSet{}, round_mode);
auto dq = std::make_shared<op::Dequantize>(
q, output_scale_r, uint8_zero, element::f32, AxisSet{});
return make_shared<Function>(
NodeVector{dq},
ParameterVector{input_l, weights_l, bias_l, input_r, weights_r, bias_r});
};
auto cpu_f1 = make_function();
auto cpu_f2 = make_function();
test::Uniform<float> rng(2.0f, 2.0f);
vector<vector<float>> args;
for (shared_ptr<op::Parameter> param : cpu_f1->get_parameters())
{
vector<float> tensor_val(shape_size(param->get_shape()));
rng.initialize(tensor_val);
args.push_back(tensor_val);
}
// Disable CPUQuantFusion
set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:0", 1);
auto cpu1_results = execute(cpu_f1, args, "CPU");
// Enable CPUQuantFusion
set_environment("NGRAPH_PASS_ENABLES", "CPUQuantFusion:1", 1);
auto cpu2_results = execute(cpu_f2, args, "CPU");
EXPECT_TRUE(test::all_close(cpu1_results.at(0), cpu2_results.at(0)));
auto backend = runtime::Backend::create("CPU");
auto fuse = make_function();
backend->compile(fuse);
ASSERT_EQ(count_ops_of_type<op::Quantize>(fuse), 6);
}
#ifndef NGRAPH_JSON_DISABLE #ifndef NGRAPH_JSON_DISABLE
// Tests that rely on deserializing json files // Tests that rely on deserializing json files
TEST(cpu_fusion, fuse_conv_bias) TEST(cpu_fusion, fuse_conv_bias)
......
...@@ -29,25 +29,61 @@ ...@@ -29,25 +29,61 @@
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
TEST(distributed_${BACKEND_NAME}, allreduce) static void test_allreduce_common(reduction::Type reduce_type)
{ {
auto comm_size = get_distributed_interface()->get_size(); auto comm_size = get_distributed_interface()->get_size();
if (comm_size > 1) if (comm_size > 1)
{ {
auto shape = Shape{2, 2}; auto shape = Shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::AllReduce>(A), ParameterVector{A}); auto f =
make_shared<Function>(make_shared<op::AllReduce>(A, reduce_type), ParameterVector{A});
auto backend = runtime::Backend::create("${BACKEND_NAME}"); auto backend = runtime::Backend::create("${BACKEND_NAME}");
auto v = vector<float>{1, 2, 3, 4}; auto v = vector<float>{1, 2, 3, 4};
auto a = backend->create_tensor(element::f32, shape); auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{1, 2, 3, 4});
auto result = backend->create_tensor(element::f32, shape); auto result = backend->create_tensor(element::f32, shape);
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch (reduce_type.get_type())
{
case reduction::Type_t::sum:
copy_data(a, v);
std::transform( std::transform(
v.begin(), v.end(), v.begin(), std::bind1st(std::multiplies<float>(), comm_size)); v.begin(), v.end(), v.begin(), std::bind1st(std::multiplies<float>(), comm_size));
break;
case reduction::Type_t::prod:
copy_data(a, v);
std::transform(v.begin(), v.end(), v.begin(), [&](float elm) -> float {
return pow(elm, comm_size);
});
break;
case reduction::Type_t::min:
case reduction::Type_t::max:
auto shift = get_distributed_interface()->get_rank();
std::rotate(v.begin(), v.begin() + shift % v.size(), v.end());
copy_data(a, v);
if (reduce_type == reduction::Type_t::min)
{
std::fill(v.begin(), v.end(), 1);
for (int i = 1; i < static_cast<int>(v.size()) - comm_size + 1; i++)
v[i] = i + 1;
}
else
{
std::fill(v.begin(), v.end(), v.size());
for (int i = 0; i < static_cast<int>(v.size()) - comm_size; i++)
v[i] = i + 2;
}
}
#if !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic pop
#endif
auto handle = backend->compile(f); auto handle = backend->compile(f);
handle->call_with_validate({result}, {a}); handle->call_with_validate({result}, {a});
...@@ -55,6 +91,28 @@ TEST(distributed_${BACKEND_NAME}, allreduce) ...@@ -55,6 +91,28 @@ TEST(distributed_${BACKEND_NAME}, allreduce)
} }
} }
TEST(distributed_${BACKEND_NAME}, allreduce_sum)
{
test_allreduce_common(reduction::sum);
}
TEST(distributed_${BACKEND_NAME}, allreduce_min)
{
test_allreduce_common(reduction::min);
}
TEST(distributed_${BACKEND_NAME}, allreduce_max)
{
test_allreduce_common(reduction::max);
}
#if !defined(NGRAPH_DISTRIBUTED_MLSL_ENABLE)
TEST(distributed_${BACKEND_NAME}, allreduce_prod)
{
test_allreduce_common(reduction::prod);
}
#endif
TEST(distributed_${BACKEND_NAME}, broadcastdistributed) TEST(distributed_${BACKEND_NAME}, broadcastdistributed)
{ {
auto shape = Shape{2, 2}; auto shape = Shape{2, 2};
......
...@@ -324,3 +324,19 @@ TEST(serialize, constant_infinity_nan) ...@@ -324,3 +324,19 @@ TEST(serialize, constant_infinity_nan)
EXPECT_NE(str.find(R"(label="C)"), string::npos); EXPECT_NE(str.find(R"(label="C)"), string::npos);
EXPECT_NE(str.find(R"(label="D)"), string::npos); EXPECT_NE(str.find(R"(label="D)"), string::npos);
} }
TEST(serialize, non_zero_node_output)
{
auto arg = make_shared<op::Parameter>(element::f32, Shape{10});
auto topk = make_shared<op::TopK>(arg, 0, element::i32, 5, true);
auto abs = make_shared<op::Abs>(Output<Node>(topk, 1));
auto result = make_shared<op::Result>(abs);
auto f = make_shared<Function>(ResultVector{result}, ParameterVector{arg});
string s = serialize(f);
shared_ptr<Function> g = deserialize(s);
auto g_result = g->get_results().at(0);
auto g_abs = g_result->input(0).get_source_output().get_node_shared_ptr();
auto topk_out = g_abs->input(0).get_source_output();
EXPECT_EQ(topk_out.get_index(), 1);
EXPECT_EQ(topk_out.get_node()->description(), "TopK");
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment