Commit 745c4001 authored by Sang Ik Lee's avatar Sang Ik Lee Committed by Scott Cyphers

Change behavior of elementwise divide for integral type to match Python. (#3034)

* Change behavior of elementwise divide for integral type to match Python.

* Fix CPU codegen.

* Temp fix: Disable failing UT for IntelGPU

* Divide: Add constructor option to specify rounding mode for Integral types.

* Update serializer to support legacy Divide dump.

* Restore modified UT.
parent 397740fe
......@@ -25,6 +25,17 @@ op::Divide::Divide(const shared_ptr<Node>& arg0,
const shared_ptr<Node>& arg1,
const AutoBroadcastSpec& autob)
: BinaryElementwiseArithmetic("Divide", arg0, arg1, autob)
, m_pythondiv(true)
{
constructor_validate_and_infer_types();
}
op::Divide::Divide(const shared_ptr<Node>& arg0,
const shared_ptr<Node>& arg1,
bool pythondiv,
const AutoBroadcastSpec& autob)
: BinaryElementwiseArithmetic("Divide", arg0, arg1, autob)
, m_pythondiv(pythondiv)
{
constructor_validate_and_infer_types();
}
......@@ -32,7 +43,8 @@ op::Divide::Divide(const shared_ptr<Node>& arg0,
shared_ptr<Node> op::Divide::copy_with_new_args(const NodeVector& new_args) const
{
check_new_args_count(this, new_args);
return make_shared<Divide>(new_args.at(0), new_args.at(1), this->get_autob());
return make_shared<Divide>(
new_args.at(0), new_args.at(1), this->is_pythondiv(), this->get_autob());
}
void op::Divide::generate_adjoints(autodiff::Adjoints& adjoints, const NodeVector& deltas)
......
......@@ -30,16 +30,31 @@ namespace ngraph
///
/// \param arg0 Node that produces the first input tensor.
/// \param arg1 Node that produces the second input tensor.
/// \param pythondiv Use Python style rounding for integral type
/// \param autob Auto broadcast specification
Divide(const std::shared_ptr<Node>& arg0,
const std::shared_ptr<Node>& arg1,
bool pythondiv,
const AutoBroadcastSpec& autob = AutoBroadcastSpec());
/// \brief Constructs a division operation.
///
/// \param arg0 Node that produces the first input tensor.
/// \param arg1 Node that produces the second input tensor.
/// \param autob Auto broadcast specification
Divide(const std::shared_ptr<Node>& arg0,
const std::shared_ptr<Node>& arg1,
const AutoBroadcastSpec& autob = AutoBroadcastSpec());
bool is_pythondiv() const { return m_pythondiv; }
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
virtual void generate_adjoints(autodiff::Adjoints& adjoints,
const NodeVector& deltas) override;
protected:
bool m_pythondiv;
};
}
......
......@@ -387,8 +387,13 @@ shared_ptr<op::Constant> fold_constant_binary(shared_ptr<op::Constant> a,
}
else if (std::dynamic_pointer_cast<op::Divide>(binary))
{
runtime::reference::divide<T>(
a->get_data_ptr<T>(), b->get_data_ptr<T>(), out_vec.data(), shape_size(out_shape));
shared_ptr<op::Divide> divop = std::dynamic_pointer_cast<op::Divide>(binary);
bool pythondiv = divop->is_pythondiv();
runtime::reference::divide<T>(a->get_data_ptr<T>(),
b->get_data_ptr<T>(),
out_vec.data(),
shape_size(out_shape),
pythondiv);
}
else if (std::dynamic_pointer_cast<op::Minimum>(binary))
{
......
......@@ -132,7 +132,30 @@ namespace ngraph
template <>
void Builder::BUILDER_DECL(ngraph::op::Divide)
{
BUILD_BINARY_ELEMWISE_FUNCTOR(runtime::cpu::kernel::divide);
auto& functors = external_function->get_functors();
const ngraph::op::Divide* divop = static_cast<const ngraph::op::Divide*>(node);
std::function<void(void*, void*, void*, size_t, bool, int)> kernel;
SELECT_KERNEL(kernel, args[0].get_element_type(), runtime::cpu::kernel::divide);
auto element_count = out[0].get_size();
auto arg0_buffer_index = external_function->get_buffer_index(args[0].get_name());
auto arg1_buffer_index = external_function->get_buffer_index(args[1].get_name());
auto out0_buffer_index = external_function->get_buffer_index(out[0].get_name());
bool pythondiv = divop->is_pythondiv();
auto functor = [&,
kernel,
element_count,
arg0_buffer_index,
arg1_buffer_index,
out0_buffer_index,
pythondiv](CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
kernel(ctx->buffer_data[arg0_buffer_index],
ctx->buffer_data[arg1_buffer_index],
ctx->buffer_data[out0_buffer_index],
element_count,
pythondiv,
ectx->arena);
};
functors.emplace_back(functor);
}
template <>
......@@ -394,7 +417,17 @@ namespace ngraph
template <>
NodeExecutorTy Builder::BUILDER_CF_DECL(ngraph::op::Divide)
{
BUILD_BINARY_ELEMWISE_CF_FUNCTOR(runtime::cpu::kernel::divide);
const ngraph::op::Divide* divop = static_cast<const ngraph::op::Divide*>(node);
std::function<void(void*, void*, void*, size_t, bool, int)> kernel;
SELECT_KERNEL(
kernel, node->get_input_element_type(0), runtime::cpu::kernel::divide);
auto element_count = shape_size(node->get_shape());
bool pythondiv = divop->is_pythondiv();
auto functor = [&, kernel, element_count, pythondiv](
const std::vector<void*>& inputs, std::vector<void*>& outputs) {
kernel(inputs[0], inputs[1], outputs[0], element_count, pythondiv, 0);
};
return functor;
}
template <>
......
......@@ -1086,7 +1086,8 @@ namespace ngraph
void CPU_Emitter::EMITTER_DECL(ngraph::op::Divide)
{
writer.block_begin();
if (node->get_element_type().is_real() == false)
bool integral_type = !node->get_element_type().is_real();
if (integral_type)
{
// Check for divide by zero for integer types only
size_t element_count = args[1].get_size();
......@@ -1096,11 +1097,25 @@ namespace ngraph
<< "[i] == 0) throw std::runtime_error(\"integer divide by zero\");\n";
writer.block_end();
}
auto divop = static_cast<const ngraph::op::Divide*>(node);
bool pythondiv = divop->is_pythondiv();
writer << "#pragma omp parallel for\n";
writer << "for (size_t i = 0; i < " << out[0].get_size() << "; i++)\n";
writer.block_begin();
writer << out[0].get_name() << "[i] = " << args[0].get_name() << "[i] / "
<< args[1].get_name() << "[i];\n";
if (integral_type && pythondiv)
{
writer << out[0].get_name() << "[i] = ((" << args[0].get_name() << "[i] % "
<< args[1].get_name() << "[i] != 0) && (" << args[0].get_name()
<< "[i] < 0 != " << args[1].get_name() << "[i] < 0)) ?"
<< args[0].get_name() << "[i] / " << args[1].get_name()
<< "[i] - 1 :" << args[0].get_name() << "[i] / " << args[1].get_name()
<< "[i];\n";
}
else
{
writer << out[0].get_name() << "[i] = " << args[0].get_name() << "[i] / "
<< args[1].get_name() << "[i];\n";
}
writer.block_end();
writer.block_end();
}
......
......@@ -30,8 +30,15 @@ namespace ngraph
namespace kernel
{
template <typename ElementType>
void divide(void* input0, void* input1, void* output, size_t count, int arena)
typename std::enable_if<std::is_floating_point<ElementType>::value>::type
divide(void* input0,
void* input1,
void* output,
size_t count,
bool pythondiv,
int arena)
{
(void)pythondiv;
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
......@@ -46,6 +53,45 @@ namespace ngraph
out.device(ngraph::runtime::cpu::executor::GetCPUExecutor().get_device(arena)) =
in0 / in1;
}
template <typename ElementType>
typename std::enable_if<std::is_integral<ElementType>::value>::type
divide(void* input0,
void* input1,
void* output,
size_t count,
bool pythondiv,
int arena)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in_dims);
if (pythondiv)
{
Eigen::Tensor<ElementType, 1, Eigen::RowMajor> zero(count);
zero.setZero();
Eigen::Tensor<ElementType, 1, Eigen::RowMajor> one(count);
one.setConstant(1);
Eigen::Tensor<ElementType, 1, Eigen::RowMajor> quot = in0 / in1;
Eigen::Tensor<ElementType, 1, Eigen::RowMajor> rem = in0 - quot * in1;
Eigen::Tensor<bool, 1, Eigen::RowMajor> if_cond =
((rem != zero) && ((in0 < zero) != (in1 < zero)));
out.device(ngraph::runtime::cpu::executor::GetCPUExecutor().get_device(
arena)) = if_cond.select(quot - one, quot);
}
else
{
out.device(ngraph::runtime::cpu::executor::GetCPUExecutor().get_device(
arena)) = in0 / in1;
}
}
}
}
}
......
......@@ -678,11 +678,13 @@ private:
}
case OP_TYPEID::Divide:
{
const op::Divide* divop = static_cast<const op::Divide*>(&node);
size_t element_count = shape_size(node.get_output_shape(0));
reference::divide<T>(static_cast<const T*>(args[0]),
static_cast<const T*>(args[1]),
static_cast<T*>(out[0]),
element_count);
element_count,
divop->is_pythondiv());
break;
}
case OP_TYPEID::Dot:
......
......@@ -95,6 +95,8 @@ all_2x2x3_eliminate_dims_0_1_2
# GPU backend uses floats to implement these ops for int32
floor_int32
divide_int32
divide_python_rounding_int32
divide_cpp_rounding_int32
one_hot_scalar_oob_in_3
# Unsupported extra pading modes
......
......@@ -113,3 +113,6 @@ model_matmul_integer
model_matmul_integer_no_zero_point
model_matmul_integer_zero_point_zero
model_matmul_integer_scalar
# Need to update implementation
divide_python_rounding_int32
......@@ -34,6 +34,7 @@
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/divide.hpp"
#include "ngraph/op/dot.hpp"
#include "ngraph/op/embedding_lookup.hpp"
#include "ngraph/op/experimental/batch_mat_mul.hpp"
......@@ -705,11 +706,13 @@ private:
}
case OP_TYPEID::Divide:
{
const op::Divide* divop = static_cast<const op::Divide*>(&node);
size_t element_count = shape_size(node.get_output_shape(0));
reference::divide<T>(args[0]->get_data_ptr<const T>(),
args[1]->get_data_ptr<const T>(),
out[0]->get_data_ptr<T>(),
element_count);
element_count,
divop->is_pythondiv());
break;
}
case OP_TYPEID::Dot:
......
......@@ -32,23 +32,47 @@ namespace ngraph
// In English: return type is void and T must be an integral type.
template <typename T>
typename std::enable_if<std::is_integral<T>::value>::type
divide(const T* arg0, const T* arg1, T* out, size_t count)
divide(const T* arg0, const T* arg1, T* out, size_t count, bool pythondiv)
{
for (size_t i = 0; i < count; i++)
if (pythondiv)
{
if (arg1[i] == 0)
for (size_t i = 0; i < count; i++)
{
throw std::domain_error("integer division by zero");
if (arg1[i] == 0)
{
throw std::domain_error("integer division by zero");
}
T quot = arg0[i] / arg1[i];
T rem = arg0[i] % arg1[i];
if ((rem != 0) && ((arg0[i] < 0) != (arg1[i] < 0)))
{
out[i] = quot - 1;
}
else
{
out[i] = quot;
}
}
}
else
{
for (size_t i = 0; i < count; i++)
{
if (arg1[i] == 0)
{
throw std::domain_error("integer division by zero");
}
out[i] = arg0[i] / arg1[i];
}
out[i] = arg0[i] / arg1[i];
}
}
// In English: return type is void and T must be a floating point type.
template <typename T>
typename std::enable_if<std::is_floating_point<T>::value>::type
divide(const T* arg0, const T* arg1, T* out, size_t count)
divide(const T* arg0, const T* arg1, T* out, size_t count, bool pythondiv)
{
(void)pythondiv;
for (size_t i = 0; i < count; i++)
{
// TODO: Here we do not check for div by zero, so we'll get +-inf here
......
......@@ -902,8 +902,13 @@ static shared_ptr<ngraph::Function>
}
case OP_TYPEID::Divide:
{
bool pythondiv = true;
if (node_js["pythondiv"].is_object())
{
pythondiv = node_js.at("pythondiv").get<bool>();
}
node = make_shared<op::Divide>(
args[0], args[1], read_auto_broadcast(node_js["autob"]));
args[0], args[1], pythondiv, read_auto_broadcast(node_js["autob"]));
break;
}
case OP_TYPEID::Dot:
......@@ -1959,6 +1964,7 @@ static json write(const Node& n, bool binary_constant_data)
case OP_TYPEID::Divide:
{
auto tmp = dynamic_cast<const op::Divide*>(&n);
node["pythondiv"] = tmp->is_pythondiv();
if (tmp->get_autob().m_type != op::AutoBroadcastType::NONE)
{
node["autob"] = write_auto_broadcast(tmp->get_autob());
......
......@@ -170,6 +170,50 @@ NGRAPH_TEST(${BACKEND_NAME}, divide_int32)
EXPECT_EQ((vector<int32_t>{536871072, 214748365, 2, 2}), read_vector<int32_t>(result));
}
NGRAPH_TEST(${BACKEND_NAME}, divide_cpp_rounding_int32)
{
Shape shape{2, 2};
auto A = make_shared<op::Parameter>(element::i32, shape);
auto B = make_shared<op::Parameter>(element::i32, shape);
auto f = make_shared<Function>(make_shared<op::Divide>(A, B, false), ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::i32, shape);
copy_data(a, vector<int32_t>{-10, -10, 10, 10});
auto b = backend->create_tensor(element::i32, shape);
copy_data(b, vector<int32_t>{-3, 3, -3, 3});
auto result = backend->create_tensor(element::i32, shape);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
EXPECT_EQ((vector<int32_t>{3, -3, -3, 3}), read_vector<int32_t>(result));
}
NGRAPH_TEST(${BACKEND_NAME}, divide_python_rounding_int32)
{
Shape shape{2, 2};
auto A = make_shared<op::Parameter>(element::i32, shape);
auto B = make_shared<op::Parameter>(element::i32, shape);
auto f = make_shared<Function>(make_shared<op::Divide>(A, B), ParameterVector{A, B});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::i32, shape);
copy_data(a, vector<int32_t>{-10, -10, 10, 10});
auto b = backend->create_tensor(element::i32, shape);
copy_data(b, vector<int32_t>{-3, 3, -3, 3});
auto result = backend->create_tensor(element::i32, shape);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
EXPECT_EQ((vector<int32_t>{3, -4, -4, 3}), read_vector<int32_t>(result));
}
NGRAPH_TEST(${BACKEND_NAME}, divide_overload)
{
Shape shape{2, 2};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment