Commit 4ec19b95 authored by nishant.b.patel's avatar nishant.b.patel

Add support for int32 output in reference kernel

parent b65e32e2
......@@ -112,7 +112,9 @@ namespace ngraph
};
functors.emplace_back(functor);
}
else
else if (args[0].get_element_type() == element::u8 &&
args[1].get_element_type() == element::u8 &&
out[0].get_element_type() == element::u8)
{
std::function<decltype(
runtime::cpu::kernel::convolution<uint8_t, uint8_t, uint8_t, int32_t>)>
......@@ -132,6 +134,140 @@ namespace ngraph
auto padding_above = qconvolution->get_padding_above();
auto data_dilation_strides = qconvolution->get_data_dilation_strides();
auto functor = [&,
kernel,
arg0_shape,
arg1_shape,
arg0_buffer_index,
arg1_buffer_index,
arg2_buffer_index,
arg3_buffer_index,
arg4_buffer_index,
arg5_buffer_index,
arg6_buffer_index,
arg7_buffer_index,
out0_buffer_index,
result_shape,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
scales_size](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
vector<float> dyn_scales;
dyn_scales.assign(static_cast<float*>(ctx->buffer_data[arg2_buffer_index]),
static_cast<float*>(ctx->buffer_data[arg2_buffer_index]) +
scales_size);
kernel(ctx->buffer_data[arg0_buffer_index],
ctx->buffer_data[arg1_buffer_index],
ctx->buffer_data[out0_buffer_index],
arg0_shape,
arg1_shape,
result_shape,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
ctx->buffer_data[arg2_buffer_index],
ctx->buffer_data[arg3_buffer_index],
ctx->buffer_data[arg4_buffer_index],
ctx->buffer_data[arg5_buffer_index],
ctx->buffer_data[arg6_buffer_index],
ctx->buffer_data[arg7_buffer_index]);
};
functors.emplace_back(functor);
}
else if (args[0].get_element_type() == element::u8 &&
args[1].get_element_type() == element::u8 &&
out[0].get_element_type() == element::i32)
{
std::function<decltype(
runtime::cpu::kernel::convolution<uint8_t, uint8_t, int32_t, int32_t>)>
kernel;
kernel = runtime::cpu::kernel::convolution<uint8_t, uint8_t, int32_t, int32_t>;
auto arg3_buffer_index =
external_function->get_buffer_index(args[3].get_name()); // input scale
auto arg5_buffer_index =
external_function->get_buffer_index(args[5].get_name()); // filter scale
auto arg7_buffer_index =
external_function->get_buffer_index(args[7].get_name()); // output scale
auto window_movement_strides = qconvolution->get_window_movement_strides();
auto window_dilation_strides = qconvolution->get_window_dilation_strides();
auto padding_below = qconvolution->get_padding_below();
auto padding_above = qconvolution->get_padding_above();
auto data_dilation_strides = qconvolution->get_data_dilation_strides();
auto functor = [&,
kernel,
arg0_shape,
arg1_shape,
arg0_buffer_index,
arg1_buffer_index,
arg2_buffer_index,
arg3_buffer_index,
arg4_buffer_index,
arg5_buffer_index,
arg6_buffer_index,
arg7_buffer_index,
out0_buffer_index,
result_shape,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
scales_size](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
vector<float> dyn_scales;
dyn_scales.assign(static_cast<float*>(ctx->buffer_data[arg2_buffer_index]),
static_cast<float*>(ctx->buffer_data[arg2_buffer_index]) +
scales_size);
kernel(ctx->buffer_data[arg0_buffer_index],
ctx->buffer_data[arg1_buffer_index],
ctx->buffer_data[out0_buffer_index],
arg0_shape,
arg1_shape,
result_shape,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
ctx->buffer_data[arg2_buffer_index],
ctx->buffer_data[arg3_buffer_index],
ctx->buffer_data[arg4_buffer_index],
ctx->buffer_data[arg5_buffer_index],
ctx->buffer_data[arg6_buffer_index],
ctx->buffer_data[arg7_buffer_index]);
};
functors.emplace_back(functor);
}
else if (args[0].get_element_type() == element::u8 &&
args[1].get_element_type() == element::i8 &&
out[0].get_element_type() == element::i32)
{
std::function<decltype(
runtime::cpu::kernel::convolution<uint8_t, int8_t, int32_t, int32_t>)>
kernel;
kernel = runtime::cpu::kernel::convolution<uint8_t, int8_t, int32_t, int32_t>;
auto arg3_buffer_index =
external_function->get_buffer_index(args[3].get_name()); // input scale
auto arg5_buffer_index =
external_function->get_buffer_index(args[5].get_name()); // filter scale
auto arg7_buffer_index =
external_function->get_buffer_index(args[7].get_name()); // output scale
auto window_movement_strides = qconvolution->get_window_movement_strides();
auto window_dilation_strides = qconvolution->get_window_dilation_strides();
auto padding_below = qconvolution->get_padding_below();
auto padding_above = qconvolution->get_padding_above();
auto data_dilation_strides = qconvolution->get_data_dilation_strides();
auto functor = [&,
kernel,
arg0_shape,
......
......@@ -1216,6 +1216,50 @@ private:
args[6]->get_data_ptr<const float>(),
args[7]->get_data_ptr<const uint8_t>());
}
else if (input_element_type == element::u8 && filter_element_type == element::i8 &&
output_element_type == element::i32)
{
reference::convolution<uint8_t, int8_t, int32_t, int32_t>(
args[0]->get_data_ptr<const uint8_t>(),
args[1]->get_data_ptr<const int8_t>(),
out[0]->get_data_ptr<int32_t>(),
node.get_input_shape(0),
node.get_input_shape(1),
node.get_output_shape(0),
qc->get_window_movement_strides(),
qc->get_window_dilation_strides(),
qc->get_padding_below(),
qc->get_padding_above(),
qc->get_data_dilation_strides(),
args[2]->get_data_ptr<const float>(),
args[3]->get_data_ptr<const uint8_t>(),
args[4]->get_data_ptr<const float>(),
args[5]->get_data_ptr<const int8_t>(),
args[6]->get_data_ptr<const float>(),
args[7]->get_data_ptr<const int32_t>());
}
else if (input_element_type == element::u8 && filter_element_type == element::u8 &&
output_element_type == element::i32)
{
reference::convolution<uint8_t, uint8_t, int32_t, int32_t>(
args[0]->get_data_ptr<const uint8_t>(),
args[1]->get_data_ptr<const uint8_t>(),
out[0]->get_data_ptr<int32_t>(),
node.get_input_shape(0),
node.get_input_shape(1),
node.get_output_shape(0),
qc->get_window_movement_strides(),
qc->get_window_dilation_strides(),
qc->get_padding_below(),
qc->get_padding_above(),
qc->get_data_dilation_strides(),
args[2]->get_data_ptr<const float>(),
args[3]->get_data_ptr<const uint8_t>(),
args[4]->get_data_ptr<const float>(),
args[5]->get_data_ptr<const uint8_t>(),
args[6]->get_data_ptr<const float>(),
args[7]->get_data_ptr<const int32_t>());
}
else
{
std::stringstream ss;
......
......@@ -7712,3 +7712,56 @@ NGRAPH_TEST(${BACKEND_NAME}, quantized_conv_non_zero_zero_point)
<< "Vectors x and y differ at index " << i;
}
}
TEST(${BACKEND_NAME}, quantized_conv_int32_output)
{
Shape shape_a{1, 1, 3, 4};
Shape shape_b{1, 1, 3, 3};
Shape shape_r{1, 1, 3, 4};
vector<uint8_t> a_data = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4};
vector<uint8_t> b_data = {1, 2, 3, 4, 5, 0, 0, 1, 2};
auto A = make_shared<op::Parameter>(element::u8, shape_a);
auto B = make_shared<op::Parameter>(element::u8, shape_b);
auto C = make_shared<op::Parameter>(element::f32, Shape{});
auto D = op::Constant::create(element::u8, Shape{}, {0});
auto E = make_shared<op::Parameter>(element::f32, Shape{});
auto F = op::Constant::create(element::u8, Shape{}, {0});
auto G = make_shared<op::Parameter>(element::f32, Shape{});
auto H = op::Constant::create(element::i32, Shape{}, {0});
auto CV = make_shared<op::QuantizedConvolution>(A,
B,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{1, 1},
CoordinateDiff{1, 1},
Strides{1, 1},
C,
D,
E,
F,
G,
H,
element::i32,
AxisSet{},
AxisSet{},
AxisSet{});
auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, C, E, G});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::u8, shape_a);
copy_data(a, a_data);
auto b = backend->create_tensor(element::u8, shape_b);
copy_data(b, b_data);
auto c = backend->create_tensor(element::f32, Shape{});
copy_data(c, vector<float>{1.0f});
auto d = backend->create_tensor(element::f32, Shape{});
copy_data(d, vector<float>{1.0f});
auto e = backend->create_tensor(element::f32, Shape{});
copy_data(e, vector<float>{1.0f});
auto result = backend->create_tensor(element::i32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b, c, d, e});
EXPECT_EQ((vector<int32_t>{22, 34, 30, 32, 38, 72, 90, 43, 33, 52, 43, 39}),
read_vector<int32_t>(result));
}
......@@ -2057,59 +2057,3 @@ TEST(cpu_test, tensor_copy_from_different_layout)
EXPECT_EQ((vector<uint8_t>{1, 4, 2, 5, 3, 6}), read_vector<uint8_t>(b));
}
// Adding this test case in cpu_test
// because reference kernel isn't supporting intermediate
// output types as of now
TEST(cpu_test, quantized_conv_int32_output)
{
Shape shape_a{1, 1, 3, 4};
Shape shape_b{1, 1, 3, 3};
Shape shape_r{1, 1, 3, 4};
vector<uint8_t> a_data = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4};
vector<int8_t> b_data = {1, 2, 3, 4, 5, 0, 0, 1, 2};
auto A = make_shared<op::Parameter>(element::u8, shape_a);
auto B = make_shared<op::Parameter>(element::i8, shape_b);
auto C = make_shared<op::Parameter>(element::f32, Shape{});
auto D = op::Constant::create(element::u8, Shape{}, {0});
auto E = make_shared<op::Parameter>(element::f32, Shape{});
auto F = op::Constant::create(element::i8, Shape{}, {0});
auto G = make_shared<op::Parameter>(element::f32, Shape{});
auto H = op::Constant::create(element::i32, Shape{}, {0});
auto CV = make_shared<op::QuantizedConvolution>(A,
B,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{1, 1},
CoordinateDiff{1, 1},
Strides{1, 1},
C,
D,
E,
F,
G,
H,
element::i32,
AxisSet{},
AxisSet{},
AxisSet{});
auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, C, E, G});
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::u8, shape_a);
copy_data(a, a_data);
auto b = backend->create_tensor(element::i8, shape_b);
copy_data(b, b_data);
auto c = backend->create_tensor(element::f32, Shape{});
copy_data(c, vector<float>{1.0f});
auto d = backend->create_tensor(element::f32, Shape{});
copy_data(d, vector<float>{1.0f});
auto e = backend->create_tensor(element::f32, Shape{});
copy_data(e, vector<float>{1.0f});
auto result = backend->create_tensor(element::i32, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b, c, d, e});
EXPECT_EQ((vector<int32_t>{22, 34, 30, 32, 38, 72, 90, 43, 33, 52, 43, 39}),
read_vector<int32_t>(result));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment