Commit baf1cb00 authored by nishant.b.patel's avatar nishant.b.patel

Changed the onnx rt conv test case to use ngraph ops

parent aba06374
......@@ -1489,40 +1489,6 @@ TEST(builder, scaled_QDot_u8u8)
EXPECT_EQ((vector<uint8_t>{3, 13, 23}), read_vector<uint8_t>(result));
}
void FindMinMax(const vector<float>& vec, float* min, float* max)
{
*min = *max = 0;
*min = *std::min_element(vec.begin(), vec.end());
*max = *std::max_element(vec.begin(), vec.end());
}
// uses quantization range 0-255
void FindScaleAndZeroPoint(float min, float max, float* scale, uint8_t* zero_point)
{
min = std::min(min, 0.f);
max = std::max(max, 0.f);
float qmin = 0;
float qmax = 255;
*scale = (max - min) / (qmax - qmin);
const auto initial_zero_point = qmin - min / *scale;
*zero_point =
static_cast<uint8_t>(std::round(std::max(0.f, std::min(255.f, initial_zero_point))));
}
void Quantize(float scale,
uint8_t zero_point,
const std::vector<float>& input,
std::vector<uint8_t>* input_quantized)
{
for (size_t i = 0; i < input.size(); i++)
{
const float clamped_val = std::max(
0.f, std::min(255.f, std::round(static_cast<float>(input[i]) / scale) + zero_point));
(*input_quantized)[i] = static_cast<uint8_t>(clamped_val);
}
}
TEST(builder, scaled_QC_non_zero_zero_point)
{
Shape shape_a{1, 1, 7, 7}; // input shape
......@@ -1546,7 +1512,8 @@ TEST(builder, scaled_QC_non_zero_zero_point)
0.034442365169525146f, -0.33322954177856445f, 0.06049239635467529f,
0.42619407176971436f};
vector<float> W = {-0.4406261742115021f};
auto expected_vals = {-0.19936637580394745f, -0.06828942894935608f, -0.04934731498360634f,
vector<float> expected_vals = {
-0.19936637580394745f, -0.06828942894935608f, -0.04934731498360634f,
0.17369966208934784f, -0.11574628204107285f, -0.05910799279808998f,
0.1197819635272026f, 0.18959586322307587f, 0.1182001456618309f,
-0.17154212296009064f, 0.06006614491343498f, 0.0042258151806890965f,
......@@ -1564,32 +1531,56 @@ TEST(builder, scaled_QC_non_zero_zero_point)
-0.015176207758486271f, 0.14682966470718384f, -0.02665453404188156f,
-0.18779225647449493f};
float lhs_min, lhs_max, rhs_min, rhs_max, result_min, result_max;
FindMinMax(X, &lhs_min, &lhs_max);
FindMinMax(W, &rhs_min, &rhs_max);
FindMinMax(expected_vals, &result_min, &result_max);
auto lhs = make_shared<op::Parameter>(element::f32, shape_a);
auto rhs = make_shared<op::Parameter>(element::f32, shape_b);
auto result = make_shared<op::Parameter>(element::f32, shape_r);
AxisSet quantization_axes;
op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
auto lhs_scale = op::Constant::create(element::f32, Shape{}, {0.00369205});
auto lhs_zero_point = op::Constant::create(element::u8, Shape{}, {132});
auto rhs_scale = op::Constant::create(element::f32, Shape{}, {0.00172795});
auto rhs_zero_point = op::Constant::create(element::u8, Shape{}, {255});
auto result_scale = op::Constant::create(element::f32, Shape{}, {0.00162681});
auto result_zero_point = op::Constant::create(element::u8, Shape{}, {123});
float lhs_scale, rhs_scale, result_scale;
uint8_t lhs_zero_point, rhs_zero_point, result_zero_point;
FindScaleAndZeroPoint(lhs_min, lhs_max, &lhs_scale, &lhs_zero_point);
FindScaleAndZeroPoint(rhs_min, rhs_max, &rhs_scale, &rhs_zero_point);
FindScaleAndZeroPoint(result_min, result_max, &result_scale, &result_zero_point);
auto quantize_lhs = make_shared<op::Quantize>(
lhs, lhs_scale, lhs_zero_point, element::u8, quantization_axes, round_mode);
auto quantize_rhs = make_shared<op::Quantize>(
rhs, rhs_scale, rhs_zero_point, element::u8, quantization_axes, round_mode);
auto quantize_result = make_shared<op::Quantize>(
result, result_scale, result_zero_point, element::u8, quantization_axes, round_mode);
vector<uint8_t> x_quantized(X.size()), w_quantized(W.size()),
result_quantized(expected_vals.size());
Quantize(lhs_scale, lhs_zero_point, X, &x_quantized);
Quantize(rhs_scale, rhs_zero_point, W, &w_quantized);
Quantize(result_scale, result_zero_point, expected_vals, &result_quantized);
auto lhs_f = make_shared<Function>(quantize_lhs, ParameterVector{lhs});
auto rhs_f = make_shared<Function>(quantize_rhs, ParameterVector{rhs});
auto result_f = make_shared<Function>(quantize_result, ParameterVector{result});
auto backend = runtime::Backend::create("CPU");
auto lhs_data = backend->create_tensor(element::f32, shape_a);
auto rhs_data = backend->create_tensor(element::f32, shape_b);
auto result_data = backend->create_tensor(element::f32, shape_r);
auto lhs_output = backend->create_tensor(element::u8, shape_a);
auto rhs_output = backend->create_tensor(element::u8, shape_b);
auto result_output = backend->create_tensor(element::u8, shape_r);
copy_data(lhs_data, X);
copy_data(rhs_data, W);
copy_data(result_data, expected_vals);
auto lhs_handle = backend->compile(lhs_f);
auto rhs_handle = backend->compile(rhs_f);
auto result_handle = backend->compile(result_f);
lhs_handle->call_with_validate({lhs_output}, {lhs_data});
rhs_handle->call_with_validate({rhs_output}, {rhs_data});
result_handle->call_with_validate({result_output}, {result_data});
auto A = make_shared<op::Parameter>(element::u8, shape_a);
auto B = make_shared<op::Parameter>(element::u8, shape_b);
auto input_scale = op::Constant::create(element::f32, Shape{}, {lhs_scale});
auto filter_scale = op::Constant::create(element::f32, Shape{}, {rhs_scale});
auto output_scale = op::Constant::create(element::f32, Shape{}, {result_scale});
auto input_zero_point = op::Constant::create(element::u8, Shape{}, {lhs_zero_point});
auto filter_zero_point = op::Constant::create(element::u8, Shape{}, {rhs_zero_point});
auto output_zero_point = op::Constant::create(element::u8, Shape{}, {result_zero_point});
auto CV = make_shared<ngraph::op::QuantizedConvolution>(A,
B,
Strides{1, 1}, // move_strides
......@@ -1597,30 +1588,29 @@ TEST(builder, scaled_QC_non_zero_zero_point)
CoordinateDiff{0, 0}, // below_pads
CoordinateDiff{0, 0}, // above_pads
Strides{1, 1}, // data_dilation
input_scale,
input_zero_point,
filter_scale,
filter_zero_point,
output_scale,
output_zero_point,
lhs_scale,
lhs_zero_point,
rhs_scale,
rhs_zero_point,
result_scale,
result_zero_point,
element::u8,
AxisSet{});
auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B});
constant_fold(f);
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::u8, shape_a);
copy_data(a, x_quantized);
copy_data(a, read_vector<uint8_t>(lhs_output));
auto b = backend->create_tensor(element::u8, shape_b);
copy_data(b, w_quantized);
auto result = backend->create_tensor(element::u8, shape_r);
copy_data(b, read_vector<uint8_t>(rhs_output));
auto final_result = backend->create_tensor(element::u8, shape_r);
auto handle = backend->compile(f);
handle->call_with_validate({result}, {a, b});
handle->call_with_validate({final_result}, {a, b});
for (int i = 0; i < 49; ++i)
{
EXPECT_EQ(result_quantized[i], (read_vector<uint8_t>(result))[i])
EXPECT_EQ((read_vector<uint8_t>(result_output))[i], (read_vector<uint8_t>(final_result))[i])
<< "Vectors x and y differ at index " << i;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment