Commit 37dc586c authored by Ayan Moitra's avatar Ayan Moitra Committed by Robert Kimball

TopK additional tests for nvGPU backend (#1946)

* added tests for malloc mode and graph transform

* Comment incorporation

* changed comparing backend to INTERPRETER

* COmments resolved+clang

* Adressed all comments

* IntelGPU does not support topk
parent d9f615b7
...@@ -129,12 +129,15 @@ namespace ngraph ...@@ -129,12 +129,15 @@ namespace ngraph
NodeVector new_goes; NodeVector new_goes;
for (auto& goe : goes) for (auto& goe : goes)
{ {
auto out_idx = auto goe_ptr = std::dynamic_pointer_cast<op::GetOutputElement>(goe);
std::dynamic_pointer_cast<op::GetOutputElement>(goe)->get_n(); if (goe_ptr)
auto new_goe = {
std::make_shared<op::GetOutputElement>(new_topk, out_idx); auto out_idx = goe_ptr->get_n();
ngraph::replace_node(goe, new_goe); auto new_goe =
new_goes.push_back(new_goe); std::make_shared<op::GetOutputElement>(new_topk, out_idx);
ngraph::replace_node(goe, new_goe);
new_goes.push_back(new_goe);
}
} }
Shape reordered_out_shape; Shape reordered_out_shape;
for (size_t j = 0; j < ndim; j++) for (size_t j = 0; j < ndim; j++)
......
...@@ -81,6 +81,9 @@ topk_3d_min_one ...@@ -81,6 +81,9 @@ topk_3d_min_one
topk_3d_min_partial topk_3d_min_partial
topk_5d_max_partial topk_5d_max_partial
topk_int64 topk_int64
topk_3d_large_input_max
topk_3d_large_input_min
topk_3d_single_output
zero_sized_abs zero_sized_abs
zero_sized_acos zero_sized_acos
zero_sized_add zero_sized_add
......
...@@ -9,3 +9,5 @@ batchnorm_fprop_bprop_2step ...@@ -9,3 +9,5 @@ batchnorm_fprop_bprop_2step
computation_reuse computation_reuse
generate_mask generate_mask
topk_int64 topk_int64
topk_3d_large_input_max
topk_3d_large_input_min
...@@ -571,3 +571,99 @@ NGRAPH_TEST(${BACKEND_NAME}, topk_2d_min_one) ...@@ -571,3 +571,99 @@ NGRAPH_TEST(${BACKEND_NAME}, topk_2d_min_one)
backend->call_with_validate(f1, {result1}, {a}); backend->call_with_validate(f1, {result1}, {a});
EXPECT_EQ((vector<float>{3, 1, 4}), read_vector<float>(result1)); EXPECT_EQ((vector<float>{3, 1, 4}), read_vector<float>(result1));
} }
NGRAPH_TEST(${BACKEND_NAME}, topk_3d_large_input_max)
{
Shape shape{4, 8192, 5};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::TopK>(A, 1, element::i32, 10, true);
auto interp_f_0 =
make_shared<Function>(make_shared<op::GetOutputElement>(B, 0), op::ParameterVector{A});
auto interp_f_1 =
make_shared<Function>(make_shared<op::GetOutputElement>(B, 1), op::ParameterVector{A});
auto gpu_f_0 = ngraph::clone_function(*interp_f_0);
auto gpu_f_1 = ngraph::clone_function(*interp_f_1);
vector<vector<float>> args;
for (shared_ptr<op::Parameter> param : interp_f_0->get_parameters())
{
vector<float> tensor_val(shape_size(param->get_shape()));
iota(tensor_val.begin(), tensor_val.end(), 0.0f);
args.push_back(tensor_val);
}
auto interp_results_0 = execute<float, int32_t>(interp_f_0, args, "INTERPRETER");
auto gpu_results_0 = execute<float, int32_t>(gpu_f_0, args, "${BACKEND_NAME}");
for (size_t i = 0; i < gpu_results_0.size(); i++)
{
EXPECT_EQ(gpu_results_0.at(i), interp_results_0.at(i));
}
auto interp_results_1 = execute(interp_f_1, args, "INTERPRETER");
auto gpu_results_1 = execute(gpu_f_1, args, "${BACKEND_NAME}");
for (size_t i = 0; i < gpu_results_1.size(); i++)
{
EXPECT_TRUE(test::all_close_f(gpu_results_1.at(i), interp_results_1.at(i), 24, 0));
}
}
NGRAPH_TEST(${BACKEND_NAME}, topk_3d_large_input_min)
{
Shape shape{4, 8192, 5};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::TopK>(A, 1, element::i32, 10, false);
auto interp_f_0 =
make_shared<Function>(make_shared<op::GetOutputElement>(B, 0), op::ParameterVector{A});
auto interp_f_1 =
make_shared<Function>(make_shared<op::GetOutputElement>(B, 1), op::ParameterVector{A});
auto gpu_f_0 = ngraph::clone_function(*interp_f_0);
auto gpu_f_1 = ngraph::clone_function(*interp_f_1);
vector<vector<float>> args;
for (shared_ptr<op::Parameter> param : interp_f_0->get_parameters())
{
vector<float> tensor_val(shape_size(param->get_shape()));
iota(tensor_val.begin(), tensor_val.end(), 0.0f);
args.push_back(tensor_val);
}
auto interp_results_0 = execute<float, int32_t>(interp_f_0, args, "INTERPRETER");
auto gpu_results_0 = execute<float, int32_t>(gpu_f_0, args, "${BACKEND_NAME}");
for (size_t i = 0; i < gpu_results_0.size(); i++)
{
EXPECT_EQ(gpu_results_0.at(i), interp_results_0.at(i));
}
auto interp_results_1 = execute(interp_f_1, args, "INTERPRETER");
auto gpu_results_1 = execute(gpu_f_1, args, "${BACKEND_NAME}");
for (size_t i = 0; i < gpu_results_1.size(); i++)
{
EXPECT_TRUE(test::all_close_f(gpu_results_1.at(i), interp_results_1.at(i), 24, 0));
}
}
NGRAPH_TEST(${BACKEND_NAME}, topk_3d_single_output)
{
Shape shape{2, 3, 2};
Shape rshape{2, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::TopK>(A, 1, element::i32, 2, false);
auto f0 =
make_shared<Function>(make_shared<op::GetOutputElement>(B, 0), op::ParameterVector{A});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
copy_data(a, vector<float>{12, 2, 10, 9, 8, 4, 6, 1, 5, 3, 11, 7});
auto result0 = backend->create_tensor(element::i32, rshape);
backend->call_with_validate(f0, {result0}, {a});
EXPECT_EQ((vector<int32_t>{2, 0, 1, 2, 1, 0, 0, 1}), read_vector<int32_t>(result0));
}
...@@ -22,7 +22,11 @@ ...@@ -22,7 +22,11 @@
#include "ngraph/runtime/gpu/gpu_primitive_emitter.hpp" #include "ngraph/runtime/gpu/gpu_primitive_emitter.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp" #include "ngraph/runtime/gpu/gpu_util.hpp"
#include "ngraph/runtime/gpu/nvshape.hpp" #include "ngraph/runtime/gpu/nvshape.hpp"
#include "ngraph/util.hpp"
#include "util/all_close.hpp"
#include "util/all_close_f.hpp"
using namespace std;
using namespace ngraph; using namespace ngraph;
TEST(gpu_test, gpu_shape_from_64bit_shape) TEST(gpu_test, gpu_shape_from_64bit_shape)
...@@ -160,3 +164,56 @@ TEST(gpu_test, memory_manager_seperate_workspaces_allocsize) ...@@ -160,3 +164,56 @@ TEST(gpu_test, memory_manager_seperate_workspaces_allocsize)
emitter.allocate_primitive_memory(); emitter.allocate_primitive_memory();
EXPECT_EQ(emitter.sizeof_device_allocation(), total_size); EXPECT_EQ(emitter.sizeof_device_allocation(), total_size);
} }
TEST(gpu_test, topk_fanout_graph_transform)
{
Shape shape{2, 3, 2};
Shape out_shape{2, 2, 2};
auto A_gpu = make_shared<op::Parameter>(element::f32, shape);
auto A_int32_gpu_1 = make_shared<op::Parameter>(element::i32, out_shape);
auto A_int32_gpu_2 = make_shared<op::Parameter>(element::i32, out_shape);
auto A_f32_gpu_1 = make_shared<op::Parameter>(element::f32, out_shape);
auto A_f32_gpu_2 = make_shared<op::Parameter>(element::f32, out_shape);
auto B_gpu = make_shared<op::TopK>(A_gpu, 1, element::i32, 2, true);
auto C_gpu_0 = make_shared<op::GetOutputElement>(B_gpu, 0);
auto C_gpu_1 = make_shared<op::GetOutputElement>(B_gpu, 1);
auto gpu_R_0 = make_shared<op::Add>(A_int32_gpu_1, C_gpu_0);
auto gpu_R_1 = make_shared<op::Add>(A_int32_gpu_2, C_gpu_0);
auto gpu_R_2 = make_shared<op::Add>(A_f32_gpu_1, C_gpu_1);
auto gpu_R_3 = make_shared<op::Add>(A_f32_gpu_2, C_gpu_1);
auto gpu_f = make_shared<Function>(
NodeVector{gpu_R_0, gpu_R_1, gpu_R_2, gpu_R_3},
op::ParameterVector{A_gpu, A_int32_gpu_1, A_int32_gpu_2, A_f32_gpu_1, A_f32_gpu_2});
auto backend = runtime::Backend::create("GPU");
auto a = backend->create_tensor(element::f32, shape);
copy_data(
a, vector<float>{1.0f, 2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f, 1.0f, 3.0f, 3.0f, 1.0f, 4.0f});
auto b = backend->create_tensor(element::i32, out_shape);
copy_data(b, vector<int32_t>{0, 0, 0, 0, 0, 0, 0, 0});
auto c = backend->create_tensor(element::i32, out_shape);
copy_data(c, vector<int32_t>{0, 0, 0, 0, 0, 0, 0, 0});
auto d = backend->create_tensor(element::f32, out_shape);
copy_data(d, vector<float>{0, 0, 0, 0, 0, 0, 0, 0});
auto e = backend->create_tensor(element::f32, out_shape);
copy_data(e, vector<float>{0, 0, 0, 0, 0, 0, 0, 0});
auto r0 = backend->create_tensor(element::i32, out_shape);
auto r1 = backend->create_tensor(element::i32, out_shape);
auto r2 = backend->create_tensor(element::f32, out_shape);
auto r3 = backend->create_tensor(element::f32, out_shape);
backend->call_with_validate(gpu_f, {r0, r1, r2, r3}, {a, b, c, d, e});
EXPECT_EQ((vector<int32_t>{2, 1, 1, 2, 1, 2, 0, 1}), read_vector<int32_t>(r0));
EXPECT_EQ((vector<int32_t>{2, 1, 1, 2, 1, 2, 0, 1}), read_vector<int32_t>(r1));
EXPECT_TRUE(
test::all_close_f(vector<float>{4, 4, 3, 3, 3, 4, 2, 3}, read_vector<float>(r2), 24, 0));
EXPECT_TRUE(
test::all_close_f(vector<float>{4, 4, 3, 3, 3, 4, 2, 3}, read_vector<float>(r3), 24, 0));
auto reshape_count = count_ops_of_type<ngraph::op::Reshape>(gpu_f);
EXPECT_EQ(reshape_count, 10);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment