Unverified Commit 40dda4eb authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

move GPU specific test to GPU only (#2191)

* move GPU specific test to GPU only

* fix unit test invocation

* fix compile error

* fix compile error

* style

* fix runtime error
parent 453a6a3c
......@@ -60,8 +60,6 @@ quantize_ROUND_TOWARD_INFINITY
quantize_ROUND_TOWARD_ZERO
quantize_ROUND_UP
quantize_ROUND_DOWN
# maxpool bug which requires maxpool backprop op change to fix
maxpool_bprop_larger_than_cache
shape_of_scalar
shape_of_vector
shape_of_matrix
......
......@@ -148,4 +148,3 @@ max_3d_to_scalar_double
argmin_trivial_in_i32
argmax_4D_axis_3_i64_in_i32
argmin_trivial_in_double
maxpool_bprop_larger_than_cache
......@@ -60,7 +60,6 @@ reduce_3d_to_vector # To debug: possible broadcasting error?
replace_slice_matrix_inplace
max_pool_2d_1channel_1image_overpadded
max_pool_3d
maxpool_bprop_larger_than_cache
reduce_window_emulating_max_pool_1d_1channel_1image
reduce_window_emulating_max_pool_1d_1channel_2image
reduce_window_emulating_max_pool_1d_2channel_2image
......
......@@ -191,8 +191,6 @@ TEST_P(all_close_f_param_test, test_boundaries)
}
// Avoid warning with how gtest defines INSTANTIATE_TEST_CASE_P
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmissing-variable-declarations"
INSTANTIATE_TEST_CASE_P(
test_simple_floats_with_range_of_precisions,
all_close_f_param_test,
......@@ -213,7 +211,6 @@ INSTANTIATE_TEST_CASE_P(
testing::Values(8,
24), // For broader range of testing use testing::Range(8, 25)
testing::Range(0, 5)), );
#pragma GCC diagnostic pop
class all_close_f_double_param_test : public testing::TestWithParam<::std::tuple<double, int>>
{
......@@ -317,8 +314,6 @@ TEST_P(all_close_f_double_param_test, test_boundaries)
}
// Avoid warning with how gtest defines INSTANTIATE_TEST_CASE_P
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmissing-variable-declarations"
INSTANTIATE_TEST_CASE_P(
test_simple_doubles_with_range_of_precisions,
all_close_f_double_param_test,
......@@ -326,7 +321,6 @@ INSTANTIATE_TEST_CASE_P(
testing::Values(
0., -0., 1., -1., 10., -10., 0.75, -0.75, 0.5, -0.5, 0.25, -0.25, 0.125, -0.125),
testing::Range(0, 17)), );
#pragma GCC diagnostic pop
// Test the exact bounds near +0.f
//
......
......@@ -1227,84 +1227,3 @@ NGRAPH_TEST_P(${BACKEND_NAME}, avg_pool_3d_params, avg_pool_3d_uneven_strided_pa
// avg_pool_3d case generation
NGRAPH_INSTANTIATE_TEST_CASE_P(${BACKEND_NAME}, include_pad, avg_pool_3d_params, testing::Bool());
//
// This test primarly checks that maxpool backprop functions
// correctly when the input tensor is larger than most cache sizes.
// Here the to-be-pooled tensor is rank 2 with one non-trivial
// dimension:
//
// x : [[0, 1, 0, 1, 0, 1, ... , 0, 1]] <--- input data
// ---- ---- ---- ... ---- <--- pooling windows
// y : [[ 1 , 1 , 1 , ... , 1]] <--- max pooled output
//
// The pooling window is size 2 and stride 2, so the windows
// do not overlap. Thus, each window will effectively see [0, 1]
// as its input data for max pooling. The resulting output tensor
// of pooling will be sizeof(x) with all elements equal to 1 as
// seen above.
// Therefore, for the backward pooling operation with the same window shape
// and strides, the value of dy will only propogate to the positions in
// dx that correspond to a value of 1 in the corresponding input tensor x:
//
// dy : [[2, 3, ... , 4]]
// x : [[0, 1, 0, 1, ... , 0, 1]]
// dx : [[0, 2, 0, 3, ... , 0, 4]]
//
NGRAPH_TEST(${BACKEND_NAME}, maxpool_bprop_larger_than_cache)
{
Shape window_shape{1, 2};
Strides move_strides{1, 2};
Shape padding_below{0, 0};
Shape padding_above{0, 0};
// 200 MB tensor to exceed cache
const size_t num_elements = 50 * 1024 * 1024;
auto ceil_div = [](size_t x, size_t y) { return 1 + ((x - 1) / y); };
const size_t num_pooled_elements = ceil_div(num_elements + padding_below.back() +
padding_above.back() - window_shape.back() + 1,
move_strides.back());
Shape shape_x{1, 1, 1, num_elements};
Shape shape_y{1, 1, 1, num_pooled_elements};
auto x = make_shared<op::Parameter>(element::f32, shape_x);
auto dy = make_shared<op::Parameter>(element::f32, shape_y);
auto bprop =
make_shared<Function>(make_shared<op::MaxPoolBackprop>(
x, dy, window_shape, move_strides, padding_below, padding_above),
ParameterVector{x, dy});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// initialize x to array of alternating 0s and 1s as described above
std::vector<float> x_data(num_elements, 0);
for (auto i = 0u; i < num_elements; i++)
{
x_data[i] = (i % 2);
}
auto x_t = backend->create_tensor(element::f32, shape_x);
copy_data(x_t, x_data);
// use random data for deltas dy
std::vector<float> dy_data(num_pooled_elements);
test::Uniform<float> rng(0.0f, 1.0f);
rng.initialize(dy_data);
auto dy_t = backend->create_tensor(element::f32, shape_y);
copy_data(dy_t, dy_data);
// create result deltas tensor and run the backward max pooling operation
auto dx_t = backend->create_tensor(element::f32, shape_x);
backend->call_with_validate(backend->compile(bprop), {dx_t}, {x_t, dy_t});
// expected values should be dy with 0s left inserted
// for each delta, see test description above for details
std::vector<float> expected_dx(num_elements, 0);
for (auto i = 0u, j = 0u; i < num_elements; i++)
{
if (x_data[i])
{
expected_dx[i] = x_data[i] * dy_data[j++];
}
}
EXPECT_EQ(expected_dx, read_vector<float>(dx_t));
}
......@@ -25,6 +25,7 @@
#include "ngraph/util.hpp"
#include "util/all_close.hpp"
#include "util/all_close_f.hpp"
#include "util/random.hpp"
using namespace std;
using namespace ngraph;
......@@ -217,3 +218,85 @@ TEST(gpu_test, topk_fanout_graph_transform)
auto reshape_count = count_ops_of_type<ngraph::op::Reshape>(gpu_f);
EXPECT_EQ(reshape_count, 10);
}
//
// This test primarly checks that maxpool backprop functions
// correctly when the input tensor is larger than most cache sizes.
// Here the to-be-pooled tensor is rank 2 with one non-trivial
// dimension:
//
// x : [[0, 1, 0, 1, 0, 1, ... , 0, 1]] <--- input data
// ---- ---- ---- ... ---- <--- pooling windows
// y : [[ 1 , 1 , 1 , ... , 1]] <--- max pooled output
//
// The pooling window is size 2 and stride 2, so the windows
// do not overlap. Thus, each window will effectively see [0, 1]
// as its input data for max pooling. The resulting output tensor
// of pooling will be sizeof(x) with all elements equal to 1 as
// seen above.
// Therefore, for the backward pooling operation with the same window shape
// and strides, the value of dy will only propogate to the positions in
// dx that correspond to a value of 1 in the corresponding input tensor x:
//
// dy : [[2, 3, ... , 4]]
// x : [[0, 1, 0, 1, ... , 0, 1]]
// dx : [[0, 2, 0, 3, ... , 0, 4]]
//
TEST(gpu_test, maxpool_bprop_larger_than_cache)
{
Shape window_shape{1, 2};
Strides move_strides{1, 2};
Shape padding_below{0, 0};
Shape padding_above{0, 0};
// 200 MB tensor to exceed cache
const size_t num_elements = 50 * 1024 * 1024;
auto ceil_div = [](size_t x, size_t y) { return 1 + ((x - 1) / y); };
const size_t num_pooled_elements = ceil_div(num_elements + padding_below.back() +
padding_above.back() - window_shape.back() + 1,
move_strides.back());
Shape shape_x{1, 1, 1, num_elements};
Shape shape_y{1, 1, 1, num_pooled_elements};
auto x = make_shared<op::Parameter>(element::f32, shape_x);
auto dy = make_shared<op::Parameter>(element::f32, shape_y);
auto bprop =
make_shared<Function>(make_shared<op::MaxPoolBackprop>(
x, dy, window_shape, move_strides, padding_below, padding_above),
ParameterVector{x, dy});
auto backend = runtime::Backend::create("GPU");
// initialize x to array of alternating 0s and 1s as described above
std::vector<float> x_data(num_elements, 0);
for (auto i = 0u; i < num_elements; i++)
{
x_data[i] = (i % 2);
}
auto x_t = backend->create_tensor(element::f32, shape_x);
copy_data(x_t, x_data);
// use random data for deltas dy
std::vector<float> dy_data(num_pooled_elements);
test::Uniform<float> rng(0.0f, 1.0f);
rng.initialize(dy_data);
auto dy_t = backend->create_tensor(element::f32, shape_y);
copy_data(dy_t, dy_data);
// create result deltas tensor and run the backward max pooling operation
auto dx_t = backend->create_tensor(element::f32, shape_x);
auto handle = backend->compile(bprop);
backend->call_with_validate(handle, {dx_t}, {x_t, dy_t});
// expected values should be dy with 0s left inserted
// for each delta, see test description above for details
std::vector<float> expected_dx(num_elements, 0);
for (auto i = 0u, j = 0u; i < num_elements; i++)
{
if (x_data[i])
{
expected_dx[i] = x_data[i] * dy_data[j++];
}
}
EXPECT_EQ(expected_dx, read_vector<float>(dx_t));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment