move GPU specific test to GPU only (#2191)

* move GPU specific test to GPU only * fix unit test invocation * fix compile error * fix compile error * style * fix runtime error

move GPU specific test to GPU only (#2191)
* move GPU specific test to GPU only * fix unit test invocation * fix compile error * fix compile error * style * fix runtime error
40dda4eb · Robert Kimball · GitHub · 453a6a3c · 40dda4eb · 40dda4eb
Unverified Commit 40dda4eb authored Dec 08, 2018 by Robert Kimball Committed by GitHub Dec 08, 2018
6 changed files
--- a/src/ngraph/runtime/gpu/unit_test.manifest
+++ b/src/ngraph/runtime/gpu/unit_test.manifest
@@ -60,8 +60,6 @@ quantize_ROUND_TOWARD_INFINITY
 quantize_ROUND_TOWARD_ZERO
 quantize_ROUND_UP
 quantize_ROUND_DOWN
-# maxpool bug which requires maxpool backprop op change to fix
-maxpool_bprop_larger_than_cache
 shape_of_scalar
 shape_of_vector
 shape_of_matrix

--- a/src/ngraph/runtime/intelgpu/unit_test.manifest
+++ b/src/ngraph/runtime/intelgpu/unit_test.manifest
@@ -148,4 +148,3 @@ max_3d_to_scalar_double
 argmin_trivial_in_i32
 argmax_4D_axis_3_i64_in_i32
 argmin_trivial_in_double
-maxpool_bprop_larger_than_cache
--- a/src/ngraph/runtime/plaidml/unit_test.manifest
+++ b/src/ngraph/runtime/plaidml/unit_test.manifest
@@ -60,7 +60,6 @@ reduce_3d_to_vector  # To debug: possible broadcasting error?
 replace_slice_matrix_inplace
 max_pool_2d_1channel_1image_overpadded
 max_pool_3d
-maxpool_bprop_larger_than_cache
 reduce_window_emulating_max_pool_1d_1channel_1image
 reduce_window_emulating_max_pool_1d_1channel_2image
 reduce_window_emulating_max_pool_1d_2channel_2image

--- a/test/all_close_f.cpp
+++ b/test/all_close_f.cpp
@@ -191,8 +191,6 @@ TEST_P(all_close_f_param_test, test_boundaries)
 }
 // Avoid warning with how gtest defines INSTANTIATE_TEST_CASE_P
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wmissing-variable-declarations"
 INSTANTIATE_TEST_CASE_P(
    test_simple_floats_with_range_of_precisions,
    all_close_f_param_test,
@@ -213,7 +211,6 @@ INSTANTIATE_TEST_CASE_P(
                     testing::Values(8,
                                     24), // For broader range of testing use testing::Range(8, 25)
                     testing::Range(0, 5)), );
-#pragma GCC diagnostic pop
 class all_close_f_double_param_test : public testing::TestWithParam<::std::tuple<double, int>>
 {
@@ -317,8 +314,6 @@ TEST_P(all_close_f_double_param_test, test_boundaries)
 }
 // Avoid warning with how gtest defines INSTANTIATE_TEST_CASE_P
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wmissing-variable-declarations"
 INSTANTIATE_TEST_CASE_P(
    test_simple_doubles_with_range_of_precisions,
    all_close_f_double_param_test,
@@ -326,7 +321,6 @@ INSTANTIATE_TEST_CASE_P(
        testing::Values(
            0., -0., 1., -1., 10., -10., 0.75, -0.75, 0.5, -0.5, 0.25, -0.25, 0.125, -0.125),
        testing::Range(0, 17)), );
-#pragma GCC diagnostic pop
 // Test the exact bounds near +0.f
 //

--- a/test/backend_pool.in.cpp
+++ b/test/backend_pool.in.cpp
@@ -1227,84 +1227,3 @@ NGRAPH_TEST_P(${BACKEND_NAME}, avg_pool_3d_params, avg_pool_3d_uneven_strided_pa
 // avg_pool_3d case generation
 NGRAPH_INSTANTIATE_TEST_CASE_P(${BACKEND_NAME}, include_pad, avg_pool_3d_params, testing::Bool());
-//
-// This test primarly checks that maxpool backprop functions
-// correctly when the input tensor is larger than most cache sizes.
-// Here the to-be-pooled tensor is rank 2 with one non-trivial
-// dimension:
-//
-// x : [[0, 1, 0, 1, 0, 1, ... , 0, 1]]  <--- input data
-//       ----  ----  ----  ...   ----    <--- pooling windows
-// y : [[ 1  ,  1  ,  1  , ... ,  1]]    <--- max pooled output
-//
-// The pooling window is size 2 and stride 2, so the windows
-// do not overlap. Thus, each window will effectively see [0, 1]
-// as its input data for max pooling. The resulting output tensor
-// of pooling will be sizeof(x) with all elements equal to 1 as
-// seen above.
-// Therefore, for the backward pooling operation with the same window shape
-// and strides, the value of dy will only propogate to the positions in
-// dx that correspond to a value of 1 in the corresponding input tensor x:
-//
-// dy : [[2, 3, ... , 4]]
-// x  : [[0, 1, 0, 1, ... , 0, 1]]
-// dx : [[0, 2, 0, 3, ... , 0, 4]]
-//
-NGRAPH_TEST(${BACKEND_NAME}, maxpool_bprop_larger_than_cache)
-{
-    Shape window_shape{1, 2};
-    Strides move_strides{1, 2};
-    Shape padding_below{0, 0};
-    Shape padding_above{0, 0};
-    // 200 MB tensor to exceed cache
-    const size_t num_elements = 50 * 1024 * 1024;
-    auto ceil_div = [](size_t x, size_t y) { return 1 + ((x - 1) / y); };
-    const size_t num_pooled_elements = ceil_div(num_elements + padding_below.back() +
-                                                    padding_above.back() - window_shape.back() + 1,
-                                                move_strides.back());
-    Shape shape_x{1, 1, 1, num_elements};
-    Shape shape_y{1, 1, 1, num_pooled_elements};
-    auto x = make_shared<op::Parameter>(element::f32, shape_x);
-    auto dy = make_shared<op::Parameter>(element::f32, shape_y);
-    auto bprop =
-        make_shared<Function>(make_shared<op::MaxPoolBackprop>(
-                                  x, dy, window_shape, move_strides, padding_below, padding_above),
-                              ParameterVector{x, dy});
-    auto backend = runtime::Backend::create("${BACKEND_NAME}");
-    // initialize x to array of alternating 0s and 1s as described above
-    std::vector<float> x_data(num_elements, 0);
-    for (auto i = 0u; i < num_elements; i++)
-    {
-        x_data[i] = (i % 2);
-    }
-    auto x_t = backend->create_tensor(element::f32, shape_x);
-    copy_data(x_t, x_data);
-    // use random data for deltas dy
-    std::vector<float> dy_data(num_pooled_elements);
-    test::Uniform<float> rng(0.0f, 1.0f);
-    rng.initialize(dy_data);
-    auto dy_t = backend->create_tensor(element::f32, shape_y);
-    copy_data(dy_t, dy_data);
-    // create result deltas tensor and run the backward max pooling operation
-    auto dx_t = backend->create_tensor(element::f32, shape_x);
-    backend->call_with_validate(backend->compile(bprop), {dx_t}, {x_t, dy_t});
-    // expected values should be dy with 0s left inserted
-    // for each delta, see test description above for details
-    std::vector<float> expected_dx(num_elements, 0);
-    for (auto i = 0u, j = 0u; i < num_elements; i++)
-    {
-        if (x_data[i])
-        {
-            expected_dx[i] = x_data[i] * dy_data[j++];
-        }
-    }
-    EXPECT_EQ(expected_dx, read_vector<float>(dx_t));
-}
--- a/test/gpu_test.cpp
+++ b/test/gpu_test.cpp
@@ -25,6 +25,7 @@
 #include "ngraph/util.hpp"
 #include "util/all_close.hpp"
 #include "util/all_close_f.hpp"
+#include "util/random.hpp"
 using namespace std;
 using namespace ngraph;
@@ -217,3 +218,85 @@ TEST(gpu_test, topk_fanout_graph_transform)
    auto reshape_count = count_ops_of_type<ngraph::op::Reshape>(gpu_f);
    EXPECT_EQ(reshape_count, 10);
 }
+//
+// This test primarly checks that maxpool backprop functions
+// correctly when the input tensor is larger than most cache sizes.
+// Here the to-be-pooled tensor is rank 2 with one non-trivial
+// dimension:
+//
+// x : [[0, 1, 0, 1, 0, 1, ... , 0, 1]]  <--- input data
+//       ----  ----  ----  ...   ----    <--- pooling windows
+// y : [[ 1  ,  1  ,  1  , ... ,  1]]    <--- max pooled output
+//
+// The pooling window is size 2 and stride 2, so the windows
+// do not overlap. Thus, each window will effectively see [0, 1]
+// as its input data for max pooling. The resulting output tensor
+// of pooling will be sizeof(x) with all elements equal to 1 as
+// seen above.
+// Therefore, for the backward pooling operation with the same window shape
+// and strides, the value of dy will only propogate to the positions in
+// dx that correspond to a value of 1 in the corresponding input tensor x:
+//
+// dy : [[2, 3, ... , 4]]
+// x  : [[0, 1, 0, 1, ... , 0, 1]]
+// dx : [[0, 2, 0, 3, ... , 0, 4]]
+//
+TEST(gpu_test, maxpool_bprop_larger_than_cache)
+{
+    Shape window_shape{1, 2};
+    Strides move_strides{1, 2};
+    Shape padding_below{0, 0};
+    Shape padding_above{0, 0};
+    // 200 MB tensor to exceed cache
+    const size_t num_elements = 50 * 1024 * 1024;
+    auto ceil_div = [](size_t x, size_t y) { return 1 + ((x - 1) / y); };
+    const size_t num_pooled_elements = ceil_div(num_elements + padding_below.back() +
+                                                    padding_above.back() - window_shape.back() + 1,
+                                                move_strides.back());
+    Shape shape_x{1, 1, 1, num_elements};
+    Shape shape_y{1, 1, 1, num_pooled_elements};
+    auto x = make_shared<op::Parameter>(element::f32, shape_x);
+    auto dy = make_shared<op::Parameter>(element::f32, shape_y);
+    auto bprop =
+        make_shared<Function>(make_shared<op::MaxPoolBackprop>(
+                                  x, dy, window_shape, move_strides, padding_below, padding_above),
+                              ParameterVector{x, dy});
+    auto backend = runtime::Backend::create("GPU");
+    // initialize x to array of alternating 0s and 1s as described above
+    std::vector<float> x_data(num_elements, 0);
+    for (auto i = 0u; i < num_elements; i++)
+    {
+        x_data[i] = (i % 2);
+    }
+    auto x_t = backend->create_tensor(element::f32, shape_x);
+    copy_data(x_t, x_data);
+    // use random data for deltas dy
+    std::vector<float> dy_data(num_pooled_elements);
+    test::Uniform<float> rng(0.0f, 1.0f);
+    rng.initialize(dy_data);
+    auto dy_t = backend->create_tensor(element::f32, shape_y);
+    copy_data(dy_t, dy_data);
+    // create result deltas tensor and run the backward max pooling operation
+    auto dx_t = backend->create_tensor(element::f32, shape_x);
+    auto handle = backend->compile(bprop);
+    backend->call_with_validate(handle, {dx_t}, {x_t, dy_t});
+    // expected values should be dy with 0s left inserted
+    // for each delta, see test description above for details
+    std::vector<float> expected_dx(num_elements, 0);
+    for (auto i = 0u, j = 0u; i < num_elements; i++)
+    {
+        if (x_data[i])
+        {
+            expected_dx[i] = x_data[i] * dy_data[j++];
+        }
+    }
+    EXPECT_EQ(expected_dx, read_vector<float>(dx_t));
+}