Add support for ArgMax.

4ef010fc · nmostafa · 6e672209 · 4ef010fc · 4ef010fc · 4ef010fc
Commit 4ef010fc authored Jun 13, 2019 by nmostafa
9 changed files
--- a/src/contrib/mlir/compiler.cpp
+++ b/src/contrib/mlir/compiler.cpp
 //*****************************************************************************
-// Copyright 2017-2019 Intel Corporation
+// Copyright 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -25,6 +25,7 @@
 #include "ngraph/node.hpp"
 #include "ngraph/op/add.hpp"
 #include "ngraph/op/argmin.hpp"
+#include "ngraph/op/argmax.hpp"
 #include "ngraph/op/dot.hpp"
 #include "ngraph/op/experimental/compiled_kernel.hpp"
 #include "ngraph/op/util/index_reduction.hpp"
@@ -289,20 +290,13 @@ mlir::Value* MLIRCompiler::COMPILE_OP_DECL(ngraph::op::Add)
 template<>
 mlir::Value* MLIRCompiler::COMPILE_OP_DECL(ngraph::op::ArgMin)
 {
-    auto* idx_red = static_cast<const ngraph::op::util::IndexReduction*>(ng_node);
-
-    auto arg = idx_red->get_argument(0);
-    size_t red_axis = idx_red->get_reduction_axis();
-
-    mlir::Value* arg_val = compiler.get_tensor_value(arg->get_output_tensor_ptr().get()).m_value;
-    mlir::ArrayAttr red_axes_attr = compiler.m_builder->getI64ArrayAttr({(int64_t)red_axis});
+    return compiler.create_index_reduction<mlir::NGArgMinRedOp>(ng_node);
+}

-    return compiler.m_builder
-        ->create<mlir::NGArgMinRedOp>(mlir::UnknownLoc::get(&compiler.m_context),
-                                      compiler.get_mlir_type(ng_node),
-                                      arg_val,
-                                      red_axes_attr)
-        .getResult();
+template<>
+mlir::Value* MLIRCompiler::COMPILE_OP_DECL(ngraph::op::ArgMax)
+{
+    return compiler.create_index_reduction<mlir::NGArgMaxRedOp>(ng_node);
 }

 template <>
@@ -338,6 +332,24 @@ void MLIRCompiler::create_return()
    m_builder->create<mlir::NGReturnOp>(mlir::UnknownLoc::get(&m_context), value_list);
 }

+template<typename RedOp>
+mlir::Value* MLIRCompiler::create_index_reduction(const ngraph::Node* ng_node)
+{
+    auto* idx_red = static_cast<const ngraph::op::util::IndexReduction*>(ng_node);
+
+    auto arg = idx_red->get_argument(0);
+    size_t red_axis = idx_red->get_reduction_axis();
+
+    mlir::Value* arg_val = get_tensor_value(arg->get_output_tensor_ptr().get()).m_value;
+    mlir::ArrayAttr red_axes_attr = m_builder->getI64ArrayAttr({(int64_t)red_axis});
+
+    return m_builder
+        ->create<RedOp>(mlir::UnknownLoc::get(&m_context),
+                                      get_mlir_type(ng_node),
+                                      arg_val,
+                                      red_axes_attr)
+        .getResult();
+}
 // Binds MLIR function arguments to the proper values. This includes externally allocated tensors
 // helpers to be used inside the function.
 void MLIRCompiler::bind_arguments()
@@ -393,6 +405,12 @@ void MLIRCompiler::execute()
    llvm::InitializeNativeTarget();
    llvm::InitializeNativeTargetAsmPrinter();

+    unsigned opt_level = 3;
+    if (char* opt_level_str = std::getenv("NGRAPH_MLIR_OPT_LEVEL"))
+    {
+        opt_level = std::stoi(opt_level_str);
+        NGRAPH_CHECK(opt_level >=0 && opt_level <= 3 , "Invalid optimization level");
+    }
    // Create an MLIR execution engine. We use a null MLIR pass manager for now to make sure we
    // don't run MLIR passes that were already run. We also pass a default transformer to run
    // LLVM optimizations at level 3.

--- a/src/contrib/mlir/compiler.hpp
+++ b/src/contrib/mlir/compiler.hpp
@@ -108,6 +108,9 @@ namespace ngraph
                template <typename BinOp>
                mlir::Value* create_binary_op(const ngraph::Node* ng_node);

+                template<typename RedOp>
+                mlir::Value* create_index_reduction(const ngraph::Node* ng_node);
+
                void create_return();

                /// Helper to create memref arguments for MLIR function signature

--- a/src/contrib/mlir/dialect/type.hpp
+++ b/src/contrib/mlir/dialect/type.hpp
@@ -51,7 +51,6 @@ namespace mlir

    // reuse std float types as-is
    using NGFloatType = mlir::FloatType;
-    using NGIndexType = mlir::IndexType;

    /// Integer type. It represents an integer of width 8,16,32,64. Signed or not.
    class NGIntegerType : public mlir::Type::TypeBase<NGIntegerType, mlir::Type>
@@ -234,8 +233,6 @@ namespace mlir
                return intType.getWidth();
            if (NGFloatType floatType = type.dyn_cast<NGFloatType>())
                return floatType.getIntOrFloatBitWidth();
-            if (NGIndexType indexType = type.dyn_cast<NGIndexType>())
-                return sizeof(intptr_t);
            if (NGBoolType boolType = type.dyn_cast<NGBoolType>())
                return boolType.getWidth();
            NGRAPH_FAIL() << "Unknown type";

--- a/src/contrib/mlir/helpers.cpp
+++ b/src/contrib/mlir/helpers.cpp
@@ -21,7 +21,7 @@
 /// Call back to copy Index tensor to Int tensor
 /// Can handle int tensors of bitwidth 8, 16, 32 and 64
 /// Index width is always intptr_t
-extern "C" NGRAPH_API void* __mlir_convert_index_to_int(mlir::StaticFloatMemRef dst, mlir::StaticFloatMemRef src, size_t numElements, size_t intWidth)
+extern "C" NGRAPH_API void __mlir_convert_index_to_int(mlir::StaticFloatMemRef dst, mlir::StaticFloatMemRef src, size_t numElements, size_t intWidth)
 {
    size_t indexSize = sizeof(intptr_t);
    auto pSrc = reinterpret_cast<intptr_t*>(src.data);

--- a/src/contrib/mlir/lowerer.cpp
+++ b/src/contrib/mlir/lowerer.cpp
@@ -45,6 +45,10 @@ namespace

 #include "op_lowerers.inc"

+    // Helpers
+    template<typename RedOp>
+    void lowerIndexReduction(Operation* op, ArrayRef<Value*> operands, PatternRewriter& rewriter, DialectLoweringPass& m_pass, bool isMin);
+
    /// Use Dialect Converson Framework
    class DialectLowerer : public DialectConversion
    {
@@ -63,6 +67,7 @@ namespace
        {
            RewriteListBuilder<NGAddOpConversion,
                               NGArgMinRedOpConversion,
+                               NGArgMaxRedOpConversion,
                               NGDotOpConversion,
                               NGReturnOpConversion>::build(patterns, mlirContext, m_pass);
        }
@@ -308,7 +313,6 @@ namespace

    // ADD
    REWRITER(NGAddOp)
-
    {
        auto add = cast<NGAddOp>(op);
        auto loc = add.getLoc();
@@ -409,15 +413,30 @@ namespace

    REWRITER(NGArgMinRedOp)
    {
-        auto argmin = cast<NGArgMinRedOp>(op);
+        lowerIndexReduction<mlir::NGArgMinRedOp>(op, operands, rewriter, m_pass, true);
+    }
+
+    REWRITER(NGArgMaxRedOp)
+    {
+        lowerIndexReduction<mlir::NGArgMaxRedOp>(op, operands, rewriter, m_pass, false);
+    }
+
+    REWRITER(NGReturnOp) { rewriter.replaceOpWithNewOp<ReturnOp>(op); }
+#undef REWRITER
+
+template<typename T>
+void lowerIndexReduction(Operation* op, ArrayRef<Value*> operands, PatternRewriter& rewriter, DialectLoweringPass& m_pass, bool isMin)
+{
+    T argmin = cast<T>(op);
    auto loc = argmin.getLoc();
    auto axesAttr = argmin.axes();

-        NGRAPH_ASSERT(axesAttr.size() == 1) << "ArgMin should have one reduction axis";
-        unsigned axis = axesAttr.begin()->dyn_cast<IntegerAttr>().getInt();
+    NGRAPH_CHECK(axesAttr.size() == 1 , "Index Reduction op should have one reduction axis");
+    Attribute axisAttr = *axesAttr.begin();
+    unsigned axis = axisAttr.dyn_cast<IntegerAttr>().getInt();

-        NGRAPH_ASSERT(operands.size() == 1 && operands[0] != nullptr)
-            << "Expected one non-null operand in ArgMin op";
+    NGRAPH_CHECK(operands.size() == 1 && operands[0] != nullptr,
+                 "Expected one non-null operand in Index Reduction op");

    // Retrieve/generate Values for operands and result.
    ScopedContext scope(rewriter, loc);
@@ -425,13 +444,14 @@ namespace
    auto arg_type = arg->getType().cast<MemRefType>();

    Value* finalResult = m_pass.buildOutputDefs(op, rewriter)[0];
-        auto resultTy = argmin.getResult()->getType().cast<NGTensorType>();
+    Type type = argmin.getResult()->getType();
+    NGTensorType resultTy = type.cast<NGTensorType>();
    // MLIR doesn't support Index to/from Integer type-conversion
    // We have to store our result in an IndexType tensor and call-back to a type-conversion routine in nGraph
    // TODO: Fix this once MLIR provides explicit cast operations.
    Value* result = m_pass.createTempTensor(
                                           rewriter.getMemRefType(resultTy.getShape(),rewriter.getIndexType()),
-                                                resultTy.getSizeInBytes(),
+                                           resultTy.getNumElements() * sizeof(intptr_t), /* hacky way to get target-dependent size of IndexType */
                                           rewriter
                                           );

@@ -466,7 +486,6 @@ namespace
        auto pAllIVs = IndexHandle::makeIndexHandlePointers(allIVs);
        SmallVector<IndexHandle,8> nonRedIVs;

-
        auto steps = vArg.getSteps();

        // iterate over all argument dimensions
@@ -483,7 +502,8 @@ namespace
                auto tempIVs = allIVs;
                // build list of IVs including current min index
                tempIVs[axis] = currMinIndx;
-                    iRes(nonRedIVs) = edsc::intrinsics::select(iArg(allIVs) < iArg(tempIVs), allIVs[axis], currMinIndx);
+                iRes(nonRedIVs) = isMin ? edsc::intrinsics::select(iArg(allIVs) < iArg(tempIVs), allIVs[axis], currMinIndx) :
+                                          edsc::intrinsics::select(iArg(tempIVs) < iArg(allIVs), allIVs[axis], currMinIndx);
            }
        );
    }
@@ -501,7 +521,7 @@ namespace
                                            rewriter.getUnknownLoc(),
                                            resultTy.getNumElements()
                                         ),
-                                             /* Integer size used */
+                                         /* Integer size used in final result*/
                                         rewriter.create<mlir::ConstantIndexOp>(
                                            rewriter.getUnknownLoc(),
                                            resultTy.getElementType().cast<NGIntegerType>().getWidth()
@@ -510,41 +530,7 @@ namespace
    rewriter.create<mlir::CallOp>(rewriter.getUnknownLoc(), callBackFunc, args);

    rewriter.replaceOp(op, {finalResult});
-#if 0
-
-        MemRefView v_res(result), v_arg(arg);
-        unsigned n_dim = v_arg.fastestVarying() - 1;
-        unsigned m_dim = v_arg.fastestVarying();
-
-        // Constants, indexed values and other vars to be used inside the loop nest.
-        IndexedValue i_res(result), i_arg(arg);
-
-        // Initialize result to zero.
-        IndexHandle m_init;
-        IndexHandle m_lb_init(v_arg.lb(m_dim));
-        IndexHandle m_ub_init(v_arg.ub(m_dim));
-        int64_t m_step = v_arg.step(m_dim);
-        LoopBuilder(&m_init, m_lb_init, m_ub_init, m_step)([&] { i_res(m_init) = m_lb_init; });
-
-        // Main loop nest for argmin
-        IndexHandle n, m;
-        IndexHandle n_lb(v_arg.lb(n_dim)), m_lb(v_arg.lb(m_dim));
-        IndexHandle n_ub(v_arg.ub(n_dim)), m_ub(v_arg.ub(m_dim));
-        ValueHandle curr_res(res_elem_ty);
-        int64_t n_step = v_arg.step(n_dim);
-
-        LoopBuilder(&n, n_lb, n_ub, n_step)([&] {
-            LoopBuilder(&m, m_lb, m_ub, m_step)([&] {
-                curr_res = i_res(m);
-                i_res(m) = edsc::intrinsics::select(i_arg(n, m) < i_arg(curr_res, m), n, curr_res);
-            });
-        });
-#endif
-
-    }
-
-    REWRITER(NGReturnOp) { rewriter.replaceOpWithNewOp<ReturnOp>(op); }
-#undef REWRITER
+}
 }

 namespace mlir

--- a/src/contrib/mlir/op_lowerers.inc
+++ b/src/contrib/mlir/op_lowerers.inc
@@ -31,6 +31,7 @@ public:\

 DECL_OP_CONV(NGAddOp)
 DECL_OP_CONV(NGArgMinRedOp)
+DECL_OP_CONV(NGArgMaxRedOp)
 DECL_OP_CONV(NGDotOp)
 DECL_OP_CONV(NGReturnOp)


--- a/src/contrib/mlir/ops_supported.inc
+++ b/src/contrib/mlir/ops_supported.inc
@@ -5,6 +5,7 @@

 MLIR_OP(Add)
 MLIR_OP(ArgMin)
+MLIR_OP(ArgMax)
 MLIR_OP(Dot)
 // Add new supported ops here


--- a/src/contrib/mlir/pass/mlir_subgraph_extraction.cpp
+++ b/src/contrib/mlir/pass/mlir_subgraph_extraction.cpp
@@ -20,6 +20,7 @@
 #include "ngraph/graph_util.hpp"
 #include "ngraph/op/add.hpp"
 #include "ngraph/op/argmin.hpp"
+#include "ngraph/op/argmax.hpp"
 #include "ngraph/op/dot.hpp"
 #include "ngraph/op/experimental/compiled_kernel.hpp"
 #include "ngraph/op/get_output_element.hpp"
@@ -107,6 +108,13 @@ bool MLIRSubgraphExtractionPass::is_supported_mlir_op(std::shared_ptr<Node> node
            return false;
        }
    }
+
+    if (TI(ngraph::op::ArgMin) == TI(*node) || TI(ngraph::op::ArgMax) == TI(*node))
+    {
+        // TODO: Remove this when MLIR has float point cmp support
+        if (!node->input(0).get_element_type().is_integral())
+            return false;
+    }
    return true;
 }


--- a/test/backend_arg_reduce.in.cpp
+++ b/test/backend_arg_reduce.in.cpp
@@ -55,7 +55,7 @@ NGRAPH_TEST(${BACKEND_NAME}, argmin_trivial)
    EXPECT_EQ((vector<int>{3, 2, 1}), read_vector<int>(result));
 }

-NGRAPH_TEST(${BACKEND_NAME}, argmin_trivial_i32)
+NGRAPH_TEST(${BACKEND_NAME}, argmin_2D_i32)
 {
    Shape shape{4, 3};
    Shape rshape{3};
@@ -74,6 +74,91 @@ NGRAPH_TEST(${BACKEND_NAME}, argmin_trivial_i32)
    EXPECT_EQ((vector<int>{3, 2, 1}), read_vector<int>(result));
 }

+NGRAPH_TEST(${BACKEND_NAME}, argmin_3D_i32)
+{
+    Shape shape{3, 3, 4};
+    Shape rshape{3, 4};
+    auto A = make_shared<op::Parameter>(element::i32, shape);
+    auto f = make_shared<Function>(make_shared<op::ArgMin>(A, 1, element::i32), ParameterVector{A});
+
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::i32, shape);
+    copy_data(a, test::NDArray<int,3>({
+                                      {{12,2,10,9},{3,5,0,8},{7,9,1,5}},
+                                      {{7,2,4,10},{6,10,2,2},{12,1,1,1}},
+                                      {{10,2,2,4},{1,5,5,1},{7,12,2,2}}
+                                      }).get_vector());
+    auto result = backend->create_tensor(element::i32, rshape);
+
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a});
+    EXPECT_EQ((vector<int>{1, 0, 1, 2, 1, 2, 2, 2, 1, 0, 0,1}), read_vector<int>(result));
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, argmin_3D_i64)
+{
+    Shape shape{3, 3, 4};
+    Shape rshape{3, 4};
+    auto A = make_shared<op::Parameter>(element::i32, shape);
+    auto f = make_shared<Function>(make_shared<op::ArgMin>(A, 1, element::i64), ParameterVector{A});
+
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::i32, shape);
+    copy_data(a, test::NDArray<int,3>({
+                                      {{12,2,10,9},{3,5,0,8},{7,9,1,5}},
+                                      {{7,2,4,10},{6,10,2,2},{12,1,1,1}},
+                                      {{10,2,2,4},{1,5,5,1},{7,12,2,2}}
+                                      }).get_vector());
+    auto result = backend->create_tensor(element::i64, rshape);
+
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a});
+    EXPECT_EQ((vector<int64_t>{1, 0, 1, 2, 1, 2, 2, 2, 1, 0, 0,1}), read_vector<int64_t>(result));
+}
+
+
+NGRAPH_TEST(${BACKEND_NAME}, argmin_4D_i64)
+{
+    Shape shape{2, 2, 5, 5}; // NCHW ->(0,1,2,3)
+    Shape rshape{2, 2, 5};
+    auto A = make_shared<op::Parameter>(element::f32, shape);
+    auto f = make_shared<Function>(make_shared<op::ArgMin>(A, 3, element::i64), ParameterVector{A});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::f32, shape);
+    copy_data(a,
+              test::NDArray<int, 4>({{{{3, 1, 1, 2, 105},
+                                         {0, 3, 2, 1, 2},
+                                         {2, 4, 2, 0, 1},
+                                         {2, 5, 1, 1, 22},
+                                         {5, 2, 1, 7, 5}},
+                                        {{3, 1, 2, 2, 1},
+                                         {1, 7, 3, 8, 1},
+                                         {2, 10, 1, 3, 2},
+                                         {3, 1, 0, 0, 6},
+                                         {2, 0, 0, 0, 0}}},
+                                       {{{0, 2, 1, 1, 0},
+                                         {0, 0, 0, 0, 1},
+                                         {0, 0, 1, 0, 3},
+                                         {2, 0, 0, 3, 0},
+                                         {0, 0, 0, 0, 1}},
+                                        {{2, 1, 0, 0, 1},
+                                         {0, 2, 0, 0, 0},
+                                         {1, 1, 2, 0, 2},
+                                         {1, 1, 1, 0, 1},
+                                         {1, 0, 0, 0, 2}}}})
+                  .get_vector());
+    auto result = backend->create_tensor(element::i64, rshape);
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a});
+    EXPECT_EQ((vector<int64_t>{1, 0, 3, 2, 2, 1, 0, 2, 2, 1, 0, 0, 0, 1, 0, 2, 0, 3, 3, 1}),
+              read_vector<int64_t>(result));
+}
+
 NGRAPH_TEST(${BACKEND_NAME}, argmin_4D_axis_3_i64)
 {
    Shape shape{2, 2, 5, 5}; // NCHW ->(0,1,2,3)
@@ -177,6 +262,111 @@ NGRAPH_TEST(${BACKEND_NAME}, argmax_trivial)
    EXPECT_EQ((vector<int>{1, 3, 0}), read_vector<int>(result));
 }

+NGRAPH_TEST(${BACKEND_NAME}, argmax_2D_i32)
+{
+    Shape shape{4, 3};
+    Shape rshape{3};
+    auto A = make_shared<op::Parameter>(element::i32, shape);
+    auto f = make_shared<Function>(make_shared<op::ArgMax>(A, 0, element::i32), ParameterVector{A});
+
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::i32, shape);
+    copy_data(a, vector<int>{12, 2, 10, 9, 8, 4, 6, 1, 5, 3, 11, 7});
+    auto result = backend->create_tensor(element::i32, rshape);
+
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a});
+    EXPECT_EQ((vector<int>{0, 3, 0}), read_vector<int>(result));
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, argmax_3D_i32)
+{
+    Shape shape{3, 3, 4};
+    Shape rshape{3, 4};
+    auto A = make_shared<op::Parameter>(element::i32, shape);
+    auto f = make_shared<Function>(make_shared<op::ArgMax>(A, 1, element::i32), ParameterVector{A});
+
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::i32, shape);
+    copy_data(a, test::NDArray<int,3>({
+                                      {{12,2,10,9},{3,5,0,8},{7,9,1,5}},
+                                      {{7,2,4,10},{6,10,2,2},{12,1,1,1}},
+                                      {{10,2,2,4},{1,5,5,1},{7,12,2,2}}
+                                      }).get_vector());
+    auto result = backend->create_tensor(element::i32, rshape);
+
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a});
+    EXPECT_EQ((vector<int>{0, 2, 0, 0, 2, 1, 0, 0, 0, 2, 1, 0}), read_vector<int>(result));
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, argmax_3D_i64)
+{
+    Shape shape{3, 3, 4};
+    Shape rshape{3, 4};
+    auto A = make_shared<op::Parameter>(element::i32, shape);
+    auto f = make_shared<Function>(make_shared<op::ArgMax>(A, 1, element::i64), ParameterVector{A});
+
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::i32, shape);
+    copy_data(a, test::NDArray<int,3>({
+                                      {{12,2,10,9},{3,5,0,8},{7,9,1,5}},
+                                      {{7,2,4,10},{6,10,2,2},{12,1,1,1}},
+                                      {{10,2,2,4},{1,5,5,1},{7,12,2,2}}
+                                      }).get_vector());
+    auto result = backend->create_tensor(element::i64, rshape);
+
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a});
+    EXPECT_EQ((vector<int64_t>{0, 2, 0, 0, 2, 1, 0, 0, 0, 2, 1, 0}), read_vector<int64_t>(result));
+}
+
+
+NGRAPH_TEST(${BACKEND_NAME}, argmax_4D_i64)
+{
+    Shape shape{2, 2, 5, 5}; // NCHW ->(0,1,2,3)
+    Shape rshape{2, 2, 5};
+    auto A = make_shared<op::Parameter>(element::f32, shape);
+    auto f = make_shared<Function>(make_shared<op::ArgMax>(A, 3, element::i64), ParameterVector{A});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::f32, shape);
+    copy_data(a,
+              test::NDArray<int, 4>({{{{3, 1, 1, 2, 105},
+                                         {0, 3, 2, 1, 2},
+                                         {2, 4, 2, 0, 1},
+                                         {2, 5, 1, 1, 22},
+                                         {5, 2, 1, 7, 5}},
+                                        {{3, 1, 2, 2, 1},
+                                         {1, 7, 3, 8, 1},
+                                         {2, 10, 1, 3, 2},
+                                         {3, 1, 0, 0, 6},
+                                         {2, 0, 0, 0, 0}}},
+                                       {{{0, 2, 1, 1, 0},
+                                         {0, 0, 0, 0, 1},
+                                         {0, 0, 1, 0, 3},
+                                         {2, 0, 0, 3, 0},
+                                         {0, 0, 0, 0, 1}},
+                                        {{2, 1, 0, 0, 1},
+                                         {0, 2, 0, 0, 0},
+                                         {1, 1, 2, 0, 2},
+                                         {1, 1, 1, 0, 1},
+                                         {1, 0, 0, 0, 2}}}})
+                  .get_vector());
+    auto result = backend->create_tensor(element::i64, rshape);
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a});
+    EXPECT_EQ((vector<int64_t>{4, 1, 1, 4, 3, 0, 3, 1, 4, 0, 1, 4, 4, 3, 4, 0, 1, 2, 0, 4}),
+              read_vector<int64_t>(result));
+}
+
+
 NGRAPH_TEST(${BACKEND_NAME}, argmax_3D_axis_0) // Along Channels
 {
    Shape shape{3, 4, 2}; // CHW ->(0,1,2)