Commit 6f30b32b authored by Ayan Moitra's avatar Ayan Moitra Committed by Scott Cyphers

Support ArgMin and ArgMax for NVGPU Backend (#1737)

* Project initialization commit

* Added unit tests for 3D tensors for argmax

* Refactored reduce to be used by argmax argmin. argmax argmin still has some issues. WIP

* [WIP]First working version of ArgMax ArgMin

* added reduce buffer for the cudnn api calls

* added reduce buffer for the cudnn api calls

* Further modifications. Using rvalues to pass enums to build reduce method

* more unit tests added

* Incorporate Fenglei's comments

* Incorporating Chris's first set of comments

* small change to test file

* Resolving clang issue that was causing argmin test to fail

* Incorporate Chris's  comments

* clang format issue
parent 2f49032f
......@@ -1783,8 +1783,9 @@ size_t runtime::gpu::CUDAEmitter::build_primitive(const op::Softmax* node)
auto output_type = out[0].get_element_type().c_type_string();
auto exp_index = build_elementwise<ngraph::op::Exp>({input_type, output_type}, input_shape);
std::vector<element::Type> dtypes{args[0].get_element_type(), out[0].get_element_type()};
auto reduce_index = cudnn_emitter->build_reduce_forward(
CUDNN_REDUCE_TENSOR_ADD, output_type, input_shape, axes);
CUDNN_REDUCE_TENSOR_ADD, dtypes, input_shape, axes, CUDNNEmitter::ReductionMode::Reduce);
size_t divide_index = build_softmax_divide(
std::vector<std::string>(3, output_type), input_shape, reduced_shape, axes_flag);
......
This diff is collapsed.
......@@ -72,6 +72,12 @@ namespace ngraph
Backward
};
enum class ReductionMode
{
Reduce,
ArgReduce
};
enum class algo_search
{
HEURISTIC,
......@@ -109,9 +115,10 @@ namespace ngraph
const algo_search find_algo = algo_search::NONE);
size_t build_reduce_forward(const cudnnReduceTensorOp_t& reduce_op,
const std::string& dtype,
const std::vector<element::Type>& dtypes,
const Shape& input_shape,
const AxisSet& reduction_axes);
const AxisSet& reduction_axes,
const ReductionMode& reduction_mode);
size_t build_tensor_op(const cudnnOpTensorOp_t& tensor_op,
const std::string& dtype,
......@@ -163,6 +170,7 @@ namespace ngraph
void* get_data_by_type(cudnnDataType_t data_type, double value);
cudnnDataType_t get_cudnn_datatype(std::string dtype);
cudnnDataType_t get_cudnn_datatype(const element::Type& dtype);
cudnnTensorDescriptor_t&
tensor_descriptor_from_shape(const Shape& shape,
......
......@@ -164,12 +164,45 @@ void runtime::gpu::GPU_Emitter::emit_And(EMIT_ARGS)
void runtime::gpu::GPU_Emitter::emit_ArgMax(EMIT_ARGS)
{
throw unsupported_op("Unsupported op '" + node->description() + "'");
cudnnReduceTensorOp_t reduce_op = CUDNN_REDUCE_TENSOR_MAX;
runtime::gpu::GPU_Emitter::emit_ArgReduce(
external_function, writer, node, args, out, reduce_op);
}
void runtime::gpu::GPU_Emitter::emit_ArgMin(EMIT_ARGS)
{
throw unsupported_op("Unsupported op '" + node->description() + "'");
cudnnReduceTensorOp_t reduce_op = CUDNN_REDUCE_TENSOR_MIN;
runtime::gpu::GPU_Emitter::emit_ArgReduce(
external_function, writer, node, args, out, reduce_op);
}
void runtime::gpu::GPU_Emitter::emit_ArgReduce(EMIT_ARGS, cudnnReduceTensorOp_t reduce_mode)
{
if (out[0].get_size() == 0)
{
return;
}
auto argmax = static_cast<const ngraph::op::ArgMax*>(node);
std::vector<size_t> axes{argmax->get_reduction_axis()};
auto axis_set = AxisSet(axes);
std::vector<element::Type> dtypes{args[0].get_element_type(), out[0].get_element_type()};
writer.block_begin();
{
auto& cudnn_emitter = external_function->get_primitive_emitter()->get_cudnn_emitter();
auto index = cudnn_emitter->build_reduce_forward(reduce_mode,
dtypes,
args[0].get_shape(),
axis_set,
CUDNNEmitter::ReductionMode::ArgReduce);
writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
}
writer.block_end();
}
void runtime::gpu::GPU_Emitter::emit_Asin(EMIT_ARGS)
......@@ -856,6 +889,7 @@ void runtime::gpu::GPU_Emitter::emit_Power(EMIT_ARGS)
void runtime::gpu::GPU_Emitter::emit_Product(EMIT_ARGS)
{
const ngraph::op::Product* product = static_cast<const ngraph::op::Product*>(node);
writer.block_begin();
{
if (out[0].get_size() != 0)
......@@ -877,12 +911,16 @@ void runtime::gpu::GPU_Emitter::emit_Product(EMIT_ARGS)
// descriptors for tensors with <= 4 dimensions
else
{
std::vector<element::Type> dtypes{args[0].get_element_type(),
out[0].get_element_type()};
auto& cudnn_emitter =
external_function->get_primitive_emitter()->get_cudnn_emitter();
auto index = cudnn_emitter->build_reduce_forward(CUDNN_REDUCE_TENSOR_MUL,
out[0].get_type(),
args[0].get_shape(),
product->get_reduction_axes());
auto index =
cudnn_emitter->build_reduce_forward(CUDNN_REDUCE_TENSOR_MUL,
dtypes,
args[0].get_shape(),
product->get_reduction_axes(),
CUDNNEmitter::ReductionMode::Reduce);
writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "void* output[] = {" << node_names(out) << "};\n";
......@@ -971,14 +1009,16 @@ void runtime::gpu::GPU_Emitter::emit_Reduce(EMIT_ARGS)
reduce_tensor_op = f_ptr->second;
}
}
std::vector<element::Type> dtypes{args[0].get_element_type(),
out[0].get_element_type()};
auto& cudnn_emitter =
external_function->get_primitive_emitter()->get_cudnn_emitter();
auto reduce_index =
cudnn_emitter->build_reduce_forward(reduce_tensor_op,
out[0].get_type(),
dtypes,
args[0].get_shape(),
reduce_op->get_reduction_axes());
reduce_op->get_reduction_axes(),
CUDNNEmitter::ReductionMode::Reduce);
writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "void* output[] = {" << node_names(out) << "};\n";
......
......@@ -75,6 +75,8 @@ namespace ngraph
writer.block_end();
}
static void emit_ArgReduce(EMIT_ARGS, cudnnReduceTensorOp_t);
private:
/// \brief Create a list of node names for each arg in args
/// \param args list of tensor arguments
......
......@@ -31,8 +31,6 @@ backwards_avgpool_n1_c1_hw4x4
backwards_avgpool_n2_c2_hw4x4
max_pool_3d
avg_pool_3d
argmin_trivial
argmax_trivial
topk_1d_max_all
topk_1d_max_partial
topk_1d_max_one
......
......@@ -9483,9 +9483,51 @@ NGRAPH_TEST(${BACKEND_NAME}, argmin_trivial)
EXPECT_EQ((vector<int>{3, 2, 1}), read_vector<int>(result));
}
NGRAPH_TEST(${BACKEND_NAME}, argmin_4D_axis_3)
{
Shape shape{2, 2, 5, 5}; // NCHW ->(0,1,2,3)
Shape rshape{2, 2, 5};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto f =
make_shared<Function>(make_shared<op::ArgMin>(A, 3, element::i32), op::ParameterVector{A});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
copy_data(a,
test::NDArray<float, 4>({{{{0.5f, 1.5f, 0.8f, 2.9f, 1.05f}, // img 0 ch 0
{0.5f, 3.5f, 2.0f, 1.0f, 0.2f},
{2.0f, 0.0f, 2.2f, 0.2f, 1.4f},
{2.9f, 0.0f, 1.52f, 1.2f, 2.22f},
{5.0f, 2.0f, 1.0f, 0.5f, 0.85f}},
{{0.25f, 0.02f, 0.02f, 2.2f, 0.001f}, // img 0 ch 1
{1.0f, 0.2f, 3.0f, 0.25f, 1.14f},
{2.25f, 10.1f, 1.0f, 0.02f, 2.22f},
{3.2f, 1.002f, 0.001f, 0.2f, 6.0f},
{2.0f, 0.0f, 0.0f, 0.0f, 0.0f}}},
{{{0.0f, 2.2f, 1.2f, 1.6f, 0.2f}, // img 1 ch 0
{0.01f, 0.0f, 0.22f, 0.02f, 1.1f},
{0.01f, 0.5f, 1.6f, 0.2f, 3.2f},
{2.4f, 0.5f, 0.0f, 3.0f, 0.1f},
{0.0f, 0.5f, 0.4f, 0.8f, 1.0f}},
{{2.0f, 1.0f, 0.0f, 0.0f, 1.0f}, // img 1 ch 1
{0.0f, 2.0f, 0.0f, 0.0f, 0.0f},
{1.0f, 1.0f, 2.0f, 0.0f, 2.0f},
{1.0f, 1.0f, 1.0f, 0.0f, 1.0f},
{1.0f, 0.0f, 0.0f, 0.0f, 2.0f}}}})
.get_vector());
auto result = backend->create_tensor(element::i32, rshape);
backend->call_with_validate(f, {result}, {a});
EXPECT_EQ((test::NDArray<int, 3>({{{0, 4, 1, 1, 3}, // ch0
{4, 1, 3, 2, 1}}, //
{{0, 1, 0, 2, 0}, // ch1
{2, 0, 3, 3, 1}}}) //
.get_vector()),
read_vector<int>(result));
}
NGRAPH_TEST(${BACKEND_NAME}, argmax_trivial)
{
Shape shape{4, 3};
Shape shape{4, 3}; // HW -> (0,1)
Shape rshape{3};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto f =
......@@ -9502,6 +9544,168 @@ NGRAPH_TEST(${BACKEND_NAME}, argmax_trivial)
EXPECT_EQ((vector<int>{1, 3, 0}), read_vector<int>(result));
}
NGRAPH_TEST(${BACKEND_NAME}, argmax_3D_axis_0) // Along Channels
{
Shape shape{3, 4, 2}; // CHW ->(0,1,2)
Shape rshape{4, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto f =
make_shared<Function>(make_shared<op::ArgMax>(A, 0, element::i32), op::ParameterVector{A});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
copy_data(a,
test::NDArray<float, 3>({{{8, 4}, //ch0
{12, 10},
{2, 9},
{1, 5}},
{{6, 7}, //ch1
{11, 3},
{9, 2},
{10, 12}},
{{8, 4}, //ch2
{6, 1},
{5, 3},
{11, 7}}})
.get_vector());
auto result = backend->create_tensor(element::i32, rshape);
backend->call_with_validate(f, {result}, {a});
EXPECT_EQ((test::NDArray<int, 2>({{0, 1}, //r0
{0, 0}, //r1
{1, 0}, //r2
{2, 1}}) //r3
.get_vector()),
read_vector<int>(result));
}
NGRAPH_TEST(${BACKEND_NAME}, argmax_3D_axis_1) // Along Height
{
Shape shape{3, 4, 2}; // CHW ->(0,1,2)
Shape rshape{3, 2};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto f =
make_shared<Function>(make_shared<op::ArgMax>(A, 1, element::i32), op::ParameterVector{A});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
copy_data(a,
test::NDArray<float, 3>({{{8, 4}, //ch0
{12, 10},
{2, 9},
{1, 5}},
{{6, 7}, //ch1
{11, 3},
{9, 2},
{10, 12}},
{{8, 4}, //ch2
{6, 1},
{5, 3},
{11, 7}}})
.get_vector());
auto result = backend->create_tensor(element::i32, rshape);
backend->call_with_validate(f, {result}, {a});
EXPECT_EQ((test::NDArray<int, 2>({{1, 1}, //
{1, 3}, //
{3, 3}})
.get_vector()),
read_vector<int>(result));
}
NGRAPH_TEST(${BACKEND_NAME}, argmax_3D_axis_2) // Along Width
{
Shape shape{3, 4, 2}; // CHW ->(0,1,2)
Shape rshape{3, 4};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto f =
make_shared<Function>(make_shared<op::ArgMax>(A, 2, element::i32), op::ParameterVector{A});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
copy_data(a,
test::NDArray<float, 3>({{{8, 4}, //ch0
{12, 10},
{2, 9},
{1, 5}},
{{6, 7}, //ch1
{11, 3},
{9, 2},
{10, 12}},
{{8, 4}, //ch2
{6, 1},
{5, 3},
{11, 7}}})
.get_vector());
auto result = backend->create_tensor(element::i32, rshape);
backend->call_with_validate(f, {result}, {a});
EXPECT_EQ((test::NDArray<int, 2>({{0, 0, 1, 1}, //
{1, 0, 0, 1}, //
{0, 0, 0, 0}}) //
.get_vector()),
read_vector<int>(result));
}
NGRAPH_TEST(${BACKEND_NAME}, argmax_4D_axis_3)
{
Shape shape{2, 2, 5, 5}; // NCHW ->(0,1,2,3)
Shape rshape{2, 2, 5};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto f =
make_shared<Function>(make_shared<op::ArgMax>(A, 3, element::i32), op::ParameterVector{A});
auto backend = runtime::Backend::create("${BACKEND_NAME}");
// Create some tensors for input/output
auto a = backend->create_tensor(element::f32, shape);
copy_data(a,
test::NDArray<float, 4>({{{{0, 1, 0, 2, 1}, // img 0 ch 0
{0, 3, 2, 0, 0},
{2, 0, 0, 0, 1},
{2, 0, 1, 1, 2},
{0, 2, 1, 0, 0}},
{{0, 0, 0, 2, 0}, // img 0 ch 1
{0, 2, 3, 0, 1},
{2, 0, 1, 0, 2},
{3, 1, 0, 0, 0},
{2, 0, 0, 0, 0}}},
{{{0, 2, 1, 1, 0}, // img 1 ch 0
{0, 0, 2, 0, 1},
{0, 0, 1, 2, 3},
{2, 0, 0, 3, 0},
{0, 0, 0, 0, 0}},
{{2, 1, 0, 0, 1}, // img 1 ch 1
{0, 2, 0, 0, 0},
{1, 1, 2, 0, 2},
{1, 1, 1, 0, 1},
{1, 0, 0, 0, 2}}}})
.get_vector());
auto result = backend->create_tensor(element::i32, rshape);
backend->call_with_validate(f, {result}, {a});
EXPECT_EQ((test::NDArray<int, 3>({{{3, 1, 0, 0, 1}, {3, 2, 0, 0, 0}}, //ch0
{{1, 2, 4, 3, 0}, {0, 1, 2, 0, 4}}}) //ch1
.get_vector()),
read_vector<int>(result));
}
NGRAPH_TEST(${BACKEND_NAME}, topk_1d_max_all)
{
Shape shape{6};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment