Commit 30d24597 authored by Fenglei's avatar Fenglei Committed by Robert Kimball

gpu select (#919)

* add select op, pass data type for each operand

* fix bugs and apply clang format

* fix index bug
parent 2d02b23f
......@@ -20,19 +20,18 @@
using namespace ngraph;
void runtime::gpu::CudaKernelBuilder::get_elementwise_op(
codegen::CodeWriter& writer,
const std::string& name,
const std::string& op,
const std::array<std::string, 2>& data_types,
const size_t& num_inputs)
void runtime::gpu::CudaKernelBuilder::get_elementwise_op(codegen::CodeWriter& writer,
const std::string& name,
const std::string& op,
const std::vector<std::string>& data_types,
const size_t& num_inputs)
{
writer << "extern \"C\" __global__ void cuda_" << name << "(";
for (size_t i = 0; i < num_inputs; i++)
{
writer << data_types[0] << "* in" << i << ", ";
writer << data_types[i] << "* in" << i << ", ";
}
writer << data_types[1] << "* out,"
writer << data_types[num_inputs] << "* out, "
<< "size_t n)\n";
writer << "{\n";
writer.indent++;
......@@ -167,21 +166,20 @@ void runtime::gpu::CudaKernelBuilder::get_slice_op(codegen::CodeWriter& writer,
writer.block_end();
}
void runtime::gpu::CudaKernelBuilder::get_device_helper(
codegen::CodeWriter& writer,
const std::string& name,
const std::string& math_kernel,
const std::array<std::string, 2>& data_types,
const size_t& num_inputs)
void runtime::gpu::CudaKernelBuilder::get_device_helper(codegen::CodeWriter& writer,
const std::string& name,
const std::string& math_kernel,
const std::vector<std::string>& data_types,
const size_t& num_inputs)
{
if (math_kernel.size())
{
writer << "__device__ " << data_types[1] << " " << name << "(";
writer << "__device__ " << data_types[num_inputs] << " " << name << "(";
for (size_t i = 0; i < num_inputs - 1; i++)
{
writer << data_types[0] << " x" << i << ", ";
writer << data_types[i] << " x" << i << ", ";
}
writer << data_types[0] << " x" << num_inputs - 1;
writer << data_types[num_inputs - 1] << " x" << num_inputs - 1;
writer << ")\n";
writer << "{\n";
writer.indent++;
......
......@@ -36,7 +36,7 @@ namespace ngraph
static void get_elementwise_op(codegen::CodeWriter& writer,
const std::string& name,
const std::string& op,
const std::array<std::string, 2>& data_types,
const std::vector<std::string>& data_types,
const size_t& num_inputs);
static void get_broadcast_op(codegen::CodeWriter& writer,
......@@ -58,8 +58,9 @@ namespace ngraph
static void get_device_helper(codegen::CodeWriter& writer,
const std::string& name,
const std::string& math_kernel,
const std::array<std::string, 2>& data_types,
const std::vector<std::string>& data_types,
const size_t& num_inputs);
static void add_pod_typedefs(codegen::CodeWriter& writer);
};
}
......
......@@ -25,6 +25,7 @@
#include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp"
#include "ngraph/runtime/gpu/gpu_runtime_context.hpp"
#include "ngraph/strides.hpp"
#include "ngraph/util.hpp"
namespace ngraph
{
......@@ -77,13 +78,13 @@ namespace ngraph
template <typename T, typename... Inputs>
void emit_elementwise_op(const std::string& name,
const std::array<std::string, 2>& data_types,
const std::vector<std::string>& data_types,
GPURuntimeContext* ctx,
size_t count,
CUdeviceptr out,
Inputs&&... inputs)
{
std::string type_signature = "_" + data_types[0] + "_" + data_types[1];
std::string type_signature = "_" + join(data_types, "_");
std::replace(type_signature.begin(), type_signature.end(), ' ', '_');
auto compiled_kernel = ctx->compiled_kernel_pool->get(name + type_signature);
if (compiled_kernel == nullptr)
......
......@@ -53,6 +53,7 @@ namespace ngraph
class Negative;
class Not;
class Sqrt;
class Select;
}
namespace runtime
{
......@@ -268,6 +269,13 @@ namespace ngraph
static constexpr const char* math_kernel = "!x0";
};
template <>
struct CudaOpMap<ngraph::op::Select>
{
static constexpr const char* op = "select";
static constexpr const char* math_kernel = "(x0 == 0) ? x2 : x1";
};
template <>
struct CudaOpMap<ngraph::op::ReluBackprop>
{
......
......@@ -123,7 +123,12 @@ namespace ngraph
writer << "if(count == 0) return;\n";
writer << "ngraph::runtime::gpu::emit_elementwise_op<ngraph::op::"
<< node->description() << ">(\"" << node->description() << "\""
<< ", {\"" << args[0].get_type() << "\", \"" << out[0].get_type() << "\"}"
<< ", std::vector<std::string>{";
for (size_t i = 0; i < args.size(); i++)
{
writer << "\"" << args[i].get_type() << "\", ";
}
writer << "\"" << out[0].get_type() << "\"}"
<< ", ctx"
<< ", count"
<< ", CUdeviceptr(" << out[0].get_name() << ")";
......
......@@ -186,7 +186,7 @@ static const runtime::gpu::OpMap dispatcher{
{TI(ngraph::op::Negative), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Negative>},
{TI(ngraph::op::NotEqual), &runtime::gpu::GPU_Emitter::emit_elementwise},
{TI(ngraph::op::Power), &runtime::gpu::GPU_Emitter::emit_elementwise},
{TI(ngraph::op::Select), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Select>},
{TI(ngraph::op::Select), &runtime::gpu::GPU_Emitter::emit_elementwise},
{TI(ngraph::op::Subtract), &runtime::gpu::GPU_Emitter::emit_elementwise},
{TI(ngraph::op::Broadcast), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Broadcast>},
{TI(ngraph::op::Convert), &runtime::gpu::GPU_Emitter::emit_elementwise},
......
......@@ -1353,7 +1353,6 @@ TEST(${BACKEND_NAME}, notequal)
TEST(${BACKEND_NAME}, select)
{
SKIP_TEST_FOR("IE", "${BACKEND_NAME}");
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 2, 2};
auto A = make_shared<op::Parameter>(element::boolean, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment