Commit 30d24597 authored by Fenglei's avatar Fenglei Committed by Robert Kimball

gpu select (#919)

* add select op, pass data type for each operand

* fix bugs and apply clang format

* fix index bug
parent 2d02b23f
...@@ -20,19 +20,18 @@ ...@@ -20,19 +20,18 @@
using namespace ngraph; using namespace ngraph;
void runtime::gpu::CudaKernelBuilder::get_elementwise_op( void runtime::gpu::CudaKernelBuilder::get_elementwise_op(codegen::CodeWriter& writer,
codegen::CodeWriter& writer,
const std::string& name, const std::string& name,
const std::string& op, const std::string& op,
const std::array<std::string, 2>& data_types, const std::vector<std::string>& data_types,
const size_t& num_inputs) const size_t& num_inputs)
{ {
writer << "extern \"C\" __global__ void cuda_" << name << "("; writer << "extern \"C\" __global__ void cuda_" << name << "(";
for (size_t i = 0; i < num_inputs; i++) for (size_t i = 0; i < num_inputs; i++)
{ {
writer << data_types[0] << "* in" << i << ", "; writer << data_types[i] << "* in" << i << ", ";
} }
writer << data_types[1] << "* out," writer << data_types[num_inputs] << "* out, "
<< "size_t n)\n"; << "size_t n)\n";
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
...@@ -167,21 +166,20 @@ void runtime::gpu::CudaKernelBuilder::get_slice_op(codegen::CodeWriter& writer, ...@@ -167,21 +166,20 @@ void runtime::gpu::CudaKernelBuilder::get_slice_op(codegen::CodeWriter& writer,
writer.block_end(); writer.block_end();
} }
void runtime::gpu::CudaKernelBuilder::get_device_helper( void runtime::gpu::CudaKernelBuilder::get_device_helper(codegen::CodeWriter& writer,
codegen::CodeWriter& writer,
const std::string& name, const std::string& name,
const std::string& math_kernel, const std::string& math_kernel,
const std::array<std::string, 2>& data_types, const std::vector<std::string>& data_types,
const size_t& num_inputs) const size_t& num_inputs)
{ {
if (math_kernel.size()) if (math_kernel.size())
{ {
writer << "__device__ " << data_types[1] << " " << name << "("; writer << "__device__ " << data_types[num_inputs] << " " << name << "(";
for (size_t i = 0; i < num_inputs - 1; i++) for (size_t i = 0; i < num_inputs - 1; i++)
{ {
writer << data_types[0] << " x" << i << ", "; writer << data_types[i] << " x" << i << ", ";
} }
writer << data_types[0] << " x" << num_inputs - 1; writer << data_types[num_inputs - 1] << " x" << num_inputs - 1;
writer << ")\n"; writer << ")\n";
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
......
...@@ -36,7 +36,7 @@ namespace ngraph ...@@ -36,7 +36,7 @@ namespace ngraph
static void get_elementwise_op(codegen::CodeWriter& writer, static void get_elementwise_op(codegen::CodeWriter& writer,
const std::string& name, const std::string& name,
const std::string& op, const std::string& op,
const std::array<std::string, 2>& data_types, const std::vector<std::string>& data_types,
const size_t& num_inputs); const size_t& num_inputs);
static void get_broadcast_op(codegen::CodeWriter& writer, static void get_broadcast_op(codegen::CodeWriter& writer,
...@@ -58,8 +58,9 @@ namespace ngraph ...@@ -58,8 +58,9 @@ namespace ngraph
static void get_device_helper(codegen::CodeWriter& writer, static void get_device_helper(codegen::CodeWriter& writer,
const std::string& name, const std::string& name,
const std::string& math_kernel, const std::string& math_kernel,
const std::array<std::string, 2>& data_types, const std::vector<std::string>& data_types,
const size_t& num_inputs); const size_t& num_inputs);
static void add_pod_typedefs(codegen::CodeWriter& writer); static void add_pod_typedefs(codegen::CodeWriter& writer);
}; };
} }
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp" #include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp"
#include "ngraph/runtime/gpu/gpu_runtime_context.hpp" #include "ngraph/runtime/gpu/gpu_runtime_context.hpp"
#include "ngraph/strides.hpp" #include "ngraph/strides.hpp"
#include "ngraph/util.hpp"
namespace ngraph namespace ngraph
{ {
...@@ -77,13 +78,13 @@ namespace ngraph ...@@ -77,13 +78,13 @@ namespace ngraph
template <typename T, typename... Inputs> template <typename T, typename... Inputs>
void emit_elementwise_op(const std::string& name, void emit_elementwise_op(const std::string& name,
const std::array<std::string, 2>& data_types, const std::vector<std::string>& data_types,
GPURuntimeContext* ctx, GPURuntimeContext* ctx,
size_t count, size_t count,
CUdeviceptr out, CUdeviceptr out,
Inputs&&... inputs) Inputs&&... inputs)
{ {
std::string type_signature = "_" + data_types[0] + "_" + data_types[1]; std::string type_signature = "_" + join(data_types, "_");
std::replace(type_signature.begin(), type_signature.end(), ' ', '_'); std::replace(type_signature.begin(), type_signature.end(), ' ', '_');
auto compiled_kernel = ctx->compiled_kernel_pool->get(name + type_signature); auto compiled_kernel = ctx->compiled_kernel_pool->get(name + type_signature);
if (compiled_kernel == nullptr) if (compiled_kernel == nullptr)
......
...@@ -53,6 +53,7 @@ namespace ngraph ...@@ -53,6 +53,7 @@ namespace ngraph
class Negative; class Negative;
class Not; class Not;
class Sqrt; class Sqrt;
class Select;
} }
namespace runtime namespace runtime
{ {
...@@ -268,6 +269,13 @@ namespace ngraph ...@@ -268,6 +269,13 @@ namespace ngraph
static constexpr const char* math_kernel = "!x0"; static constexpr const char* math_kernel = "!x0";
}; };
template <>
struct CudaOpMap<ngraph::op::Select>
{
static constexpr const char* op = "select";
static constexpr const char* math_kernel = "(x0 == 0) ? x2 : x1";
};
template <> template <>
struct CudaOpMap<ngraph::op::ReluBackprop> struct CudaOpMap<ngraph::op::ReluBackprop>
{ {
......
...@@ -123,7 +123,12 @@ namespace ngraph ...@@ -123,7 +123,12 @@ namespace ngraph
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
writer << "ngraph::runtime::gpu::emit_elementwise_op<ngraph::op::" writer << "ngraph::runtime::gpu::emit_elementwise_op<ngraph::op::"
<< node->description() << ">(\"" << node->description() << "\"" << node->description() << ">(\"" << node->description() << "\""
<< ", {\"" << args[0].get_type() << "\", \"" << out[0].get_type() << "\"}" << ", std::vector<std::string>{";
for (size_t i = 0; i < args.size(); i++)
{
writer << "\"" << args[i].get_type() << "\", ";
}
writer << "\"" << out[0].get_type() << "\"}"
<< ", ctx" << ", ctx"
<< ", count" << ", count"
<< ", CUdeviceptr(" << out[0].get_name() << ")"; << ", CUdeviceptr(" << out[0].get_name() << ")";
......
...@@ -186,7 +186,7 @@ static const runtime::gpu::OpMap dispatcher{ ...@@ -186,7 +186,7 @@ static const runtime::gpu::OpMap dispatcher{
{TI(ngraph::op::Negative), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Negative>}, {TI(ngraph::op::Negative), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Negative>},
{TI(ngraph::op::NotEqual), &runtime::gpu::GPU_Emitter::emit_elementwise}, {TI(ngraph::op::NotEqual), &runtime::gpu::GPU_Emitter::emit_elementwise},
{TI(ngraph::op::Power), &runtime::gpu::GPU_Emitter::emit_elementwise}, {TI(ngraph::op::Power), &runtime::gpu::GPU_Emitter::emit_elementwise},
{TI(ngraph::op::Select), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Select>}, {TI(ngraph::op::Select), &runtime::gpu::GPU_Emitter::emit_elementwise},
{TI(ngraph::op::Subtract), &runtime::gpu::GPU_Emitter::emit_elementwise}, {TI(ngraph::op::Subtract), &runtime::gpu::GPU_Emitter::emit_elementwise},
{TI(ngraph::op::Broadcast), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Broadcast>}, {TI(ngraph::op::Broadcast), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Broadcast>},
{TI(ngraph::op::Convert), &runtime::gpu::GPU_Emitter::emit_elementwise}, {TI(ngraph::op::Convert), &runtime::gpu::GPU_Emitter::emit_elementwise},
......
...@@ -1353,7 +1353,6 @@ TEST(${BACKEND_NAME}, notequal) ...@@ -1353,7 +1353,6 @@ TEST(${BACKEND_NAME}, notequal)
TEST(${BACKEND_NAME}, select) TEST(${BACKEND_NAME}, select)
{ {
SKIP_TEST_FOR("IE", "${BACKEND_NAME}"); SKIP_TEST_FOR("IE", "${BACKEND_NAME}");
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 2, 2}; Shape shape{2, 2, 2};
auto A = make_shared<op::Parameter>(element::boolean, shape); auto A = make_shared<op::Parameter>(element::boolean, shape);
auto B = make_shared<op::Parameter>(element::f32, shape); auto B = make_shared<op::Parameter>(element::f32, shape);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment