Unverified Commit d3ea93e2 authored by Chris Sullivan's avatar Chris Sullivan Committed by GitHub

GPU elementwise emitters now respect input and output tensor types. (#633)

* GPU elementwise emitters now respect input and output tensor types.
This enables the use of binary comparison ops and op::Convert.

* Removed comments.

* All kernels now have type signature
even if the i/o tensors are equivalent type so that
kernels for specific type tensors are unique.

NGMX-391 #close 
parent 429eae9a
...@@ -18,18 +18,19 @@ ...@@ -18,18 +18,19 @@
using namespace ngraph; using namespace ngraph;
void runtime::gpu::CudaKernelBuilder::get_elementwise_op(codegen::CodeWriter& writer, void runtime::gpu::CudaKernelBuilder::get_elementwise_op(
const std::string& name, codegen::CodeWriter& writer,
const std::string& data_type, const std::string& name,
const std::string& op, const std::string& op,
const size_t& num_inputs) const std::array<std::string, 2>& data_types,
const size_t& num_inputs)
{ {
writer << "extern \"C\" __global__ void cuda_" << name << "("; writer << "extern \"C\" __global__ void cuda_" << name << "(";
for (size_t i = 0; i < num_inputs; i++) for (size_t i = 0; i < num_inputs; i++)
{ {
writer << data_type << "* in" << i << ", "; writer << data_types[0] << "* in" << i << ", ";
} }
writer << data_type << "* out," writer << data_types[1] << "* out,"
<< "size_t n)\n"; << "size_t n)\n";
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
...@@ -55,20 +56,21 @@ void runtime::gpu::CudaKernelBuilder::get_elementwise_op(codegen::CodeWriter& wr ...@@ -55,20 +56,21 @@ void runtime::gpu::CudaKernelBuilder::get_elementwise_op(codegen::CodeWriter& wr
return; return;
} }
void runtime::gpu::CudaKernelBuilder::get_device_helper(codegen::CodeWriter& writer, void runtime::gpu::CudaKernelBuilder::get_device_helper(
const std::string& name, codegen::CodeWriter& writer,
const std::string& data_type, const std::string& name,
const std::string& math_kernel, const std::string& math_kernel,
const size_t& num_inputs) const std::array<std::string, 2>& data_types,
const size_t& num_inputs)
{ {
if (math_kernel.size()) if (math_kernel.size())
{ {
writer << "__device__ " << data_type << " " << name << "("; writer << "__device__ " << data_types[1] << " " << name << "(";
for (size_t i = 0; i < num_inputs - 1; i++) for (size_t i = 0; i < num_inputs - 1; i++)
{ {
writer << data_type << " x" << i << ", "; writer << data_types[0] << " x" << i << ", ";
} }
writer << data_type << " x" << num_inputs - 1; writer << data_types[0] << " x" << num_inputs - 1;
writer << ")\n"; writer << ")\n";
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
...@@ -80,3 +82,16 @@ void runtime::gpu::CudaKernelBuilder::get_device_helper(codegen::CodeWriter& wri ...@@ -80,3 +82,16 @@ void runtime::gpu::CudaKernelBuilder::get_device_helper(codegen::CodeWriter& wri
} }
return; return;
} }
void runtime::gpu::CudaKernelBuilder::add_pod_typedefs(codegen::CodeWriter& writer)
{
writer << "typedef signed char int8_t;\n";
writer << "typedef signed short int16_t;\n";
writer << "typedef signed int int32_t;\n";
writer << "typedef signed long int int64_t;\n";
writer << "typedef unsigned char uint8_t;\n";
writer << "typedef unsigned short uint16_t;\n";
writer << "typedef unsigned int uint32_t;\n";
writer << "typedef unsigned long int uint64_t;\n";
writer << "\n";
}
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#pragma once #pragma once
#include <array>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -34,15 +35,16 @@ namespace ngraph ...@@ -34,15 +35,16 @@ namespace ngraph
public: public:
static void get_elementwise_op(codegen::CodeWriter& writer, static void get_elementwise_op(codegen::CodeWriter& writer,
const std::string& name, const std::string& name,
const std::string& data_type,
const std::string& op, const std::string& op,
const std::array<std::string, 2>& data_types,
const size_t& num_inputs); const size_t& num_inputs);
static void get_device_helper(codegen::CodeWriter& writer, static void get_device_helper(codegen::CodeWriter& writer,
const std::string& name, const std::string& name,
const std::string& data_type,
const std::string& math_kernel, const std::string& math_kernel,
const std::array<std::string, 2>& data_types,
const size_t& num_inputs); const size_t& num_inputs);
static void add_pod_typedefs(codegen::CodeWriter& writer);
}; };
} }
} }
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "ngraph/runtime/gpu/gpu_cuda_kernel_ops.hpp" #include "ngraph/runtime/gpu/gpu_cuda_kernel_ops.hpp"
using namespace ngraph; using namespace ngraph;
void runtime::gpu::emit_broadcast( void runtime::gpu::emit_broadcast(
void* in, void* out, size_t repeat_size, size_t repeat_times, size_t count) void* in, void* out, size_t repeat_size, size_t repeat_times, size_t count)
{ {
......
...@@ -16,6 +16,9 @@ ...@@ -16,6 +16,9 @@
#pragma once #pragma once
#include <array>
#include <string>
#include "ngraph/codegen/code_writer.hpp" #include "ngraph/codegen/code_writer.hpp"
#include "ngraph/coordinate.hpp" #include "ngraph/coordinate.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_function_pool.hpp" #include "ngraph/runtime/gpu/gpu_cuda_function_pool.hpp"
...@@ -36,43 +39,49 @@ namespace ngraph ...@@ -36,43 +39,49 @@ namespace ngraph
template <typename T, typename... Inputs> template <typename T, typename... Inputs>
void emit_elementwise_op(std::string name, void emit_elementwise_op(std::string name,
std::array<std::string, 2> data_types,
size_t count, size_t count,
CUdeviceptr out, CUdeviceptr out,
Inputs&&... inputs) Inputs&&... inputs)
{ {
// Create an instance of nvrtcProgram with the code string. std::string type_signature = "_" + data_types[0] + "_" + data_types[1];
if (CudaFunctionPool::instance().get(name) == nullptr) if (CudaFunctionPool::instance().get(name + type_signature) == nullptr)
{ {
codegen::CodeWriter writer; codegen::CodeWriter writer;
CudaKernelBuilder::add_pod_typedefs(writer);
std::string op_name = CudaOpMap<T>::op;
if (CudaOpMap<T>::math_kernel) if (CudaOpMap<T>::math_kernel)
{ {
op_name += type_signature;
CudaKernelBuilder::get_device_helper(writer, CudaKernelBuilder::get_device_helper(writer,
CudaOpMap<T>::op, op_name,
CudaOpMap<T>::type,
CudaOpMap<T>::math_kernel, CudaOpMap<T>::math_kernel,
data_types,
sizeof...(inputs)); sizeof...(inputs));
} }
CudaKernelBuilder::get_elementwise_op( CudaKernelBuilder::get_elementwise_op(
writer, name, CudaOpMap<T>::type, CudaOpMap<T>::op, sizeof...(inputs)); writer, name + type_signature, op_name, data_types, sizeof...(inputs));
std::string kernel = writer.get_code(); std::string kernel = writer.get_code();
CudaFunctionPool::instance().set(name, kernel); CudaFunctionPool::instance().set(name + type_signature, kernel);
} }
//convert runtime ptr to driver api ptr //convert runtime ptr to driver api ptr
void* args_list[] = {&inputs..., &out, &count}; void* args_list[] = {&inputs..., &out, &count};
CUDA_SAFE_CALL(cuLaunchKernel(*CudaFunctionPool::instance().get(name).get(), CUDA_SAFE_CALL(
count, cuLaunchKernel(*CudaFunctionPool::instance().get(name + type_signature).get(),
1, count,
1, // grid dim 1,
1, 1, // grid dim
1, 1,
1, // block dim 1,
0, 1, // block dim
NULL, // shared mem and stream 0,
args_list, NULL, // shared mem and stream
0)); // arguments args_list,
0)); // arguments
CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output. CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
} }
} }
......
...@@ -38,8 +38,6 @@ namespace ngraph ...@@ -38,8 +38,6 @@ namespace ngraph
class Subtract; class Subtract;
class Divide; class Divide;
class Sign; class Sign;
// requires different input and output types
class Convert; class Convert;
class Equal; class Equal;
class NotEqual; class NotEqual;
...@@ -47,8 +45,6 @@ namespace ngraph ...@@ -47,8 +45,6 @@ namespace ngraph
class GreaterEq; class GreaterEq;
class Less; class Less;
class LessEq; class LessEq;
// Unimplemented or unused in favor of cuDNN impl.
class Max; class Max;
class Min; class Min;
class Negative; class Negative;
...@@ -63,7 +59,6 @@ namespace ngraph ...@@ -63,7 +59,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Abs> struct CudaOpMap<ngraph::op::Abs>
{ {
static constexpr const char* op = "fabsf"; static constexpr const char* op = "fabsf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -71,7 +66,6 @@ namespace ngraph ...@@ -71,7 +66,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Acos> struct CudaOpMap<ngraph::op::Acos>
{ {
static constexpr const char* op = "acosf"; static constexpr const char* op = "acosf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -79,7 +73,6 @@ namespace ngraph ...@@ -79,7 +73,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Asin> struct CudaOpMap<ngraph::op::Asin>
{ {
static constexpr const char* op = "asinf"; static constexpr const char* op = "asinf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -87,7 +80,6 @@ namespace ngraph ...@@ -87,7 +80,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Atan> struct CudaOpMap<ngraph::op::Atan>
{ {
static constexpr const char* op = "atanf"; static constexpr const char* op = "atanf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -95,7 +87,6 @@ namespace ngraph ...@@ -95,7 +87,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Ceiling> struct CudaOpMap<ngraph::op::Ceiling>
{ {
static constexpr const char* op = "ceilf"; static constexpr const char* op = "ceilf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -103,7 +94,6 @@ namespace ngraph ...@@ -103,7 +94,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Cos> struct CudaOpMap<ngraph::op::Cos>
{ {
static constexpr const char* op = "cosf"; static constexpr const char* op = "cosf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -111,7 +101,6 @@ namespace ngraph ...@@ -111,7 +101,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Cosh> struct CudaOpMap<ngraph::op::Cosh>
{ {
static constexpr const char* op = "coshf"; static constexpr const char* op = "coshf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -119,7 +108,6 @@ namespace ngraph ...@@ -119,7 +108,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Exp> struct CudaOpMap<ngraph::op::Exp>
{ {
static constexpr const char* op = "expf"; static constexpr const char* op = "expf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -127,7 +115,6 @@ namespace ngraph ...@@ -127,7 +115,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Floor> struct CudaOpMap<ngraph::op::Floor>
{ {
static constexpr const char* op = "floorf"; static constexpr const char* op = "floorf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -135,7 +122,6 @@ namespace ngraph ...@@ -135,7 +122,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Log> struct CudaOpMap<ngraph::op::Log>
{ {
static constexpr const char* op = "logf"; static constexpr const char* op = "logf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -143,7 +129,6 @@ namespace ngraph ...@@ -143,7 +129,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Max> struct CudaOpMap<ngraph::op::Max>
{ {
static constexpr const char* op = "fmaxf"; static constexpr const char* op = "fmaxf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -151,7 +136,6 @@ namespace ngraph ...@@ -151,7 +136,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Min> struct CudaOpMap<ngraph::op::Min>
{ {
static constexpr const char* op = "fminf"; static constexpr const char* op = "fminf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -159,7 +143,6 @@ namespace ngraph ...@@ -159,7 +143,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Sin> struct CudaOpMap<ngraph::op::Sin>
{ {
static constexpr const char* op = "sinf"; static constexpr const char* op = "sinf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -167,7 +150,6 @@ namespace ngraph ...@@ -167,7 +150,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Sinh> struct CudaOpMap<ngraph::op::Sinh>
{ {
static constexpr const char* op = "sinhf"; static constexpr const char* op = "sinhf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -175,7 +157,6 @@ namespace ngraph ...@@ -175,7 +157,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Sqrt> struct CudaOpMap<ngraph::op::Sqrt>
{ {
static constexpr const char* op = "sqrtf"; static constexpr const char* op = "sqrtf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -183,7 +164,6 @@ namespace ngraph ...@@ -183,7 +164,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Tan> struct CudaOpMap<ngraph::op::Tan>
{ {
static constexpr const char* op = "tanf"; static constexpr const char* op = "tanf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -191,7 +171,6 @@ namespace ngraph ...@@ -191,7 +171,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Tanh> struct CudaOpMap<ngraph::op::Tanh>
{ {
static constexpr const char* op = "tanhf"; static constexpr const char* op = "tanhf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -199,7 +178,6 @@ namespace ngraph ...@@ -199,7 +178,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Power> struct CudaOpMap<ngraph::op::Power>
{ {
static constexpr const char* op = "powf"; static constexpr const char* op = "powf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -207,7 +185,6 @@ namespace ngraph ...@@ -207,7 +185,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Subtract> struct CudaOpMap<ngraph::op::Subtract>
{ {
static constexpr const char* op = "subtractf"; static constexpr const char* op = "subtractf";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = "x0-x1"; static constexpr const char* math_kernel = "x0-x1";
}; };
...@@ -215,7 +192,6 @@ namespace ngraph ...@@ -215,7 +192,6 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Divide> struct CudaOpMap<ngraph::op::Divide>
{ {
static constexpr const char* op = "fdividef"; static constexpr const char* op = "fdividef";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = nullptr; static constexpr const char* math_kernel = nullptr;
}; };
...@@ -223,9 +199,57 @@ namespace ngraph ...@@ -223,9 +199,57 @@ namespace ngraph
struct CudaOpMap<ngraph::op::Sign> struct CudaOpMap<ngraph::op::Sign>
{ {
static constexpr const char* op = "sign"; static constexpr const char* op = "sign";
static constexpr const char* type = "float";
static constexpr const char* math_kernel = "(x0 > 0) - (x0 < 0)"; static constexpr const char* math_kernel = "(x0 > 0) - (x0 < 0)";
}; };
template <>
struct CudaOpMap<ngraph::op::Convert>
{
static constexpr const char* op = "convert";
static constexpr const char* math_kernel = "x0";
};
template <>
struct CudaOpMap<ngraph::op::Equal>
{
static constexpr const char* op = "equal";
static constexpr const char* math_kernel = "x0 == x1";
};
template <>
struct CudaOpMap<ngraph::op::NotEqual>
{
static constexpr const char* op = "not_equal";
static constexpr const char* math_kernel = "x0 != x1";
};
template <>
struct CudaOpMap<ngraph::op::Greater>
{
static constexpr const char* op = "greater";
static constexpr const char* math_kernel = "x0 > x1";
};
template <>
struct CudaOpMap<ngraph::op::GreaterEq>
{
static constexpr const char* op = "greater_equal";
static constexpr const char* math_kernel = "x0 >= x1";
};
template <>
struct CudaOpMap<ngraph::op::Less>
{
static constexpr const char* op = "less";
static constexpr const char* math_kernel = "x0 < x1";
};
template <>
struct CudaOpMap<ngraph::op::LessEq>
{
static constexpr const char* op = "less_equal";
static constexpr const char* math_kernel = "x0 <= x1";
};
} }
} }
} }
...@@ -97,15 +97,13 @@ ...@@ -97,15 +97,13 @@
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
using namespace std; using namespace std;
using namespace ngraph;
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
{ {
namespace gpu namespace gpu
{ {
void runtime::gpu::GPU_Emitter::EmitElementwise( void GPU_Emitter::EmitElementwise(
GPU_ExternalFunction* external_function, GPU_ExternalFunction* external_function,
codegen::CodeWriter& writer, codegen::CodeWriter& writer,
const ngraph::Node* n, const ngraph::Node* n,
...@@ -116,12 +114,14 @@ namespace ngraph ...@@ -116,12 +114,14 @@ namespace ngraph
{ {
return; return;
} }
writer << "{ // " << n->get_name() << "\n"; writer << "{ // " << n->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n"; writer << "if(count == 0) return;\n";
writer << "ngraph::runtime::gpu::emit_elementwise_op<ngraph::op::" writer << "ngraph::runtime::gpu::emit_elementwise_op<ngraph::op::"
<< n->description() << ">(\"" << n->description() << "\"" << n->description() << ">(\"" << n->description() << "\""
<< ", {\"" << args[0].get_type() << "\", \"" << out[0].get_type() << "\"}"
<< ", count" << ", count"
<< ", (CUdeviceptr) " << out[0].get_name(); << ", (CUdeviceptr) " << out[0].get_name();
for (size_t i = 0; i < args.size(); i++) for (size_t i = 0; i < args.size(); i++)
......
...@@ -168,23 +168,23 @@ static const runtime::gpu::OpMap dispatcher{ ...@@ -168,23 +168,23 @@ static const runtime::gpu::OpMap dispatcher{
{TI(ngraph::op::Abs), &runtime::gpu::GPU_Emitter::EmitElementwise}, {TI(ngraph::op::Abs), &runtime::gpu::GPU_Emitter::EmitElementwise},
{TI(ngraph::op::Concat), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Concat>}, {TI(ngraph::op::Concat), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Concat>},
{TI(ngraph::op::Divide), &runtime::gpu::GPU_Emitter::EmitElementwise}, {TI(ngraph::op::Divide), &runtime::gpu::GPU_Emitter::EmitElementwise},
{TI(ngraph::op::Equal), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Equal>}, {TI(ngraph::op::Equal), &runtime::gpu::GPU_Emitter::EmitElementwise},
{TI(ngraph::op::GetOutputElement), {TI(ngraph::op::GetOutputElement),
&runtime::gpu::GPU_Emitter::emit<ngraph::op::GetOutputElement>}, &runtime::gpu::GPU_Emitter::emit<ngraph::op::GetOutputElement>},
{TI(ngraph::op::Greater), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Greater>}, {TI(ngraph::op::Greater), &runtime::gpu::GPU_Emitter::EmitElementwise},
{TI(ngraph::op::GreaterEq), &runtime::gpu::GPU_Emitter::emit<ngraph::op::GreaterEq>}, {TI(ngraph::op::GreaterEq), &runtime::gpu::GPU_Emitter::EmitElementwise},
{TI(ngraph::op::Less), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Less>}, {TI(ngraph::op::Less), &runtime::gpu::GPU_Emitter::EmitElementwise},
{TI(ngraph::op::LessEq), &runtime::gpu::GPU_Emitter::emit<ngraph::op::LessEq>}, {TI(ngraph::op::LessEq), &runtime::gpu::GPU_Emitter::EmitElementwise},
{TI(ngraph::op::Log), &runtime::gpu::GPU_Emitter::EmitElementwise}, {TI(ngraph::op::Log), &runtime::gpu::GPU_Emitter::EmitElementwise},
{TI(ngraph::op::Maximum), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Maximum>}, {TI(ngraph::op::Maximum), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Maximum>},
{TI(ngraph::op::Minimum), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Minimum>}, {TI(ngraph::op::Minimum), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Minimum>},
{TI(ngraph::op::Negative), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Negative>}, {TI(ngraph::op::Negative), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Negative>},
{TI(ngraph::op::NotEqual), &runtime::gpu::GPU_Emitter::emit<ngraph::op::NotEqual>}, {TI(ngraph::op::NotEqual), &runtime::gpu::GPU_Emitter::EmitElementwise},
{TI(ngraph::op::Power), &runtime::gpu::GPU_Emitter::EmitElementwise}, {TI(ngraph::op::Power), &runtime::gpu::GPU_Emitter::EmitElementwise},
{TI(ngraph::op::Select), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Select>}, {TI(ngraph::op::Select), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Select>},
{TI(ngraph::op::Subtract), &runtime::gpu::GPU_Emitter::EmitElementwise}, {TI(ngraph::op::Subtract), &runtime::gpu::GPU_Emitter::EmitElementwise},
{TI(ngraph::op::Broadcast), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Broadcast>}, {TI(ngraph::op::Broadcast), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Broadcast>},
{TI(ngraph::op::Convert), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Convert>}, {TI(ngraph::op::Convert), &runtime::gpu::GPU_Emitter::EmitElementwise},
{TI(ngraph::op::Constant), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Constant>}, {TI(ngraph::op::Constant), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Constant>},
{TI(ngraph::op::Reshape), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Reshape>}, {TI(ngraph::op::Reshape), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Reshape>},
{TI(ngraph::op::FunctionCall), &runtime::gpu::GPU_Emitter::emit<ngraph::op::FunctionCall>}, {TI(ngraph::op::FunctionCall), &runtime::gpu::GPU_Emitter::emit<ngraph::op::FunctionCall>},
......
...@@ -751,7 +751,6 @@ TEST(${BACKEND_NAME}, divide_by_zero_int32) ...@@ -751,7 +751,6 @@ TEST(${BACKEND_NAME}, divide_by_zero_int32)
TEST(${BACKEND_NAME}, equal) TEST(${BACKEND_NAME}, equal)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 2, 2}; Shape shape{2, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape); auto B = make_shared<op::Parameter>(element::f32, shape);
...@@ -1245,7 +1244,6 @@ TEST(${BACKEND_NAME}, dot_matrix_vector_int64) ...@@ -1245,7 +1244,6 @@ TEST(${BACKEND_NAME}, dot_matrix_vector_int64)
TEST(${BACKEND_NAME}, greater) TEST(${BACKEND_NAME}, greater)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 2, 2}; Shape shape{2, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape); auto B = make_shared<op::Parameter>(element::f32, shape);
...@@ -1269,7 +1267,6 @@ TEST(${BACKEND_NAME}, greater) ...@@ -1269,7 +1267,6 @@ TEST(${BACKEND_NAME}, greater)
TEST(${BACKEND_NAME}, greatereq) TEST(${BACKEND_NAME}, greatereq)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 2, 2}; Shape shape{2, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape); auto B = make_shared<op::Parameter>(element::f32, shape);
...@@ -1293,7 +1290,6 @@ TEST(${BACKEND_NAME}, greatereq) ...@@ -1293,7 +1290,6 @@ TEST(${BACKEND_NAME}, greatereq)
TEST(${BACKEND_NAME}, less) TEST(${BACKEND_NAME}, less)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 2, 2}; Shape shape{2, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape); auto B = make_shared<op::Parameter>(element::f32, shape);
...@@ -1317,7 +1313,6 @@ TEST(${BACKEND_NAME}, less) ...@@ -1317,7 +1313,6 @@ TEST(${BACKEND_NAME}, less)
TEST(${BACKEND_NAME}, lesseq) TEST(${BACKEND_NAME}, lesseq)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 2, 2}; Shape shape{2, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape); auto B = make_shared<op::Parameter>(element::f32, shape);
...@@ -1341,7 +1336,6 @@ TEST(${BACKEND_NAME}, lesseq) ...@@ -1341,7 +1336,6 @@ TEST(${BACKEND_NAME}, lesseq)
TEST(${BACKEND_NAME}, lesseq_bool) TEST(${BACKEND_NAME}, lesseq_bool)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 2, 2}; Shape shape{2, 2, 2};
auto A = make_shared<op::Parameter>(element::boolean, shape); auto A = make_shared<op::Parameter>(element::boolean, shape);
auto B = make_shared<op::Parameter>(element::boolean, shape); auto B = make_shared<op::Parameter>(element::boolean, shape);
...@@ -1461,7 +1455,6 @@ TEST(${BACKEND_NAME}, negative) ...@@ -1461,7 +1455,6 @@ TEST(${BACKEND_NAME}, negative)
TEST(${BACKEND_NAME}, notequal) TEST(${BACKEND_NAME}, notequal)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 2, 2}; Shape shape{2, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape); auto B = make_shared<op::Parameter>(element::f32, shape);
...@@ -1943,7 +1936,6 @@ TEST(${BACKEND_NAME}, broadcast_matrix_2) ...@@ -1943,7 +1936,6 @@ TEST(${BACKEND_NAME}, broadcast_matrix_2)
TEST(${BACKEND_NAME}, convert_int32_float32) TEST(${BACKEND_NAME}, convert_int32_float32)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 2}; Shape shape{2, 2};
auto A = make_shared<op::Parameter>(element::i32, shape); auto A = make_shared<op::Parameter>(element::i32, shape);
auto f = auto f =
...@@ -1965,7 +1957,6 @@ TEST(${BACKEND_NAME}, convert_int32_float32) ...@@ -1965,7 +1957,6 @@ TEST(${BACKEND_NAME}, convert_int32_float32)
TEST(${BACKEND_NAME}, convert_int32_bool) TEST(${BACKEND_NAME}, convert_int32_bool)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 2}; Shape shape{2, 2};
auto A = make_shared<op::Parameter>(element::i32, shape); auto A = make_shared<op::Parameter>(element::i32, shape);
auto f = make_shared<Function>(make_shared<op::Convert>(A, element::boolean), auto f = make_shared<Function>(make_shared<op::Convert>(A, element::boolean),
...@@ -1987,7 +1978,6 @@ TEST(${BACKEND_NAME}, convert_int32_bool) ...@@ -1987,7 +1978,6 @@ TEST(${BACKEND_NAME}, convert_int32_bool)
TEST(${BACKEND_NAME}, convert_float32_bool) TEST(${BACKEND_NAME}, convert_float32_bool)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 2}; Shape shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Convert>(A, element::boolean), auto f = make_shared<Function>(make_shared<op::Convert>(A, element::boolean),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment