Commit dd5a6769 authored by fenglei.tian's avatar fenglei.tian

add sign op, fix constant bug

parent b5414ba5
...@@ -74,6 +74,53 @@ void cuda_)" + name + "(" + data_type + ...@@ -74,6 +74,53 @@ void cuda_)" + name + "(" + data_type +
0)); // arguments 0)); // arguments
CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output. CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
} }
void emit_sign(void* in, void* out, size_t count)
{
std::string name = "sign";
// Create an instance of nvrtcProgram with the code string.
if (CudaFunctionPool::instance().get(name) == nullptr)
{
const char* opts[] = {"--gpu-architecture=compute_35",
"--relocatable-device-code=true"};
std::string kernel;
std::string data_type("float");
kernel = R"(
extern "C" __global__
void cuda_)" + name + "(" + data_type +
"* in, " + data_type + "* out, size_t n)\n" + R"(
{
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n)
{
out[tid] = (in[tid] > 0) - (in[tid] < 0);
}
})";
CudaFunctionPool::instance().set(
name, CudaFunctionBuilder::get("cuda_" + name, kernel, 2, opts));
}
//convert runtime ptr to driver api ptr
CUdeviceptr d_ptr_in, d_ptr_out;
d_ptr_in = (CUdeviceptr)in;
d_ptr_out = (CUdeviceptr)out;
void* args_list[] = {&d_ptr_in, &d_ptr_out, &count};
CUDA_SAFE_CALL(cuLaunchKernel(*CudaFunctionPool::instance().get(name).get(),
count,
1,
1, // grid dim
1,
1,
1, // block dim
0,
NULL, // shared mem and stream
args_list,
0)); // arguments
CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
}
} }
} }
} }
...@@ -35,6 +35,8 @@ namespace ngraph ...@@ -35,6 +35,8 @@ namespace ngraph
void emit_broadcast( void emit_broadcast(
void* in, void* out, size_t repeat_size, size_t repeat_times, size_t count); void* in, void* out, size_t repeat_size, size_t repeat_times, size_t count);
void emit_sign(void* in, void* out, size_t count);
template <typename T> template <typename T>
void emit_unary_elementwise_op(void* in, void* out, size_t count, std::string name) void emit_unary_elementwise_op(void* in, void* out, size_t count, std::string name)
{ {
......
...@@ -134,7 +134,6 @@ namespace ngraph ...@@ -134,7 +134,6 @@ namespace ngraph
writer << "{ // " << node->get_name() << "\n"; writer << "{ // " << node->get_name() << "\n";
writer.indent++; writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n"; writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n";
writer << "ngraph::runtime::gpu::emit_unary_elementwise_op<ngraph::op::" writer << "ngraph::runtime::gpu::emit_unary_elementwise_op<ngraph::op::"
<< node->description() << ">((void*) " << args[0].get_name() << ", (void*) " << node->description() << ">((void*) " << args[0].get_name() << ", (void*) "
<< out[0].get_name() << ", count, \"" << node->description() << "\");\n"; << out[0].get_name() << ", count, \"" << node->description() << "\");\n";
...@@ -618,6 +617,22 @@ cudnnSetOpTensorDescriptor(opTensorDesc, ...@@ -618,6 +617,22 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
writer << "}\n"; writer << "}\n";
} }
template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Sign)
{
if (out[0].get_size() == 0)
{
return;
}
writer << "{ // " << node->get_name() << "\n";
writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n";
writer << "ngraph::runtime::gpu::emit_sign((void*) " << args[0].get_name()
<< ", (void*) " << out[0].get_name() << ", count);\n";
writer.indent--;
writer << "}\n";
}
template <> template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Sqrt) void GPU_Emitter::EMITTER_DECL(ngraph::op::Sqrt)
{ {
......
...@@ -194,7 +194,7 @@ namespace ngraph ...@@ -194,7 +194,7 @@ namespace ngraph
{TI(ngraph::op::Reshape), &GPU_Emitter::emit<ngraph::op::Reshape>}, {TI(ngraph::op::Reshape), &GPU_Emitter::emit<ngraph::op::Reshape>},
{TI(ngraph::op::FunctionCall), &GPU_Emitter::emit<ngraph::op::FunctionCall>}, {TI(ngraph::op::FunctionCall), &GPU_Emitter::emit<ngraph::op::FunctionCall>},
{TI(ngraph::op::Reduce), &GPU_Emitter::emit<ngraph::op::Reduce>}, {TI(ngraph::op::Reduce), &GPU_Emitter::emit<ngraph::op::Reduce>},
{TI(ngraph::op::Sign), &GPU_Emitter::EmitUnaryElementwise}, {TI(ngraph::op::Sign), &GPU_Emitter::emit<ngraph::op::Sign>},
{TI(ngraph::op::Slice), &GPU_Emitter::emit<ngraph::op::Slice>}, {TI(ngraph::op::Slice), &GPU_Emitter::emit<ngraph::op::Slice>},
{TI(ngraph::op::Sum), &GPU_Emitter::emit<ngraph::op::Sum>}, {TI(ngraph::op::Sum), &GPU_Emitter::emit<ngraph::op::Sum>},
{TI(ngraph::op::Exp), &GPU_Emitter::EmitUnaryElementwise}, {TI(ngraph::op::Exp), &GPU_Emitter::EmitUnaryElementwise},
...@@ -558,6 +558,10 @@ using namespace std; ...@@ -558,6 +558,10 @@ using namespace std;
writer << "if(" << tv->get_tensor().get_name() << " == NULL)\n"; writer << "if(" << tv->get_tensor().get_name() << " == NULL)\n";
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
writer << tv->get_tensor().get_name() << " = ("
<< tv->get_tensor().get_element_type().c_type_string()
<< " *) ngraph::runtime::gpu::create_gpu_buffer("
<< tv->get_tensor().size() << ");\n";
writer << "runtime::gpu::cuda_memcpyHtD(" writer << "runtime::gpu::cuda_memcpyHtD("
<< tv->get_tensor().get_name() << ", " << tv->get_tensor().get_name() << ", "
<< tv->get_tensor().get_name() << "_cpu, " << tv->get_tensor().get_name() << "_cpu, "
...@@ -853,4 +857,4 @@ using namespace std; ...@@ -853,4 +857,4 @@ using namespace std;
} }
} }
} }
} }
\ No newline at end of file
...@@ -388,7 +388,6 @@ TEST(${BACKEND_NAME}, backwards_avgpool_n2_c2_hw2x2_win_2x2_str_1x1_padding_nume ...@@ -388,7 +388,6 @@ TEST(${BACKEND_NAME}, backwards_avgpool_n2_c2_hw2x2_win_2x2_str_1x1_padding_nume
TEST(${BACKEND_NAME}, backwards_abs) TEST(${BACKEND_NAME}, backwards_abs)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}"); auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend(); auto backend = manager->allocate_backend();
...@@ -439,7 +438,6 @@ TEST(${BACKEND_NAME}, backwards_add) ...@@ -439,7 +438,6 @@ TEST(${BACKEND_NAME}, backwards_add)
TEST(${BACKEND_NAME}, backwards_add_nested) TEST(${BACKEND_NAME}, backwards_add_nested)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}"); auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend(); auto backend = manager->allocate_backend();
...@@ -577,7 +575,6 @@ TEST(${BACKEND_NAME}, backwards_concat_axis_1) ...@@ -577,7 +575,6 @@ TEST(${BACKEND_NAME}, backwards_concat_axis_1)
TEST(${BACKEND_NAME}, backwards_ceiling) TEST(${BACKEND_NAME}, backwards_ceiling)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
SKIP_TEST_FOR("ARGON", "${BACKEND_NAME}"); SKIP_TEST_FOR("ARGON", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}"); auto manager = runtime::Manager::get("${BACKEND_NAME}");
...@@ -617,7 +614,6 @@ TEST(${BACKEND_NAME}, backwards_ceiling) ...@@ -617,7 +614,6 @@ TEST(${BACKEND_NAME}, backwards_ceiling)
TEST(${BACKEND_NAME}, backwards_cos) TEST(${BACKEND_NAME}, backwards_cos)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}"); auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend(); auto backend = manager->allocate_backend();
...@@ -639,7 +635,6 @@ TEST(${BACKEND_NAME}, backwards_cos) ...@@ -639,7 +635,6 @@ TEST(${BACKEND_NAME}, backwards_cos)
TEST(${BACKEND_NAME}, backwards_cosh) TEST(${BACKEND_NAME}, backwards_cosh)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
SKIP_TEST_FOR("ARGON", "${BACKEND_NAME}"); SKIP_TEST_FOR("ARGON", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}"); auto manager = runtime::Manager::get("${BACKEND_NAME}");
...@@ -663,7 +658,6 @@ TEST(${BACKEND_NAME}, backwards_cosh) ...@@ -663,7 +658,6 @@ TEST(${BACKEND_NAME}, backwards_cosh)
TEST(${BACKEND_NAME}, backwards_divide) TEST(${BACKEND_NAME}, backwards_divide)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
SKIP_TEST_FOR("ARGON", "${BACKEND_NAME}"); SKIP_TEST_FOR("ARGON", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}"); auto manager = runtime::Manager::get("${BACKEND_NAME}");
...@@ -690,7 +684,6 @@ TEST(${BACKEND_NAME}, backwards_divide) ...@@ -690,7 +684,6 @@ TEST(${BACKEND_NAME}, backwards_divide)
TEST(${BACKEND_NAME}, backwards_dot_scalar_scalar) TEST(${BACKEND_NAME}, backwards_dot_scalar_scalar)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}"); auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend(); auto backend = manager->allocate_backend();
...@@ -712,7 +705,6 @@ TEST(${BACKEND_NAME}, backwards_dot_scalar_scalar) ...@@ -712,7 +705,6 @@ TEST(${BACKEND_NAME}, backwards_dot_scalar_scalar)
TEST(${BACKEND_NAME}, backwards_dot_scalar_tensor) TEST(${BACKEND_NAME}, backwards_dot_scalar_tensor)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}"); auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend(); auto backend = manager->allocate_backend();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment