Commit dd5a6769 authored by fenglei.tian's avatar fenglei.tian

add sign op, fix constant bug

parent b5414ba5
......@@ -74,6 +74,53 @@ void cuda_)" + name + "(" + data_type +
0)); // arguments
CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
}
void emit_sign(void* in, void* out, size_t count)
{
std::string name = "sign";
// Create an instance of nvrtcProgram with the code string.
if (CudaFunctionPool::instance().get(name) == nullptr)
{
const char* opts[] = {"--gpu-architecture=compute_35",
"--relocatable-device-code=true"};
std::string kernel;
std::string data_type("float");
kernel = R"(
extern "C" __global__
void cuda_)" + name + "(" + data_type +
"* in, " + data_type + "* out, size_t n)\n" + R"(
{
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n)
{
out[tid] = (in[tid] > 0) - (in[tid] < 0);
}
})";
CudaFunctionPool::instance().set(
name, CudaFunctionBuilder::get("cuda_" + name, kernel, 2, opts));
}
//convert runtime ptr to driver api ptr
CUdeviceptr d_ptr_in, d_ptr_out;
d_ptr_in = (CUdeviceptr)in;
d_ptr_out = (CUdeviceptr)out;
void* args_list[] = {&d_ptr_in, &d_ptr_out, &count};
CUDA_SAFE_CALL(cuLaunchKernel(*CudaFunctionPool::instance().get(name).get(),
count,
1,
1, // grid dim
1,
1,
1, // block dim
0,
NULL, // shared mem and stream
args_list,
0)); // arguments
CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
}
}
}
}
......@@ -35,6 +35,8 @@ namespace ngraph
void emit_broadcast(
void* in, void* out, size_t repeat_size, size_t repeat_times, size_t count);
void emit_sign(void* in, void* out, size_t count);
template <typename T>
void emit_unary_elementwise_op(void* in, void* out, size_t count, std::string name)
{
......
......@@ -134,7 +134,6 @@ namespace ngraph
writer << "{ // " << node->get_name() << "\n";
writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n";
writer << "if(count == 0) return;\n";
writer << "ngraph::runtime::gpu::emit_unary_elementwise_op<ngraph::op::"
<< node->description() << ">((void*) " << args[0].get_name() << ", (void*) "
<< out[0].get_name() << ", count, \"" << node->description() << "\");\n";
......@@ -618,6 +617,22 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
writer << "}\n";
}
template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Sign)
{
if (out[0].get_size() == 0)
{
return;
}
writer << "{ // " << node->get_name() << "\n";
writer.indent++;
writer << "int count = " << out[0].get_size() << ";\n";
writer << "ngraph::runtime::gpu::emit_sign((void*) " << args[0].get_name()
<< ", (void*) " << out[0].get_name() << ", count);\n";
writer.indent--;
writer << "}\n";
}
template <>
void GPU_Emitter::EMITTER_DECL(ngraph::op::Sqrt)
{
......
......@@ -194,7 +194,7 @@ namespace ngraph
{TI(ngraph::op::Reshape), &GPU_Emitter::emit<ngraph::op::Reshape>},
{TI(ngraph::op::FunctionCall), &GPU_Emitter::emit<ngraph::op::FunctionCall>},
{TI(ngraph::op::Reduce), &GPU_Emitter::emit<ngraph::op::Reduce>},
{TI(ngraph::op::Sign), &GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Sign), &GPU_Emitter::emit<ngraph::op::Sign>},
{TI(ngraph::op::Slice), &GPU_Emitter::emit<ngraph::op::Slice>},
{TI(ngraph::op::Sum), &GPU_Emitter::emit<ngraph::op::Sum>},
{TI(ngraph::op::Exp), &GPU_Emitter::EmitUnaryElementwise},
......@@ -558,6 +558,10 @@ using namespace std;
writer << "if(" << tv->get_tensor().get_name() << " == NULL)\n";
writer << "{\n";
writer.indent++;
writer << tv->get_tensor().get_name() << " = ("
<< tv->get_tensor().get_element_type().c_type_string()
<< " *) ngraph::runtime::gpu::create_gpu_buffer("
<< tv->get_tensor().size() << ");\n";
writer << "runtime::gpu::cuda_memcpyHtD("
<< tv->get_tensor().get_name() << ", "
<< tv->get_tensor().get_name() << "_cpu, "
......@@ -853,4 +857,4 @@ using namespace std;
}
}
}
}
\ No newline at end of file
}
......@@ -388,7 +388,6 @@ TEST(${BACKEND_NAME}, backwards_avgpool_n2_c2_hw2x2_win_2x2_str_1x1_padding_nume
TEST(${BACKEND_NAME}, backwards_abs)
{
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
......@@ -439,7 +438,6 @@ TEST(${BACKEND_NAME}, backwards_add)
TEST(${BACKEND_NAME}, backwards_add_nested)
{
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
......@@ -577,7 +575,6 @@ TEST(${BACKEND_NAME}, backwards_concat_axis_1)
TEST(${BACKEND_NAME}, backwards_ceiling)
{
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
SKIP_TEST_FOR("ARGON", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}");
......@@ -617,7 +614,6 @@ TEST(${BACKEND_NAME}, backwards_ceiling)
TEST(${BACKEND_NAME}, backwards_cos)
{
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
......@@ -639,7 +635,6 @@ TEST(${BACKEND_NAME}, backwards_cos)
TEST(${BACKEND_NAME}, backwards_cosh)
{
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
SKIP_TEST_FOR("ARGON", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}");
......@@ -663,7 +658,6 @@ TEST(${BACKEND_NAME}, backwards_cosh)
TEST(${BACKEND_NAME}, backwards_divide)
{
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
SKIP_TEST_FOR("ARGON", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}");
......@@ -690,7 +684,6 @@ TEST(${BACKEND_NAME}, backwards_divide)
TEST(${BACKEND_NAME}, backwards_dot_scalar_scalar)
{
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
......@@ -712,7 +705,6 @@ TEST(${BACKEND_NAME}, backwards_dot_scalar_scalar)
TEST(${BACKEND_NAME}, backwards_dot_scalar_tensor)
{
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment