fix compiling bugs

ca07b7f0 · fenglei.tian · 1a91c924 · ca07b7f0 · ca07b7f0 · ca07b7f0
Commit ca07b7f0 authored Feb 21, 2018 by fenglei.tian
10 changed files
--- a/src/ngraph/runtime/gpu/gpu_call_frame.cpp
+++ b/src/ngraph/runtime/gpu/gpu_call_frame.cpp
@@ -25,39 +25,41 @@
 using namespace std;
 using namespace ngraph;

-#define NVRTC_SAFE_CALL(x) \
-do { \
-    nvrtcResult result = x; \
-        if (result != NVRTC_SUCCESS) { \
-            std::cerr << "\nerror: " #x " failed with error " \
-                << nvrtcGetErrorString(result) << '\n'; \
-                exit(1); \
-        } \
-} while(0)
+#define NVRTC_SAFE_CALL(x)                                                                         \
+    do                                                                                             \
+    {                                                                                              \
+        nvrtcResult result = x;                                                                    \
+        if (result != NVRTC_SUCCESS)                                                               \
+        {                                                                                          \
+            std::cerr << "\nerror: " #x " failed with error " << nvrtcGetErrorString(result)       \
+                      << '\n';                                                                     \
+            exit(1);                                                                               \
+        }                                                                                          \
+    } while (0)

-#define CUDA_SAFE_CALL(x) \
-do { \
-    CUresult result = x; \
-        if (result != CUDA_SUCCESS) { \
-            const char *msg; \
-                cuGetErrorName(result, &msg); \
-                std::cerr << "\nerror: " #x " failed with error " \
-                << msg << '\n'; \
-                exit(1); \
-        } \
-} while(0)
+#define CUDA_SAFE_CALL(x)                                                                          \
+    do                                                                                             \
+    {                                                                                              \
+        CUresult result = x;                                                                       \
+        if (result != CUDA_SUCCESS)                                                                \
+        {                                                                                          \
+            const char* msg;                                                                       \
+            cuGetErrorName(result, &msg);                                                          \
+            std::cerr << "\nerror: " #x " failed with error " << msg << '\n';                      \
+            exit(1);                                                                               \
+        }                                                                                          \
+    } while (0)

 runtime::gpu::GPU_CallFrame::GPU_CallFrame(std::shared_ptr<GPU_ExternalFunction> external_function,
                                           EntryPoint compiled_function)
    : m_external_function(external_function)
    , m_compiled_function(compiled_function)
 {
-
    CUdevice cuDevice;
    CUcontext context;
    CUmodule module;
    CUfunction cuda_op_abs_kernel;
-    CUDA_SAFE_CALL( cuInit(0));
+    CUDA_SAFE_CALL(cuInit(0));
    CUDA_SAFE_CALL(cuDeviceGet(&cuDevice, 0));
    CUDA_SAFE_CALL(cuCtxCreate(&context, 0, cuDevice));
    cublasStatus_t cublasStatus = cublasCreate(&m_cublas_handle);

--- a/src/ngraph/runtime/gpu/gpu_cuda_context_manager.hpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_context_manager.hpp
@@ -17,6 +17,7 @@
 #pragma once

 #include <string>
+#include "ngraph/runtime/gpu/gpu_util.hpp"

 namespace ngraph
 {
@@ -26,24 +27,20 @@ namespace ngraph
        {
            class Cuda_context_manager
            {
-                public:
+            public:
                static Cuda_context_manager& Instance()
                {
                    static Cuda_context_manager manager;
-                    return pool;
+                    return manager;
                }

                Cuda_context_manager(Cuda_context_manager const&) = delete;
                Cuda_context_manager(Cuda_context_manager&&) = delete;
                Cuda_context_manager& operator=(Cuda_context_manager const&) = delete;
-                Cuda_context_manager& operator=(Cuda_context_manager &&) = delete;
+                Cuda_context_manager& operator=(Cuda_context_manager&&) = delete;

-                std::shared_ptr<CUcontext> GetContext()
-                {
-                    return context_ptr;
-                }
-
-                protected:
+                std::shared_ptr<CUcontext> GetContext() { return context_ptr; }
+            protected:
                Cuda_context_manager()
                {
                    CUDA_SAFE_CALL(cuInit(0));
@@ -51,11 +48,11 @@ namespace ngraph
                    CUDA_SAFE_CALL(cuCtxCreate(&context, 0, cuDevice));
                    context_ptr = std::make_shared<CUcontext>(context);
                }
-                ~Cuda_context_manager(){}
+                ~Cuda_context_manager() {}
                CUdevice cuDevice;
                CUcontext context;
                std::shared_ptr<CUcontext> context_ptr;
-            }
+            };
        }
    }
 }
--- a/src/ngraph/runtime/gpu/gpu_cuda_function_builder.hpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_function_builder.hpp
@@ -18,8 +18,8 @@

 #include <string>

-#include "ngraph/runtime/gpu/gpu_util.hpp"
 #include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"
+#include "ngraph/runtime/gpu/gpu_util.hpp"

 namespace ngraph
 {
@@ -29,44 +29,49 @@ namespace ngraph
        {
            class Cuda_function_builder
            {
-                public:
-                static std::shared_ptr<CUfuction> get(std::string& kernel, std::string& name, int number_of_options, std::string options)
+            public:
+                static std::shared_ptr<CUfunction> Get(std::string& kernel,
+                                                      std::string& name,
+                                                      int number_of_options,
+                                                      const char** options)
                {
-                        nvrtcProgram prog; 
-                        NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
-                                    kernel.c_str(), 
-                                    "op.cu", 
-                                    0,      // numHeaders 
-                                    NULL,   // headers 
-                                    NULL)); // includeNames
+                    nvrtcProgram prog;
+                    NVRTC_SAFE_CALL(nvrtcCreateProgram(&prog,
+                                                       kernel.c_str(),
+                                                       "op.cu",
+                                                       0,      // numHeaders
+                                                       NULL,   // headers
+                                                       NULL)); // includeNames
+
+                    nvrtcResult compileResult =
+                        nvrtcCompileProgram(prog, number_of_options, options);

-                        nvrtcResult compileResult = nvrtcCompileProgram(prog, 
-                                number_of_options, 
-                                options); 
+                    if (compileResult != NVRTC_SUCCESS)
+                    {
+                        // size_t logSize;
+                        // NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &logSize));
+                        // char *log = new char[logSize];
+                        // NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, log));
+                        // std::cout << log << '\n';
+                        // delete[] log;
+                        throw std::runtime_error("compile error: \n" + kernel + "\n options");
+                    }

-                        if (compileResult != NVRTC_SUCCESS) {
-                            // size_t logSize; 
-                            // NVRTC_SAFE_CALL(nvrtcGetProgramLogSize(prog, &logSize)); 
-                            // char *log = new char[logSize]; 
-                            // NVRTC_SAFE_CALL(nvrtcGetProgramLog(prog, log)); 
-                            // std::cout << log << '\n'; 
-                            // delete[] log; 
-                            throw std::runtime_error("compile error: \n" + kernel + "\n options" + options);
-                        }
+                    size_t ptxSize;
+                    NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptxSize));
+                    char* ptx = new char[ptxSize];
+                    NVRTC_SAFE_CALL(nvrtcGetPTX(
+                        prog,
+                        ptx)); // Load the generated PTX and get a handle to the parent kernel.
+                    NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog)); // Destroy the program.

-                        size_t ptxSize; 
-                        NVRTC_SAFE_CALL(nvrtcGetPTXSize(prog, &ptxSize)); 
-                        char *ptx = new char[ptxSize]; 
-                        NVRTC_SAFE_CALL(nvrtcGetPTX(prog, ptx));      // Load the generated PTX and get a handle to the parent kernel. 
-                        NVRTC_SAFE_CALL(nvrtcDestroyProgram(&prog)); // Destroy the program. 
-                        
-                        CUmodule module;
-                        CUfunction function;
-                        CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
-                        CUDA_SAFE_CALL(cuModuleGetFunction(&function, module, name));
-                        return std::make_shared<CUfunction>(function);
+                    CUmodule module;
+                    CUfunction function;
+                    CUDA_SAFE_CALL(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
+                    CUDA_SAFE_CALL(cuModuleGetFunction(&function, module, name.c_str()));
+                    return std::make_shared<CUfunction>(function);
                }
-            }
+            };
        }
    }
 }
--- a/src/ngraph/runtime/gpu/gpu_cuda_function_pool.hpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_function_pool.hpp
@@ -17,6 +17,9 @@
 #pragma once

 #include <string>
+#include <unordered_map>
+
+#include "ngraph/runtime/gpu/gpu_util.hpp"

 namespace ngraph
 {
@@ -26,7 +29,7 @@ namespace ngraph
        {
            class Cuda_function_pool
            {
-                public:
+            public:
                static Cuda_function_pool& Instance()
                {
                    static Cuda_function_pool pool;
@@ -36,29 +39,28 @@ namespace ngraph
                Cuda_function_pool(Cuda_function_pool const&) = delete;
                Cuda_function_pool(Cuda_function_pool&&) = delete;
                Cuda_function_pool& operator=(Cuda_function_pool const&) = delete;
-                Cuda_function_pool& operator=(Cuda_function_pool &&) = delete;
+                Cuda_function_pool& operator=(Cuda_function_pool&&) = delete;

                void Set(std::string& name, std::shared_ptr<CUfunction> function)
                {
-                    CUfunction_map.insert({name,function});
+                    CUfunction_map.insert({name, function});
                }

                std::shared_ptr<CUfunction> Get(std::string& name)
                {
                    auto it = CUfunction_map.find(name);
-                    if(it != CUfunction_map.end())
+                    if (it != CUfunction_map.end())
                    {
                        return (*it).second;
                    }
                    return nullptr;
                }

-                protected:
-                Cuda_function_pool(){}
-                ~Cuda_function_pool(){}
-
+            protected:
+                Cuda_function_pool() {}
+                ~Cuda_function_pool() {}
                std::unordered_map<std::string, std::shared_ptr<CUfunction>> CUfunction_map;
-            }
+            };
        }
    }
 }
--- a/src/ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp
@@ -26,12 +26,16 @@ namespace ngraph
        {
            class Cuda_kernel_builder
            {
-                static std::string get_1_element_op(std::string& name, std::string& data_type, std::string& op, std::string& kernel)
+                public:
+                static void Get_1_element_op(const std::string& name,
+                                                   const  std::string& data_type,
+                                                    const std::string& op,
+                                                    std::string& kernel)
                {
                    kernel = R"(  
 extern "C" __global__
-void cuda_op_)" + name + "(" + data_type + "* in, " + data_type + "* out, size_t n)\n"
-+R"({  
+void cuda_op_)" + name + "(" +
+                             data_type + "* in, " + data_type + "* out, size_t n)\n" + R"({  
 size_t tid = blockIdx.x * blockDim.x + threadIdx.x;  
 if(tid < n) 
 {
@@ -41,27 +45,34 @@ out[tid] = " + op + "(in[tid]);\n"
                    return;
                }

-                static std::string get_2_element_op(std::string& name, std::string& data_type, std::string op, std::string& kernel)
+                static void Get_2_element_op(const std::string& name,
+                                                    const std::string& data_type,
+                                                    const std::string op,
+                                                    std::string& kernel)
                {
                    kernel = R"(  
 extern "C" __global__
-void cuda_op_)" + name + "(" + data_type + "* in1, " + data_type + "* in2, " + data_type + "* out, size_t n)\n"
-+R"({  
+void cuda_op_)" + name + "(" +
+                             data_type + "* in1, " + data_type + "* in2, " + data_type +
+                             "* out, size_t n)\n" + R"({  
 size_t tid = blockIdx.x * blockDim.x + threadIdx.x;  
 if(tid < n) 
 {
-out[tid] = in1[tid] )" + op + "in2[tid]\n"
-+R"(}
+out[tid] = in1[tid] )" + op + "in2[tid]\n" +
+                             R"(}
 })";
                    return;
                }

-                static std::string get_n_element_op(std::string& name, std::string& data_type, std::vector<std::string> ops, std::string& kernel)
+                static void Get_n_element_op(const std::string& name,
+                                                    const std::string& data_type,
+                                                    const std::vector<std::string> ops,
+                                                    std::string& kernel)
                {
                    kernel = "";
                    return;
                }
-            }
+            };
        }
    }
 }
--- a/src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp
@@ -17,18 +17,16 @@
 #include <algorithm>
 #include <map>

-
-
-#include <nvrtc.h>
+#include <cublas_v2.h>
 #include <cuda.h>
 #include <cuda_runtime.h>
-#include <cublas_v2.h>
 #include <cudnn_v7.h>
+#include <nvrtc.h>

 #include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
-#include "ngraph/runtime/gpu/gpu_cude_kernel_builder.hpp"
-#include "ngraph/runtime/gpu/gpu_cude_function_builder.hpp"
-#include "ngraph/runtime/gpu/gpu_cude_function_pool.hpp"
+#include "ngraph/runtime/gpu/gpu_cuda_function_builder.hpp"
+#include "ngraph/runtime/gpu/gpu_cuda_function_pool.hpp"
+#include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp"

 namespace ngraph
 {
@@ -40,42 +38,48 @@ namespace ngraph
            {
                namespace kernel
                {
-
                    void emit_abs(void* in, void* out, size_t count)
                    {
                        std::string name = "abs";
-                        // Create an instance of nvrtcProgram with the code string. 
-                        if(Cuda_function_pool::Instance().get(name) == nullptr)
+                        // Create an instance of nvrtcProgram with the code string.
+                        if (Cuda_function_pool::Instance().Get(name) == nullptr)
                        {
-                            const char *opts[] = {"--gpu-architecture=compute_35",
-                            "--relocatable-device-code=true"};
+                            const char* opts[] = {"--gpu-architecture=compute_35",
+                                                  "--relocatable-device-code=true"};
                            std::string kernel;
-                            Cuda_kernel_builder::get_1_element_op(name, "float", "fabsf",kernel);
-                            Cuda_function_pool::Instance().set(name, Cuda_function_builder(name, kernel, 2, opts));
+                            Cuda_kernel_builder::Get_1_element_op(name, "float", "fabsf", kernel);
+                            Cuda_function_pool::Instance().Set(
+                                name, Cuda_function_builder::Get(name, kernel, 2, opts));
                        }
-                     
+
                        //convert runtime ptr to driver api ptr
                        CUdeviceptr dPtrIn, dPtrOut;
                        dPtrIn = (CUdeviceptr)in;
                        dPtrOut = (CUdeviceptr)out;

-                        void *argsList[] = {&dPtrIn, &dPtrOut, &count};
+                        void* argsList[] = {&dPtrIn, &dPtrOut, &count};
                        CUDA_SAFE_CALL(
-                                cuLaunchKernel(cudCuda_function_pool::Instance().get(name).get(), 
-                                    count ,1, 1, // grid dim 
-                                    1, 1, 1, // block dim 
-                                    0, NULL, // shared mem and stream 
-                                    argsList, 0)); // arguments 
-                        CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output. 
+                            cuLaunchKernel(*Cuda_function_pool::Instance().Get(name).get(),
+                                           count,
+                                           1,
+                                           1, // grid dim
+                                           1,
+                                           1,
+                                           1, // block dim
+                                           0,
+                                           NULL, // shared mem and stream
+                                           argsList,
+                                           0));             // arguments
+                        CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
                    }

                    void emit_broadcast(codegen::CodeWriter& writer,
-                            const std::string& element_type,
-                            const std::string& arg0, // replacement context
-                            const std::string& out,
-                            const Shape& arg0_shape,
-                            const Shape& out_shape,
-                            const AxisSet& broadcast_axes)
+                                        const std::string& element_type,
+                                        const std::string& arg0, // replacement context
+                                        const std::string& out,
+                                        const Shape& arg0_shape,
+                                        const Shape& out_shape,
+                                        const AxisSet& broadcast_axes)
                    {
                    }

@@ -83,61 +87,59 @@ namespace ngraph
                    // For the reference kernel this is gpud on, see ngraph/runtime/kernel/concat.hpp.
                    //
                    void emit_concat(codegen::CodeWriter& writer,
-                            const std::string& element_type,
-                            const std::vector<std::string>& args,
-                            const std::string& out,
-                            const std::vector<Shape>& in_shapes,
-                            const Shape& out_shape,
-                            size_t concatenation_axis)
+                                     const std::string& element_type,
+                                     const std::vector<std::string>& args,
+                                     const std::string& out,
+                                     const std::vector<Shape>& in_shapes,
+                                     const Shape& out_shape,
+                                     size_t concatenation_axis)
                    {
                    }

-                    void emit_replace_slice(
-                            codegen::CodeWriter& writer,
-                            const std::string& element_type,
-                            const std::string& arg0, // replacement context
-                            const std::string& arg1, // replacement value
-                            const std::string& out,
-                            const Shape& arg1_shape,
-                            const Shape& out_shape,
-                            const Coordinate& lower_bounds,
-                            const Coordinate& upper_bounds,
-                            const Strides& strides)
+                    void emit_replace_slice(codegen::CodeWriter& writer,
+                                            const std::string& element_type,
+                                            const std::string& arg0, // replacement context
+                                            const std::string& arg1, // replacement value
+                                            const std::string& out,
+                                            const Shape& arg1_shape,
+                                            const Shape& out_shape,
+                                            const Coordinate& lower_bounds,
+                                            const Coordinate& upper_bounds,
+                                            const Strides& strides)
                    {
                    }

                    void emit_slice(codegen::CodeWriter& writer,
-                            const std::string& element_type,
-                            const std::string& arg0, // replacement context
-                            const std::string& out,
-                            const Shape& arg0_shape,
-                            const Shape& out_shape,
-                            const Coordinate& lower_bounds,
-                            const Coordinate& upper_bounds,
-                            const Strides& strides)
+                                    const std::string& element_type,
+                                    const std::string& arg0, // replacement context
+                                    const std::string& out,
+                                    const Shape& arg0_shape,
+                                    const Shape& out_shape,
+                                    const Coordinate& lower_bounds,
+                                    const Coordinate& upper_bounds,
+                                    const Strides& strides)
                    {
                    }

                    void emit_reshape(codegen::CodeWriter& writer,
-                            const std::string& element_type,
-                            const std::string& arg0, // replacement context
-                            const std::string& out,
-                            const Shape& arg0_shape,
-                            const Shape& out_shape,
-                            const AxisVector& arg0_axis_order)
+                                      const std::string& element_type,
+                                      const std::string& arg0, // replacement context
+                                      const std::string& out,
+                                      const Shape& arg0_shape,
+                                      const Shape& out_shape,
+                                      const AxisVector& arg0_axis_order)
                    {
                    }

                    void emit_sum(codegen::CodeWriter& writer,
-                            const std::string& element_type,
-                            const std::string& arg0, // replacement context
-                            const std::string& out,
-                            const Shape& arg0_shape,
-                            const Shape& out_shape,
-                            const AxisSet& reduction_axes)
+                                  const std::string& element_type,
+                                  const std::string& arg0, // replacement context
+                                  const std::string& out,
+                                  const Shape& arg0_shape,
+                                  const Shape& out_shape,
+                                  const AxisSet& reduction_axes)
                    {
                    }
-
                }
            }
        }

--- a/src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp
@@ -37,12 +37,12 @@ namespace ngraph
                                        const Shape& out_shape,
                                        const AxisSet& broadcast_axes);
                    void emit_concat(codegen::CodeWriter& writer,
-                                    const std::string& element_type,
-                                    const std::vector<std::string>& args,
-                                    const std::string& out,
-                                    const std::vector<Shape>& in_shapes,
-                                    const Shape& out_shape,
-                                    const size_t concatenation_axis);
+                                     const std::string& element_type,
+                                     const std::vector<std::string>& args,
+                                     const std::string& out,
+                                     const std::vector<Shape>& in_shapes,
+                                     const Shape& out_shape,
+                                     const size_t concatenation_axis);

                    void emit_replace_slice(codegen::CodeWriter& writer,
                                            const std::string& element_type,
@@ -64,19 +64,19 @@ namespace ngraph
                                    const Coordinate& upper_bounds,
                                    const Strides& strides);
                    void emit_reshape(codegen::CodeWriter& writer,
-                                    const std::string& element_type,
-                                    const std::string& arg0, // replacement context
-                                    const std::string& out,
-                                    const Shape& arg0_shape,
-                                    const Shape& out_shape,
-                                    const AxisVector& arg0_axis_order);
+                                      const std::string& element_type,
+                                      const std::string& arg0, // replacement context
+                                      const std::string& out,
+                                      const Shape& arg0_shape,
+                                      const Shape& out_shape,
+                                      const AxisVector& arg0_axis_order);
                    void emit_sum(codegen::CodeWriter& writer,
-                                const std::string& element_type,
-                                const std::string& arg0, // replacement context
-                                const std::string& out,
-                                const Shape& arg0_shape,
-                                const Shape& out_shape,
-                                const AxisSet& reduction_axes);
+                                  const std::string& element_type,
+                                  const std::string& arg0, // replacement context
+                                  const std::string& out,
+                                  const Shape& arg0_shape,
+                                  const Shape& out_shape,
+                                  const AxisSet& reduction_axes);
                }
            }
        }

--- a/src/ngraph/runtime/gpu/gpu_emitter.cpp
+++ b/src/ngraph/runtime/gpu/gpu_emitter.cpp
@@ -17,20 +17,20 @@
 #include <algorithm>
 #include <cmath>
 #include <cublas_v2.h>
+#include <cublas_v2.h>
+#include <cuda.h>
 #include <cuda.h>
 #include <cuda_runtime.h>
+#include <cuda_runtime.h>
+#include <cudnn_v7.h>
 #include <cudnn_v7.h>
 #include <iostream>
 #include <nvrtc.h>
+#include <nvrtc.h>
 #include <string>
 #include <typeindex>
 #include <unordered_map>
 #include <vector>
-#include <nvrtc.h>
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <cublas_v2.h>
-#include <cudnn_v7.h>

 #include "ngraph/node.hpp"
 #include "ngraph/ops/broadcast.hpp"
@@ -48,9 +48,9 @@
 #include "ngraph/ops/reverse.hpp"
 #include "ngraph/ops/slice.hpp"
 #include "ngraph/ops/sum.hpp"
+#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
 #include "ngraph/runtime/gpu/gpu_emitter.hpp"
 #include "ngraph/runtime/gpu/gpu_kernel_emitters.hpp"
-#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
 #include "ngraph/util.hpp"

 using namespace std;
@@ -80,30 +80,31 @@ using namespace ngraph;
    } while (0)

 void runtime::gpu::GPU_Emitter::EmitNop(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                        const ngraph::Node* n,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
 }

 void runtime::gpu::GPU_Emitter::EmitAbs(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                        const ngraph::Node* n,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    writer << "{  // " << n->get_name() << "\n";
    writer.indent++;
    writer << "int count = " << out[0].get_size() << ";\n";
    writer << "if(count == 0) return;\n";
-    writer << "ngraph::runtime::gpu::cuda::kernel::emit_abs((void*) " << args[0].get_name() <<  ", (void*) " << out[0].get_name() << ", count);\n";
+    writer << "ngraph::runtime::gpu::cuda::kernel::emit_abs((void*) " << args[0].get_name()
+           << ", (void*) " << out[0].get_name() << ", count);\n";
    writer.indent--;
    writer << "}\n";
 }

 void runtime::gpu::GPU_Emitter::EmitAdd(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                        const ngraph::Node* n,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    writer << "{  // " << n->get_name() << "\n";
    writer.indent++;
@@ -142,9 +143,9 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
 }

 void runtime::gpu::GPU_Emitter::EmitConcat(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                           const ngraph::Node* n,
+                                           const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                           const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
 }

@@ -205,12 +206,12 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer,
    else if ((arg0_shape.size() == 2) && (arg1_shape.size() == 2))
    {
        // GEMM Call
-        if(arg0_shape[0] != out[0].get_shape()[0] ||  // m
-           arg1_shape[1] != out[0].get_shape()[1] || // n
-           arg0_shape[1] != arg1_shape[0])         // k
-           {
-               throw std::runtime_error("input and output shape is not correct for dot;");
-           }
+        if (arg0_shape[0] != out[0].get_shape()[0] || // m
+            arg1_shape[1] != out[0].get_shape()[1] || // n
+            arg0_shape[1] != arg1_shape[0])           // k
+        {
+            throw std::runtime_error("input and output shape is not correct for dot;");
+        }
        writer << "{   // " << n->get_name() << "\n";
        writer.indent++;
        writer << "static const float alpha = 1.0;\n";
@@ -243,66 +244,66 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer,
 }

 void runtime::gpu::GPU_Emitter::EmitDivide(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                           const ngraph::Node* n,
+                                           const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                           const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitEqual(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                          const ngraph::Node* n,
+                                          const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                          const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitGreater(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                            const ngraph::Node* n,
+                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitGreaterEq(
-        codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+    codegen::CodeWriter& writer,
+    const ngraph::Node* n,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitLess(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                         const ngraph::Node* n,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitLessEq(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                           const ngraph::Node* n,
+                                           const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                           const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitLog(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                        const ngraph::Node* n,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitMaximum(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                            const ngraph::Node* n,
+                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    writer << "{  // " << n->get_name() << "\n";
    writer.indent++;
@@ -341,9 +342,9 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
 }

 void runtime::gpu::GPU_Emitter::EmitMinimum(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                            const ngraph::Node* n,
+                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    writer << "{  // " << n->get_name() << "\n";
    writer.indent++;
@@ -382,10 +383,10 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
 }

 void runtime::gpu::GPU_Emitter::EmitNegative(
-        codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+    codegen::CodeWriter& writer,
+    const ngraph::Node* n,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    writer << "{  // " << n->get_name() << "\n";
    writer.indent++;
@@ -424,60 +425,60 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
 }

 void runtime::gpu::GPU_Emitter::EmitNotEqual(
-        codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+    codegen::CodeWriter& writer,
+    const ngraph::Node* n,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }
 void runtime::gpu::GPU_Emitter::EmitSelect(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                           const ngraph::Node* n,
+                                           const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                           const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitSubtract(
-        codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+    codegen::CodeWriter& writer,
+    const ngraph::Node* n,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitBroadcast(
-        codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+    codegen::CodeWriter& writer,
+    const ngraph::Node* n,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitConvert(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                            const ngraph::Node* n,
+                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitConstant(
-        codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+    codegen::CodeWriter& writer,
+    const ngraph::Node* n,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                            const ngraph::Node* n,
+                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    auto reshape = static_cast<const op::Reshape*>(n);
    writer << "{   // " << n->get_name() << "\n";
@@ -505,7 +506,7 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
        writer << "{   // " << n->get_name() << " 1\n";
        writer.indent++;
        writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", " << args[0].get_name()
-            << ", " << out[0].get_size() << "," << out[0].get_element_type().size() << ");\n";
+               << ", " << out[0].get_size() << "," << out[0].get_element_type().size() << ");\n";
        writer.indent--;
        writer << "}\n";
    }
@@ -542,50 +543,50 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
 }

 void runtime::gpu::GPU_Emitter::EmitFunctionCall(
-        codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+    codegen::CodeWriter& writer,
+    const ngraph::Node* n,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
 }

 void runtime::gpu::GPU_Emitter::EmitReduce(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                           const ngraph::Node* n,
+                                           const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                           const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitSign(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                         const ngraph::Node* n,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitSlice(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                          const ngraph::Node* n,
+                                          const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                          const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitSum(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                        const ngraph::Node* n,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitMultiply(
-        codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+    codegen::CodeWriter& writer,
+    const ngraph::Node* n,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    writer << "{  // " << n->get_name() << "\n";
    writer.indent++;
@@ -624,130 +625,130 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
 }

 void runtime::gpu::GPU_Emitter::EmitExp(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                        const ngraph::Node* n,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitSin(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                        const ngraph::Node* n,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitSinh(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                         const ngraph::Node* n,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitCos(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                        const ngraph::Node* n,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitCosh(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                         const ngraph::Node* n,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitTan(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                        const ngraph::Node* n,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitTanh(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                         const ngraph::Node* n,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitAsin(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                         const ngraph::Node* n,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitAcos(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                         const ngraph::Node* n,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitAtan(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                         const ngraph::Node* n,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitPower(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                          const ngraph::Node* n,
+                                          const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                          const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitReplaceSlice(
-        codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+    codegen::CodeWriter& writer,
+    const ngraph::Node* n,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitOneHot(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                           const ngraph::Node* n,
+                                           const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                           const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitCeiling(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                            const ngraph::Node* n,
+                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitFloor(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                          const ngraph::Node* n,
+                                          const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                          const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitSqrt(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                         const ngraph::Node* n,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                         const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    writer << "{  // " << n->get_name() << "\n";
    writer.indent++;
@@ -786,52 +787,52 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
 }

 void runtime::gpu::GPU_Emitter::EmitConvolution(
-        codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+    codegen::CodeWriter& writer,
+    const ngraph::Node* n,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitNot(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                        const ngraph::Node* n,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitMaxPool(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                            const ngraph::Node* n,
+                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitReverse(codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+                                            const ngraph::Node* n,
+                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitReduceWindow(
-        codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+    codegen::CodeWriter& writer,
+    const ngraph::Node* n,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }

 void runtime::gpu::GPU_Emitter::EmitSelectAndScatter(
-        codegen::CodeWriter& writer,
-        const ngraph::Node* n,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
-        const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
+    codegen::CodeWriter& writer,
+    const ngraph::Node* n,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
+    const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
    throw std::runtime_error(n->get_name() + " is not implemented.");
 }
--- a/src/ngraph/runtime/gpu/gpu_external_function.cpp
+++ b/src/ngraph/runtime/gpu/gpu_external_function.cpp
@@ -94,10 +94,10 @@
 #include "ngraph/pass/memory_layout.hpp"
 #include "ngraph/runtime/gpu/gpu_backend.hpp"
 #include "ngraph/runtime/gpu/gpu_call_frame.hpp"
+#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
 #include "ngraph/runtime/gpu/gpu_emitter.hpp"
 #include "ngraph/runtime/gpu/gpu_external_function.hpp"
 #include "ngraph/runtime/gpu/gpu_kernel_emitters.hpp"
-#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"

 using namespace std;
 using namespace ngraph;
@@ -249,17 +249,17 @@ void runtime::gpu::GPU_ExternalFunction::compile()
    #include "ngraph/pass/manager.hpp"
    #include "ngraph/pass/memory_layout.hpp"
    #include "ngraph/runtime/aligned_buffer.hpp"
+    #include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
    #include "ngraph/runtime/gpu/gpu_util.hpp"
    #include "ngraph/util.hpp"
-    #include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
 )";

    string pch_header_source = writer.get_code();

-//    writer += R"(
-//    using namespace ngraph;
-//    using namespace std;
-//)";
+    //    writer += R"(
+    //    using namespace ngraph;
+    //    using namespace std;
+    //)";

    if (m_emit_timing)
    {

--- a/src/ngraph/runtime/gpu/gpu_util.hpp
+++ b/src/ngraph/runtime/gpu/gpu_util.hpp
@@ -16,27 +16,28 @@

 #pragma once

-#define NVRTC_SAFE_CALL(x) \ 
-do { \ 
-    nvrtcResult result = x; \ 
-        if (result != NVRTC_SUCCESS) { \ 
-            std::cerr << "\nerror: " #x " failed with error " \ 
-                << nvrtcGetErrorString(result) << '\n'; \
-                exit(1); \ 
-        } \
-} while(0) 
+#define NVRTC_SAFE_CALL(x)                                                                         \
+    do                                                                                             \
+    {                                                                                              \
+        nvrtcResult result = x;                                                                    \
+        if (result != NVRTC_SUCCESS)                                                               \
+        {                                                                                          \
+            throw std::runtime_error("\nerror: " #x " failed with error " +                        \
+                                     std::string(nvrtcGetErrorString(result)));                                 \
+        }                                                                                          \
+    } while (0)

-#define CUDA_SAFE_CALL(x) \ 
-do { \ 
-    CUresult result = x; \ 
-        if (result != CUDA_SUCCESS) { \ 
-            const char *msg; \ 
-                cuGetErrorName(result, &msg); \ 
-                std::cerr << "\nerror: " #x " failed with error " \ 
-                << msg << '\n'; \ 
-                exit(1); \ 
-        } \ 
-} while(0)
+#define CUDA_SAFE_CALL(x)                                                                          \
+    do                                                                                             \
+    {                                                                                              \
+        CUresult result = x;                                                                       \
+        if (result != CUDA_SUCCESS)                                                                \
+        {                                                                                          \
+            const char* msg;                                                                       \
+            cuGetErrorName(result, &msg);                                                          \
+            throw std::runtime_error("\nerror: " #x " failed with error " + std::string(msg));      \
+        }                                                                                          \
+    } while (0)

 namespace ngraph
 {