Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
b5467550
Unverified
Commit
b5467550
authored
Mar 13, 2018
by
Chris Sullivan
Committed by
GitHub
Mar 13, 2018
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Updated gpu cpp files with consistent use of namespaces (cosmetic) (#629)
* Updated namespace use in cpp files.
parent
a32fdab5
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
169 additions
and
240 deletions
+169
-240
gpu_cuda_context_manager.cpp
src/ngraph/runtime/gpu/gpu_cuda_context_manager.cpp
+6
-13
gpu_cuda_function_builder.cpp
src/ngraph/runtime/gpu/gpu_cuda_function_builder.cpp
+6
-13
gpu_cuda_function_pool.cpp
src/ngraph/runtime/gpu/gpu_cuda_function_pool.cpp
+11
-20
gpu_cuda_kernel_builder.cpp
src/ngraph/runtime/gpu/gpu_cuda_kernel_builder.cpp
+7
-14
gpu_cuda_kernel_emitters.cpp
src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp
+6
-13
gpu_external_function.cpp
src/ngraph/runtime/gpu/gpu_external_function.cpp
+133
-167
No files found.
src/ngraph/runtime/gpu/gpu_cuda_context_manager.cpp
View file @
b5467550
...
@@ -19,25 +19,18 @@
...
@@ -19,25 +19,18 @@
#include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"
namespace
ngraph
using
namespace
ngraph
;
runtime
::
gpu
::
CudaContextManager
&
runtime
::
gpu
::
CudaContextManager
::
instance
()
{
{
namespace
runtime
{
namespace
gpu
{
CudaContextManager
&
CudaContextManager
::
instance
()
{
static
CudaContextManager
manager
;
static
CudaContextManager
manager
;
return
manager
;
return
manager
;
}
}
CudaContextManager
::
CudaContextManager
()
runtime
::
gpu
::
CudaContextManager
::
CudaContextManager
()
{
{
CUDA_SAFE_CALL
(
cuInit
(
0
));
CUDA_SAFE_CALL
(
cuInit
(
0
));
CUDA_SAFE_CALL
(
cuDeviceGet
(
&
m_device
,
0
));
CUDA_SAFE_CALL
(
cuDeviceGet
(
&
m_device
,
0
));
CUDA_SAFE_CALL
(
cuCtxCreate
(
&
m_context
,
0
,
m_device
));
CUDA_SAFE_CALL
(
cuCtxCreate
(
&
m_context
,
0
,
m_device
));
m_context_ptr
=
std
::
make_shared
<
CUcontext
>
(
m_context
);
m_context_ptr
=
std
::
make_shared
<
CUcontext
>
(
m_context
);
}
}
}
}
}
src/ngraph/runtime/gpu/gpu_cuda_function_builder.cpp
View file @
b5467550
...
@@ -20,17 +20,13 @@
...
@@ -20,17 +20,13 @@
#include "ngraph/runtime/gpu/gpu_cuda_function_builder.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_function_builder.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp"
namespace
ngraph
using
namespace
ngraph
;
{
namespace
runtime
std
::
shared_ptr
<
CUfunction
>
runtime
::
gpu
::
CudaFunctionBuilder
::
get
(
const
std
::
string
&
name
,
{
namespace
gpu
{
std
::
shared_ptr
<
CUfunction
>
CudaFunctionBuilder
::
get
(
const
std
::
string
&
name
,
const
std
::
string
&
kernel
,
const
std
::
string
&
kernel
,
int
number_of_options
,
int
number_of_options
,
const
char
**
options
)
const
char
**
options
)
{
{
nvrtcProgram
prog
;
nvrtcProgram
prog
;
NVRTC_SAFE_CALL
(
nvrtcCreateProgram
(
&
prog
,
NVRTC_SAFE_CALL
(
nvrtcCreateProgram
(
&
prog
,
kernel
.
c_str
(),
kernel
.
c_str
(),
...
@@ -49,8 +45,8 @@ namespace ngraph
...
@@ -49,8 +45,8 @@ namespace ngraph
size_t
ptx_size
;
size_t
ptx_size
;
NVRTC_SAFE_CALL
(
nvrtcGetPTXSize
(
prog
,
&
ptx_size
));
NVRTC_SAFE_CALL
(
nvrtcGetPTXSize
(
prog
,
&
ptx_size
));
char
*
ptx
=
new
char
[
ptx_size
];
char
*
ptx
=
new
char
[
ptx_size
];
NVRTC_SAFE_CALL
(
nvrtcGetPTX
(
NVRTC_SAFE_CALL
(
prog
,
nvrtcGetPTX
(
prog
,
ptx
));
// Load the generated PTX and get a handle to the parent kernel.
ptx
));
// Load the generated PTX and get a handle to the parent kernel.
NVRTC_SAFE_CALL
(
nvrtcDestroyProgram
(
&
prog
));
// Destroy the program.
NVRTC_SAFE_CALL
(
nvrtcDestroyProgram
(
&
prog
));
// Destroy the program.
...
@@ -59,7 +55,4 @@ namespace ngraph
...
@@ -59,7 +55,4 @@ namespace ngraph
CUDA_SAFE_CALL
(
cuModuleLoadDataEx
(
&
module
,
ptx
,
0
,
0
,
0
));
CUDA_SAFE_CALL
(
cuModuleLoadDataEx
(
&
module
,
ptx
,
0
,
0
,
0
));
CUDA_SAFE_CALL
(
cuModuleGetFunction
(
&
function
,
module
,
name
.
c_str
()));
CUDA_SAFE_CALL
(
cuModuleGetFunction
(
&
function
,
module
,
name
.
c_str
()));
return
std
::
make_shared
<
CUfunction
>
(
function
);
return
std
::
make_shared
<
CUfunction
>
(
function
);
}
}
}
}
}
src/ngraph/runtime/gpu/gpu_cuda_function_pool.cpp
View file @
b5467550
...
@@ -26,40 +26,31 @@
...
@@ -26,40 +26,31 @@
static
const
std
::
string
s_output_dir
=
"gpu_codegen"
;
static
const
std
::
string
s_output_dir
=
"gpu_codegen"
;
namespace
ngraph
using
namespace
ngraph
;
runtime
::
gpu
::
CudaFunctionPool
&
runtime
::
gpu
::
CudaFunctionPool
::
instance
()
{
{
namespace
runtime
{
namespace
gpu
{
CudaFunctionPool
&
CudaFunctionPool
::
instance
()
{
static
CudaFunctionPool
pool
;
static
CudaFunctionPool
pool
;
return
pool
;
return
pool
;
}
}
void
CudaFunctionPool
::
set
(
const
std
::
string
&
name
,
const
std
::
string
&
kernel
)
void
runtime
::
gpu
::
CudaFunctionPool
::
set
(
const
std
::
string
&
name
,
const
std
::
string
&
kernel
)
{
{
const
char
*
opts
[]
=
{
"--gpu-architecture=compute_35"
,
const
char
*
opts
[]
=
{
"--gpu-architecture=compute_35"
,
"--relocatable-device-code=true"
};
"--relocatable-device-code=true"
};
std
::
string
filename
=
std
::
string
filename
=
file_util
::
path_join
(
s_output_dir
,
"cuda_kernel_"
+
name
+
"_codegen.cu"
);
file_util
::
path_join
(
s_output_dir
,
"cuda_kernel_"
+
name
+
"_codegen.cu"
);
std
::
ofstream
out
(
filename
);
std
::
ofstream
out
(
filename
);
out
<<
kernel
;
out
<<
kernel
;
out
.
close
();
out
.
close
();
m_function_map
.
insert
(
m_function_map
.
insert
({
name
,
CudaFunctionBuilder
::
get
(
"cuda_"
+
name
,
kernel
,
2
,
opts
)});
{
name
,
CudaFunctionBuilder
::
get
(
"cuda_"
+
name
,
kernel
,
2
,
opts
)});
}
}
std
::
shared_ptr
<
CUfunction
>
CudaFunctionPool
::
get
(
const
std
::
string
&
name
)
std
::
shared_ptr
<
CUfunction
>
runtime
::
gpu
::
CudaFunctionPool
::
get
(
const
std
::
string
&
name
)
{
{
auto
it
=
m_function_map
.
find
(
name
);
auto
it
=
m_function_map
.
find
(
name
);
if
(
it
!=
m_function_map
.
end
())
if
(
it
!=
m_function_map
.
end
())
{
{
return
(
*
it
).
second
;
return
(
*
it
).
second
;
}
}
return
nullptr
;
return
nullptr
;
}
}
}
}
}
src/ngraph/runtime/gpu/gpu_cuda_kernel_builder.cpp
View file @
b5467550
...
@@ -16,18 +16,14 @@
...
@@ -16,18 +16,14 @@
#include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp"
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/codegen/code_writer.hpp"
namespace
ngraph
using
namespace
ngraph
;
{
namespace
runtime
void
runtime
::
gpu
::
CudaKernelBuilder
::
get_elementwise_op
(
codegen
::
CodeWriter
&
writer
,
{
namespace
gpu
{
void
CudaKernelBuilder
::
get_elementwise_op
(
codegen
::
CodeWriter
&
writer
,
const
std
::
string
&
name
,
const
std
::
string
&
name
,
const
std
::
string
&
data_type
,
const
std
::
string
&
data_type
,
const
std
::
string
&
op
,
const
std
::
string
&
op
,
const
size_t
&
num_inputs
)
const
size_t
&
num_inputs
)
{
{
writer
<<
"extern
\"
C
\"
__global__ void cuda_"
<<
name
<<
"("
;
writer
<<
"extern
\"
C
\"
__global__ void cuda_"
<<
name
<<
"("
;
for
(
size_t
i
=
0
;
i
<
num_inputs
;
i
++
)
for
(
size_t
i
=
0
;
i
<
num_inputs
;
i
++
)
{
{
...
@@ -57,14 +53,14 @@ namespace ngraph
...
@@ -57,14 +53,14 @@ namespace ngraph
writer
<<
"}
\n
"
;
writer
<<
"}
\n
"
;
return
;
return
;
}
}
void
CudaKernelBuilder
::
get_device_helper
(
codegen
::
CodeWriter
&
writer
,
void
runtime
::
gpu
::
CudaKernelBuilder
::
get_device_helper
(
codegen
::
CodeWriter
&
writer
,
const
std
::
string
&
name
,
const
std
::
string
&
name
,
const
std
::
string
&
data_type
,
const
std
::
string
&
data_type
,
const
std
::
string
&
math_kernel
,
const
std
::
string
&
math_kernel
,
const
size_t
&
num_inputs
)
const
size_t
&
num_inputs
)
{
{
if
(
math_kernel
.
size
())
if
(
math_kernel
.
size
())
{
{
writer
<<
"__device__ "
<<
data_type
<<
" "
<<
name
<<
"("
;
writer
<<
"__device__ "
<<
data_type
<<
" "
<<
name
<<
"("
;
...
@@ -83,7 +79,4 @@ namespace ngraph
...
@@ -83,7 +79,4 @@ namespace ngraph
writer
<<
"}
\n
"
;
writer
<<
"}
\n
"
;
}
}
return
;
return
;
}
}
}
}
}
src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp
View file @
b5467550
...
@@ -20,15 +20,10 @@
...
@@ -20,15 +20,10 @@
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_ops.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_ops.hpp"
namespace
ngraph
using
namespace
ngraph
;
{
void
runtime
::
gpu
::
emit_broadcast
(
namespace
runtime
{
namespace
gpu
{
void
emit_broadcast
(
void
*
in
,
void
*
out
,
size_t
repeat_size
,
size_t
repeat_times
,
size_t
count
)
void
*
in
,
void
*
out
,
size_t
repeat_size
,
size_t
repeat_times
,
size_t
count
)
{
{
std
::
string
name
=
"broadcast"
;
std
::
string
name
=
"broadcast"
;
// Create an instance of nvrtcProgram with the code string.
// Create an instance of nvrtcProgram with the code string.
if
(
CudaFunctionPool
::
instance
().
get
(
name
)
==
nullptr
)
if
(
CudaFunctionPool
::
instance
().
get
(
name
)
==
nullptr
)
...
@@ -38,8 +33,9 @@ namespace ngraph
...
@@ -38,8 +33,9 @@ namespace ngraph
kernel
=
R"(
kernel
=
R"(
extern "C" __global__
extern "C" __global__
void cuda_)"
+
name
+
"("
+
data_type
+
void cuda_)"
+
name
+
"* in, "
+
data_type
+
"* out, size_t m, size_t k, size_t n)
\n
"
+
R"(
"("
+
data_type
+
"* in, "
+
data_type
+
"* out, size_t m, size_t k, size_t n)
\n
"
+
R"(
{
{
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n)
if(tid < n)
...
@@ -69,7 +65,4 @@ void cuda_)" + name + "(" + data_type +
...
@@ -69,7 +65,4 @@ void cuda_)" + name + "(" + data_type +
args_list
,
args_list
,
0
));
// arguments
0
));
// arguments
CUDA_SAFE_CALL
(
cuCtxSynchronize
());
// Retrieve and print output.
CUDA_SAFE_CALL
(
cuCtxSynchronize
());
// Retrieve and print output.
}
}
}
}
}
src/ngraph/runtime/gpu/gpu_external_function.cpp
View file @
b5467550
...
@@ -114,6 +114,7 @@
...
@@ -114,6 +114,7 @@
#include "ngraph/runtime/gpu/gpu_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_kernel_emitters.hpp"
using
namespace
std
;
using
namespace
std
;
using
namespace
ngraph
;
static
const
string
s_output_dir
=
"gpu_codegen"
;
static
const
string
s_output_dir
=
"gpu_codegen"
;
...
@@ -159,110 +160,104 @@ static StaticInitializers s_static_initializers;
...
@@ -159,110 +160,104 @@ static StaticInitializers s_static_initializers;
#define TI(x) type_index(typeid(x))
#define TI(x) type_index(typeid(x))
namespace
ngraph
static
const
runtime
::
gpu
::
OpMap
dispatcher
{
{
{
TI
(
ngraph
::
op
::
Add
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Add
>
},
namespace
runtime
{
TI
(
ngraph
::
op
::
Dot
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Dot
>
},
{
{
TI
(
ngraph
::
op
::
Multiply
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Multiply
>
},
namespace
gpu
{
TI
(
ngraph
::
op
::
Parameter
),
&
runtime
::
gpu
::
GPU_Emitter
::
nop
},
{
{
TI
(
ngraph
::
op
::
Abs
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
static
const
OpMap
dispatcher
{
{
TI
(
ngraph
::
op
::
Concat
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Concat
>
},
{
TI
(
ngraph
::
op
::
Add
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Add
>
},
{
TI
(
ngraph
::
op
::
Divide
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Dot
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Dot
>
},
{
TI
(
ngraph
::
op
::
Equal
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Equal
>
},
{
TI
(
ngraph
::
op
::
Multiply
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Multiply
>
},
{
TI
(
ngraph
::
op
::
Parameter
),
&
GPU_Emitter
::
nop
},
{
TI
(
ngraph
::
op
::
Abs
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Concat
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Concat
>
},
{
TI
(
ngraph
::
op
::
Divide
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Equal
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Equal
>
},
{
TI
(
ngraph
::
op
::
GetOutputElement
),
{
TI
(
ngraph
::
op
::
GetOutputElement
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
GetOutputElement
>
},
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
GetOutputElement
>
},
{
TI
(
ngraph
::
op
::
Greater
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Greater
>
},
{
TI
(
ngraph
::
op
::
Greater
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Greater
>
},
{
TI
(
ngraph
::
op
::
GreaterEq
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
GreaterEq
>
},
{
TI
(
ngraph
::
op
::
GreaterEq
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
GreaterEq
>
},
{
TI
(
ngraph
::
op
::
Less
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Less
>
},
{
TI
(
ngraph
::
op
::
Less
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Less
>
},
{
TI
(
ngraph
::
op
::
LessEq
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
LessEq
>
},
{
TI
(
ngraph
::
op
::
LessEq
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
LessEq
>
},
{
TI
(
ngraph
::
op
::
Log
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Log
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Maximum
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Maximum
>
},
{
TI
(
ngraph
::
op
::
Maximum
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Maximum
>
},
{
TI
(
ngraph
::
op
::
Minimum
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Minimum
>
},
{
TI
(
ngraph
::
op
::
Minimum
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Minimum
>
},
{
TI
(
ngraph
::
op
::
Negative
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Negative
>
},
{
TI
(
ngraph
::
op
::
Negative
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Negative
>
},
{
TI
(
ngraph
::
op
::
NotEqual
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
NotEqual
>
},
{
TI
(
ngraph
::
op
::
NotEqual
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
NotEqual
>
},
{
TI
(
ngraph
::
op
::
Power
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Power
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Select
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Select
>
},
{
TI
(
ngraph
::
op
::
Select
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Select
>
},
{
TI
(
ngraph
::
op
::
Subtract
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Subtract
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Broadcast
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Broadcast
>
},
{
TI
(
ngraph
::
op
::
Broadcast
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Broadcast
>
},
{
TI
(
ngraph
::
op
::
Convert
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Convert
>
},
{
TI
(
ngraph
::
op
::
Convert
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Convert
>
},
{
TI
(
ngraph
::
op
::
Constant
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Constant
>
},
{
TI
(
ngraph
::
op
::
Constant
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Constant
>
},
{
TI
(
ngraph
::
op
::
Reshape
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Reshape
>
},
{
TI
(
ngraph
::
op
::
Reshape
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Reshape
>
},
{
TI
(
ngraph
::
op
::
FunctionCall
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
FunctionCall
>
},
{
TI
(
ngraph
::
op
::
FunctionCall
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
FunctionCall
>
},
{
TI
(
ngraph
::
op
::
Reduce
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Reduce
>
},
{
TI
(
ngraph
::
op
::
Reduce
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Reduce
>
},
{
TI
(
ngraph
::
op
::
Sign
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Sign
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Slice
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Slice
>
},
{
TI
(
ngraph
::
op
::
Slice
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Slice
>
},
{
TI
(
ngraph
::
op
::
Sum
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Sum
>
},
{
TI
(
ngraph
::
op
::
Sum
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Sum
>
},
{
TI
(
ngraph
::
op
::
Exp
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Exp
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Sin
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Sin
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Sinh
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Sinh
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Cos
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Cos
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Cosh
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Cosh
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Tan
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Tan
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Tanh
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Tanh
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Asin
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Asin
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Acos
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Acos
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Atan
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Atan
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
ReplaceSlice
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
ReplaceSlice
>
},
{
TI
(
ngraph
::
op
::
ReplaceSlice
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
ReplaceSlice
>
},
{
TI
(
ngraph
::
op
::
OneHot
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
OneHot
>
},
{
TI
(
ngraph
::
op
::
OneHot
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
OneHot
>
},
{
TI
(
ngraph
::
op
::
Floor
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Floor
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Ceiling
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Ceiling
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Sqrt
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Sqrt
>
},
{
TI
(
ngraph
::
op
::
Sqrt
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Sqrt
>
},
{
TI
(
ngraph
::
op
::
Convolution
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Convolution
>
},
{
TI
(
ngraph
::
op
::
Convolution
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Convolution
>
},
{
TI
(
ngraph
::
op
::
ConvolutionBackpropFilters
),
{
TI
(
ngraph
::
op
::
ConvolutionBackpropFilters
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
ConvolutionBackpropFilters
>
},
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
ConvolutionBackpropFilters
>
},
{
TI
(
ngraph
::
op
::
ConvolutionBackpropData
),
{
TI
(
ngraph
::
op
::
ConvolutionBackpropData
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
ConvolutionBackpropData
>
},
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
ConvolutionBackpropData
>
},
{
TI
(
ngraph
::
op
::
Not
),
&
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
Not
),
&
runtime
::
gpu
::
GPU_Emitter
::
EmitElementwise
},
{
TI
(
ngraph
::
op
::
MaxPool
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
MaxPool
>
},
{
TI
(
ngraph
::
op
::
MaxPool
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
MaxPool
>
},
{
TI
(
ngraph
::
op
::
Reverse
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Reverse
>
},
{
TI
(
ngraph
::
op
::
Reverse
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Reverse
>
},
{
TI
(
ngraph
::
op
::
Result
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Result
>
},
{
TI
(
ngraph
::
op
::
Result
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Result
>
},
{
TI
(
ngraph
::
op
::
ReduceWindow
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
ReduceWindow
>
},
{
TI
(
ngraph
::
op
::
ReduceWindow
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
ReduceWindow
>
},
{
TI
(
ngraph
::
op
::
SelectAndScatter
),
{
TI
(
ngraph
::
op
::
SelectAndScatter
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
SelectAndScatter
>
},
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
SelectAndScatter
>
},
{
TI
(
ngraph
::
op
::
AvgPool
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
AvgPool
>
},
{
TI
(
ngraph
::
op
::
AvgPool
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
AvgPool
>
},
{
TI
(
ngraph
::
op
::
AvgPoolBackprop
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
AvgPoolBackprop
>
},
{
TI
(
ngraph
::
op
::
AvgPoolBackprop
),
{
TI
(
ngraph
::
op
::
Pad
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Pad
>
},
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
AvgPoolBackprop
>
},
{
TI
(
ngraph
::
op
::
BatchNorm
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
BatchNorm
>
},
{
TI
(
ngraph
::
op
::
Pad
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Pad
>
},
{
TI
(
ngraph
::
op
::
BatchNorm
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
BatchNorm
>
},
{
TI
(
ngraph
::
op
::
BatchNormBackprop
),
{
TI
(
ngraph
::
op
::
BatchNormBackprop
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
BatchNormBackprop
>
},
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
BatchNormBackprop
>
},
{
TI
(
ngraph
::
op
::
MaxPoolBackprop
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
MaxPoolBackprop
>
},
{
TI
(
ngraph
::
op
::
MaxPoolBackprop
),
{
TI
(
ngraph
::
op
::
Product
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Product
>
},
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
MaxPoolBackprop
>
},
{
TI
(
ngraph
::
op
::
Max
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Max
>
},
{
TI
(
ngraph
::
op
::
Product
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Product
>
},
{
TI
(
ngraph
::
op
::
Min
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Min
>
},
{
TI
(
ngraph
::
op
::
Max
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Max
>
},
{
TI
(
ngraph
::
op
::
Relu
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Relu
>
},
{
TI
(
ngraph
::
op
::
Min
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Min
>
},
{
TI
(
ngraph
::
op
::
ReluBackprop
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
ReluBackprop
>
},
{
TI
(
ngraph
::
op
::
Relu
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Relu
>
},
{
TI
(
ngraph
::
op
::
Softmax
),
&
GPU_Emitter
::
emit
<
ngraph
::
op
::
Softmax
>
},
{
TI
(
ngraph
::
op
::
ReluBackprop
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
ReluBackprop
>
},
};
{
TI
(
ngraph
::
op
::
Softmax
),
&
runtime
::
gpu
::
GPU_Emitter
::
emit
<
ngraph
::
op
::
Softmax
>
},
};
GPU_ExternalFunction
::
GPU_ExternalFunction
(
const
shared_ptr
<
ngraph
::
Function
>&
function
,
bool
release_function
)
runtime
::
gpu
::
GPU_ExternalFunction
::
GPU_ExternalFunction
(
const
shared_ptr
<
ngraph
::
Function
>&
function
,
bool
release_function
)
:
ngraph
::
runtime
::
ExternalFunction
(
function
,
release_function
)
:
ngraph
::
runtime
::
ExternalFunction
(
function
,
release_function
)
,
m_compiled_function
(
nullptr
)
,
m_compiled_function
(
nullptr
)
,
m_emit_timing
(
std
::
getenv
(
"NGRAPH_GPU_EMIT_TIMING"
)
!=
nullptr
)
,
m_emit_timing
(
std
::
getenv
(
"NGRAPH_GPU_EMIT_TIMING"
)
!=
nullptr
)
{
{
}
}
void
GPU_ExternalFunction
::
compile
()
void
runtime
::
gpu
::
GPU_ExternalFunction
::
compile
()
{
{
if
(
m_is_compiled
)
if
(
m_is_compiled
)
{
{
return
;
return
;
}
}
string
function_name
=
m_function
->
get_name
();
string
function_name
=
m_function
->
get_name
();
string
dump_filename
=
string
dump_filename
=
file_util
::
path_join
(
s_output_dir
,
function_name
+
"_ops.txt"
);
file_util
::
path_join
(
s_output_dir
,
function_name
+
"_ops.txt"
);
pass
::
Manager
pass_manager
;
pass
::
Manager
pass_manager
;
// pass_manager.register_pass<pass::TopologicalSort>();
// pass_manager.register_pass<pass::TopologicalSort>();
// For now, just make everyone row-major.
// For now, just make everyone row-major.
pass_manager
pass_manager
.
register_pass
<
pass
::
AssignLayout
<
descriptor
::
layout
::
DenseTensorViewLayout
>>
();
.
register_pass
<
pass
::
AssignLayout
<
descriptor
::
layout
::
DenseTensorViewLayout
>>
();
pass_manager
.
register_pass
<
pass
::
Liveness
>
();
pass_manager
.
register_pass
<
pass
::
Liveness
>
();
pass_manager
.
register_pass
<
pass
::
MemoryLayout
>
(
64
);
pass_manager
.
register_pass
<
pass
::
MemoryLayout
>
(
64
);
pass_manager
.
register_pass
<
pass
::
DumpSorted
>
(
dump_filename
);
pass_manager
.
register_pass
<
pass
::
DumpSorted
>
(
dump_filename
);
...
@@ -308,8 +303,7 @@ using namespace std;
...
@@ -308,8 +303,7 @@ using namespace std;
{
{
writer
<<
"// Declare debug timers
\n
"
;
writer
<<
"// Declare debug timers
\n
"
;
vector
<
string
>
names
;
vector
<
string
>
names
;
for
(
shared_ptr
<
Function
>
current_function
:
for
(
shared_ptr
<
Function
>
current_function
:
pass_manager
.
get_state
().
get_functions
())
pass_manager
.
get_state
().
get_functions
())
{
{
for
(
shared_ptr
<
Node
>
node
:
current_function
->
get_ordered_ops
())
for
(
shared_ptr
<
Node
>
node
:
current_function
->
get_ordered_ops
())
{
{
...
@@ -323,8 +317,8 @@ using namespace std;
...
@@ -323,8 +317,8 @@ using namespace std;
{
{
writer
<<
"ngraph::stopwatch timer_"
<<
s
<<
";
\n
"
;
writer
<<
"ngraph::stopwatch timer_"
<<
s
<<
";
\n
"
;
}
}
writer
<<
"extern
\"
C
\"
size_t get_debug_timer_count() { return "
writer
<<
"extern
\"
C
\"
size_t get_debug_timer_count() { return "
<<
names
.
size
()
<<
names
.
size
()
<<
"; }
\n
"
;
<<
"; }
\n
"
;
writer
<<
"extern
\"
C
\"
const char* get_debug_timer_name(size_t index)
\n
"
;
writer
<<
"extern
\"
C
\"
const char* get_debug_timer_name(size_t index)
\n
"
;
writer
<<
"{
\n
"
;
writer
<<
"{
\n
"
;
writer
.
indent
++
;
writer
.
indent
++
;
...
@@ -340,8 +334,7 @@ using namespace std;
...
@@ -340,8 +334,7 @@ using namespace std;
writer
<<
"return rc;
\n
"
;
writer
<<
"return rc;
\n
"
;
writer
.
indent
--
;
writer
.
indent
--
;
writer
<<
"}
\n
"
;
writer
<<
"}
\n
"
;
writer
writer
<<
"extern
\"
C
\"
const size_t get_debug_timer_microseconds(size_t index)
\n
"
;
<<
"extern
\"
C
\"
const size_t get_debug_timer_microseconds(size_t index)
\n
"
;
writer
<<
"{
\n
"
;
writer
<<
"{
\n
"
;
writer
.
indent
++
;
writer
.
indent
++
;
writer
<<
"size_t rc;
\n
"
;
writer
<<
"size_t rc;
\n
"
;
...
@@ -357,8 +350,7 @@ using namespace std;
...
@@ -357,8 +350,7 @@ using namespace std;
writer
<<
"return rc;
\n
"
;
writer
<<
"return rc;
\n
"
;
writer
.
indent
--
;
writer
.
indent
--
;
writer
<<
"}
\n
"
;
writer
<<
"}
\n
"
;
writer
writer
<<
"extern
\"
C
\"
const size_t get_debug_timer_call_count(size_t index)
\n
"
;
<<
"extern
\"
C
\"
const size_t get_debug_timer_call_count(size_t index)
\n
"
;
writer
<<
"{
\n
"
;
writer
<<
"{
\n
"
;
writer
.
indent
++
;
writer
.
indent
++
;
writer
<<
"size_t rc;
\n
"
;
writer
<<
"size_t rc;
\n
"
;
...
@@ -366,8 +358,7 @@ using namespace std;
...
@@ -366,8 +358,7 @@ using namespace std;
writer
<<
"{
\n
"
;
writer
<<
"{
\n
"
;
for
(
size_t
i
=
0
;
i
<
names
.
size
();
i
++
)
for
(
size_t
i
=
0
;
i
<
names
.
size
();
i
++
)
{
{
writer
<<
"case "
<<
i
<<
": rc = timer_"
<<
names
[
i
]
writer
<<
"case "
<<
i
<<
": rc = timer_"
<<
names
[
i
]
<<
".get_call_count(); break;
\n
"
;
<<
".get_call_count(); break;
\n
"
;
}
}
writer
<<
"default: rc = 0;
\n
"
;
writer
<<
"default: rc = 0;
\n
"
;
writer
<<
"}
\n
"
;
writer
<<
"}
\n
"
;
...
@@ -383,31 +374,26 @@ using namespace std;
...
@@ -383,31 +374,26 @@ using namespace std;
writer
<<
"void *__dso_handle = 0;
\n\n
"
;
writer
<<
"void *__dso_handle = 0;
\n\n
"
;
writer
<<
"// Declare all constants
\n
"
;
writer
<<
"// Declare all constants
\n
"
;
for
(
shared_ptr
<
Function
>
current_function
:
for
(
shared_ptr
<
Function
>
current_function
:
pass_manager
.
get_state
().
get_functions
())
pass_manager
.
get_state
().
get_functions
())
{
{
for
(
shared_ptr
<
Node
>
node
:
current_function
->
get_ordered_ops
())
for
(
shared_ptr
<
Node
>
node
:
current_function
->
get_ordered_ops
())
{
{
const
op
::
Constant
*
c
=
dynamic_cast
<
ngraph
::
op
::
Constant
*>
(
node
.
get
());
const
op
::
Constant
*
c
=
dynamic_cast
<
ngraph
::
op
::
Constant
*>
(
node
.
get
());
if
(
c
)
if
(
c
)
{
{
shared_ptr
<
descriptor
::
TensorView
>
tv
=
shared_ptr
<
descriptor
::
TensorView
>
tv
=
node
->
get_outputs
()[
0
].
get_tensor_view
();
node
->
get_outputs
()[
0
].
get_tensor_view
();
auto
c_value_strings
=
c
->
get_value_strings
();
auto
c_value_strings
=
c
->
get_value_strings
();
writer
<<
"static "
writer
<<
"static "
<<
tv
->
get_tensor
().
get_element_type
().
c_type_string
()
<<
" "
<<
tv
->
get_tensor
().
get_element_type
().
c_type_string
()
<<
" "
<<
tv
->
get_tensor
().
get_name
()
<<
"_cpu["
<<
c_value_strings
.
size
()
<<
tv
->
get_tensor
().
get_name
()
<<
"_cpu["
<<
"] =
\n
"
;
<<
c_value_strings
.
size
()
<<
"] =
\n
"
;
writer
<<
"{
\n
"
;
writer
<<
"{
\n
"
;
writer
.
indent
++
;
writer
.
indent
++
;
writer
<<
emit_string_array
(
c_value_strings
,
100
-
writer
.
indent
*
4
);
writer
<<
emit_string_array
(
c_value_strings
,
100
-
writer
.
indent
*
4
);
writer
.
indent
--
;
writer
.
indent
--
;
writer
<<
"
\n
};
\n\n
"
;
writer
<<
"
\n
};
\n\n
"
;
writer
<<
"static "
writer
<<
"static "
<<
tv
->
get_tensor
().
get_element_type
().
c_type_string
()
<<
" *"
<<
tv
->
get_tensor
().
get_element_type
().
c_type_string
()
<<
" *"
<<
tv
->
get_tensor
().
get_name
()
<<
";
\n
"
;
<<
tv
->
get_tensor
().
get_name
()
<<
";
\n
"
;
m_variable_name_map
[
tv
->
get_tensor
().
get_name
()]
=
m_variable_name_map
[
tv
->
get_tensor
().
get_name
()]
=
tv
->
get_tensor
().
get_name
();
tv
->
get_tensor
().
get_name
();
}
}
}
}
}
}
...
@@ -415,8 +401,7 @@ using namespace std;
...
@@ -415,8 +401,7 @@ using namespace std;
writer
<<
"// Declare all functions
\n
"
;
writer
<<
"// Declare all functions
\n
"
;
for
(
shared_ptr
<
Function
>
f
:
pass_manager
.
get_state
().
get_functions
())
for
(
shared_ptr
<
Function
>
f
:
pass_manager
.
get_state
().
get_functions
())
{
{
writer
<<
"extern
\"
C
\"
void "
<<
f
->
get_name
()
writer
<<
"extern
\"
C
\"
void "
<<
f
->
get_name
()
<<
"(void** inputs, void** outputs, "
<<
"(void** inputs, void** outputs, "
"cublasHandle_t& cublas_handle, "
"cublasHandle_t& cublas_handle, "
"cudnnHandle_t& cudnn_handle);
\n
"
;
"cudnnHandle_t& cudnn_handle);
\n
"
;
}
}
...
@@ -424,8 +409,7 @@ using namespace std;
...
@@ -424,8 +409,7 @@ using namespace std;
writer
<<
"
\n
"
;
writer
<<
"
\n
"
;
unordered_map
<
Node
*
,
string
>
match_functions
;
unordered_map
<
Node
*
,
string
>
match_functions
;
for
(
shared_ptr
<
Function
>
current_function
:
for
(
shared_ptr
<
Function
>
current_function
:
pass_manager
.
get_state
().
get_functions
())
pass_manager
.
get_state
().
get_functions
())
{
{
set
<
string
>
output_names
;
set
<
string
>
output_names
;
for
(
shared_ptr
<
Node
>
op
:
current_function
->
get_results
())
for
(
shared_ptr
<
Node
>
op
:
current_function
->
get_results
())
...
@@ -503,8 +487,7 @@ using namespace std;
...
@@ -503,8 +487,7 @@ using namespace std;
}
}
}
}
for
(
shared_ptr
<
Function
>
current_function
:
for
(
shared_ptr
<
Function
>
current_function
:
pass_manager
.
get_state
().
get_functions
())
pass_manager
.
get_state
().
get_functions
())
{
{
set
<
string
>
output_names
;
set
<
string
>
output_names
;
for
(
shared_ptr
<
Node
>
op
:
current_function
->
get_results
())
for
(
shared_ptr
<
Node
>
op
:
current_function
->
get_results
())
...
@@ -517,8 +500,7 @@ using namespace std;
...
@@ -517,8 +500,7 @@ using namespace std;
{
{
if
(
dynamic_cast
<
ngraph
::
op
::
Constant
*>
(
node
.
get
()))
if
(
dynamic_cast
<
ngraph
::
op
::
Constant
*>
(
node
.
get
()))
{
{
shared_ptr
<
descriptor
::
TensorView
>
tv
=
shared_ptr
<
descriptor
::
TensorView
>
tv
=
node
->
get_outputs
()[
0
].
get_tensor_view
();
node
->
get_outputs
()[
0
].
get_tensor_view
();
constants
.
insert
(
tv
.
get
());
constants
.
insert
(
tv
.
get
());
}
}
}
}
...
@@ -535,14 +517,13 @@ using namespace std;
...
@@ -535,14 +517,13 @@ using namespace std;
const
op
::
Constant
*
c
=
dynamic_cast
<
op
::
Constant
*>
(
node
.
get
());
const
op
::
Constant
*
c
=
dynamic_cast
<
op
::
Constant
*>
(
node
.
get
());
if
(
c
)
if
(
c
)
{
{
shared_ptr
<
descriptor
::
TensorView
>
tv
=
shared_ptr
<
descriptor
::
TensorView
>
tv
=
node
->
get_outputs
()[
0
].
get_tensor_view
();
node
->
get_outputs
()[
0
].
get_tensor_view
();
writer
<<
"if("
<<
tv
->
get_tensor
().
get_name
()
<<
" == NULL)
\n
"
;
writer
<<
"if("
<<
tv
->
get_tensor
().
get_name
()
<<
" == NULL)
\n
"
;
writer
<<
"{
\n
"
;
writer
<<
"{
\n
"
;
writer
.
indent
++
;
writer
.
indent
++
;
writer
<<
"runtime::gpu::cuda_memcpyHtD("
<<
tv
->
get_tensor
().
get_name
()
writer
<<
"runtime::gpu::cuda_memcpyHtD("
<<
tv
->
get_tensor
().
get_name
()
<<
", "
<<
", "
<<
tv
->
get_tensor
().
get_name
()
<<
"_cpu, "
<<
tv
->
get_tensor
().
get_name
()
<<
"_cpu, "
<<
tv
->
get_tensor
().
size
()
<<
tv
->
get_tensor
().
size
()
<<
");
\n
"
;
<<
");
\n
"
;
writer
.
indent
--
;
writer
.
indent
--
;
writer
<<
"}
\n
"
;
writer
<<
"}
\n
"
;
}
}
...
@@ -576,8 +557,7 @@ using namespace std;
...
@@ -576,8 +557,7 @@ using namespace std;
{
{
stringstream
ss
;
stringstream
ss
;
ss
<<
"(("
<<
tensor
->
get_element_type
().
c_type_string
()
ss
<<
"(("
<<
tensor
->
get_element_type
().
c_type_string
()
<<
"*)((char *)pool_base_ptr + "
<<
tensor
->
get_pool_offset
()
<<
"*)((char *)pool_base_ptr + "
<<
tensor
->
get_pool_offset
()
<<
"))"
;
<<
"))"
;
m_variable_name_map
[
tensor
->
get_name
()]
=
ss
.
str
();
m_variable_name_map
[
tensor
->
get_name
()]
=
ss
.
str
();
}
}
}
}
...
@@ -585,15 +565,12 @@ using namespace std;
...
@@ -585,15 +565,12 @@ using namespace std;
// Add inputs to the variable name map
// Add inputs to the variable name map
size_t
arg_index
=
0
;
size_t
arg_index
=
0
;
for
(
shared_ptr
<
ngraph
::
op
::
Parameter
>
param
:
for
(
shared_ptr
<
ngraph
::
op
::
Parameter
>
param
:
current_function
->
get_parameters
())
current_function
->
get_parameters
())
{
{
for
(
size_t
i
=
0
;
i
<
param
->
get_output_size
();
++
i
)
for
(
size_t
i
=
0
;
i
<
param
->
get_output_size
();
++
i
)
{
{
shared_ptr
<
descriptor
::
TensorView
>
tv
=
shared_ptr
<
descriptor
::
TensorView
>
tv
=
param
->
get_output_tensor_view
(
i
);
param
->
get_output_tensor_view
(
i
);
const
element
::
Type
&
et
=
tv
->
get_tensor_view_type
()
->
get_element_type
();
const
element
::
Type
&
et
=
tv
->
get_tensor_view_type
()
->
get_element_type
();
string
type
=
et
.
c_type_string
();
string
type
=
et
.
c_type_string
();
stringstream
ss
;
stringstream
ss
;
ss
<<
"(("
<<
type
<<
"*)(inputs["
<<
arg_index
<<
"]))"
;
ss
<<
"(("
<<
type
<<
"*)(inputs["
<<
arg_index
<<
"]))"
;
...
@@ -627,8 +604,7 @@ using namespace std;
...
@@ -627,8 +604,7 @@ using namespace std;
shared_ptr
<
descriptor
::
TensorView
>
tv
=
op
->
get_output_tensor_view
();
shared_ptr
<
descriptor
::
TensorView
>
tv
=
op
->
get_output_tensor_view
();
const
element
::
Type
&
et
=
tv
->
get_tensor_view_type
()
->
get_element_type
();
const
element
::
Type
&
et
=
tv
->
get_tensor_view_type
()
->
get_element_type
();
bool
parameter_as_output
=
false
;
bool
parameter_as_output
=
false
;
for
(
shared_ptr
<
ngraph
::
op
::
Parameter
>
param
:
for
(
shared_ptr
<
ngraph
::
op
::
Parameter
>
param
:
current_function
->
get_parameters
())
current_function
->
get_parameters
())
{
{
for
(
const
descriptor
::
Output
&
pout
:
param
->
get_outputs
())
for
(
const
descriptor
::
Output
&
pout
:
param
->
get_outputs
())
{
{
...
@@ -636,10 +612,8 @@ using namespace std;
...
@@ -636,10 +612,8 @@ using namespace std;
if
(
tv
==
ptv
)
if
(
tv
==
ptv
)
{
{
parameter_as_output
=
true
;
parameter_as_output
=
true
;
writer
writer
<<
"ngraph::runtime::gpu::cuda_memcpyDtD(reinterpret_cast<"
<<
"ngraph::runtime::gpu::cuda_memcpyDtD(reinterpret_cast<"
<<
et
.
c_type_string
()
<<
"*>(outputs["
<<
output_index
<<
"]), "
<<
et
.
c_type_string
()
<<
"*>(outputs["
<<
output_index
<<
"]), "
<<
m_variable_name_map
[
ptv
->
get_tensor
().
get_name
()]
<<
", "
<<
m_variable_name_map
[
ptv
->
get_tensor
().
get_name
()]
<<
", "
<<
ptv
->
get_tensor
().
size
()
<<
");
\n
"
;
<<
ptv
->
get_tensor
().
size
()
<<
");
\n
"
;
break
;
break
;
...
@@ -650,9 +624,9 @@ using namespace std;
...
@@ -650,9 +624,9 @@ using namespace std;
{
{
if
(
contains
(
constants
,
tv
.
get
()))
if
(
contains
(
constants
,
tv
.
get
()))
{
{
writer
<<
"ngraph::runtime::gpu::cuda_memcpyHtD(outputs["
writer
<<
"ngraph::runtime::gpu::cuda_memcpyHtD(outputs["
<<
output_index
<<
output_index
<<
"], "
<<
tv
->
get_tensor
().
get_name
()
<<
"], "
<<
tv
->
get_tensor
().
get_name
()
<<
", "
<<
", "
<<
tv
->
get_tensor
().
size
()
<<
");
\n
"
;
<<
tv
->
get_tensor
().
size
()
<<
");
\n
"
;
}
}
else
else
{
{
...
@@ -667,29 +641,27 @@ using namespace std;
...
@@ -667,29 +641,27 @@ using namespace std;
for
(
shared_ptr
<
Node
>
node
:
current_function
->
get_ordered_ops
())
for
(
shared_ptr
<
Node
>
node
:
current_function
->
get_ordered_ops
())
{
{
auto
&
n
=
auto
&
n
=
*
node
;
// Work around a compiler warning (*node inside typeid may have effects
*
node
;
// Work around a compiler warning (*node inside typeid may have effects
// with shared pointers, which is fine here but clang doesn't like it.)
// with shared pointers, which is fine here but clang doesn't like it.)
auto
handler
=
dispatcher
.
find
(
type_index
(
typeid
(
n
)));
auto
handler
=
dispatcher
.
find
(
type_index
(
typeid
(
n
)));
if
(
handler
==
dispatcher
.
end
())
if
(
handler
==
dispatcher
.
end
())
{
{
throw
ngraph_error
(
"Unhandled op during code generation : "
+
throw
ngraph_error
(
"Unhandled op during code generation : "
+
node
->
description
());
node
->
description
());
}
}
vector
<
GPU_TensorViewWrapper
>
in
;
vector
<
GPU_TensorViewWrapper
>
in
;
for
(
const
descriptor
::
Input
&
input
:
node
->
get_inputs
())
for
(
const
descriptor
::
Input
&
input
:
node
->
get_inputs
())
{
{
const
descriptor
::
Output
&
output
=
input
.
get_output
();
const
descriptor
::
Output
&
output
=
input
.
get_output
();
shared_ptr
<
descriptor
::
TensorView
>
tv
=
output
.
get_tensor_view
();
shared_ptr
<
descriptor
::
TensorView
>
tv
=
output
.
get_tensor_view
();
in
.
push_back
(
GPU_TensorViewWrapper
(
in
.
push_back
(
tv
,
m_variable_name_map
[
tv
->
get_tensor
().
get_name
()]));
GPU_TensorViewWrapper
(
tv
,
m_variable_name_map
[
tv
->
get_tensor
().
get_name
()]));
}
}
vector
<
GPU_TensorViewWrapper
>
out
;
vector
<
GPU_TensorViewWrapper
>
out
;
for
(
const
descriptor
::
Output
&
output
:
node
->
get_outputs
())
for
(
const
descriptor
::
Output
&
output
:
node
->
get_outputs
())
{
{
shared_ptr
<
descriptor
::
TensorView
>
tv
=
output
.
get_tensor_view
();
shared_ptr
<
descriptor
::
TensorView
>
tv
=
output
.
get_tensor_view
();
out
.
push_back
(
GPU_TensorViewWrapper
(
out
.
push_back
(
tv
,
m_variable_name_map
[
tv
->
get_tensor
().
get_name
()]));
GPU_TensorViewWrapper
(
tv
,
m_variable_name_map
[
tv
->
get_tensor
().
get_name
()]));
}
}
// Emit operation prologue
// Emit operation prologue
...
@@ -743,8 +715,7 @@ using namespace std;
...
@@ -743,8 +715,7 @@ using namespace std;
// TODO: Cleanup and make this a utility function
// TODO: Cleanup and make this a utility function
file_util
::
make_directory
(
s_output_dir
);
file_util
::
make_directory
(
s_output_dir
);
string
filename
=
string
filename
=
file_util
::
path_join
(
s_output_dir
,
function_name
+
"_codegen.cpp"
);
file_util
::
path_join
(
s_output_dir
,
function_name
+
"_codegen.cpp"
);
ofstream
out
(
filename
);
ofstream
out
(
filename
);
string
code
=
writer
.
get_code
();
string
code
=
writer
.
get_code
();
out
<<
code
;
out
<<
code
;
...
@@ -763,8 +734,7 @@ using namespace std;
...
@@ -763,8 +734,7 @@ using namespace std;
}
}
m_execution_engine
->
add_module
(
codegen_module
);
m_execution_engine
->
add_module
(
codegen_module
);
m_execution_engine
->
finalize
();
m_execution_engine
->
finalize
();
m_compiled_function
=
m_compiled_function
=
m_execution_engine
->
find_function
<
EntryPoint_t
>
(
function_name
);
m_execution_engine
->
find_function
<
EntryPoint_t
>
(
function_name
);
assert
(
m_compiled_function
);
assert
(
m_compiled_function
);
m_is_compiled
=
true
;
m_is_compiled
=
true
;
...
@@ -772,13 +742,13 @@ using namespace std;
...
@@ -772,13 +742,13 @@ using namespace std;
{
{
release_function
();
release_function
();
}
}
}
}
void
GPU_ExternalFunction
::
handle_output_alias
(
void
runtime
::
gpu
::
GPU_ExternalFunction
::
handle_output_alias
(
codegen
::
CodeWriter
&
writer
,
codegen
::
CodeWriter
&
writer
,
const
Node
&
node
,
const
Node
&
node
,
const
unordered_map
<
descriptor
::
TensorView
*
,
vector
<
size_t
>>&
output_alias_map
)
const
unordered_map
<
descriptor
::
TensorView
*
,
vector
<
size_t
>>&
output_alias_map
)
{
{
for
(
const
descriptor
::
Output
&
output
:
node
.
get_outputs
())
for
(
const
descriptor
::
Output
&
output
:
node
.
get_outputs
())
{
{
shared_ptr
<
descriptor
::
TensorView
>
otv
=
output
.
get_tensor_view
();
shared_ptr
<
descriptor
::
TensorView
>
otv
=
output
.
get_tensor_view
();
...
@@ -794,44 +764,40 @@ using namespace std;
...
@@ -794,44 +764,40 @@ using namespace std;
{
{
writer
<<
"ngraph::runtime::gpu::cuda_memcpyDtD(static_cast<void*>("
writer
<<
"ngraph::runtime::gpu::cuda_memcpyDtD(static_cast<void*>("
"outputs["
"outputs["
<<
outputs
[
i
]
<<
"]), static_cast<void*>(outputs["
<<
outputs
[
i
]
<<
"]), static_cast<void*>(outputs["
<<
outputs
[
0
]
<<
outputs
[
0
]
<<
"]), "
<<
otv
->
get_tensor
().
size
()
<<
"]), "
<<
otv
->
get_tensor
().
size
()
<<
");
\n
"
;
<<
");
\n
"
;
}
}
writer
.
indent
--
;
writer
.
indent
--
;
writer
<<
"}
\n
"
;
writer
<<
"}
\n
"
;
}
}
}
}
}
}
}
}
shared_ptr
<
ngraph
::
runtime
::
CallFrame
>
GPU_ExternalFunction
::
make_call_frame
()
shared_ptr
<
ngraph
::
runtime
::
CallFrame
>
runtime
::
gpu
::
GPU_ExternalFunction
::
make_call_frame
()
{
{
if
(
!
m_is_compiled
)
if
(
!
m_is_compiled
)
{
{
compile
();
compile
();
}
}
return
make_shared
<
GPU_CallFrame
>
(
shared_from_this
(),
m_compiled_function
);
return
make_shared
<
GPU_CallFrame
>
(
shared_from_this
(),
m_compiled_function
);
}
}
void
GPU_ExternalFunction
::
emit_debug_function_entry
(
void
runtime
::
gpu
::
GPU_ExternalFunction
::
emit_debug_function_entry
(
codegen
::
CodeWriter
&
writer
,
codegen
::
CodeWriter
&
writer
,
Node
*
node
,
Node
*
node
,
const
std
::
vector
<
GPU_TensorViewWrapper
>&
in
,
const
std
::
vector
<
GPU_TensorViewWrapper
>&
in
,
const
std
::
vector
<
GPU_TensorViewWrapper
>&
out
)
const
std
::
vector
<
GPU_TensorViewWrapper
>&
out
)
{
{
writer
<<
"timer_"
<<
node
->
get_name
()
<<
".start();
\n
"
;
writer
<<
"timer_"
<<
node
->
get_name
()
<<
".start();
\n
"
;
}
}
void
GPU_ExternalFunction
::
emit_debug_function_exit
(
void
runtime
::
gpu
::
GPU_ExternalFunction
::
emit_debug_function_exit
(
codegen
::
CodeWriter
&
writer
,
codegen
::
CodeWriter
&
writer
,
Node
*
node
,
Node
*
node
,
const
std
::
vector
<
GPU_TensorViewWrapper
>&
in
,
const
std
::
vector
<
GPU_TensorViewWrapper
>&
in
,
const
std
::
vector
<
GPU_TensorViewWrapper
>&
out
)
const
std
::
vector
<
GPU_TensorViewWrapper
>&
out
)
{
{
writer
<<
"timer_"
<<
node
->
get_name
()
<<
".stop();
\n
"
;
writer
<<
"timer_"
<<
node
->
get_name
()
<<
".stop();
\n
"
;
}
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment