Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
94429375
Commit
94429375
authored
Feb 07, 2018
by
fenglei.tian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refacgtor and fix bug
parent
28de1557
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
39 additions
and
127 deletions
+39
-127
build.sh
scripts/build.sh
+1
-1
run.sh
scripts/run.sh
+2
-1
gpu_call_frame.cpp
src/ngraph/runtime/gpu/gpu_call_frame.cpp
+10
-4
gpu_call_frame.hpp
src/ngraph/runtime/gpu/gpu_call_frame.hpp
+3
-3
gpu_emitter.cpp
src/ngraph/runtime/gpu/gpu_emitter.cpp
+13
-45
gpu_external_function.cpp
src/ngraph/runtime/gpu/gpu_external_function.cpp
+8
-63
gpu_tensor_view.cpp
src/ngraph/runtime/gpu/gpu_tensor_view.cpp
+1
-9
gpu_tensor_view.hpp
src/ngraph/runtime/gpu/gpu_tensor_view.hpp
+1
-1
No files found.
scripts/build.sh
View file @
94429375
mkdir
build
cd
build
cmake ..
-DNGRAPH_GPU_ENABLE
=
TRUE
-DNGRAPH_CPU_ENABLE
=
TRUE
-DCUDNN_ROOT_DIR
=
/usr/lib/x86_64-linux-gnu/
-DCUDNN_INCLUDE_DIR
=
/usr/include
-DZLIB_LIBRARY
=
/usr/lib/x86_64-linux/gpu/libz.so
-DZLIB_INCLUDE_DIR
=
/usr/include/
-DCMAKE_EXPORT_COMPILE_COMMANDS
=
ON
make
make
-j24
all
scripts/run.sh
View file @
94429375
./build/test/unit-test
--gtest_filter
=
GPU.
*
./build/test/unit-test
--gtest_filter
=
GPU.ab
#./build/test/unit-test --gtest_filter=GPU.dot*
src/ngraph/runtime/gpu/gpu_call_frame.cpp
View file @
94429375
...
...
@@ -17,7 +17,7 @@
#include <stdio.h>
#include <cuda_runtime.h>
#include "cublas
_v2
.h"
#include "cublas.h"
#include "ngraph/runtime/gpu/gpu_call_frame.hpp"
#include "ngraph/runtime/gpu/gpu_external_function.hpp"
...
...
@@ -32,21 +32,27 @@ runtime::gpu::GPU_CallFrame::GPU_CallFrame(std::shared_ptr<GPU_ExternalFunction>
,
m_compiled_function
(
compiled_function
)
{
cublasStatus_t
stat
=
cublasCreate
(
&
m_cublas_handle
);
if
(
stat
!=
cudaSuccess
)
if
(
stat
!=
CUBLAS_STATUS_SUCCESS
)
{
throw
runtime_error
(
"cuBLAS create failed"
);
}
cublasSetPointerMode
(
m_cublas_handle
,
CUBLAS_POINTER_MODE_HOST
);
// Pass scalars as reference on the device
cublasSetPointerMode
(
m_cublas_handle
,
CUBLAS_POINTER_MODE_DEVICE
);
}
runtime
::
gpu
::
GPU_CallFrame
::~
GPU_CallFrame
()
{
cublasDestroy
(
m_cublas_handle
);
}
void
runtime
::
gpu
::
GPU_CallFrame
::
tensor_call
(
const
std
::
vector
<
std
::
shared_ptr
<
ngraph
::
runtime
::
TensorView
>>&
input_tvs
,
const
std
::
vector
<
std
::
shared_ptr
<
ngraph
::
runtime
::
TensorView
>>&
output_tvs
)
{
// Host tensors
vector
<
void
*
*
>
inputs
;
vector
<
void
*
*
>
outputs
;
vector
<
void
*>
inputs
;
vector
<
void
*>
outputs
;
for
(
size_t
i
=
0
;
i
<
input_tvs
.
size
();
i
++
)
{
...
...
src/ngraph/runtime/gpu/gpu_call_frame.hpp
View file @
94429375
...
...
@@ -35,8 +35,8 @@ namespace ngraph
class
GPU_CallFrame
;
class
GPU_ExternalFunction
;
using
EntryPoint_t
=
void
(
void
**
*
inputs
,
void
**
*
outputs
,
using
EntryPoint_t
=
void
(
void
**
inputs
,
void
**
outputs
,
cublasHandle_t
&
cublas_handle
);
using
EntryPoint
=
std
::
function
<
EntryPoint_t
>
;
...
...
@@ -48,7 +48,7 @@ namespace ngraph
GPU_CallFrame
(
std
::
shared_ptr
<
GPU_ExternalFunction
>
external_function
,
EntryPoint
compiled_function
);
~
GPU_CallFrame
()
override
=
default
;
~
GPU_CallFrame
()
override
;
/// @brief Invoke the function with values matching the signature of the function.
///
...
...
src/ngraph/runtime/gpu/gpu_emitter.cpp
View file @
94429375
...
...
@@ -63,55 +63,28 @@ void runtime::gpu::GPU_Emitter::EmitAdd(codegen::CodeWriter& writer,
const
vector
<
runtime
::
gpu
::
GPU_TensorViewWrapper
>&
args
,
const
vector
<
runtime
::
gpu
::
GPU_TensorViewWrapper
>&
out
)
{
const
Shape
&
arg0_shape
=
args
[
0
].
get_shape
();
const
Shape
&
arg1_shape
=
args
[
1
].
get_shape
();
if
(
arg0_shape
.
empty
()
||
arg1_shape
.
empty
())
{
auto
&
first
=
(
arg0_shape
.
empty
()
?
args
[
0
]
:
args
[
1
]);
auto
&
second
=
(
arg0_shape
.
empty
()
?
args
[
1
]
:
args
[
0
]);
}
// clang-format off
else
if
((
arg0_shape
.
size
()
<=
2
)
&&
(
arg1_shape
.
size
()
<=
2
))
{
// TODO Assert arg0_shape[0] == arg1_shape[0]?
writer
<<
"{ // "
<<
n
->
get_name
()
<<
"
\n
"
;
writer
.
indent
++
;
writer
<<
"static const float alpha = 1.0;
\n
"
;
writer
<<
"cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_HOST);
\n
"
;;
writer
<<
"cublasScopy("
<<
"cublas_handle,"
<<
out
[
0
].
get_size
()
<<
","
writer
<<
"const float alpha = 1.0;
\n
"
;
writer
<<
"const float beta = 1.0;
\n
"
;
writer
<<
"cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_HOST);
\n
"
;
writer
<<
"cublasSgeam("
<<
"cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N,
\n
"
<<
out
[
0
].
get_size
()
<<
","
<<
" 1,
\n
"
<<
" &alpha, "
<<
args
[
0
].
get_name
()
<<
","
// Todo handle striding?
<<
"1,"
<<
args
[
0
].
get_size
()
<<
",
\n
"
<<
" &beta, "
<<
args
[
1
].
get_name
()
<<
","
<<
args
[
1
].
get_size
()
<<
",
\n
"
<<
out
[
0
].
get_name
()
<<
","
<<
"1);
\n
"
;
writer
<<
"cublasSaxpy("
<<
"cublas_handle,"
<<
out
[
0
].
get_size
()
<<
","
<<
"&alpha,"
//alpha
<<
args
[
1
].
get_name
()
<<
","
// Todo handle striding?
<<
"1,"
<<
out
[
0
].
get_name
()
<<
","
<<
"1);
\n
"
;
<<
out
[
0
].
get_size
()
<<
");
\n
"
;
writer
.
indent
--
;
writer
<<
"cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE);
\n
"
;;
writer
<<
"}
\n
"
;
}
// clang-format on
else
if
((
arg0_shape
.
size
()
==
2
)
&&
(
arg1_shape
.
size
()
==
1
))
{
}
else
if
((
arg0_shape
.
size
()
==
2
)
&&
(
arg1_shape
.
size
()
==
2
))
{
// GEMM Call
}
else
{
// General ND Call?
}
}
void
runtime
::
gpu
::
GPU_Emitter
::
EmitConcat
(
codegen
::
CodeWriter
&
writer
,
...
...
@@ -188,7 +161,6 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer,
<<
out
[
0
].
get_name
()
<<
","
<<
"1);
\n
"
;
// clang-format on
writer
<<
"cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE);
\n
"
;
;
writer
.
indent
--
;
writer
<<
"}
\n
"
;
...
...
@@ -224,7 +196,6 @@ void runtime::gpu::GPU_Emitter::EmitDot(codegen::CodeWriter& writer,
<<
out
[
0
].
get_name
()
<<
","
<<
"n);
\n
"
;
// clang-format on
writer
<<
"cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE);
\n
"
;
writer
.
indent
--
;
writer
<<
"}
\n
"
;
}
...
...
@@ -333,7 +304,6 @@ void runtime::gpu::GPU_Emitter::EmitMaximum(codegen::CodeWriter& writer,
writer
+=
R"(
cudnnDestroy(cudnnHandle);
)"
;
writer
<<
"cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE);
\n
"
;;
writer
.
indent
--
;
writer
<<
"}
\n
"
;
// clang-format on
...
...
@@ -458,7 +428,6 @@ void runtime::gpu::GPU_Emitter::EmitReshape(codegen::CodeWriter& writer,
<<
arg_shape
[
1
]
<<
","
<<
out
[
0
].
get_name
()
<<
","
<<
out
[
0
].
get_shape
()[
1
]
<<
");
\n
"
;
writer
<<
"cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE);
\n
"
;;
writer
.
indent
--
;
writer
<<
"}
\n
"
;
//clang-format on
...
...
@@ -541,7 +510,6 @@ void runtime::gpu::GPU_Emitter::EmitMultiply(
<<
"1"
// Stride y
<<
");
\n
"
;
writer
.
indent
--
;
writer
<<
"cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE);
\n
"
;;
writer
<<
"}
\n
"
;
// clang-format on
}
...
...
src/ngraph/runtime/gpu/gpu_external_function.cpp
View file @
94429375
...
...
@@ -21,6 +21,11 @@
#include <typeinfo>
#include <unordered_map>
#include <cuda_runtime.h>
#include <cudnn_v7.h>
#include "cublas_v2.h"
#include "cuda.h"
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/codegen/compiler.hpp"
#include "ngraph/codegen/execution_engine.hpp"
...
...
@@ -223,19 +228,6 @@ void runtime::gpu::GPU_ExternalFunction::compile()
writer
+=
R"(// Generated by the NGraph GPU backend
#include <cassert>
#include <cmath>
#include <cstdlib>
#include <fstream>
#include <fstream>
#include <iostream>
#include <memory>
#include <string>
#include <tuple>
#include <typeindex>
#include <typeinfo>
#include <unordered_map>
#include <cuda_runtime.h>
#include <cudnn_v7.h>
#include "cublas_v2.h"
...
...
@@ -249,53 +241,6 @@ void runtime::gpu::GPU_ExternalFunction::compile()
#include "ngraph/function.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/node.hpp"
#include "ngraph/ops/abs.hpp"
#include "ngraph/ops/acos.hpp"
#include "ngraph/ops/add.hpp"
#include "ngraph/ops/asin.hpp"
#include "ngraph/ops/atan.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/ceiling.hpp"
#include "ngraph/ops/concatenate.hpp"
#include "ngraph/ops/constant.hpp"
#include "ngraph/ops/convert.hpp"
#include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/cos.hpp"
#include "ngraph/ops/cosh.hpp"
#include "ngraph/ops/divide.hpp"
#include "ngraph/ops/dot.hpp"
#include "ngraph/ops/equal.hpp"
#include "ngraph/ops/exp.hpp"
#include "ngraph/ops/floor.hpp"
#include "ngraph/ops/function_call.hpp"
#include "ngraph/ops/greater.hpp"
#include "ngraph/ops/greater_eq.hpp"
#include "ngraph/ops/less.hpp"
#include "ngraph/ops/less_eq.hpp"
#include "ngraph/ops/log.hpp"
#include "ngraph/ops/max_pool.hpp"
#include "ngraph/ops/maximum.hpp"
#include "ngraph/ops/minimum.hpp"
#include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/negative.hpp"
#include "ngraph/ops/not.hpp"
#include "ngraph/ops/not_equal.hpp"
#include "ngraph/ops/one_hot.hpp"
#include "ngraph/ops/power.hpp"
#include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/replace_slice.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/reverse.hpp"
#include "ngraph/ops/select.hpp"
#include "ngraph/ops/sign.hpp"
#include "ngraph/ops/sin.hpp"
#include "ngraph/ops/sinh.hpp"
#include "ngraph/ops/slice.hpp"
#include "ngraph/ops/sqrt.hpp"
#include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/ops/tan.hpp"
#include "ngraph/ops/tanh.hpp"
#include "ngraph/pass/assign_layout.hpp"
#include "ngraph/pass/dump_sorted.hpp"
#include "ngraph/pass/liveness.hpp"
...
...
@@ -413,7 +358,7 @@ void runtime::gpu::GPU_ExternalFunction::compile()
for
(
shared_ptr
<
Function
>
f
:
pass_manager
.
get_state
().
get_functions
())
{
writer
<<
"extern
\"
C
\"
void "
<<
f
->
get_name
()
<<
"(void**
* inputs, void*
** outputs, cublasHandle_t& cublas_handle);
\n
"
;
<<
"(void**
inputs, void
** outputs, cublasHandle_t& cublas_handle);
\n
"
;
}
writer
<<
"
\n
"
;
...
...
@@ -450,7 +395,7 @@ void runtime::gpu::GPU_ExternalFunction::compile()
string
match_function_name
;
for
(
size_t
j
=
i
+
1
;
j
<
op_list
.
size
();
j
++
)
{
if
(
op_list
[
i
]
->
is_functionally_identical
(
*
op_list
[
j
]))
if
(
0
)
//
op_list[i]->is_functionally_identical(*op_list[j]))
{
if
(
match_function_name
.
empty
())
{
...
...
@@ -531,7 +476,7 @@ void runtime::gpu::GPU_ExternalFunction::compile()
}
writer
<<
"extern
\"
C
\"
void "
<<
current_function
->
get_name
();
writer
<<
"(void**
* inputs, void*
** outputs, cublasHandle_t& cublas_handle)
\n
"
;
writer
<<
"(void**
inputs, void
** outputs, cublasHandle_t& cublas_handle)
\n
"
;
writer
<<
"{
\n
"
;
writer
.
indent
++
;
...
...
src/ngraph/runtime/gpu/gpu_tensor_view.cpp
View file @
94429375
...
...
@@ -39,7 +39,7 @@ runtime::gpu::GPU_TensorView::GPU_TensorView(const ngraph::element::Type& elemen
m_buffer_size
=
m_descriptor
->
get_tensor_view_layout
()
->
get_size
()
*
element_type
.
size
();
if
(
m_buffer_size
>
0
)
{
cudaMalloc
(
&
m_allocated_buffer_pool
,
m_buffer_size
);
cudaMalloc
(
(
void
**
)
&
m_allocated_buffer_pool
,
m_buffer_size
);
}
}
...
...
@@ -50,18 +50,10 @@ runtime::gpu::GPU_TensorView::~GPU_TensorView()
void
runtime
::
gpu
::
GPU_TensorView
::
write
(
const
void
*
source
,
size_t
tensor_offset
,
size_t
n
)
{
if
(
tensor_offset
+
n
>
m_buffer_size
)
{
throw
out_of_range
(
"write access past end of tensor"
);
}
cudaMemcpy
(
m_allocated_buffer_pool
,
source
,
n
,
cudaMemcpyHostToDevice
);
}
void
runtime
::
gpu
::
GPU_TensorView
::
read
(
void
*
target
,
size_t
tensor_offset
,
size_t
n
)
const
{
if
(
tensor_offset
+
n
>
m_buffer_size
)
{
throw
out_of_range
(
"read access past end of tensor"
);
}
cudaMemcpy
(
target
,
m_allocated_buffer_pool
,
n
,
cudaMemcpyDeviceToHost
);
}
src/ngraph/runtime/gpu/gpu_tensor_view.hpp
View file @
94429375
...
...
@@ -49,6 +49,6 @@ public:
/// @param n Number of bytes to read, must be integral number of elements.
void
read
(
void
*
p
,
size_t
tensor_offset
,
size_t
n
)
const
override
;
void
*
*
m_allocated_buffer_pool
;
void
*
m_allocated_buffer_pool
;
size_t
m_buffer_size
;
};
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment