Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
ee220ffb
Commit
ee220ffb
authored
Feb 26, 2018
by
fenglei.tian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix bugs and apply clang
parent
a574bdaf
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
73 additions
and
115 deletions
+73
-115
gpu_cuda_context_manager.cpp
src/ngraph/runtime/gpu/gpu_cuda_context_manager.cpp
+1
-3
gpu_cuda_function_builder.cpp
src/ngraph/runtime/gpu/gpu_cuda_function_builder.cpp
+11
-13
gpu_cuda_function_builder.hpp
src/ngraph/runtime/gpu/gpu_cuda_function_builder.hpp
+1
-32
gpu_cuda_function_pool.cpp
src/ngraph/runtime/gpu/gpu_cuda_function_pool.cpp
+1
-3
gpu_cuda_function_pool.hpp
src/ngraph/runtime/gpu/gpu_cuda_function_pool.hpp
+1
-0
gpu_cuda_kernel_builder.cpp
src/ngraph/runtime/gpu/gpu_cuda_kernel_builder.cpp
+20
-21
gpu_cuda_kernel_builder.hpp
src/ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp
+2
-0
gpu_cuda_kernel_emitters.cpp
src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp
+28
-34
gpu_cuda_kernel_emitters.hpp
src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp
+1
-7
gpu_emitter.cpp
src/ngraph/runtime/gpu/gpu_emitter.cpp
+2
-2
gpu_util.hpp
src/ngraph/runtime/gpu/gpu_util.hpp
+5
-0
No files found.
src/ngraph/runtime/gpu/gpu_cuda_context_manager.cpp
View file @
ee220ffb
...
...
@@ -14,8 +14,6 @@
* limitations under the License.
*******************************************************************************/
#pragma once
#include <memory>
#include <string>
...
...
@@ -27,7 +25,7 @@ namespace ngraph
{
namespace
gpu
{
static
CudaContextManager
::
CudaContextManager
&
instance
()
CudaContextManager
&
CudaContextManager
::
instance
()
{
static
CudaContextManager
manager
;
return
manager
;
...
...
src/ngraph/runtime/gpu/gpu_cuda_function_builder.cpp
View file @
ee220ffb
...
...
@@ -14,11 +14,10 @@
* limitations under the License.
*******************************************************************************/
#pragma once
#include <string>
#include "ngraph/runtime/gpu/gpu_cuda_context_manager.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_function_builder.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp"
namespace
ngraph
...
...
@@ -27,21 +26,20 @@ namespace ngraph
{
namespace
gpu
{
st
atic
st
d
::
shared_ptr
<
CUfunction
>
CudaFunctionBuilder
::
get
(
const
std
::
string
&
name
,
const
std
::
string
&
kernel
,
int
number_of_options
,
const
char
**
options
)
std
::
shared_ptr
<
CUfunction
>
CudaFunctionBuilder
::
get
(
const
std
::
string
&
name
,
const
std
::
string
&
kernel
,
int
number_of_options
,
const
char
**
options
)
{
nvrtcProgram
prog
;
NVRTC_SAFE_CALL
(
nvrtcCreateProgram
(
&
prog
,
kernel
.
c_str
(),
"op.cu"
,
0
,
// numHeaders
NULL
,
// headers
NULL
));
// includeNames
kernel
.
c_str
(),
"op.cu"
,
0
,
// numHeaders
NULL
,
// headers
NULL
));
// includeNames
nvrtcResult
compile_result
=
nvrtcCompileProgram
(
prog
,
number_of_options
,
options
);
nvrtcResult
compile_result
=
nvrtcCompileProgram
(
prog
,
number_of_options
,
options
);
if
(
compile_result
!=
NVRTC_SUCCESS
)
{
...
...
src/ngraph/runtime/gpu/gpu_cuda_function_builder.hpp
View file @
ee220ffb
...
...
@@ -33,38 +33,7 @@ namespace ngraph
static
std
::
shared_ptr
<
CUfunction
>
get
(
const
std
::
string
&
name
,
const
std
::
string
&
kernel
,
int
number_of_options
,
const
char
**
options
)
{
nvrtcProgram
prog
;
NVRTC_SAFE_CALL
(
nvrtcCreateProgram
(
&
prog
,
kernel
.
c_str
(),
"op.cu"
,
0
,
// numHeaders
NULL
,
// headers
NULL
));
// includeNames
nvrtcResult
compile_result
=
nvrtcCompileProgram
(
prog
,
number_of_options
,
options
);
if
(
compile_result
!=
NVRTC_SUCCESS
)
{
throw
std
::
runtime_error
(
"compile error:
\n
"
+
kernel
+
"
\n
options"
);
}
size_t
ptx_size
;
NVRTC_SAFE_CALL
(
nvrtcGetPTXSize
(
prog
,
&
ptx_size
));
char
*
ptx
=
new
char
[
ptx_size
];
NVRTC_SAFE_CALL
(
nvrtcGetPTX
(
prog
,
ptx
));
// Load the generated PTX and get a handle to the parent kernel.
NVRTC_SAFE_CALL
(
nvrtcDestroyProgram
(
&
prog
));
// Destroy the program.
CUmodule
module
;
CUfunction
function
;
CUDA_SAFE_CALL
(
cuModuleLoadDataEx
(
&
module
,
ptx
,
0
,
0
,
0
));
CUDA_SAFE_CALL
(
cuModuleGetFunction
(
&
function
,
module
,
name
.
c_str
()));
return
std
::
make_shared
<
CUfunction
>
(
function
);
}
const
char
**
options
);
};
}
}
...
...
src/ngraph/runtime/gpu/gpu_cuda_function_pool.cpp
View file @
ee220ffb
...
...
@@ -14,8 +14,6 @@
* limitations under the License.
*******************************************************************************/
#pragma once
#include <string>
#include <unordered_map>
...
...
@@ -27,7 +25,7 @@ namespace ngraph
{
namespace
gpu
{
static
CudaFunctionPool
::
CudaFunctionPool
&
instance
()
CudaFunctionPool
&
CudaFunctionPool
::
instance
()
{
static
CudaFunctionPool
pool
;
return
pool
;
...
...
src/ngraph/runtime/gpu/gpu_cuda_function_pool.hpp
View file @
ee220ffb
...
...
@@ -38,6 +38,7 @@ namespace ngraph
void
set
(
std
::
string
&
name
,
std
::
shared_ptr
<
CUfunction
>
function
);
std
::
shared_ptr
<
CUfunction
>
get
(
std
::
string
&
name
);
protected
:
CudaFunctionPool
()
{}
~
CudaFunctionPool
()
{}
...
...
src/ngraph/runtime/gpu/gpu_cuda_kernel_builder.cpp
View file @
ee220ffb
...
...
@@ -14,8 +14,6 @@
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp"
namespace
ngraph
...
...
@@ -24,47 +22,48 @@ namespace ngraph
{
namespace
gpu
{
static
void
CudaKernelBuilder
::
get_1_element_op
(
const
std
::
string
&
name
,
const
std
::
string
&
data_type
,
const
std
::
string
&
op
,
std
::
string
&
kernel
)
void
CudaKernelBuilder
::
get_1_element_op
(
const
std
::
string
&
name
,
const
std
::
string
&
data_type
,
const
std
::
string
&
op
,
std
::
string
&
kernel
)
{
kernel
=
R"(
extern "C" __global__
void cuda_)"
+
name
+
"("
+
data_type
+
"* in, "
+
data_type
+
"* out, size_t n)
\n
"
+
R"({
void cuda_)"
+
name
+
"("
+
data_type
+
"* in, "
+
data_type
+
"* out, size_t n)
\n
"
+
R"({
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n)
{
out[tid] =)"
+
op
+
"(in[tid]);
\n
"
+
R"(}
R"(}
})"
;
return
;
}
static
void
CudaKernelBuilder
::
get_2_element_op
(
const
std
::
string
&
name
,
const
std
::
string
&
data_type
,
const
std
::
string
&
op
,
std
::
string
&
kernel
)
void
CudaKernelBuilder
::
get_2_element_op
(
const
std
::
string
&
name
,
const
std
::
string
&
data_type
,
const
std
::
string
&
op
,
std
::
string
&
kernel
)
{
kernel
=
R"(
extern "C" __global__
void )"
+
name
+
"("
+
data_type
+
"* in1, "
+
data_type
+
"* in2, "
+
data_type
+
"* out, size_t n)
\n
"
+
R"({
"* in1, "
+
data_type
+
"* in2, "
+
data_type
+
"* out, size_t n)
\n
"
+
R"({
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid < n)
{
out[tid] = in1[tid] )"
+
op
+
"in2[tid]
\n
"
+
R"(}
out[tid] = in1[tid] )"
+
op
+
"in2[tid]
\n
"
+
R"(}
})"
;
return
;
}
static
void
CudaKernelBuilder
::
get_n_element_op
(
const
std
::
string
&
name
,
const
std
::
string
&
data_type
,
const
std
::
vector
<
std
::
string
>&
ops
,
std
::
string
&
kernel
)
void
CudaKernelBuilder
::
get_n_element_op
(
const
std
::
string
&
name
,
const
std
::
string
&
data_type
,
const
std
::
vector
<
std
::
string
>&
ops
,
std
::
string
&
kernel
)
{
kernel
=
""
;
return
;
...
...
src/ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp
View file @
ee220ffb
...
...
@@ -17,6 +17,7 @@
#pragma once
#include <string>
#include <vector>
namespace
ngraph
{
...
...
@@ -41,6 +42,7 @@ namespace ngraph
const
std
::
string
&
data_type
,
const
std
::
vector
<
std
::
string
>&
ops
,
std
::
string
&
kernel
);
};
}
}
}
src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp
View file @
ee220ffb
...
...
@@ -28,44 +28,38 @@ namespace ngraph
{
namespace
gpu
{
namespace
cuda
void
emit_abs
(
void
*
in
,
void
*
out
,
size_t
count
)
{
namespace
kernel
std
::
string
name
=
"abs"
;
// Create an instance of nvrtcProgram with the code string.
if
(
CudaFunctionPool
::
instance
().
get
(
name
)
==
nullptr
)
{
void
emit_abs
(
void
*
in
,
void
*
out
,
size_t
count
)
{
std
::
string
name
=
"abs"
;
// Create an instance of nvrtcProgram with the code string.
if
(
CudaFunctionPool
::
instance
().
get
(
name
)
==
nullptr
)
{
const
char
*
opts
[]
=
{
"--gpu-architecture=compute_35"
,
"--relocatable-device-code=true"
};
std
::
string
kernel
;
CudaKernelBuilder
::
get_1_element_op
(
name
,
"float"
,
"fabsf"
,
kernel
);
CudaFunctionPool
::
instance
().
set
(
name
,
CudaFunctionBuilder
::
get
(
"cuda_"
+
name
,
kernel
,
2
,
opts
));
}
const
char
*
opts
[]
=
{
"--gpu-architecture=compute_35"
,
"--relocatable-device-code=true"
};
std
::
string
kernel
;
CudaKernelBuilder
::
get_1_element_op
(
name
,
"float"
,
"fabsf"
,
kernel
);
CudaFunctionPool
::
instance
().
set
(
name
,
CudaFunctionBuilder
::
get
(
"cuda_"
+
name
,
kernel
,
2
,
opts
));
}
//convert runtime ptr to driver api ptr
CUdeviceptr
d_ptr_in
,
d_ptr_out
;
d_ptr_in
=
(
CUdeviceptr
)
in
;
d_ptr_out
=
(
CUdeviceptr
)
out
;
//convert runtime ptr to driver api ptr
CUdeviceptr
d_ptr_in
,
d_ptr_out
;
d_ptr_in
=
(
CUdeviceptr
)
in
;
d_ptr_out
=
(
CUdeviceptr
)
out
;
void
*
args_list
[]
=
{
&
d_ptr_in
,
&
d_ptr_out
,
&
count
};
CUDA_SAFE_CALL
(
cuLaunchKernel
(
*
CudaFunctionPool
::
instance
().
get
(
name
).
get
(),
count
,
1
,
1
,
// grid dim
1
,
1
,
1
,
// block dim
0
,
NULL
,
// shared mem and stream
args_list
,
0
));
// arguments
CUDA_SAFE_CALL
(
cuCtxSynchronize
());
// Retrieve and print output.
}
}
void
*
args_list
[]
=
{
&
d_ptr_in
,
&
d_ptr_out
,
&
count
};
CUDA_SAFE_CALL
(
cuLaunchKernel
(
*
CudaFunctionPool
::
instance
().
get
(
name
).
get
(),
count
,
1
,
1
,
// grid dim
1
,
1
,
1
,
// block dim
0
,
NULL
,
// shared mem and stream
args_list
,
0
));
// arguments
CUDA_SAFE_CALL
(
cuCtxSynchronize
());
// Retrieve and print output.
}
}
}
...
...
src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp
View file @
ee220ffb
...
...
@@ -25,13 +25,7 @@ namespace ngraph
{
namespace
gpu
{
namespace
cuda
{
namespace
kernel
{
void
emit_abs
(
void
*
in
,
void
*
out
,
size_t
count
);
}
}
void
emit_abs
(
void
*
in
,
void
*
out
,
size_t
count
);
}
}
}
src/ngraph/runtime/gpu/gpu_emitter.cpp
View file @
ee220ffb
...
...
@@ -90,8 +90,8 @@ void runtime::gpu::GPU_Emitter::EmitAbs(codegen::CodeWriter& writer,
writer
.
indent
++
;
writer
<<
"int count = "
<<
out
[
0
].
get_size
()
<<
";
\n
"
;
writer
<<
"if(count == 0) return;
\n
"
;
writer
<<
"ngraph::runtime::gpu::
cuda::kernel::emit_abs((void*) "
<<
args
[
0
].
get_name
()
<<
", (void*) "
<<
out
[
0
].
get_name
()
<<
", count);
\n
"
;
writer
<<
"ngraph::runtime::gpu::
emit_abs((void*) "
<<
args
[
0
].
get_name
()
<<
", (void*) "
<<
out
[
0
].
get_name
()
<<
", count);
\n
"
;
writer
.
indent
--
;
writer
<<
"}
\n
"
;
}
...
...
src/ngraph/runtime/gpu/gpu_util.hpp
View file @
ee220ffb
...
...
@@ -16,6 +16,11 @@
#pragma once
#include <memory>
#include <stdexcept>
#include <string>
#include <vector>
#include <cublas_v2.h>
#include <cuda.h>
#include <cuda_runtime.h>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment