Unverified Commit c5549682 authored by Tristan Webb's avatar Tristan Webb Committed by GitHub

Drwebb/gpu external function (#367)

* Initial GPU_ExternalFunction implementation

Other changes:

Add GPU runtime to same cmake block as GPU, include CUDA headers if GPU enabled

Initial passing (a+b)*c test

Properly link cuda libraries

Simple GPUTensorView implementation

Initial GPU emitter

GPU codegen initial function gen, no kernels yet

Rename GPU emitter and tensor_view_wrapper to match naming convention

* GPU external function based on BASE

* Fix stray base -> gpu

* TensorViewWrapper -> GPU_TensorViewWrapper

* Copy over emitter from base transformer

* Fix for naming dense layout

* Copy kernel emitters from base -> gpu and strip out kernel_utils

* Add aliases to GPU_TensorViewWrappers

* More fixes for naming descriptor::TensorViews

* Move in call_frame implementation from base -> gpu

* apply code format

* GPU codegen running A+B*C

gpu emitters
gpu ctx setup cuda_module kernels
Remove GPU_CF perf counters
Use gpu kernels in external function
Add GPU 1d dot test

Review Changes:
* Remove CPU specific kernel emitting method bodies

* Use copy_data from test/util.cpp, uncomment compileTest

* Use test_utils copy_data function

* Grab function name from pass manager for def, clean up indentation
parent e433e55a
......@@ -190,9 +190,12 @@ endif()
set(SRC ${SRC}
runtime/gpu/gpu_call_frame.cpp
runtime/gpu/gpu_backend.cpp
runtime/gpu/gpu_manager.cpp
runtime/gpu/gpu_emitter.cpp
runtime/gpu/gpu_external_function.cpp
runtime/gpu/gpu_kernel_emitters.cpp
runtime/gpu/gpu_manager.cpp
runtime/gpu/gpu_tensor_view.cpp
runtime/gpu/gpu_tensor_view_wrapper.cpp
)
set_property(SOURCE codegen/compiler.cpp APPEND_STRING PROPERTY COMPILE_DEFINITIONS
"CUDA_HEADER_PATHS=\"${CUDA_INCLUDE_DIRS}\";")
......@@ -265,7 +268,7 @@ if(NGRAPH_CPU_ENABLE)
endif()
if(NGRAPH_GPU_ENABLE AND CUDA_LIBRARIES)
target_link_libraries(ngraph PRIVATE ${CUDA_LIBRARIES} ${CUDNN_LIBRARIES})
target_link_libraries(ngraph PRIVATE cuda)
endif()
# Argon
......
......@@ -13,6 +13,7 @@
// ----------------------------------------------------------------------------
#include "ngraph/runtime/gpu/gpu_backend.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
#include "ngraph/runtime/external_function.hpp"
#include "ngraph/runtime/gpu/gpu_tensor_view.hpp"
......@@ -29,6 +30,6 @@ std::shared_ptr<ngraph::runtime::TensorView>
runtime::gpu::GPU_Backend::make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape)
{
auto rc = make_shared<runtime::gpu::GPU_TensorView>(element_type, shape);
auto rc = make_shared<runtime::cpu::CPU_TensorView>(element_type, shape);
return dynamic_pointer_cast<runtime::TensorView>(rc);
}
......@@ -12,25 +12,66 @@
// see the license for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include <cstdlib>
#include <fstream>
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
#include "ngraph/runtime/gpu/gpu_call_frame.hpp"
#include "ngraph/runtime/gpu/gpu_external_function.hpp"
#include "ngraph/runtime/gpu/gpu_tensor_view.hpp"
using namespace std;
using namespace ngraph::runtime::gpu;
using namespace ngraph;
GPU_CallFrame::GPU_CallFrame(shared_ptr<GPU_ExternalFunction> external_function,
shared_ptr<Function> func)
runtime::gpu::GPU_CallFrame::GPU_CallFrame(std::shared_ptr<GPU_ExternalFunction> external_function,
EntryPoint compiled_function)
: m_external_function(external_function)
, m_function(func)
, m_compiled_function(compiled_function)
{
}
void GPU_CallFrame::call(const vector<shared_ptr<Value>>& input_tvs,
const vector<shared_ptr<Value>>& output_tvs)
void runtime::gpu::GPU_CallFrame::tensor_call(
const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& input_tvs,
const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& output_tvs)
{
// Host tensors
vector<void*> inputs;
vector<void*> outputs;
for (size_t i = 0; i < input_tvs.size(); i++)
{
shared_ptr<runtime::cpu::CPU_TensorView> tv =
static_pointer_cast<runtime::cpu::CPU_TensorView>(input_tvs[i]);
inputs.push_back(tv->get_data_ptr());
}
for (size_t i = 0; i < output_tvs.size(); i++)
{
shared_ptr<runtime::cpu::CPU_TensorView> tv =
static_pointer_cast<runtime::cpu::CPU_TensorView>(output_tvs[i]);
outputs.push_back(tv->get_data_ptr());
}
// Invoke compiled computation
m_compiled_function(inputs.data(), outputs.data());
}
void GPU_CallFrame::tensor_call(const std::vector<std::shared_ptr<TensorView>>& inputs,
const std::vector<std::shared_ptr<TensorView>>& outputs)
void runtime::gpu::GPU_CallFrame::call(
const std::vector<std::shared_ptr<runtime::TensorView>>& arguments,
const std::vector<std::shared_ptr<runtime::TensorView>>& results)
{
// TODO: Check types of args and result
vector<shared_ptr<runtime::TensorView>> inputs;
for (shared_ptr<runtime::TensorView> argument : arguments)
{
argument->collect_tensor_views(inputs, argument);
}
vector<shared_ptr<runtime::TensorView>> outputs;
for (shared_ptr<runtime::TensorView> result : results)
{
result->collect_tensor_views(outputs, result);
}
tensor_call(inputs, outputs);
}
......@@ -26,6 +26,8 @@ namespace ngraph
{
namespace runtime
{
class PrimaryTensorView;
namespace gpu
{
class GPU_CallFrame;
......@@ -40,22 +42,23 @@ namespace ngraph
{
public:
GPU_CallFrame(std::shared_ptr<GPU_ExternalFunction> external_function,
std::shared_ptr<Function> func);
EntryPoint compiled_function);
/// @brief Invoke the function with values matching the signature of the function.
///
/// Tuples will be expanded into their tensor views to build the call frame.
void call(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& inputs,
const std::vector<std::shared_ptr<ngraph::runtime::Value>>& outputs);
void
call(const std::vector<std::shared_ptr<runtime::TensorView>>& inputs,
const std::vector<std::shared_ptr<runtime::TensorView>>& outputs) override;
/// @brief Invoke the function with tuples pre-expanded to their underlying
/// tensor views.
void tensor_call(const std::vector<std::shared_ptr<TensorView>>& inputs,
const std::vector<std::shared_ptr<TensorView>>& outputs);
const std::vector<std::shared_ptr<TensorView>>& outputs) override;
protected:
std::shared_ptr<GPU_ExternalFunction> m_external_function;
std::shared_ptr<Function> m_function;
EntryPoint m_compiled_function;
};
}
}
......
This diff is collapsed.
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include <string>
#include <vector>
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/node.hpp"
#include "ngraph/runtime/gpu/gpu_external_function.hpp"
#include "ngraph/runtime/gpu/gpu_tensor_view_wrapper.hpp"
#define EMITTER_DECL(E) \
E(const ngraph::Node* n, \
const std::vector<ngraph::runtime::gpu::GPU_TensorViewWrapper>& args, \
const std::vector<ngraph::runtime::gpu::GPU_TensorViewWrapper>& out)
namespace ngraph
{
namespace runtime
{
namespace gpu
{
class GPU_Emitter
{
protected:
codegen::CodeWriter m_out;
bool m_use_ref_kernels;
public:
GPU_Emitter()
: m_out()
, m_use_ref_kernels(std::getenv("NGRAPH_GPU_USE_REF_KERNELS") != nullptr)
{
}
std::string get_code() { return m_out.get_code(); }
codegen::CodeWriter& get_code_writer() { return m_out; }
void EMITTER_DECL(EmitNop);
void EMITTER_DECL(EmitAdd);
void EMITTER_DECL(EmitDot);
void EMITTER_DECL(EmitMultiply);
void EMITTER_DECL(EmitGetOutputElement);
void EMITTER_DECL(EmitXLAGetTupleElement);
void EMITTER_DECL(EmitTuple);
void EMITTER_DECL(EmitAbs);
void EMITTER_DECL(EmitConcat);
void EMITTER_DECL(EmitDivide);
void EMITTER_DECL(EmitEqual);
void EMITTER_DECL(EmitGreater);
void EMITTER_DECL(EmitGreaterEq);
void EMITTER_DECL(EmitLess);
void EMITTER_DECL(EmitLessEq);
void EMITTER_DECL(EmitLog);
void EMITTER_DECL(EmitMaximum);
void EMITTER_DECL(EmitMinimum);
void EMITTER_DECL(EmitNegative);
void EMITTER_DECL(EmitNotEqual);
void EMITTER_DECL(EmitSelect);
void EMITTER_DECL(EmitSubtract);
void EMITTER_DECL(EmitBroadcast);
void EMITTER_DECL(EmitConvert);
void EMITTER_DECL(EmitConstant);
void EMITTER_DECL(EmitReshape);
void EMITTER_DECL(EmitFunctionCall);
void EMITTER_DECL(EmitReduce);
void EMITTER_DECL(EmitSign);
void EMITTER_DECL(EmitSlice);
void EMITTER_DECL(EmitSum);
void EMITTER_DECL(EmitExp);
void EMITTER_DECL(EmitSin);
void EMITTER_DECL(EmitSinh);
void EMITTER_DECL(EmitCos);
void EMITTER_DECL(EmitCosh);
void EMITTER_DECL(EmitTan);
void EMITTER_DECL(EmitTanh);
void EMITTER_DECL(EmitAsin);
void EMITTER_DECL(EmitAcos);
void EMITTER_DECL(EmitAtan);
void EMITTER_DECL(EmitPower);
void EMITTER_DECL(EmitReplaceSlice);
void EMITTER_DECL(EmitOneHot);
void EMITTER_DECL(EmitFloor);
void EMITTER_DECL(EmitCeiling);
void EMITTER_DECL(EmitSqrt);
void EMITTER_DECL(EmitConvolution);
void EMITTER_DECL(EmitNot);
void EMITTER_DECL(EmitMaxPool);
void EMITTER_DECL(EmitReverse);
private:
void generate_call(const std::vector<GPU_TensorViewWrapper>& args,
const std::vector<GPU_TensorViewWrapper>& out,
std::shared_ptr<Function> function);
std::string emit_vector(const GPU_TensorViewWrapper&, const std::string& name = "");
std::string emit_array1d(const GPU_TensorViewWrapper&,
const std::string& name = "");
std::string emit_matrix(const GPU_TensorViewWrapper&, const std::string& name = "");
};
}
}
}
......@@ -20,9 +20,13 @@
#include <typeinfo>
#include <unordered_map>
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/codegen/compiler.hpp"
#include "ngraph/codegen/execution_engine.hpp"
#include "ngraph/function.hpp"
#include "ngraph/runtime/external_function.hpp"
#include "ngraph/runtime/gpu/gpu_call_frame.hpp"
#include "ngraph/runtime/gpu/gpu_tensor_view_wrapper.hpp"
namespace ngraph
{
......@@ -30,9 +34,23 @@ namespace ngraph
{
namespace gpu
{
class GPU_ExternalFunction;
class GPU_Emitter;
class GPU_CallFrame;
using OpFunction =
std::function<void(GPU_Emitter*,
const ngraph::Node*,
const std::vector<GPU_TensorViewWrapper>& inputs,
const std::vector<GPU_TensorViewWrapper>& outputs)>;
using OpMap = std::unordered_map<std::type_index, OpFunction>;
class GPU_ExternalFunction : public ngraph::runtime::ExternalFunction,
public std::enable_shared_from_this<GPU_ExternalFunction>
{
friend class GPU_CallFrame;
public:
GPU_ExternalFunction(const std::shared_ptr<ngraph::Function>& function,
bool release_function = true);
......@@ -41,7 +59,27 @@ namespace ngraph
protected:
void compile();
std::shared_ptr<ngraph::Function> m_function;
EntryPoint m_compiled_function;
private:
void emit_debug_function_entry(codegen::CodeWriter& writer,
Node* node,
const std::vector<GPU_TensorViewWrapper>& in,
const std::vector<GPU_TensorViewWrapper>& out);
void emit_debug_function_exit(codegen::CodeWriter& writer,
Node* node,
const std::vector<GPU_TensorViewWrapper>& in,
const std::vector<GPU_TensorViewWrapper>& out);
void handle_output_alias(
codegen::CodeWriter& writer,
const Node&,
const std::unordered_map<descriptor::TensorView*, std::vector<size_t>>&);
std::unique_ptr<codegen::Compiler> m_compiler;
std::unique_ptr<codegen::ExecutionEngine> m_execution_engine;
bool m_emit_timing;
bool m_use_tbb;
std::unordered_map<std::string, std::string> m_variable_name_map;
};
}
}
......
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include <algorithm>
#include <map>
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/runtime/gpu/gpu_kernel_emitters.hpp"
using namespace ngraph;
using namespace ngraph::runtime::gpu::kernel;
void ngraph::runtime::gpu::kernel::emit_broadcast(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisSet& broadcast_axes)
{
}
//
// For the reference kernel this is gpud on, see ngraph/runtime/kernel/concat.hpp.
//
void ngraph::runtime::gpu::kernel::emit_concat(codegen::CodeWriter& writer,
const std::string& element_type,
const std::vector<std::string>& args,
const std::string& out,
const std::vector<Shape>& in_shapes,
const Shape& out_shape,
size_t concatenation_axis)
{
}
void ngraph::runtime::gpu::kernel::emit_replace_slice(
codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& arg1, // replacement value
const std::string& out,
const Shape& arg1_shape,
const Shape& out_shape,
const Coordinate& lower_bounds,
const Coordinate& upper_bounds,
const Strides& strides)
{
}
void ngraph::runtime::gpu::kernel::emit_slice(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const Coordinate& lower_bounds,
const Coordinate& upper_bounds,
const Strides& strides)
{
}
void ngraph::runtime::gpu::kernel::emit_reshape(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisVector& arg0_axis_order)
{
}
void ngraph::runtime::gpu::kernel::emit_sum(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisSet& reduction_axes)
{
}
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/common.hpp"
namespace ngraph
{
namespace runtime
{
namespace gpu
{
namespace kernel
{
void emit_broadcast(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisSet& broadcast_axes);
void emit_concat(codegen::CodeWriter& writer,
const std::string& element_type,
const std::vector<std::string>& args,
const std::string& out,
const std::vector<Shape>& in_shapes,
const Shape& out_shape,
const size_t concatenation_axis);
void emit_replace_slice(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& arg1, // replacement value
const std::string& out,
const Shape& arg1_shape,
const Shape& out_shape,
const Coordinate& lower_bounds,
const Coordinate& upper_bounds,
const Strides& strides);
void emit_slice(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const Coordinate& lower_bounds,
const Coordinate& upper_bounds,
const Strides& strides);
void emit_reshape(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisVector& arg0_axis_order);
void emit_sum(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisSet& reduction_axes);
}
}
}
}
......@@ -14,6 +14,8 @@
#include <memory>
#include <cuda.h>
#include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp"
#include "ngraph/descriptor/primary_tensor_view.hpp"
#include "ngraph/runtime/gpu/gpu_backend.hpp"
......@@ -30,59 +32,26 @@ runtime::gpu::GPU_TensorView::GPU_TensorView(const ngraph::element::Type& elemen
true,
true,
false))
, m_allocated_buffer_pool(nullptr)
, m_aligned_buffer_pool(nullptr)
{
// Need to check type and have host/device tensors
m_descriptor->set_tensor_view_layout(
std::make_shared<ngraph::descriptor::layout::DenseTensorViewLayout>(*m_descriptor));
m_buffer_size = m_descriptor->get_tensor_view_layout()->get_size() * element_type.size();
if (m_buffer_size > 0)
{
size_t allocation_size = m_buffer_size + runtime::gpu::alignment;
m_allocated_buffer_pool = static_cast<char*>(malloc(allocation_size));
m_aligned_buffer_pool = m_allocated_buffer_pool;
size_t mod = size_t(m_aligned_buffer_pool) % alignment;
if (mod != 0)
{
m_aligned_buffer_pool += (alignment - mod);
}
}
}
runtime::gpu::GPU_TensorView::~GPU_TensorView()
{
if (m_allocated_buffer_pool != nullptr)
{
free(m_allocated_buffer_pool);
}
}
char* runtime::gpu::GPU_TensorView::get_data_ptr()
{
return m_aligned_buffer_pool;
// cuMemAlloc(&dev_buffer, m_buffer_size);
}
const char* runtime::gpu::GPU_TensorView::get_data_ptr() const
runtime::gpu::GPU_TensorView::~GPU_TensorView()
{
return m_aligned_buffer_pool;
// cuMemFree(dev_buffer);
}
void runtime::gpu::GPU_TensorView::write(const void* source, size_t tensor_offset, size_t n)
{
if (tensor_offset + n > m_buffer_size)
{
throw out_of_range("write access past end of tensor");
}
char* target = get_data_ptr();
// cuMemcpyHtoD(dev_buffer, source, n);
}
void runtime::gpu::GPU_TensorView::read(void* target, size_t tensor_offset, size_t n) const
{
if (tensor_offset + n > m_buffer_size)
{
throw out_of_range("read access past end of tensor");
}
const char* source = get_data_ptr();
// cuMemcpyDtoH(target, dev_buffer, n);
}
......@@ -14,6 +14,7 @@
#pragma once
#include <cuda.h>
#include <memory>
#include "ngraph/runtime/tensor_view.hpp"
......@@ -36,9 +37,6 @@ public:
GPU_TensorView(const ngraph::element::Type& element_type, const Shape& shape);
virtual ~GPU_TensorView();
char* get_data_ptr();
const char* get_data_ptr() const;
/// @brief Write bytes directly into the tensor
/// @param p Pointer to source of data
/// @param tensor_offset Offset into tensor storage to begin writing. Must be element-aligned.
......@@ -51,8 +49,12 @@ public:
/// @param n Number of bytes to read, must be integral number of elements.
void read(void* p, size_t tensor_offset, size_t n) const override;
// const char* get_data_ptr();
// const char* get_data_ptr() const;
private:
char* m_allocated_buffer_pool;
char* m_aligned_buffer_pool;
CUdeviceptr dev_buffer;
// At some point need to deal with alignment
size_t m_buffer_size;
};
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include "ngraph/runtime/gpu/gpu_tensor_view_wrapper.hpp"
#include "ngraph/descriptor/layout/tensor_view_layout.hpp"
#include "ngraph/descriptor/tensor.hpp"
using namespace std;
using namespace ngraph;
runtime::gpu::GPU_TensorViewWrapper::GPU_TensorViewWrapper(
const shared_ptr<descriptor::TensorView>& tv, const string& alias)
: m_tensor_view(tv)
, m_alias(alias)
{
}
size_t runtime::gpu::GPU_TensorViewWrapper::get_size() const
{
return m_tensor_view->get_tensor_view_layout()->get_size();
}
const vector<size_t>& runtime::gpu::GPU_TensorViewWrapper::get_shape() const
{
return m_tensor_view->get_tensor_view_layout()->get_shape();
}
const vector<size_t>& runtime::gpu::GPU_TensorViewWrapper::get_strides() const
{
return m_tensor_view->get_tensor_view_layout()->get_strides();
}
const element::Type& runtime::gpu::GPU_TensorViewWrapper::get_element_type() const
{
return m_tensor_view->get_tensor_view_layout()->get_element_type();
}
const std::string& runtime::gpu::GPU_TensorViewWrapper::get_name() const
{
if (m_alias.empty())
{
return m_tensor_view->get_tensor().get_name();
}
else
{
return m_alias;
}
}
const std::string& runtime::gpu::GPU_TensorViewWrapper::get_type() const
{
return get_element_type().c_type_string();
}
bool runtime::gpu::GPU_TensorViewWrapper::is_output() const
{
return m_tensor_view->get_tensor().is_output();
}
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include <memory>
#include "ngraph/descriptor/tensor_view.hpp"
#include "ngraph/types/element_type.hpp"
namespace ngraph
{
namespace runtime
{
namespace gpu
{
class GPU_TensorViewWrapper;
}
}
}
class ngraph::runtime::gpu::GPU_TensorViewWrapper
{
public:
GPU_TensorViewWrapper(const std::shared_ptr<descriptor::TensorView>&,
const std::string& alias = "");
size_t get_size() const;
const std::vector<size_t>& get_shape() const;
const std::vector<size_t>& get_strides() const;
const element::Type& get_element_type() const;
const std::string& get_name() const;
const std::string& get_type() const;
bool is_output() const;
private:
std::shared_ptr<descriptor::TensorView> m_tensor_view;
std::string m_alias;
};
......@@ -22,7 +22,11 @@
#include <cudnn.h>
#include "ngraph/codegen/compiler.hpp"
#include "ngraph/runtime/gpu/gpu_external_function.hpp"
#include "ngraph/ngraph.hpp"
#include "util/ndarray.hpp"
#include "util/test_tools.hpp"
using namespace ngraph;
using namespace std;
......@@ -41,7 +45,6 @@ TEST(cudnn, compileTest)
#include <iostream>
#include "cuda.h"
void check_cuda_errors(CUresult err) {
assert(err == CUDA_SUCCESS);
}
......@@ -202,7 +205,6 @@ const auto str = R"(
check_cuda_errors(cuMemcpyHtoD(dev_bufferA, &host_A[0], sizeof(float)*16));
check_cuda_errors(cuMemcpyHtoD(dev_bufferB, &host_B[0], sizeof(float)*16));
unsigned block_size_X = 16;
unsigned block_size_Y = 1;
unsigned block_size_Z = 1;
......@@ -223,13 +225,11 @@ const auto str = R"(
// Retrieve device data
check_cuda_errors(cuMemcpyDtoH(&host_C[0], dev_bufferC, sizeof(float)*16));
std::cout << "Results:\n";
for (unsigned i = 0; i != 16; ++i) {
std::cout << host_A[i] << " + " << host_B[i] << " = " << host_C[i] << "\n";
}
// Clean up after ourselves
delete [] host_A;
delete [] host_B;
......@@ -261,4 +261,50 @@ TEST(cudnn, abc)
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
// Create some tensors for input/output
shared_ptr<runtime::TensorView> a = backend->make_primary_tensor_view(element::f32, shape);
shared_ptr<runtime::TensorView> b = backend->make_primary_tensor_view(element::f32, shape);
shared_ptr<runtime::TensorView> c = backend->make_primary_tensor_view(element::f32, shape);
shared_ptr<runtime::TensorView> result = backend->make_primary_tensor_view(element::f32, shape);
copy_data(a, test::NDArray<float, 2>({{1, 2}, {3, 4}}).get_vector());
copy_data(b, test::NDArray<float, 2>({{5, 6}, {7, 8}}).get_vector());
copy_data(c, test::NDArray<float, 2>({{9, 10}, {11, 12}}).get_vector());
cf->call({a, b, c}, {result});
EXPECT_EQ(result->get_vector<float>(),
(test::NDArray<float, 2>({{54, 80}, {110, 144}})).get_vector());
cf->call({b, a, c}, {result});
EXPECT_EQ(result->get_vector<float>(),
(test::NDArray<float, 2>({{54, 80}, {110, 144}})).get_vector());
cf->call({a, c, b}, {result});
EXPECT_EQ(result->get_vector<float>(),
(test::NDArray<float, 2>({{50, 72}, {98, 128}})).get_vector());
}
TEST(cudnn, dot1d)
{
auto shape = Shape{4};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto shape_r = Shape{};
auto f = make_shared<Function>(make_shared<op::Dot>(A, B), op::Parameters{A, B});
auto manager = runtime::Manager::get("GPU");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
// Create some tensors for input/output
auto a = backend->make_primary_tensor_view(element::f32, shape);
copy_data(a, vector<float>{2, 4, 8, 16});
auto b = backend->make_primary_tensor_view(element::f32, shape);
copy_data(b, vector<float>{1, 2, 4, 8});
auto result = backend->make_primary_tensor_view(element::f32, shape_r);
cf->call({a, b}, {result});
EXPECT_EQ((vector<float>{170}), result->get_vector<float>());
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment