Unverified Commit e576a6c6 authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

Merge pull request #3558 from NervanaSystems/bob/no_igpu

Remove Intel GPU backend
parents 2d51c383 c264fa34
......@@ -174,7 +174,6 @@ option(NGRAPH_TOOLS_ENABLE "Control the building of tool" TRUE)
option(NGRAPH_CPU_ENABLE "Control the building of the CPU backend" TRUE)
option(NGRAPH_USE_LEGACY_MKLDNN "Use legacy MKLDNN" TRUE)
option(NGRAPH_MLIR_ENABLE "Control the building of MLIR backend" FALSE)
option(NGRAPH_INTELGPU_ENABLE "Control the building of the Intel GPU backend with clDNN" FALSE)
option(NGRAPH_INTERPRETER_ENABLE "Control the building of the INTERPRETER backend" TRUE)
option(NGRAPH_NOP_ENABLE "Control the building of the NOP backend" TRUE)
option(NGRAPH_GENERIC_CPU_ENABLE "Enable build nGraph for generic CPU backend" FALSE)
......@@ -202,7 +201,7 @@ endif()
if (NGRAPH_CPU_ENABLE
AND
((NOT NGRAPH_GENERIC_CPU_ENABLE) AND (NOT NGRAPH_INTELGPU_ENABLE))
(NOT NGRAPH_GENERIC_CPU_ENABLE)
)
set(NGRAPH_INTEL_CPU_ONLY_ENABLE ON)
endif()
......@@ -254,7 +253,6 @@ NORMALIZE_BOOL(NGRAPH_TOOLS_ENABLE)
NORMALIZE_BOOL(NGRAPH_CPU_ENABLE)
NORMALIZE_BOOL(NGRAPH_USE_LEGACY_MKLDNN)
NORMALIZE_BOOL(NGRAPH_MLIR_ENABLE)
NORMALIZE_BOOL(NGRAPH_INTELGPU_ENABLE)
NORMALIZE_BOOL(NGRAPH_INTERPRETER_ENABLE)
NORMALIZE_BOOL(NGRAPH_NOP_ENABLE)
NORMALIZE_BOOL(NGRAPH_GENERIC_CPU_ENABLE)
......@@ -286,7 +284,6 @@ message(STATUS "NGRAPH_TOOLS_ENABLE: ${NGRAPH_TOOLS_ENABLE}")
message(STATUS "NGRAPH_CPU_ENABLE: ${NGRAPH_CPU_ENABLE}")
message(STATUS "NGRAPH_USE_LEGACY_MKLDNN: ${NGRAPH_USE_LEGACY_MKLDNN}")
message(STATUS "NGRAPH_MLIR_ENABLE: ${NGRAPH_MLIR_ENABLE}")
message(STATUS "NGRAPH_INTELGPU_ENABLE: ${NGRAPH_INTELGPU_ENABLE}")
message(STATUS "NGRAPH_INTERPRETER_ENABLE: ${NGRAPH_INTERPRETER_ENABLE}")
message(STATUS "NGRAPH_NOP_ENABLE: ${NGRAPH_NOP_ENABLE}")
message(STATUS "NGRAPH_GENERIC_CPU_ENABLE: ${NGRAPH_GENERIC_CPU_ENABLE}")
......@@ -582,9 +579,6 @@ endif()
if (NGRAPH_MLIR_ENABLE)
include(cmake/external_mlir.cmake)
endif()
if(NGRAPH_INTELGPU_ENABLE)
include(cmake/external_cldnn.cmake)
endif()
if (NGRAPH_CPU_ENABLE AND NOT NGRAPH_DEX_ONLY)
set(NGRAPH_CODEGEN_ENABLE TRUE)
......
# ******************************************************************************
# Copyright 2017-2019 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
# Enable ExternalProject CMake module
include(ExternalProject)
#------------------------------------------------------------------------------
# Download and install GoogleTest ...
#------------------------------------------------------------------------------
set(CLDNN_GIT_REPO_URL https://github.com/intel/clDNN.git)
set(CLDNN_GIT_LABEL v0.1.0)
set(OUT_DIR ${EXTERNAL_PROJECTS_ROOT}/cldnn/out)
ExternalProject_Add(
ext_cldnn
PREFIX cldnn
GIT_REPOSITORY ${CLDNN_GIT_REPO_URL}
GIT_TAG ${CLDNN_GIT_LABEL}
# Disable install step
INSTALL_COMMAND ""
UPDATE_COMMAND ""
CMAKE_GENERATOR ${CMAKE_GENERATOR}
CMAKE_GENERATOR_PLATFORM ${CMAKE_GENERATOR_PLATFORM}
CMAKE_GENERATOR_TOOLSET ${CMAKE_GENERATOR_TOOLSET}
CMAKE_ARGS
${NGRAPH_FORWARD_CMAKE_ARGS}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
# -DCLDNN__OUTPUT_DIR=out/Debug
-DCLDNN__INCLUDE_TESTS=OFF
-DCLDNN__INCLUDE_CORE_INTERNAL_TESTS=OFF
-DCLDNN__INCLUDE_TUTORIAL=OFF
EXCLUDE_FROM_ALL TRUE
)
#------------------------------------------------------------------------------
add_library(libcldnn INTERFACE)
if (CLDNN_ROOT_DIR)
find_package(CLDNN REQUIRED)
target_include_directories(libcldnn SYSTEM INTERFACE ${CLDNN_INCLUDE_DIRS})
target_link_libraries(libcldnn INTERFACE ${CLDNN_LIBRARIES})
install(
FILES
${CLDNN_LIBRARIES}
DESTINATION
${NGRAPH_INSTALL_LIB}
OPTIONAL
)
else()
ExternalProject_Get_Property(ext_cldnn SOURCE_DIR BINARY_DIR)
set(CLDNN_LIB ${CMAKE_SHARED_LIBRARY_PREFIX}clDNN64${CMAKE_SHARED_LIBRARY_SUFFIX})
ExternalProject_Add_Step(
ext_cldnn
CopyCLDNN
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${SOURCE_DIR}/build/out/Linux64/${CMAKE_BUILD_TYPE}/${CLDNN_LIB} ${NGRAPH_LIBRARY_OUTPUT_DIRECTORY}/${CLDNN_LIB}
COMMENT "Copy cldnn runtime libraries to ngraph build directory."
DEPENDEES install
)
add_dependencies(libcldnn ext_cldnn)
target_include_directories(libcldnn SYSTEM INTERFACE ${SOURCE_DIR}/api)
target_link_libraries(libcldnn INTERFACE ${NGRAPH_LIBRARY_OUTPUT_DIRECTORY}/${CLDNN_LIB})
install(
FILES
${NGRAPH_LIBRARY_OUTPUT_DIRECTORY}/${CLDNN_LIB}
DESTINATION
${NGRAPH_INSTALL_LIB}
OPTIONAL
)
endif()
......@@ -23,10 +23,6 @@ if (NGRAPH_CPU_ENABLE)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
endif()
if (NGRAPH_INTELGPU_ENABLE)
add_subdirectory(intelgpu)
endif()
if (NGRAPH_NOP_ENABLE)
add_subdirectory(nop)
endif()
......
# ******************************************************************************
# Copyright 2017-2019 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
set(SRC
intelgpu_backend.cpp
intelgpu_executable.cpp
intelgpu_tensor_view.cpp
intelgpu_layout.cpp
intelgpu_kernels.cpp
intelgpu_op_batchnorm.cpp
intelgpu_op_broadcast.cpp
intelgpu_op_custom_kernels.cpp
intelgpu_op_convolution.cpp
intelgpu_op_softmax.cpp
intelgpu_op_custom_func_call.cpp
visualize_tree.cpp
)
if (NGRAPH_INTELGPU_ENABLE)
add_library(intelgpu_backend SHARED ${SRC})
target_link_libraries(intelgpu_backend PUBLIC ngraph libcldnn)
target_compile_definitions(intelgpu_backend PRIVATE INTELGPU_BACKEND_DLL_EXPORTS)
if (NGRAPH_LIB_VERSIONING_ENABLE)
set_target_properties(intelgpu_backend
PROPERTIES
VERSION ${NGRAPH_VERSION}
SOVERSION ${NGRAPH_API_VERSION})
endif()
install(TARGETS intelgpu_backend LIBRARY DESTINATION ${NGRAPH_INSTALL_LIB})
endif()
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <map>
#include <memory>
#include <CPP/engine.hpp>
#include "ngraph/runtime/backend.hpp"
namespace ngraph
{
namespace runtime
{
namespace intelgpu
{
class IntelGPUBackend;
}
}
}
class ngraph::runtime::intelgpu::IntelGPUBackend : public runtime::Backend
{
public:
IntelGPUBackend();
std::shared_ptr<ngraph::runtime::Tensor>
create_tensor(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer) override;
std::shared_ptr<ngraph::runtime::Tensor>
create_tensor(const ngraph::element::Type& element_type, const Shape& shape) override;
std::shared_ptr<runtime::Executable> compile(std::shared_ptr<Function> func,
bool enable_timing = false) override;
void remove_compiled_function(std::shared_ptr<runtime::Executable> exec) override;
bool is_supported_property(const Property prop) const override;
bool is_supported(const Node& node) const override;
static bool is_supported_impl(const Node& node);
private:
std::shared_ptr<cldnn::engine> cldnn_engine;
std::map<std::shared_ptr<Function>, std::shared_ptr<runtime::Executable>> cldnn_networks;
bool m_profile_enable = false;
long m_profile_lines_limit_count = 10;
bool m_dump_graph_enable = false;
bool m_cldnn_graph_optimize = true;
bool m_cldnn_dump_enable = false;
bool m_function_cache_disabled = false;
long m_disable_backend_optimizations = 0;
std::string m_cldnn_dump_dir = std::string("intelgpu_codegen");
};
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/visibility.hpp"
#ifdef INTELGPU_BACKEND_EXPORTS // defined if we are building the INTELGPU_BACKEND
#define INTELGPU_BACKEND_API NGRAPH_HELPER_DLL_EXPORT
#else
#define INTELGPU_BACKEND_API NGRAPH_HELPER_DLL_IMPORT
#endif
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <CPP/network.hpp>
#include "ngraph/runtime/tensor.hpp"
namespace ngraph
{
namespace runtime
{
namespace intelgpu
{
class IntelGPUExecutable;
}
}
}
class ngraph::runtime::intelgpu::IntelGPUExecutable : public runtime::Executable
{
public:
IntelGPUExecutable(std::shared_ptr<Function> func,
std::shared_ptr<cldnn::network> network,
bool enable_timing,
bool enable_profile,
double compilation_time,
double consumed_memory,
size_t profile_lines_limit_count);
bool call(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) override;
std::vector<PerformanceCounter> get_performance_data() const override;
private:
std::shared_ptr<Function> m_function;
std::shared_ptr<cldnn::network> m_cldnn_network = nullptr;
bool m_performance_counters_enabled = false;
bool m_profile_enable = false;
double m_compilation_time = 0.0;
double m_consumed_memory = 0.0;
long m_profile_lines_limit_count = 10;
std::string delim = std::string(":");
// Statistic related things
void print_call_performance(const std::shared_ptr<cldnn::network> network,
const std::shared_ptr<Function> func,
double time_compile,
double time_call,
double mem_compilation_consumed,
double mem_call_consumed,
double mem_current) const;
};
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <CPP/custom_gpu_primitive.hpp>
#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/node.hpp"
#include "ngraph/util.hpp"
using namespace std;
using namespace ngraph;
void runtime::intelgpu::CustomKernels::queue_krnl(const krnl_info& krnl_info,
const shared_ptr<Node>& op)
{
for (const CustomKernelInfo& kr : krnl_info)
{
// Need to save this code to allow further work on it later
#if 0
mkldnn::engine eng(0);
shared_ptr<mkldnn::stream> mkldnn_stream = make_shared<mkldnn::stream>(eng);
cl_device_id device = eng.get_ocl_device();
const char* source_code = kr.m_code.c_str();
const size_t source_code_length = strlen(source_code);
cl_int errcode = CL_SUCCESS;
cl_command_queue queue = mkldnn_stream->get_ocl_command_queue();
cl_program program = clCreateProgramWithSource(
eng.get_ocl_context(), 1, &source_code, &source_code_length, &errcode);
if (errcode != CL_SUCCESS)
{
throw ngraph_error("Build OpenCL program error: " + to_string(errcode));
}
errcode = clBuildProgram(program, 1, &device, "", NULL, NULL);
if (errcode != CL_SUCCESS)
{
size_t log_length = 0;
int info_errcode =
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, 0, &log_length);
if (info_errcode != CL_SUCCESS)
{
throw ngraph_error("clGetProgramBuildInfo(log_length) error: " +
to_string(info_errcode));
}
void* log = ngraph_malloc(log_length);
info_errcode =
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_length, log, 0);
if (info_errcode != CL_SUCCESS)
{
throw ngraph_error("clGetProgramBuildInfo(log) error: " + to_string(info_errcode));
}
string err_string((const char*)log);
ngraph_free(log);
throw ngraph_error("Error during the build of OpenCL program. Error: " +
to_string(errcode) + "\nBuild log:" + err_string);
}
cl_kernel kernel = clCreateKernel(program, kr.m_entry_point.c_str(), &errcode);
if (errcode != CL_SUCCESS)
{
throw ngraph_error("Create OpenCL kernel error: " + to_string(errcode));
}
//kr.kernel = kernel;
#else
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(kr.m_type, kr.m_shape);
const cldnn::custom_gpu_primitive kernel_item(kr.m_name,
kr.m_inputs,
{kr.m_code},
kr.m_entry_point,
get_kernel_args(kr.m_inputs.size(), 1),
"",
layout,
kr.m_gws,
kr.m_lws);
stream.add(kernel_item);
#endif
++m_count_krnls;
}
}
void runtime::intelgpu::arguments_check(const shared_ptr<Node>& op, size_t input, size_t output)
{
if (op->get_input_size() != input || op->get_output_size() != output)
{
ostringstream os;
os << "Operation \"" << op->description() << "\" input and output sizes mismatch."
<< " Expected input size=" << input << ", provided=" << op->get_input_size()
<< ". Expected output size=" << output << ", provided=" << op->get_output_size();
throw invalid_argument(os.str());
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <memory>
#include <string>
#include <vector>
#include <CPP/topology.hpp>
#include "ngraph/node.hpp"
#include "ngraph/op/all.hpp"
#include "ngraph/op/and.hpp"
#include "ngraph/op/any.hpp"
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/equal.hpp"
#include "ngraph/op/fused/conv_fused.hpp"
#include "ngraph/op/fused/gemm.hpp"
#include "ngraph/op/fused/group_conv.hpp"
#include "ngraph/op/greater.hpp"
#include "ngraph/op/greater_eq.hpp"
#include "ngraph/op/less.hpp"
#include "ngraph/op/less_eq.hpp"
#include "ngraph/op/max.hpp"
#include "ngraph/op/min.hpp"
#include "ngraph/op/not_equal.hpp"
#include "ngraph/op/or.hpp"
#include "ngraph/op/product.hpp"
#include "ngraph/op/select.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/op/softmax.hpp"
#include "ngraph/op/sum.hpp"
namespace ngraph
{
namespace runtime
{
namespace intelgpu
{
class CustomKernelInfo;
class CustomKernels;
void arguments_check(const std::shared_ptr<Node>& op, size_t input, size_t output);
}
}
}
class ngraph::runtime::intelgpu::CustomKernelInfo
{
public:
CustomKernelInfo(const std::string& name,
const Shape& shape,
const element::Type& type,
const std::vector<std::string>& inputs,
const std::string& code,
const std::string& entry_point,
const std::vector<size_t>& gws = {1},
const std::vector<size_t>& lws = {1})
{
m_name = name;
m_shape = shape;
m_type = type;
m_inputs = inputs;
m_code = code;
m_entry_point = entry_point;
m_gws = gws;
m_lws = lws;
kernel = nullptr;
}
std::string m_name;
Shape m_shape;
element::Type m_type;
std::vector<std::string> m_inputs;
std::string m_code;
std::string m_entry_point;
std::vector<size_t> m_gws;
std::vector<size_t> m_lws;
void* kernel;
};
class ngraph::runtime::intelgpu::CustomKernels
{
public:
using krnl_info = std::vector<CustomKernelInfo>;
explicit CustomKernels(cldnn::topology& backend_stream)
: stream(backend_stream)
{
m_count_krnls = 0;
}
template <typename OP>
void emit(const std::shared_ptr<OP>& op)
{
krnl_info krnl_info;
krnl_info = build_krnl(op);
queue_krnl(krnl_info, op);
}
size_t get_custom_kernel_count() const { return m_count_krnls; }
private:
void queue_krnl(const krnl_info& krn_info, const std::shared_ptr<Node>& op);
krnl_info build_krnl(const std::shared_ptr<op::All>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::And>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Any>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::BatchNormInference>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::BatchNormTraining>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::BatchNormTrainingBackprop>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Broadcast>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Convolution>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::GroupConvolution>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBackpropData>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBackpropFilters>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBias>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBiasAdd>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBiasBackpropFiltersBias>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Equal>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Gemm>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Greater>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::GreaterEq>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Less>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::LessEq>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Max>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Min>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::NotEqual>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Or>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Product>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Select>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Slice>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Softmax>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Sum>& op) const;
cldnn::topology& stream;
size_t m_count_krnls;
};
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/except.hpp"
#include "ngraph/shape.hpp"
#include "ngraph/type/element_type.hpp"
using namespace std;
using namespace ngraph;
runtime::intelgpu::IntelGPULayout::IntelGPULayout(const descriptor::Tensor& tv,
const cldnn::layout& layout)
: TensorLayout(tv)
, cldnn_layout(layout)
{
}
size_t runtime::intelgpu::IntelGPULayout::get_index_offset(const vector<size_t>& indices)
{
if (indices.size() != strides.size())
{
throw ngraph_error("Indices have incorrect rank");
}
return inner_product(indices.cbegin(), indices.cend(), strides.cbegin(), 0);
}
bool runtime::intelgpu::IntelGPULayout::
operator==(const descriptor::layout::TensorLayout& other) const
{
const IntelGPULayout* p_other = dynamic_cast<const IntelGPULayout*>(&other);
if (!p_other)
{
return false;
}
return (cldnn_layout == p_other->cldnn_layout);
}
cldnn::data_types
runtime::intelgpu::IntelGPULayout::get_cldnn_type(const element::Type& element_type)
{
switch (element_type)
{
case element::Type_t::i8:
case element::Type_t::boolean: return cldnn::data_types::i8;
case element::Type_t::u8: return cldnn::data_types::u8;
case element::Type_t::i32: return cldnn::data_types::i32;
case element::Type_t::i64: return cldnn::data_types::i64;
case element::Type_t::f32: return cldnn::data_types::f32;
}
ostringstream os;
os << "IntelGPULayout::get_cldnn_type: Unknown type " << element_type;
throw invalid_argument(os.str());
}
cldnn::tensor runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(const Shape& element_shape)
{
vector<size_t> idx(4, 1);
size_t index = 0;
const size_t total_size = shape_size<Shape>(element_shape);
// clDNN requires at least scalar tensor size. We can't create zero sized tensors
if (total_size != 0)
{
for (auto i = element_shape.crbegin(); i != element_shape.crend() && index < 3;
++i, ++index)
{
idx.at(index) = *i;
}
if (element_shape.size() > 3)
{
idx.at(3) = accumulate(
element_shape.rbegin() + 3, element_shape.rend(), 1, multiplies<size_t>());
}
}
// Parameters for this ctor: batch, feature, spatial_x, spatial_y
const cldnn::tensor tns(idx.at(3), idx.at(2), idx.at(0), idx.at(1));
return tns;
}
cldnn::tensor runtime::intelgpu::IntelGPULayout::create_cldnn_offset(const Shape& pad_below)
{
vector<cldnn::tensor::value_type> offset({0, 0, 0, 0});
size_t ridx = 4;
for (auto i = pad_below.crbegin(); i != pad_below.crend() && ridx > 0; ++i, --ridx)
{
offset.at(ridx - 1) = -(*i);
}
const cldnn::tensor input_offset(offset.at(0), offset.at(1), offset.at(3), offset.at(2), 0);
return input_offset;
}
cldnn::layout runtime::intelgpu::IntelGPULayout::create_cldnn_layout(
const ngraph::element::Type& element_type, const Shape& element_shape)
{
const cldnn::format::type format = cldnn::format::bfyx;
const cldnn::tensor tensor = create_cldnn_tensor(element_shape);
cldnn::data_types data_type;
switch (element_type)
{
case element::Type_t::i16:
case element::Type_t::u16:
{
data_type = cldnn::data_types::f16;
break;
}
case element::Type_t::u32:
{
data_type = cldnn::data_types::i32;
break;
}
case element::Type_t::u64:
case element::Type_t::f64:
{
data_type = cldnn::data_types::i64;
break;
}
default: { data_type = get_cldnn_type(element_type);
}
}
return cldnn::layout(data_type, format, tensor);
}
cldnn::concatenation::concatenation_axis
runtime::intelgpu::IntelGPULayout::get_cldnn_axis(size_t shape_size, size_t axis)
{
const size_t t_channel = shape_size - axis - 1;
switch (t_channel)
{
case 0: return cldnn::concatenation::along_x;
case 1: return cldnn::concatenation::along_y;
case 2: return cldnn::concatenation::along_f;
case 3:
if (shape_size < 5)
{
return cldnn::concatenation::along_b;
}
// no break
default:
throw invalid_argument("IntelGPULayout::get_cldnn_axis: wrong tensor channel " +
to_string(t_channel));
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <CPP/concatenation.hpp>
#include <CPP/layout.hpp>
#include <CPP/tensor.hpp>
#include "ngraph/descriptor/layout/tensor_layout.hpp"
namespace ngraph
{
namespace runtime
{
namespace intelgpu
{
class IntelGPULayout;
}
}
}
class ngraph::runtime::intelgpu::IntelGPULayout : public ngraph::descriptor::layout::TensorLayout
{
public:
IntelGPULayout(const ngraph::descriptor::Tensor& tv, const cldnn::layout& layout);
~IntelGPULayout() override {}
size_t get_index_offset(const std::vector<size_t>& indices) override;
Strides get_strides() const override { return strides; }
bool operator==(const TensorLayout& other) const override;
static cldnn::data_types get_cldnn_type(const ngraph::element::Type& element_type);
static cldnn::layout create_cldnn_layout(const ngraph::element::Type& element_type,
const Shape& element_shape);
static cldnn::tensor create_cldnn_tensor(const Shape& element_shape);
static cldnn::tensor create_cldnn_offset(const Shape& pad_below);
// This function converts Shape dimension_id into cldnn::concatenation id
static cldnn::concatenation::concatenation_axis get_cldnn_axis(size_t shape_size, size_t axis);
private:
Strides strides;
cldnn::layout cldnn_layout;
};
This diff is collapsed.
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/code_writer.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/op/constant.hpp"
using namespace std;
using namespace ngraph;
using namespace ngraph::runtime::intelgpu;
static CustomKernels::krnl_info do_all_any_op(const shared_ptr<op::util::LogicalReduction>& op,
const string& operation)
{
const string& input0_name = op->get_input_tensor_name(0);
const Shape& input0_shape = op->get_input_shape(0);
const string& output_name = op->get_output_tensor_name(0);
const Shape& output_shape = op->get_output_shape(0);
const element::Type& output_type = op->get_output_element_type(0);
const AxisSet& axis = op->get_reduction_axes();
const shared_ptr<Node> def_val = op->get_default_value();
const shared_ptr<op::Constant> def_const = static_pointer_cast<op::Constant>(def_val);
const vector<string>& values = def_const->get_value_strings();
const string& init_val = values.at(0);
const string entry_point_name = "custom_op_all_any_" + output_name;
const string kernel_type_name = get_opencl_type_name(output_type);
const size_t input_size = shape_size<Shape>(input0_shape);
CodeWriter writer;
// The kernel name and parameters
gen_func_def(writer,
entry_point_name,
{1, kernel_type_name},
{input0_shape, {1}},
kernel_type_name,
output_shape);
writer.block_begin();
{
// Initialization loop
size_t var_idx = 0;
for (auto const& i : output_shape)
{
writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << i << "; ++i"
<< var_idx << ")\n";
writer.block_begin();
++var_idx;
}
writer << "output" << access_dims(output_shape) << " = " << init_val << ";\n";
// Closing brackets for initialization loop
for (auto const& i : output_shape)
{
writer.block_end();
}
if (input_size && !input0_shape.empty())
{
// Main operation loop
var_idx = 0;
for (auto const& i : input0_shape)
{
writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << i << "; ++i"
<< var_idx << ")\n";
writer.block_begin();
++var_idx;
}
writer << kernel_type_name << " lhs = output" << access_dims(input0_shape, "i", axis)
<< ";\n"
<< kernel_type_name << " rhs = input0" << access_dims(input0_shape) << ";\n"
<< "output" << access_dims(input0_shape, "i", axis) << " = (" << operation
<< ");\n";
// Closing brackets for loop
for (auto const& i : input0_shape)
{
writer.block_end();
}
}
} // End of function bracket
writer.block_end();
const CustomKernelInfo krn_ret(output_name,
output_shape,
output_type,
{input0_name},
{writer.get_code()},
entry_point_name);
return {krn_ret};
}
CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::All>& op) const
{
return do_all_any_op(op, "lhs && rhs");
}
CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::Any>& op) const
{
return do_all_any_op(op, "lhs || rhs");
}
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <memory>
#include <CPP/data.hpp>
#include "ngraph/descriptor/layout/dense_tensor_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_tensor_view.hpp"
using namespace ngraph;
using namespace std;
runtime::intelgpu::IntelGPUTensorView::IntelGPUTensorView(const element::Type& element_type,
const Shape& shape,
const cldnn::engine& backend_engine,
void* memory_pointer)
: runtime::Tensor(make_shared<descriptor::Tensor>(element_type, shape, ""))
{
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(element_type, shape);
m_descriptor->set_tensor_layout(
std::make_shared<runtime::intelgpu::IntelGPULayout>(*m_descriptor, layout));
if (nullptr != memory_pointer)
{
ocl_memory = make_shared<cldnn::memory>(
cldnn::memory::attach<void>(layout, memory_pointer, layout.bytes_count()));
}
else
{
ocl_memory = make_shared<cldnn::memory>(cldnn::memory::allocate(backend_engine, layout));
}
}
void runtime::intelgpu::IntelGPUTensorView::write(const void* source, size_t n)
{
if (n > ocl_memory->size())
{
throw out_of_range("write access past end of tensor");
}
auto ptr = ocl_memory->pointer<char>();
char* target = ptr.data();
memcpy(target, source, n);
}
void runtime::intelgpu::IntelGPUTensorView::read(void* target, size_t n) const
{
if (n > ocl_memory->size())
{
throw out_of_range("read access past end of tensor");
}
const auto ptr = ocl_memory->pointer<char>();
const char* source = ptr.data();
memcpy(target, source, n);
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -30,9 +30,6 @@ target_link_libraries(nbench PRIVATE ngraph)
if (NGRAPH_CPU_ENABLE)
target_link_libraries(nbench PRIVATE cpu_backend)
endif()
if (NGRAPH_INTELGPU_ENABLE)
target_link_libraries(nbench PRIVATE intelgpu_backend)
endif()
if (NGRAPH_INTERPRETER_ENABLE)
target_link_libraries(nbench PRIVATE interpreter_backend)
endif()
......
......@@ -204,10 +204,6 @@ if (NGRAPH_CPU_ENABLE)
set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} CPU)
endif()
if (NGRAPH_INTELGPU_ENABLE)
set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} INTELGPU)
endif()
if (NGRAPH_GPUH_ENABLE)
set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} GPUH)
endif()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment