Commit af2b33bd authored by Robert Kimball's avatar Robert Kimball

remove Intel GPU backend

parent 90a1f581
...@@ -169,7 +169,6 @@ option(NGRAPH_UNIT_TEST_ENABLE "Control the building of unit tests" TRUE) ...@@ -169,7 +169,6 @@ option(NGRAPH_UNIT_TEST_ENABLE "Control the building of unit tests" TRUE)
option(NGRAPH_TOOLS_ENABLE "Control the building of tool" TRUE) option(NGRAPH_TOOLS_ENABLE "Control the building of tool" TRUE)
option(NGRAPH_CPU_ENABLE "Control the building of the CPU backend" TRUE) option(NGRAPH_CPU_ENABLE "Control the building of the CPU backend" TRUE)
option(NGRAPH_MLIR_ENABLE "Control the building of MLIR backend" FALSE) option(NGRAPH_MLIR_ENABLE "Control the building of MLIR backend" FALSE)
option(NGRAPH_INTELGPU_ENABLE "Control the building of the Intel GPU backend with clDNN" FALSE)
option(NGRAPH_INTERPRETER_ENABLE "Control the building of the INTERPRETER backend" TRUE) option(NGRAPH_INTERPRETER_ENABLE "Control the building of the INTERPRETER backend" TRUE)
option(NGRAPH_NOP_ENABLE "Control the building of the NOP backend" TRUE) option(NGRAPH_NOP_ENABLE "Control the building of the NOP backend" TRUE)
option(NGRAPH_GENERIC_CPU_ENABLE "Enable build nGraph for generic CPU backend" FALSE) option(NGRAPH_GENERIC_CPU_ENABLE "Enable build nGraph for generic CPU backend" FALSE)
...@@ -192,7 +191,7 @@ option(NGRAPH_DYNAMIC_COMPONENTS_ENABLE "Enable dynamic loading of components" T ...@@ -192,7 +191,7 @@ option(NGRAPH_DYNAMIC_COMPONENTS_ENABLE "Enable dynamic loading of components" T
if (NGRAPH_CPU_ENABLE if (NGRAPH_CPU_ENABLE
AND AND
((NOT NGRAPH_GENERIC_CPU_ENABLE) AND (NOT NGRAPH_INTELGPU_ENABLE)) (NOT NGRAPH_GENERIC_CPU_ENABLE)
) )
set(NGRAPH_INTEL_CPU_ONLY_ENABLE ON) set(NGRAPH_INTEL_CPU_ONLY_ENABLE ON)
endif() endif()
...@@ -242,7 +241,6 @@ NORMALIZE_BOOL(NGRAPH_UNIT_TEST_ENABLE) ...@@ -242,7 +241,6 @@ NORMALIZE_BOOL(NGRAPH_UNIT_TEST_ENABLE)
NORMALIZE_BOOL(NGRAPH_TOOLS_ENABLE) NORMALIZE_BOOL(NGRAPH_TOOLS_ENABLE)
NORMALIZE_BOOL(NGRAPH_CPU_ENABLE) NORMALIZE_BOOL(NGRAPH_CPU_ENABLE)
NORMALIZE_BOOL(NGRAPH_MLIR_ENABLE) NORMALIZE_BOOL(NGRAPH_MLIR_ENABLE)
NORMALIZE_BOOL(NGRAPH_INTELGPU_ENABLE)
NORMALIZE_BOOL(NGRAPH_INTERPRETER_ENABLE) NORMALIZE_BOOL(NGRAPH_INTERPRETER_ENABLE)
NORMALIZE_BOOL(NGRAPH_NOP_ENABLE) NORMALIZE_BOOL(NGRAPH_NOP_ENABLE)
NORMALIZE_BOOL(NGRAPH_GENERIC_CPU_ENABLE) NORMALIZE_BOOL(NGRAPH_GENERIC_CPU_ENABLE)
...@@ -267,7 +265,6 @@ message(STATUS "NGRAPH_UNIT_TEST_ENABLE: ${NGRAPH_UNIT_TEST_ENABLE} ...@@ -267,7 +265,6 @@ message(STATUS "NGRAPH_UNIT_TEST_ENABLE: ${NGRAPH_UNIT_TEST_ENABLE}
message(STATUS "NGRAPH_TOOLS_ENABLE: ${NGRAPH_TOOLS_ENABLE}") message(STATUS "NGRAPH_TOOLS_ENABLE: ${NGRAPH_TOOLS_ENABLE}")
message(STATUS "NGRAPH_CPU_ENABLE: ${NGRAPH_CPU_ENABLE}") message(STATUS "NGRAPH_CPU_ENABLE: ${NGRAPH_CPU_ENABLE}")
message(STATUS "NGRAPH_MLIR_ENABLE: ${NGRAPH_MLIR_ENABLE}") message(STATUS "NGRAPH_MLIR_ENABLE: ${NGRAPH_MLIR_ENABLE}")
message(STATUS "NGRAPH_INTELGPU_ENABLE: ${NGRAPH_INTELGPU_ENABLE}")
message(STATUS "NGRAPH_INTERPRETER_ENABLE: ${NGRAPH_INTERPRETER_ENABLE}") message(STATUS "NGRAPH_INTERPRETER_ENABLE: ${NGRAPH_INTERPRETER_ENABLE}")
message(STATUS "NGRAPH_NOP_ENABLE: ${NGRAPH_NOP_ENABLE}") message(STATUS "NGRAPH_NOP_ENABLE: ${NGRAPH_NOP_ENABLE}")
message(STATUS "NGRAPH_GENERIC_CPU_ENABLE: ${NGRAPH_GENERIC_CPU_ENABLE}") message(STATUS "NGRAPH_GENERIC_CPU_ENABLE: ${NGRAPH_GENERIC_CPU_ENABLE}")
...@@ -539,9 +536,6 @@ endif() ...@@ -539,9 +536,6 @@ endif()
if (NGRAPH_MLIR_ENABLE) if (NGRAPH_MLIR_ENABLE)
include(cmake/external_mlir.cmake) include(cmake/external_mlir.cmake)
endif() endif()
if(NGRAPH_INTELGPU_ENABLE)
include(cmake/external_cldnn.cmake)
endif()
if (NGRAPH_CPU_ENABLE AND NOT NGRAPH_DEX_ONLY) if (NGRAPH_CPU_ENABLE AND NOT NGRAPH_DEX_ONLY)
set(NGRAPH_CODEGEN_ENABLE TRUE) set(NGRAPH_CODEGEN_ENABLE TRUE)
......
# ******************************************************************************
# Copyright 2017-2019 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
# Enable ExternalProject CMake module
include(ExternalProject)
#------------------------------------------------------------------------------
# Download and install GoogleTest ...
#------------------------------------------------------------------------------
set(CLDNN_GIT_REPO_URL https://github.com/intel/clDNN.git)
set(CLDNN_GIT_LABEL v0.1.0)
set(OUT_DIR ${EXTERNAL_PROJECTS_ROOT}/cldnn/out)
ExternalProject_Add(
ext_cldnn
PREFIX cldnn
GIT_REPOSITORY ${CLDNN_GIT_REPO_URL}
GIT_TAG ${CLDNN_GIT_LABEL}
# Disable install step
INSTALL_COMMAND ""
UPDATE_COMMAND ""
CMAKE_GENERATOR ${CMAKE_GENERATOR}
CMAKE_GENERATOR_PLATFORM ${CMAKE_GENERATOR_PLATFORM}
CMAKE_GENERATOR_TOOLSET ${CMAKE_GENERATOR_TOOLSET}
CMAKE_ARGS
${NGRAPH_FORWARD_CMAKE_ARGS}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
# -DCLDNN__OUTPUT_DIR=out/Debug
-DCLDNN__INCLUDE_TESTS=OFF
-DCLDNN__INCLUDE_CORE_INTERNAL_TESTS=OFF
-DCLDNN__INCLUDE_TUTORIAL=OFF
EXCLUDE_FROM_ALL TRUE
)
#------------------------------------------------------------------------------
add_library(libcldnn INTERFACE)
if (CLDNN_ROOT_DIR)
find_package(CLDNN REQUIRED)
target_include_directories(libcldnn SYSTEM INTERFACE ${CLDNN_INCLUDE_DIRS})
target_link_libraries(libcldnn INTERFACE ${CLDNN_LIBRARIES})
install(
FILES
${CLDNN_LIBRARIES}
DESTINATION
${NGRAPH_INSTALL_LIB}
OPTIONAL
)
else()
ExternalProject_Get_Property(ext_cldnn SOURCE_DIR BINARY_DIR)
set(CLDNN_LIB ${CMAKE_SHARED_LIBRARY_PREFIX}clDNN64${CMAKE_SHARED_LIBRARY_SUFFIX})
ExternalProject_Add_Step(
ext_cldnn
CopyCLDNN
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${SOURCE_DIR}/build/out/Linux64/${CMAKE_BUILD_TYPE}/${CLDNN_LIB} ${NGRAPH_LIBRARY_OUTPUT_DIRECTORY}/${CLDNN_LIB}
COMMENT "Copy cldnn runtime libraries to ngraph build directory."
DEPENDEES install
)
add_dependencies(libcldnn ext_cldnn)
target_include_directories(libcldnn SYSTEM INTERFACE ${SOURCE_DIR}/api)
target_link_libraries(libcldnn INTERFACE ${NGRAPH_LIBRARY_OUTPUT_DIRECTORY}/${CLDNN_LIB})
install(
FILES
${NGRAPH_LIBRARY_OUTPUT_DIRECTORY}/${CLDNN_LIB}
DESTINATION
${NGRAPH_INSTALL_LIB}
OPTIONAL
)
endif()
...@@ -23,10 +23,6 @@ if (NGRAPH_CPU_ENABLE) ...@@ -23,10 +23,6 @@ if (NGRAPH_CPU_ENABLE)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE) set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
endif() endif()
if (NGRAPH_INTELGPU_ENABLE)
add_subdirectory(intelgpu)
endif()
if (NGRAPH_NOP_ENABLE) if (NGRAPH_NOP_ENABLE)
add_subdirectory(nop) add_subdirectory(nop)
endif() endif()
......
# ******************************************************************************
# Copyright 2017-2019 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
set(SRC
intelgpu_backend.cpp
intelgpu_executable.cpp
intelgpu_tensor_view.cpp
intelgpu_layout.cpp
intelgpu_kernels.cpp
intelgpu_op_batchnorm.cpp
intelgpu_op_broadcast.cpp
intelgpu_op_custom_kernels.cpp
intelgpu_op_convolution.cpp
intelgpu_op_softmax.cpp
intelgpu_op_custom_func_call.cpp
visualize_tree.cpp
)
if (NGRAPH_INTELGPU_ENABLE)
add_library(intelgpu_backend SHARED ${SRC})
target_link_libraries(intelgpu_backend PUBLIC ngraph libcldnn)
target_compile_definitions(intelgpu_backend PRIVATE INTELGPU_BACKEND_DLL_EXPORTS)
if (NGRAPH_LIB_VERSIONING_ENABLE)
set_target_properties(intelgpu_backend
PROPERTIES
VERSION ${NGRAPH_VERSION}
SOVERSION ${NGRAPH_API_VERSION})
endif()
install(TARGETS intelgpu_backend LIBRARY DESTINATION ${NGRAPH_INSTALL_LIB})
endif()
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <map>
#include <memory>
#include <CPP/engine.hpp>
#include "ngraph/runtime/backend.hpp"
namespace ngraph
{
namespace runtime
{
namespace intelgpu
{
class IntelGPUBackend;
}
}
}
class ngraph::runtime::intelgpu::IntelGPUBackend : public runtime::Backend
{
public:
IntelGPUBackend();
std::shared_ptr<ngraph::runtime::Tensor>
create_tensor(const ngraph::element::Type& element_type,
const Shape& shape,
void* memory_pointer) override;
std::shared_ptr<ngraph::runtime::Tensor>
create_tensor(const ngraph::element::Type& element_type, const Shape& shape) override;
std::shared_ptr<runtime::Executable> compile(std::shared_ptr<Function> func,
bool enable_timing = false) override;
void remove_compiled_function(std::shared_ptr<runtime::Executable> exec) override;
bool is_supported_property(const Property prop) const override;
bool is_supported(const Node& node) const override;
static bool is_supported_impl(const Node& node);
private:
std::shared_ptr<cldnn::engine> cldnn_engine;
std::map<std::shared_ptr<Function>, std::shared_ptr<runtime::Executable>> cldnn_networks;
bool m_profile_enable = false;
long m_profile_lines_limit_count = 10;
bool m_dump_graph_enable = false;
bool m_cldnn_graph_optimize = true;
bool m_cldnn_dump_enable = false;
bool m_function_cache_disabled = false;
long m_disable_backend_optimizations = 0;
std::string m_cldnn_dump_dir = std::string("intelgpu_codegen");
};
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/visibility.hpp"
#ifdef INTELGPU_BACKEND_EXPORTS // defined if we are building the INTELGPU_BACKEND
#define INTELGPU_BACKEND_API NGRAPH_HELPER_DLL_EXPORT
#else
#define INTELGPU_BACKEND_API NGRAPH_HELPER_DLL_IMPORT
#endif
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <CPP/network.hpp>
#include "ngraph/runtime/tensor.hpp"
namespace ngraph
{
namespace runtime
{
namespace intelgpu
{
class IntelGPUExecutable;
}
}
}
class ngraph::runtime::intelgpu::IntelGPUExecutable : public runtime::Executable
{
public:
IntelGPUExecutable(std::shared_ptr<Function> func,
std::shared_ptr<cldnn::network> network,
bool enable_timing,
bool enable_profile,
double compilation_time,
double consumed_memory,
size_t profile_lines_limit_count);
bool call(const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs) override;
std::vector<PerformanceCounter> get_performance_data() const override;
private:
std::shared_ptr<Function> m_function;
std::shared_ptr<cldnn::network> m_cldnn_network = nullptr;
bool m_performance_counters_enabled = false;
bool m_profile_enable = false;
double m_compilation_time = 0.0;
double m_consumed_memory = 0.0;
long m_profile_lines_limit_count = 10;
std::string delim = std::string(":");
// Statistic related things
void print_call_performance(const std::shared_ptr<cldnn::network> network,
const std::shared_ptr<Function> func,
double time_compile,
double time_call,
double mem_compilation_consumed,
double mem_call_consumed,
double mem_current) const;
};
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <CPP/custom_gpu_primitive.hpp>
#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/node.hpp"
#include "ngraph/util.hpp"
using namespace std;
using namespace ngraph;
void runtime::intelgpu::CustomKernels::queue_krnl(const krnl_info& krnl_info,
const shared_ptr<Node>& op)
{
for (const CustomKernelInfo& kr : krnl_info)
{
// Need to save this code to allow further work on it later
#if 0
mkldnn::engine eng(0);
shared_ptr<mkldnn::stream> mkldnn_stream = make_shared<mkldnn::stream>(eng);
cl_device_id device = eng.get_ocl_device();
const char* source_code = kr.m_code.c_str();
const size_t source_code_length = strlen(source_code);
cl_int errcode = CL_SUCCESS;
cl_command_queue queue = mkldnn_stream->get_ocl_command_queue();
cl_program program = clCreateProgramWithSource(
eng.get_ocl_context(), 1, &source_code, &source_code_length, &errcode);
if (errcode != CL_SUCCESS)
{
throw ngraph_error("Build OpenCL program error: " + to_string(errcode));
}
errcode = clBuildProgram(program, 1, &device, "", NULL, NULL);
if (errcode != CL_SUCCESS)
{
size_t log_length = 0;
int info_errcode =
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, 0, &log_length);
if (info_errcode != CL_SUCCESS)
{
throw ngraph_error("clGetProgramBuildInfo(log_length) error: " +
to_string(info_errcode));
}
void* log = ngraph_malloc(log_length);
info_errcode =
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_length, log, 0);
if (info_errcode != CL_SUCCESS)
{
throw ngraph_error("clGetProgramBuildInfo(log) error: " + to_string(info_errcode));
}
string err_string((const char*)log);
ngraph_free(log);
throw ngraph_error("Error during the build of OpenCL program. Error: " +
to_string(errcode) + "\nBuild log:" + err_string);
}
cl_kernel kernel = clCreateKernel(program, kr.m_entry_point.c_str(), &errcode);
if (errcode != CL_SUCCESS)
{
throw ngraph_error("Create OpenCL kernel error: " + to_string(errcode));
}
//kr.kernel = kernel;
#else
const cldnn::layout layout = IntelGPULayout::create_cldnn_layout(kr.m_type, kr.m_shape);
const cldnn::custom_gpu_primitive kernel_item(kr.m_name,
kr.m_inputs,
{kr.m_code},
kr.m_entry_point,
get_kernel_args(kr.m_inputs.size(), 1),
"",
layout,
kr.m_gws,
kr.m_lws);
stream.add(kernel_item);
#endif
++m_count_krnls;
}
}
void runtime::intelgpu::arguments_check(const shared_ptr<Node>& op, size_t input, size_t output)
{
if (op->get_input_size() != input || op->get_output_size() != output)
{
ostringstream os;
os << "Operation \"" << op->description() << "\" input and output sizes mismatch."
<< " Expected input size=" << input << ", provided=" << op->get_input_size()
<< ". Expected output size=" << output << ", provided=" << op->get_output_size();
throw invalid_argument(os.str());
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <memory>
#include <string>
#include <vector>
#include <CPP/topology.hpp>
#include "ngraph/node.hpp"
#include "ngraph/op/all.hpp"
#include "ngraph/op/and.hpp"
#include "ngraph/op/any.hpp"
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/equal.hpp"
#include "ngraph/op/fused/conv_fused.hpp"
#include "ngraph/op/fused/gemm.hpp"
#include "ngraph/op/fused/group_conv.hpp"
#include "ngraph/op/greater.hpp"
#include "ngraph/op/greater_eq.hpp"
#include "ngraph/op/less.hpp"
#include "ngraph/op/less_eq.hpp"
#include "ngraph/op/max.hpp"
#include "ngraph/op/min.hpp"
#include "ngraph/op/not_equal.hpp"
#include "ngraph/op/or.hpp"
#include "ngraph/op/product.hpp"
#include "ngraph/op/select.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/op/softmax.hpp"
#include "ngraph/op/sum.hpp"
namespace ngraph
{
namespace runtime
{
namespace intelgpu
{
class CustomKernelInfo;
class CustomKernels;
void arguments_check(const std::shared_ptr<Node>& op, size_t input, size_t output);
}
}
}
class ngraph::runtime::intelgpu::CustomKernelInfo
{
public:
CustomKernelInfo(const std::string& name,
const Shape& shape,
const element::Type& type,
const std::vector<std::string>& inputs,
const std::string& code,
const std::string& entry_point,
const std::vector<size_t>& gws = {1},
const std::vector<size_t>& lws = {1})
{
m_name = name;
m_shape = shape;
m_type = type;
m_inputs = inputs;
m_code = code;
m_entry_point = entry_point;
m_gws = gws;
m_lws = lws;
kernel = nullptr;
}
std::string m_name;
Shape m_shape;
element::Type m_type;
std::vector<std::string> m_inputs;
std::string m_code;
std::string m_entry_point;
std::vector<size_t> m_gws;
std::vector<size_t> m_lws;
void* kernel;
};
class ngraph::runtime::intelgpu::CustomKernels
{
public:
using krnl_info = std::vector<CustomKernelInfo>;
explicit CustomKernels(cldnn::topology& backend_stream)
: stream(backend_stream)
{
m_count_krnls = 0;
}
template <typename OP>
void emit(const std::shared_ptr<OP>& op)
{
krnl_info krnl_info;
krnl_info = build_krnl(op);
queue_krnl(krnl_info, op);
}
size_t get_custom_kernel_count() const { return m_count_krnls; }
private:
void queue_krnl(const krnl_info& krn_info, const std::shared_ptr<Node>& op);
krnl_info build_krnl(const std::shared_ptr<op::All>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::And>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Any>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::BatchNormInference>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::BatchNormTraining>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::BatchNormTrainingBackprop>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Broadcast>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Convolution>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::GroupConvolution>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBackpropData>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBackpropFilters>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBias>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBiasAdd>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::ConvolutionBiasBackpropFiltersBias>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Equal>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Gemm>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Greater>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::GreaterEq>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Less>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::LessEq>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Max>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Min>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::NotEqual>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Or>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Product>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Select>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Slice>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Softmax>& op) const;
krnl_info build_krnl(const std::shared_ptr<op::Sum>& op) const;
cldnn::topology& stream;
size_t m_count_krnls;
};
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/except.hpp"
#include "ngraph/shape.hpp"
#include "ngraph/type/element_type.hpp"
using namespace std;
using namespace ngraph;
runtime::intelgpu::IntelGPULayout::IntelGPULayout(const descriptor::Tensor& tv,
const cldnn::layout& layout)
: TensorLayout(tv)
, cldnn_layout(layout)
{
}
size_t runtime::intelgpu::IntelGPULayout::get_index_offset(const vector<size_t>& indices)
{
if (indices.size() != strides.size())
{
throw ngraph_error("Indices have incorrect rank");
}
return inner_product(indices.cbegin(), indices.cend(), strides.cbegin(), 0);
}
bool runtime::intelgpu::IntelGPULayout::
operator==(const descriptor::layout::TensorLayout& other) const
{
const IntelGPULayout* p_other = dynamic_cast<const IntelGPULayout*>(&other);
if (!p_other)
{
return false;
}
return (cldnn_layout == p_other->cldnn_layout);
}
cldnn::data_types
runtime::intelgpu::IntelGPULayout::get_cldnn_type(const element::Type& element_type)
{
switch (element_type)
{
case element::Type_t::i8:
case element::Type_t::boolean: return cldnn::data_types::i8;
case element::Type_t::u8: return cldnn::data_types::u8;
case element::Type_t::i32: return cldnn::data_types::i32;
case element::Type_t::i64: return cldnn::data_types::i64;
case element::Type_t::f32: return cldnn::data_types::f32;
}
ostringstream os;
os << "IntelGPULayout::get_cldnn_type: Unknown type " << element_type;
throw invalid_argument(os.str());
}
cldnn::tensor runtime::intelgpu::IntelGPULayout::create_cldnn_tensor(const Shape& element_shape)
{
vector<size_t> idx(4, 1);
size_t index = 0;
const size_t total_size = shape_size<Shape>(element_shape);
// clDNN requires at least scalar tensor size. We can't create zero sized tensors
if (total_size != 0)
{
for (auto i = element_shape.crbegin(); i != element_shape.crend() && index < 3;
++i, ++index)
{
idx.at(index) = *i;
}
if (element_shape.size() > 3)
{
idx.at(3) = accumulate(
element_shape.rbegin() + 3, element_shape.rend(), 1, multiplies<size_t>());
}
}
// Parameters for this ctor: batch, feature, spatial_x, spatial_y
const cldnn::tensor tns(idx.at(3), idx.at(2), idx.at(0), idx.at(1));
return tns;
}
cldnn::tensor runtime::intelgpu::IntelGPULayout::create_cldnn_offset(const Shape& pad_below)
{
vector<cldnn::tensor::value_type> offset({0, 0, 0, 0});
size_t ridx = 4;
for (auto i = pad_below.crbegin(); i != pad_below.crend() && ridx > 0; ++i, --ridx)
{
offset.at(ridx - 1) = -(*i);
}
const cldnn::tensor input_offset(offset.at(0), offset.at(1), offset.at(3), offset.at(2), 0);
return input_offset;
}
cldnn::layout runtime::intelgpu::IntelGPULayout::create_cldnn_layout(
const ngraph::element::Type& element_type, const Shape& element_shape)
{
const cldnn::format::type format = cldnn::format::bfyx;
const cldnn::tensor tensor = create_cldnn_tensor(element_shape);
cldnn::data_types data_type;
switch (element_type)
{
case element::Type_t::i16:
case element::Type_t::u16:
{
data_type = cldnn::data_types::f16;
break;
}
case element::Type_t::u32:
{
data_type = cldnn::data_types::i32;
break;
}
case element::Type_t::u64:
case element::Type_t::f64:
{
data_type = cldnn::data_types::i64;
break;
}
default: { data_type = get_cldnn_type(element_type);
}
}
return cldnn::layout(data_type, format, tensor);
}
cldnn::concatenation::concatenation_axis
runtime::intelgpu::IntelGPULayout::get_cldnn_axis(size_t shape_size, size_t axis)
{
const size_t t_channel = shape_size - axis - 1;
switch (t_channel)
{
case 0: return cldnn::concatenation::along_x;
case 1: return cldnn::concatenation::along_y;
case 2: return cldnn::concatenation::along_f;
case 3:
if (shape_size < 5)
{
return cldnn::concatenation::along_b;
}
// no break
default:
throw invalid_argument("IntelGPULayout::get_cldnn_axis: wrong tensor channel " +
to_string(t_channel));
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <CPP/concatenation.hpp>
#include <CPP/layout.hpp>
#include <CPP/tensor.hpp>
#include "ngraph/descriptor/layout/tensor_layout.hpp"
namespace ngraph
{
namespace runtime
{
namespace intelgpu
{
class IntelGPULayout;
}
}
}
class ngraph::runtime::intelgpu::IntelGPULayout : public ngraph::descriptor::layout::TensorLayout
{
public:
IntelGPULayout(const ngraph::descriptor::Tensor& tv, const cldnn::layout& layout);
~IntelGPULayout() override {}
size_t get_index_offset(const std::vector<size_t>& indices) override;
Strides get_strides() const override { return strides; }
bool operator==(const TensorLayout& other) const override;
static cldnn::data_types get_cldnn_type(const ngraph::element::Type& element_type);
static cldnn::layout create_cldnn_layout(const ngraph::element::Type& element_type,
const Shape& element_shape);
static cldnn::tensor create_cldnn_tensor(const Shape& element_shape);
static cldnn::tensor create_cldnn_offset(const Shape& pad_below);
// This function converts Shape dimension_id into cldnn::concatenation id
static cldnn::concatenation::concatenation_axis get_cldnn_axis(size_t shape_size, size_t axis);
private:
Strides strides;
cldnn::layout cldnn_layout;
};
This diff is collapsed.
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/code_writer.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/op/constant.hpp"
using namespace std;
using namespace ngraph;
using namespace ngraph::runtime::intelgpu;
static CustomKernels::krnl_info do_all_any_op(const shared_ptr<op::util::LogicalReduction>& op,
const string& operation)
{
const string& input0_name = op->get_input_tensor_name(0);
const Shape& input0_shape = op->get_input_shape(0);
const string& output_name = op->get_output_tensor_name(0);
const Shape& output_shape = op->get_output_shape(0);
const element::Type& output_type = op->get_output_element_type(0);
const AxisSet& axis = op->get_reduction_axes();
const shared_ptr<Node> def_val = op->get_default_value();
const shared_ptr<op::Constant> def_const = static_pointer_cast<op::Constant>(def_val);
const vector<string>& values = def_const->get_value_strings();
const string& init_val = values.at(0);
const string entry_point_name = "custom_op_all_any_" + output_name;
const string kernel_type_name = get_opencl_type_name(output_type);
const size_t input_size = shape_size<Shape>(input0_shape);
CodeWriter writer;
// The kernel name and parameters
gen_func_def(writer,
entry_point_name,
{1, kernel_type_name},
{input0_shape, {1}},
kernel_type_name,
output_shape);
writer.block_begin();
{
// Initialization loop
size_t var_idx = 0;
for (auto const& i : output_shape)
{
writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << i << "; ++i"
<< var_idx << ")\n";
writer.block_begin();
++var_idx;
}
writer << "output" << access_dims(output_shape) << " = " << init_val << ";\n";
// Closing brackets for initialization loop
for (auto const& i : output_shape)
{
writer.block_end();
}
if (input_size && !input0_shape.empty())
{
// Main operation loop
var_idx = 0;
for (auto const& i : input0_shape)
{
writer << "for (uint i" << var_idx << " = 0; i" << var_idx << " < " << i << "; ++i"
<< var_idx << ")\n";
writer.block_begin();
++var_idx;
}
writer << kernel_type_name << " lhs = output" << access_dims(input0_shape, "i", axis)
<< ";\n"
<< kernel_type_name << " rhs = input0" << access_dims(input0_shape) << ";\n"
<< "output" << access_dims(input0_shape, "i", axis) << " = (" << operation
<< ");\n";
// Closing brackets for loop
for (auto const& i : input0_shape)
{
writer.block_end();
}
}
} // End of function bracket
writer.block_end();
const CustomKernelInfo krn_ret(output_name,
output_shape,
output_type,
{input0_name},
{writer.get_code()},
entry_point_name);
return {krn_ret};
}
CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::All>& op) const
{
return do_all_any_op(op, "lhs && rhs");
}
CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::Any>& op) const
{
return do_all_any_op(op, "lhs || rhs");
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/code_writer.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
using namespace std;
using namespace ngraph;
using namespace ngraph::runtime::intelgpu;
static Shape shape_dims(const Shape& dimentions, const AxisSet& axis = {})
{
size_t var_idx = 0;
Shape output_shape;
for (auto const& dim : dimentions)
{
if (axis.find(var_idx) == axis.end())
{
output_shape.push_back(dim);
}
++var_idx;
}
if (output_shape.size() == 0)
{ // it means scalar
output_shape.push_back(1);
}
return output_shape;
}
CustomKernels::krnl_info CustomKernels::build_krnl(const shared_ptr<op::Softmax>& op) const
{
const string& input_name = op->get_input_tensor_name(0);
const Shape& input_shape = op->get_input_shape(0);
const element::Type& input_type = op->get_input_element_type(0);
const string& output_name = op->get_output_tensor_name(0);
const Shape& output_shape = op->get_output_shape(0);
const element::Type& output_type = op->get_output_element_type(0);
const AxisSet& axes = op->get_axes();
const string entry_point_name = "softmax_" + output_name;
const string middle_name = entry_point_name + "_middle";
const string entry_point_middle_name = "softmax_middle_" + output_name;
const string expression = "output" + access_dims(input_shape, "i", axes) + " = 0.0f;\n";
const Shape new_shape = shape_dims(output_shape, axes);
CodeWriter writer0;
CodeWriter writer1;
vector<size_t> gws;
writer0 << "__kernel void " << entry_point_middle_name << "(const __global "
<< get_opencl_type_name(input_type) << " input" << array_dims(input_shape)
<< ", __global " << get_opencl_type_name(output_type) << " output"
<< array_dims(input_shape, axes) << ")\n";
writer0.block_begin();
{
gws = generate_loops_w_axes(writer0, output_shape, true, axes, expression);
writer0 << "output" << access_dims(input_shape, "i", axes) << " += exp(input"
<< access_dims(input_shape) << ");\n";
generate_loops_w_axes(writer0, output_shape, false, axes, "");
}
writer0.block_end();
const CustomKernelInfo op_softmax_middle(middle_name,
new_shape,
output_type,
{input_name},
{writer0.get_code()},
entry_point_middle_name,
gws);
writer1 << "__kernel void " << entry_point_name << "(const __global "
<< get_opencl_type_name(input_type) << " input0" << array_dims(input_shape)
<< ", const __global " << get_opencl_type_name(input_type) << " input1"
<< array_dims(input_shape, axes) << ", __global " << get_opencl_type_name(output_type)
<< " output" << array_dims(output_shape) << ")\n";
writer1.block_begin();
{
gws = generate_loops(writer1, output_shape, true);
writer1 << "output" << access_dims(input_shape) << " = exp(input0"
<< access_dims(input_shape) << ")/input1" << access_dims(input_shape, "i", axes)
<< ";\n";
generate_loops(writer1, output_shape, false);
}
writer1.block_end();
const CustomKernelInfo op_softmax(output_name,
output_shape,
output_type,
{input_name, middle_name},
{writer1.get_code()},
entry_point_name,
gws);
return {op_softmax_middle, op_softmax};
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment