Commit eef2b19d authored by Fenglei's avatar Fenglei Committed by Nick Korovaiko

enable cudnn datatype support (#1122)

* enable multi datatpye support for Cudnn. refactor binary ops using cudnn

* fix bugs

* add tests to skip list that CUDNN does not support

* not int support on cudnn for backward pooling

* no GPU.dot_4d_5d_multi_axis_big_fp64_VERY_SLOW test anymore

* clang format

* throw if datatype is int8 or int32 for backward pooling

* comments

* fix list in unit_test.manifest

* add type support for alpha, beta

* fix bugs

* datatype support for alpha, beta

* missing ()

* clang format

* batchnorm backward bug fix

* remove debug info

* change member function name to snake case. remove comments

* use nullptr instead of NULL

* code style, use cuDNN everywhere in comments

* add cudnn host parameters memory manager.

* change name to allocate_by_datatype

* compiled

* debug

* fix bug: using list instead of vector, vector address will change each time it resize

* add CUDNN_DATA_UINT8 and CUDNN_DATA_UINT8x4
parent 35b04e6a
This diff is collapsed.
......@@ -26,6 +26,7 @@
#include "ngraph/axis_set.hpp"
#include "ngraph/runtime/gpu/cudnn_descriptors.hpp"
#include "ngraph/runtime/gpu/cudnn_host_parameters.hpp"
#include "ngraph/runtime/gpu/gpu_runtime_context.hpp"
#include "ngraph/shape.hpp"
......@@ -56,7 +57,7 @@ namespace ngraph
};
size_t build_convolution(const runtime::gpu::GPURuntimeContext* ctx,
const cudnnDataType_t data_type,
const std::string& dtype,
const Shape& input_tensor_shape,
const Shape& input_filter_shape,
const Shape& output_tensor_shape,
......@@ -65,7 +66,7 @@ namespace ngraph
const Shape& padding_below);
size_t build_convolution_backward_data(const runtime::gpu::GPURuntimeContext* ctx,
const cudnnDataType_t data_type,
const std::string& dtype,
const Shape& input_filter_shape,
const Shape& input_tensor_shape,
const Shape& output_tensor_shape,
......@@ -74,7 +75,7 @@ namespace ngraph
const Shape& padding_below);
size_t build_convolution_backward_filter(const runtime::gpu::GPURuntimeContext* ctx,
const cudnnDataType_t data_type,
const std::string& dtype,
const Shape& input_tensor_shape_0,
const Shape& input_tensor_shape_1,
const Shape& output_filter_shape,
......@@ -84,11 +85,21 @@ namespace ngraph
size_t build_reduce_forward(const GPURuntimeContext* ctx,
const cudnnReduceTensorOp_t& reduce_op,
const std::string& dtype,
const Shape& input_shape,
const AxisSet& reduction_axes);
size_t build_tensor_op(const GPURuntimeContext* ctx,
const cudnnOpTensorOp_t& tensor_op,
const std::string& dtype,
const Shape& input_shape,
const double alpha0,
const double alpha1,
const double beta);
size_t build_pooling(const GPURuntimeContext* ctx,
const cudnnPoolingMode_t& pool_op,
const std::string& dtype,
const Prop& direction,
const ngraph::Shape& input_shape,
const ngraph::Shape& output_shape,
......@@ -99,6 +110,7 @@ namespace ngraph
size_t build_batchnorm(const runtime::gpu::GPURuntimeContext* ctx,
const cudnnBatchNormMode_t& bn_op,
const std::string& dtype,
const Prop& direction,
const Shape& tensor_shape,
const Shape& param_shape,
......@@ -107,10 +119,21 @@ namespace ngraph
size_t build_softmax(const runtime::gpu::GPURuntimeContext* ctx,
const cudnnSoftmaxAlgorithm_t& algorithm,
const cudnnSoftmaxMode_t& mode,
const std::string& dtype,
const Prop& direction,
const Shape& tensor_shape);
cudnnTensorDescriptor_t& tensor_descriptor_from_shape(const Shape& shape);
private:
CUDNNEmitter(GPUPrimitiveEmitter* emitter);
void* get_data_by_type(cudnnDataType_t data_type, double value);
cudnnDataType_t get_cudnn_datatype(std::string dtype);
cudnnTensorDescriptor_t&
tensor_descriptor_from_shape(const Shape& shape,
const cudnnDataType_t data_type,
const cudnnTensorFormat_t tensor_format);
cudnnFilterDescriptor_t&
get_cudnn_filter_descriptor(const Shape& shape,
const cudnnDataType_t data_type,
......@@ -122,10 +145,9 @@ namespace ngraph
cudnnConvolutionMode_t mode,
cudnnDataType_t data_type);
private:
CUDNNEmitter(GPUPrimitiveEmitter* emitter);
CUDNNDescriptors m_descriptors;
CUDNNHostParameters m_host_parameters;
GPUPrimitiveEmitter* m_primitive_emitter;
};
}
......
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <list>
#include <memory>
#include <cudnn.h>
#include "ngraph/log.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp"
namespace ngraph
{
namespace runtime
{
namespace gpu
{
/// \brief A factory which builds cuDNN host parameters
/// and manages their creation and destruction.
class CUDNNHostParameters
{
public:
CUDNNHostParameters() = default;
~CUDNNHostParameters() = default;
void* allocate_by_datatype(const cudnnDataType_t data_type, const double value)
{
void* r = nullptr;
switch (data_type)
{
case CUDNN_DATA_FLOAT:
m_host_parameters_float.push_back(static_cast<float>(value));
r = static_cast<void*>(&m_host_parameters_float.back());
break;
case CUDNN_DATA_DOUBLE:
m_host_parameters_double.push_back(value);
r = static_cast<void*>(&m_host_parameters_double.back());
break;
case CUDNN_DATA_INT8:
m_host_parameters_int8_t.push_back(static_cast<int8_t>(value));
r = static_cast<void*>(&m_host_parameters_int8_t.back());
break;
case CUDNN_DATA_INT32:
m_host_parameters_int32_t.push_back(static_cast<int32_t>(value));
r = static_cast<void*>(&m_host_parameters_int32_t.back());
break;
case CUDNN_DATA_HALF:
case CUDNN_DATA_INT8x4:
case CUDNN_DATA_UINT8:
case CUDNN_DATA_UINT8x4:
std::string err = "datatype is not supported by cuDNN";
throw std::runtime_error(err);
}
return r;
}
private:
std::list<int8_t> m_host_parameters_int8_t;
std::list<int32_t> m_host_parameters_int32_t;
std::list<float> m_host_parameters_float;
std::list<double> m_host_parameters_double;
};
}
}
}
......@@ -288,6 +288,13 @@ namespace ngraph
static constexpr const char* math_kernel = "!x0";
};
template <>
struct CudaOpMap<ngraph::op::Negative>
{
static constexpr const char* op = "negative";
static constexpr const char* math_kernel = "-x0";
};
template <>
struct CudaOpMap<ngraph::op::Select>
{
......
This diff is collapsed.
......@@ -187,7 +187,7 @@ static const runtime::gpu::OpMap dispatcher{
{TI(ngraph::op::Log), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Log>},
{TI(ngraph::op::Maximum), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Maximum>},
{TI(ngraph::op::Minimum), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Minimum>},
{TI(ngraph::op::Negative), &runtime::gpu::GPU_Emitter::emit<ngraph::op::Negative>},
{TI(ngraph::op::Negative), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Negative>},
{TI(ngraph::op::NotEqual), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::NotEqual>},
{TI(ngraph::op::Power), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Power>},
{TI(ngraph::op::Select), &runtime::gpu::GPU_Emitter::emit_elementwise<ngraph::op::Select>},
......@@ -431,7 +431,7 @@ using namespace std;
}
}
}
// Add cudnn descriptor factory for descriptor management.
// Add cuDNN descriptor factory for descriptor management.
// After the cuDNN code emitted in gpu_emitter.cc is refactored
// into the CUDNNEmitter class, this can be removed.
writer << "static runtime::gpu::CUDNNDescriptors descriptors;\n\n";
......
#int64 is not supprted by cuDNN
abc_int64
batch_norm_one_output
batch_norm_three_outputs
#need to check
computation_reuse
#int64 is not supprted
concat_matrix_int64
#convolution 4d is work in progress
convolution_4d_2items
convolution_4d_4items
convolution_4d_4items_dilated
......@@ -12,16 +16,27 @@ convolution_4d_4items_strided_dilated
convolution_4d_4items_strided_dilated_padded
convolution_4d_4items_strided_dilated_padded_neg
convolution_4d_4items_strided_dilated_padded_same
#cuDNN does not have arithmetic exceptions
divide_by_zero_int32
dot_4d_5d_multi_axis_big_fp64_VERY_SLOW
#int64 is not supprted by cuDNN
dot_matrix_vector_int64
#no mkldnn on GPU
mkldnn_layouts
#error throw is not the same on GPU, not supported yet
one_hot_scalar_fp_nonint_in_3
one_hot_scalar_oob_in_3
one_hot_vector_1_barely_oob
one_hot_vector_1_far_oob
one_hot_vector_1_fp_nonint
#select_and_scatter is deprecated
select_and_scatter_3d_without_overlap
select_and_scatter_with_overlap
select_and_scatter_without_overlap
#custom_mem is not implemented on GPU
tensorview_custom_mem
#integer is not supported by cuDNN on backward pooling
backwards_maxpool_n4_c1_hw4_2x2_max
backwards_maxpool_n2_c1_hw5_3x3_str2_max
backwards_avgpool_n1_c1_hw2x2
backwards_avgpool_n1_c1_hw4x4
backwards_avgpool_n2_c2_hw4x4
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment