Commit 36dd64ad authored by Adam Procter's avatar Adam Procter

Merge remote-tracking branch 'origin/master' into r0.9

parents 413e9617 c579e245
......@@ -68,7 +68,7 @@ function build_ngraph() {
make install || return 1
cd "${ngraph_directory}/ngraph/python"
if [ ! -d ./pybind11 ]; then
git clone --recursive -b allow-nonconstructible-holders https://github.com/jagerman/pybind11.git
git clone --recursive https://github.com/pybind/pybind11.git
fi
export PYBIND_HEADERS_PATH="${ngraph_directory}/ngraph/python/pybind11"
export NGRAPH_CPP_BUILD_PATH="${ngraph_directory}/ngraph_dist"
......
......@@ -39,7 +39,7 @@ RUN make install
# Prepare nGraph Python API
WORKDIR /root/ngraph/python
RUN git clone --recursive -b allow-nonconstructible-holders https://github.com/jagerman/pybind11.git
RUN git clone --recursive https://github.com/pybind/pybind11.git
ENV NGRAPH_CPP_BUILD_PATH /root/ngraph_dist
ENV LD_LIBRARY_PATH /root/ngraph_dist/lib
ENV PYBIND_HEADERS_PATH /root/ngraph/python/pybind11
......
......@@ -558,3 +558,16 @@ bool ngraph::is_strided(const Strides& strides)
{
return std::any_of(strides.begin(), strides.end(), [](size_t stride) { return stride != 1; });
}
bool ngraph::is_valid_rank(const std::shared_ptr<Node>& node, std::vector<size_t> valid_ranks)
{
auto node_rank = node->get_shape().size();
for (auto rank : valid_ranks)
{
if (rank == node_rank)
{
return true;
}
}
return false;
}
......@@ -315,4 +315,6 @@ namespace ngraph
bool possibly_overwritten(Node* node);
bool is_strided(const Strides& strides);
bool is_valid_rank(const std::shared_ptr<Node>& node, std::vector<size_t> valid_ranks);
}
......@@ -103,10 +103,11 @@ namespace ngraph
void validate_and_infer_elementwise_logical();
Node(const std::string& node_type, const NodeVector& arguments, size_t output_size = 1);
virtual ~Node();
virtual void generate_adjoints(autodiff::Adjoints& adjoints, const NodeVector& deltas) {}
public:
virtual ~Node();
void revalidate_and_infer_types() { validate_and_infer_types(); }
// Called after transition
void delayed_validate_and_infer_types();
......
......@@ -110,6 +110,7 @@ set(SRC
pass/cpu_post_layout_optimizations.cpp
pass/cpu_rnn_fusion.cpp
pass/cpu_workspace_insertion.cpp
pass/cpu_reshape_sinking.cpp
)
if (NOT NGRAPH_DEX_ONLY)
......
......@@ -19,6 +19,7 @@
#include "ngraph/runtime/cpu/kernel/softmax.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/reference/softmax.hpp"
using namespace std;
using namespace ngraph;
......@@ -131,8 +132,35 @@ namespace ngraph
};
functors.emplace_back(functor);
}
else if (arg_shape.size() == 4 && axes.size() == 3)
{
std::function<decltype(runtime::cpu::kernel::softmax_4d_3rd<float>)> kernel;
SELECT_KERNEL(kernel,
args[0].get_element_type(),
runtime::cpu::kernel::softmax_4d_3rd);
auto functor = [&, kernel, arg_shape, axes](CPURuntimeContext* ctx) {
kernel(arg_tensor, out_tensor, arg_shape, axes);
};
functors.emplace_back(functor);
}
else if (softmax->get_element_type() == element::f32)
{
NGRAPH_WARN << "Falling back to refernce kernel for softmax " << arg_shape
<< " over " << axes;
auto functor = [&, arg_shape, axes](CPURuntimeContext* ctx) {
runtime::reference::softmax<float>(static_cast<float*>(arg_tensor),
static_cast<float*>(out_tensor),
arg_shape,
axes);
};
functors.emplace_back(functor);
}
else
{
NGRAPH_ERR << "Unsupported Softmax " << arg_shape << " over " << axes
<< " in cpu buiilder";
throw ngraph_error("Unsupported Softmax");
}
}
......
......@@ -126,6 +126,7 @@
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/memory_layout.hpp"
#include "ngraph/pass/nop_elimination.hpp"
#include "ngraph/pass/zero_dim_tensor_elimination.hpp"
#include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/cpu/cpu_backend.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
......@@ -1001,6 +1002,7 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(ngraph::pass::Ma
{
pass_manager.register_pass<ngraph::pass::LikeReplacement>();
pass_manager.register_pass<ngraph::pass::NopElimination>();
pass_manager.register_pass<ngraph::pass::ZeroDimTensorElimination>();
// TODO (pruthvi): Enable all the disabeled RNN fusion graph pass after fixing
// failing mxnet unit tests.
// pass_manager.register_pass<runtime::cpu::pass::LSTMFusion>();
......@@ -1013,7 +1015,7 @@ void runtime::cpu::CPU_ExternalFunction::register_common_passes(ngraph::pass::Ma
pass_manager.register_pass<ngraph::pass::CommonSubexpressionElimination>();
pass_manager.register_pass<ngraph::pass::CoreFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUHorizontalFusion>();
// pass_manager.register_pass<runtime::cpu::pass::CPUHorizontalFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUCollapseDims>();
NodeVector nv_cwi; // We dont need CPUWorkspaceInsertion to return list of indices
pass_manager.register_pass<runtime::cpu::pass::CPUWorkspaceInsertion>(nv_cwi, false);
......
......@@ -16,6 +16,9 @@
#pragma once
#include <cstddef>
#include <cstring>
namespace ngraph
{
namespace runtime
......
......@@ -21,6 +21,7 @@
#include "ngraph/axis_set.hpp"
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
{
......@@ -147,6 +148,15 @@ namespace ngraph
{
softmax<ElementType, 3, 2>(input, output, input_shape, softmax_axes);
}
template <typename ElementType>
void softmax_4d_3rd(void* input,
void* output,
const Shape& input_shape,
const AxisSet& softmax_axes)
{
softmax<ElementType, 4, 3>(input, output, input_shape, softmax_axes);
}
}
}
}
......
......@@ -311,6 +311,20 @@ bool runtime::cpu::mkldnn_utils::is_perm_sorted(const Strides& a, const AxisVect
mkldnn::memory::desc runtime::cpu::mkldnn_utils::create_blocked_mkldnn_md(
const Shape& dims, const Strides& strides, const ngraph::element::Type type)
{
if (dims.size() > TENSOR_MAX_DIMS || strides.size() > TENSOR_MAX_DIMS)
{
throw ngraph_error("In create_blocked_mkldnn_md: Dimensions (dims, stride): (" +
std::to_string(dims.size()) + ", " + std::to_string(strides.size()) +
") exceed maximum supported by MKLDNN " +
std::to_string(TENSOR_MAX_DIMS));
}
if (dims.size() != strides.size())
{
throw ngraph_error("In create_blocked_mkldnn_md: Rank mismatch between shape and strides " +
std::to_string(dims.size()) + " " + std::to_string(strides.size()));
}
memory::dims dim(dims.begin(), dims.end());
memory::dims stride(strides.begin(), strides.end());
memory::data_type dtype = get_mkldnn_data_type(type);
......@@ -515,7 +529,7 @@ memory::desc runtime::cpu::mkldnn_utils::expand_blocked_md(const memory::desc& i
size_t k = 0;
for (size_t i = 0, j = 0; j < md.ndims; j++)
{
if (j == axis_list[k])
if (k < axis_list.size() && j == axis_list[k])
{
k++;
md.dims[j] = 1;
......@@ -531,7 +545,8 @@ memory::desc runtime::cpu::mkldnn_utils::expand_blocked_md(const memory::desc& i
}
else
{
md.layout_desc.blocking.strides[1][j] = 0;
md.layout_desc.blocking.strides[1][j] =
in.data.layout_desc.blocking.strides[0][in.data.ndims - 1];
size_t nelems = 1;
for (size_t idx = 0; idx < in.data.ndims; idx++)
nelems *= in.data.dims[idx];
......
......@@ -124,7 +124,7 @@ void ngraph::runtime::cpu::pass::CPUHorizontalFusion::cpu_conv_horizontal_fusion
NGRAPH_DEBUG << "conv_horizontal_fusion: slice shape " << slice_shape << "\n";
auto lower_bounds = Coordinate{0, index, 0, 0};
index += slice_shape[1];
auto upper_bounds = Coordinate{slice_shape[0], index, slice_shape[2], slice_shape[2]};
auto upper_bounds = Coordinate{slice_shape[0], index, slice_shape[2], slice_shape[3]};
NGRAPH_DEBUG << "conv_horizontal_fusion: lower_bounds " << lower_bounds << "\n";
NGRAPH_DEBUG << "conv_horizontal_fusion: upper_bounds " << upper_bounds << "\n";
auto slice =
......
......@@ -1533,7 +1533,18 @@ namespace ngraph
}
else
{
set_native_layouts(external_function, node);
if (mkldnn_utils::get_input_mkldnn_md(node.get(), 0).data.format ==
mkldnn_format_undef)
{
set_native_layouts(external_function, node);
}
else
{
auto input_md = mkldnn_utils::get_input_mkldnn_md(node.get(), 0);
vector<memory::desc> o_mds;
o_mds.push_back(input_md);
set_output_layouts(node, o_mds);
}
}
}
......@@ -1775,33 +1786,36 @@ namespace ngraph
auto result_shape = slice->get_output_shape(0);
auto input_md = mkldnn_utils::get_input_mkldnn_md(node.get(), 0);
auto input_pd = mkldnn::memory::primitive_desc(
input_md, runtime::cpu::mkldnn_utils::global_cpu_engine);
auto dims = mkldnn::memory::dims(result_shape.begin(), result_shape.end());
auto offsets =
mkldnn::memory::dims(lower_bounds.begin(), lower_bounds.end());
NGRAPH_DEBUG << "input memory format: " << input_md.data.format << "\n";
auto result_format =
static_cast<mkldnn::memory::format>(input_md.data.format);
try
{
// MKLDNN currently doesn't support views for blocked layouts
// when the dims and offsets are not divisible by the block size
auto view_md = mkldnn::view::primitive_desc(input_pd, dims, offsets)
.dst_primitive_desc()
.desc();
vector<memory::desc> o_mds;
o_mds.push_back(view_md);
set_output_layouts(node, o_mds);
}
catch (const mkldnn::error& e)
// check lower bounds and output shape
for (auto i = 0; i < input_md.data.ndims; i++)
{
if (e.status == mkldnn_unimplemented)
auto block_size = input_md.data.layout_desc.blocking.block_dims[i];
if (block_size != 0 && (lower_bounds[i] % block_size != 0 ||
result_shape[i] % block_size != 0))
{
NGRAPH_DEBUG << "slice: number of channels in lower bounds or "
"output shape is not multiple of block size, "
"set native layout\n";
set_native_layouts(external_function, node);
return;
}
else
{
throw ngraph_error(e.message);
}
}
if (result_format == mkldnn::memory::blocked)
{
set_native_layouts(external_function, node);
}
else
{
vector<memory::desc> o_mds;
auto result_desc = mkldnn_utils::create_default_mkldnn_md(
node.get(), 0, true, result_format);
o_mds.push_back(result_desc);
set_output_layouts(node, o_mds);
}
}
else
......
......@@ -23,6 +23,7 @@
#include <unordered_map>
#include "cpu_mat_fusion.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/concat.hpp"
......@@ -147,6 +148,26 @@ bool runtime::cpu::pass::CPURnnMatFusion::run_on_function(std::shared_ptr<Functi
auto matched_weight = matcher_v2->get_pattern_map()[W]->get_argument(0);
auto matched_data = matcher_v2->get_pattern_map()[input_data];
auto matched_bias = matcher_v2->get_pattern_map()[b]->get_argument(0);
std::vector<size_t> supported_ranks{2, 3};
if (!ngraph::is_valid_rank(matcher_v2->get_match_root(), supported_ranks))
{
NGRAPH_DEBUG << "Add (mat_fusion_v2) " << matcher_v2->get_match_root()->get_name()
<< " isn't 2D or 3D";
continue;
}
if (!ngraph::is_valid_rank(matched_weight, supported_ranks))
{
NGRAPH_DEBUG << "Weights (mat_fusion_v2) " << matched_weight << " isn't 2D or 3D";
continue;
}
if (!ngraph::is_valid_rank(matched_data, supported_ranks))
{
NGRAPH_DEBUG << "Data (mat_fusion_v2) " << matched_data << " isn't 2D or 3D";
continue;
}
map_weights_to_pattern[matched_weight].push_back(matcher_v2->get_match_root());
map_weights_bias_to_data[std::make_pair(matched_weight, matched_bias)].push_back(
matched_data);
......@@ -234,6 +255,7 @@ bool runtime::cpu::pass::CPURnnMatFusion::run_on_function(std::shared_ptr<Functi
concated_data, data_order, Shape{data_shape[0] * data_shape[1], data_shape[2]});
}
auto new_input_node = data_shape.size() == 2 ? concated_data : input_reshape_node;
NGRAPH_ASSERT(new_input_node);
auto w_reshape_node = std::make_shared<op::Reshape>(
weights, AxisVector{1, 0}, Shape{w_shape[1], w_shape[0]});
auto new_dot = std::make_shared<op::Dot>(new_input_node, w_reshape_node);
......@@ -248,8 +270,15 @@ bool runtime::cpu::pass::CPURnnMatFusion::run_on_function(std::shared_ptr<Functi
size_t end_index = batch_size;
for (auto& matched_root_node : map_weights_to_pattern[weights])
{
auto slice_node = std::make_shared<op::Slice>(
std::shared_ptr<Node> slice_node = std::make_shared<op::Slice>(
new_add_bias, Coordinate{start_index, 0}, Coordinate{end_index, shape_axis_1});
if (matched_root_node->get_shape().size() != 2)
{
NGRAPH_ASSERT(matched_root_node->get_shape().size() == 3);
slice_node = std::make_shared<op::Reshape>(
slice_node, AxisVector{0, 1}, matched_root_node->get_shape());
}
start_index += batch_size;
end_index += batch_size;
NGRAPH_DEBUG << "Replacing op " << matched_root_node->get_name() << " with "
......
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/pass/pass.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace pass
{
class CPUReshapeSinking : public ngraph::pass::FunctionPass
{
public:
bool run_on_function(std::shared_ptr<ngraph::Function> function) override;
};
}
}
}
}
......@@ -170,13 +170,15 @@ size_t runtime::gpu::CUDAEmitter::build_concat(const std::vector<std::string>& d
size_t runtime::gpu::CUDAEmitter::build_onehot(const std::array<std::string, 2>& dtypes,
NVShape input_shape,
NVShape output_shape,
size_t one_hot_axis)
size_t one_hot_axis,
size_t output_datatype_size)
{
std::stringstream kernel_name;
kernel_name << "onehot_" << join(dtypes, "_");
std::string hash = kernel_name.str() + "_i_" + join(input_shape, "_") + "_o_" +
join(output_shape, "_") + std::to_string(one_hot_axis);
join(output_shape, "_") + "_axis_" + std::to_string(one_hot_axis) +
"_datasize_" + std::to_string(output_datatype_size);
// For backwards compatability we currently use two unordered maps
// 1. one looks up the compiled cuda kernel (CudaFunctionPool)
// 2. the other looks to see if this kernel is already in the primitive list
......@@ -206,18 +208,19 @@ size_t runtime::gpu::CUDAEmitter::build_onehot(const std::array<std::string, 2>&
uint32_t block_size_x = 64;
uint32_t aligned_grid_size_x = align_to_block_size(nthreads, block_size_x);
uint32_t repeat_times = static_cast<uint32_t>(output_shape[one_hot_axis]);
uint32_t repeat_size = 1;
uint32_t hot_axis_shape = static_cast<uint32_t>(output_shape[one_hot_axis]);
uint32_t hot_axis_stride = 1;
for (size_t i = one_hot_axis + 1; i < output_shape.size(); i++)
{
repeat_size *= output_shape[i];
hot_axis_stride *= output_shape[i];
}
uint32_t output_size = static_cast<uint32_t>(shape_size(output_shape) * output_datatype_size);
// create the launch primitive
std::unique_ptr<gpu::primitive> kernel_launch(
new gpu::primitive{[=](void** inputs, void** outputs) mutable {
std::vector<void*> args_list{
&inputs[0], &outputs[0], &repeat_size, &repeat_times, &nthreads};
&inputs[0], &outputs[0], &hot_axis_stride, &hot_axis_shape, &nthreads};
runtime::gpu::cuda_memset(outputs[0], 0, output_size);
CUDA_SAFE_CALL(cuLaunchKernel(*compiled_kernel.get(),
aligned_grid_size_x,
1,
......@@ -1780,8 +1783,9 @@ size_t runtime::gpu::CUDAEmitter::build_primitive(const op::Softmax* node)
auto output_type = out[0].get_element_type().c_type_string();
auto exp_index = build_elementwise<ngraph::op::Exp>({input_type, output_type}, input_shape);
std::vector<element::Type> dtypes{args[0].get_element_type(), out[0].get_element_type()};
auto reduce_index = cudnn_emitter->build_reduce_forward(
CUDNN_REDUCE_TENSOR_ADD, output_type, input_shape, axes);
CUDNN_REDUCE_TENSOR_ADD, dtypes, input_shape, axes, CUDNNEmitter::ReductionMode::Reduce);
size_t divide_index = build_softmax_divide(
std::vector<std::string>(3, output_type), input_shape, reduced_shape, axes_flag);
......
......@@ -99,7 +99,8 @@ namespace ngraph
size_t build_onehot(const std::array<std::string, 2>& dtypes,
NVShape input_shape,
NVShape output_shape,
size_t one_hot_axis);
size_t one_hot_axis,
size_t output_datatype_size);
size_t build_reverse(const std::array<std::string, 2>& dtypes,
NVShape input_shape,
......
This diff is collapsed.
......@@ -72,6 +72,19 @@ namespace ngraph
Backward
};
enum class ReductionMode
{
Reduce,
ArgReduce
};
enum class algo_search
{
HEURISTIC,
EXPLICIT,
NONE
};
size_t build_convolution(const std::string& dtype,
const Shape& input_tensor_shape,
const Shape& input_filter_shape,
......@@ -79,30 +92,33 @@ namespace ngraph
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const Shape& padding_below,
const bool find_algo = false);
size_t build_convolution_backward_data(const std::string& dtype,
const Shape& input_filter_shape,
const Shape& input_tensor_shape,
const Shape& output_tensor_shape,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const Shape& padding_below,
const bool find_algo = false);
size_t build_convolution_backward_filter(const std::string& dtype,
const Shape& input_tensor_shape_0,
const Shape& input_tensor_shape_1,
const Shape& output_filter_shape,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const Shape& padding_below,
const bool find_algo = false);
const algo_search find_algo = algo_search::NONE);
size_t build_convolution_backward_data(
const std::string& dtype,
const Shape& input_filter_shape,
const Shape& input_tensor_shape,
const Shape& output_tensor_shape,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const Shape& padding_below,
const algo_search find_algo = algo_search::NONE);
size_t build_convolution_backward_filter(
const std::string& dtype,
const Shape& input_tensor_shape_0,
const Shape& input_tensor_shape_1,
const Shape& output_filter_shape,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const Shape& padding_below,
const algo_search find_algo = algo_search::NONE);
size_t build_reduce_forward(const cudnnReduceTensorOp_t& reduce_op,
const std::string& dtype,
const std::vector<element::Type>& dtypes,
const Shape& input_shape,
const AxisSet& reduction_axes);
const AxisSet& reduction_axes,
const ReductionMode& reduction_mode);
size_t build_tensor_op(const cudnnOpTensorOp_t& tensor_op,
const std::string& dtype,
......@@ -154,6 +170,7 @@ namespace ngraph
void* get_data_by_type(cudnnDataType_t data_type, double value);
cudnnDataType_t get_cudnn_datatype(std::string dtype);
cudnnDataType_t get_cudnn_datatype(const element::Type& dtype);
cudnnTensorDescriptor_t&
tensor_descriptor_from_shape(const Shape& shape,
......@@ -178,6 +195,24 @@ namespace ngraph
cudnnConvolutionMode_t mode,
cudnnDataType_t data_type);
template <typename PERF_TYPE, typename ALGO_TYPE>
ALGO_TYPE
select_cudnn_algo(const std::vector<PERF_TYPE>& perf_results,
size_t workspace_byte = std::numeric_limits<size_t>::max())
{
for (auto i = 0; i != perf_results.size(); ++i)
{
auto const& result = perf_results[i];
if (result.status == CUDNN_STATUS_SUCCESS &&
result.memory <= workspace_byte)
{
return result.algo;
}
}
throw ngraph_error(
"No suitable cuDNN algorithm was found for the requested operation.");
}
CUDNNDescriptors m_descriptors;
CUDNNHostParameters m_host_parameters;
......
......@@ -435,15 +435,24 @@ void runtime::gpu::CudaKernelBuilder::get_onehot_op(codegen::CodeWriter& writer,
const std::array<std::string, 2>& data_types)
{
writer << "extern \"C\" __global__ void cuda_" << name << "(" << data_types[0] << "* in, "
<< data_types[1] << "* out, uint32_t m, uint32_t k, uint32_t n)\n";
<< data_types[1]
<< "* out, uint32_t hot_axis_stride, uint32_t hot_axis_shape, uint32_t n)\n";
writer.block_begin();
{
writer << "uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x;\n";
writer << "if (tid < n)\n";
writer.block_begin();
{
writer << "uint32_t idx = (tid / m) * m * k + (m * in[tid]) + tid % m;\n";
writer << "out[idx] = 1;\n";
writer << "int32_t in_pixel = static_cast<int32_t>(in[tid]);\n";
writer << "if(in_pixel >= 0 && in_pixel < hot_axis_shape)\n";
writer.block_begin();
{
writer << "uint32_t idx = tid / hot_axis_stride * hot_axis_stride * hot_axis_shape "
"+ (hot_axis_stride * in_pixel) + tid % "
"hot_axis_stride;\n";
writer << "out[idx] = 1;\n";
}
writer.block_end();
}
writer.block_end();
}
......
......@@ -164,12 +164,45 @@ void runtime::gpu::GPU_Emitter::emit_And(EMIT_ARGS)
void runtime::gpu::GPU_Emitter::emit_ArgMax(EMIT_ARGS)
{
throw unsupported_op("Unsupported op '" + node->description() + "'");
cudnnReduceTensorOp_t reduce_op = CUDNN_REDUCE_TENSOR_MAX;
runtime::gpu::GPU_Emitter::emit_ArgReduce(
external_function, writer, node, args, out, reduce_op);
}
void runtime::gpu::GPU_Emitter::emit_ArgMin(EMIT_ARGS)
{
throw unsupported_op("Unsupported op '" + node->description() + "'");
cudnnReduceTensorOp_t reduce_op = CUDNN_REDUCE_TENSOR_MIN;
runtime::gpu::GPU_Emitter::emit_ArgReduce(
external_function, writer, node, args, out, reduce_op);
}
void runtime::gpu::GPU_Emitter::emit_ArgReduce(EMIT_ARGS, cudnnReduceTensorOp_t reduce_mode)
{
if (out[0].get_size() == 0)
{
return;
}
auto argmax = static_cast<const ngraph::op::ArgMax*>(node);
std::vector<size_t> axes{argmax->get_reduction_axis()};
auto axis_set = AxisSet(axes);
std::vector<element::Type> dtypes{args[0].get_element_type(), out[0].get_element_type()};
writer.block_begin();
{
auto& cudnn_emitter = external_function->get_primitive_emitter()->get_cudnn_emitter();
auto index = cudnn_emitter->build_reduce_forward(reduce_mode,
dtypes,
args[0].get_shape(),
axis_set,
CUDNNEmitter::ReductionMode::ArgReduce);
writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
}
writer.block_end();
}
void runtime::gpu::GPU_Emitter::emit_Asin(EMIT_ARGS)
......@@ -792,13 +825,17 @@ void runtime::gpu::GPU_Emitter::emit_OneHot(EMIT_ARGS)
auto onehot = static_cast<const ngraph::op::OneHot*>(node);
auto arg_shape = args[0].get_shape();
auto result_shape = out[0].get_shape();
auto output_datatype_size = out[0].get_element_type().size();
size_t idx = onehot->get_one_hot_axis();
writer.block_begin();
{
auto& cuda_emitter = external_function->get_primitive_emitter()->get_cuda_emitter();
auto index = cuda_emitter->build_onehot(
{{args[0].get_type(), out[0].get_type()}}, arg_shape, result_shape, idx);
auto index = cuda_emitter->build_onehot({{args[0].get_type(), out[0].get_type()}},
arg_shape,
result_shape,
idx,
output_datatype_size);
writer.block_begin();
writer << "void* input[] = {" << node_names(args) << "};\n";
......@@ -852,6 +889,7 @@ void runtime::gpu::GPU_Emitter::emit_Power(EMIT_ARGS)
void runtime::gpu::GPU_Emitter::emit_Product(EMIT_ARGS)
{
const ngraph::op::Product* product = static_cast<const ngraph::op::Product*>(node);
writer.block_begin();
{
if (out[0].get_size() != 0)
......@@ -873,12 +911,16 @@ void runtime::gpu::GPU_Emitter::emit_Product(EMIT_ARGS)
// descriptors for tensors with <= 4 dimensions
else
{
std::vector<element::Type> dtypes{args[0].get_element_type(),
out[0].get_element_type()};
auto& cudnn_emitter =
external_function->get_primitive_emitter()->get_cudnn_emitter();
auto index = cudnn_emitter->build_reduce_forward(CUDNN_REDUCE_TENSOR_MUL,
out[0].get_type(),
args[0].get_shape(),
product->get_reduction_axes());
auto index =
cudnn_emitter->build_reduce_forward(CUDNN_REDUCE_TENSOR_MUL,
dtypes,
args[0].get_shape(),
product->get_reduction_axes(),
CUDNNEmitter::ReductionMode::Reduce);
writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "void* output[] = {" << node_names(out) << "};\n";
......@@ -967,14 +1009,16 @@ void runtime::gpu::GPU_Emitter::emit_Reduce(EMIT_ARGS)
reduce_tensor_op = f_ptr->second;
}
}
std::vector<element::Type> dtypes{args[0].get_element_type(),
out[0].get_element_type()};
auto& cudnn_emitter =
external_function->get_primitive_emitter()->get_cudnn_emitter();
auto reduce_index =
cudnn_emitter->build_reduce_forward(reduce_tensor_op,
out[0].get_type(),
dtypes,
args[0].get_shape(),
reduce_op->get_reduction_axes());
reduce_op->get_reduction_axes(),
CUDNNEmitter::ReductionMode::Reduce);
writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "void* output[] = {" << node_names(out) << "};\n";
......
......@@ -75,6 +75,8 @@ namespace ngraph
writer.block_end();
}
static void emit_ArgReduce(EMIT_ARGS, cudnnReduceTensorOp_t);
private:
/// \brief Create a list of node names for each arg in args
/// \param args list of tensor arguments
......
......@@ -16,6 +16,7 @@
#pragma once
#include <cinttypes>
#include <list>
namespace ngraph
......
......@@ -31,8 +31,6 @@ backwards_avgpool_n1_c1_hw4x4
backwards_avgpool_n2_c2_hw4x4
max_pool_3d
avg_pool_3d
argmin_trivial
argmax_trivial
topk_1d_max_all
topk_1d_max_partial
topk_1d_max_one
......
......@@ -17,6 +17,7 @@
#pragma once
#include <cstddef>
#include <string>
namespace ngraph
{
......
......@@ -488,3 +488,13 @@ AxisVector ngraph::get_default_order(size_t rank)
std::iota(begin(default_order), end(default_order), 0);
return default_order;
}
AxisVector ngraph::get_permutation_to_default_order(const AxisVector& axis_order)
{
AxisVector out(axis_order.size());
for (size_t i = 0; i < axis_order.size(); i++)
{
out.at(axis_order[i]) = i;
}
return out;
}
......@@ -204,6 +204,8 @@ namespace ngraph
AxisVector get_default_order(size_t rank);
AxisVector get_default_order(const Shape& shape);
AxisVector get_permutation_to_default_order(const AxisVector& axis_order);
/*
* Return type struct for cache_fprop, with the modified fprop and bprop
* functions
......
......@@ -29,6 +29,7 @@ set(SRC
element_type.cpp
file_util.cpp
graph_partition.cpp
includes.cpp
inliner.cpp
input_output_assign.cpp
main.cpp
......@@ -49,6 +50,9 @@ set(SRC
zero_dim_tensor_elimination.cpp
)
set_source_files_properties(includes.cpp PROPERTIES COMPILE_DEFINITIONS
NGRAPH_INCLUDES="${PROJECT_SOURCE_DIR}/src/ngraph")
if (NGRAPH_ONNX_IMPORT_ENABLE)
list(APPEND SRC onnx_import.cpp)
if (NGRAPH_ONNXIFI_ENABLE)
......@@ -69,7 +73,7 @@ add_subdirectory(files)
add_subdirectory(util)
if(NGRAPH_CPU_ENABLE)
set(SRC ${SRC} backend_performance.cpp cpu_fusion.cpp cpu_test.cpp)
set(SRC ${SRC} backend_performance.cpp cpu_fusion.cpp cpu_test.cpp cpu_reshape_sinking.cpp)
endif()
if(NGRAPH_GPU_ENABLE)
......
This diff is collapsed.
This diff is collapsed.
......@@ -890,6 +890,7 @@ TEST(cpu_fusion, conv_bias_relu_n2c1h2w2_2)
EXPECT_TRUE(test::all_close(cpu_results.at(0), int_results.at(0)));
}
#if 0
TEST(cpu_fusion, conv_horizontal_fusion)
{
Shape shape_a{2, 1, 6, 6};
......@@ -940,6 +941,7 @@ TEST(cpu_fusion, conv_horizontal_fusion)
size_t cpu_cb = count_ops_of_type<op::ConvolutionBias>(cpu_f);
ASSERT_EQ(cpu_cb, 1);
}
#endif
// ConvolutionBiasAdd relies on an in-place fused MKLDNN kernel.
// Need to ensure that it is fused only when in-place buffer allocation is feasible
......
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <algorithm>
#include <cstdio>
#include <iostream>
#include <list>
#include <memory>
#include "gtest/gtest.h"
#include "ngraph/autodiff/adjoints.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp"
#include "ngraph/ngraph.hpp"
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/parameter.hpp"
#include "ngraph/pass/core_fusion.hpp"
#include "ngraph/pass/cse.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/reshape_elimination.hpp"
#include "ngraph/pass/visualize_tree.hpp"
#include "ngraph/runtime/cpu/pass/cpu_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_reshape_sinking.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
#include "nlohmann/json.hpp"
#include "util/all_close.hpp"
#include "util/autodiff/backprop_function.hpp"
#include "util/autodiff/numeric_compare.hpp"
#include "util/ndarray.hpp"
#include "util/random.hpp"
#include "util/test_tools.hpp"
using namespace ngraph;
using namespace std;
TEST(cpu_reshape_sinking, edge_splitting)
{
//checks if Reshapes are pushed through op::Abs, but stopped by Sum
Shape shape_nhwc{16, 28, 28, 1};
Shape shape_nchw{16, 1, 28, 28};
auto a = make_shared<op::Parameter>(element::i32, shape_nhwc);
auto reshape = make_shared<op::Reshape>(a, AxisVector{0, 3, 1, 2}, shape_nchw);
auto absn = make_shared<op::Abs>(reshape);
auto absn2 = make_shared<op::Abs>(absn);
auto sum = make_shared<op::Sum>(reshape, AxisSet{0, 1, 2, 3});
auto func = make_shared<Function>(NodeVector{absn2, sum}, op::ParameterVector{a});
pass::Manager pass_manager;
//size_t before_count = count_ops_of_type<op::Reshape>(func);
pass_manager.register_pass<pass::VisualizeTree>("before.pdf");
pass_manager.register_pass<runtime::cpu::pass::CPUReshapeSinking>();
pass_manager.register_pass<pass::ReshapeElimination>();
pass_manager.register_pass<pass::CommonSubexpressionElimination>();
pass_manager.register_pass<pass::VisualizeTree>("after.pdf");
pass_manager.run_passes(func);
ASSERT_EQ(func->get_results().at(1)->get_argument(0), sum);
auto new_reshape =
std::dynamic_pointer_cast<op::Reshape>(func->get_results().at(0)->get_argument(0));
ASSERT_TRUE(new_reshape);
ASSERT_EQ(new_reshape->get_shape(), shape_nchw);
}
TEST(cpu_reshape_sinking, mnist_conv)
{
const string json_path = file_util::path_join(SERIALIZED_ZOO, "tf_conv_mnist_nhwc.json");
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> func = ngraph::deserialize(ss);
pass::Manager pass_manager;
size_t before_count = count_ops_of_type<op::Reshape>(func);
//pass_manager.register_pass<pass::VisualizeTree>("before.pdf");
pass_manager.register_pass<runtime::cpu::pass::CPUReshapeSinking>();
pass_manager.register_pass<pass::ReshapeElimination>();
pass_manager.register_pass<pass::CommonSubexpressionElimination>();
//pass_manager.register_pass<pass::CoreFusion>();
//pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
//pass_manager.register_pass<pass::VisualizeTree>("after.pdf");
pass_manager.run_passes(func);
size_t before_after = count_ops_of_type<op::Reshape>(func);
ASSERT_LE(before_after, before_count);
}
......@@ -195,8 +195,9 @@ TEST(cpu_test, mkldnn_layouts)
EXPECT_EQ(vector<float>{expected_result}, rv);
}
TEST(cpu_test, reshape_squeeze)
TEST(cpu_test, reshape_layout_optimizations1)
{
// Squeeze outermost dimension
auto make_function = []() -> std::shared_ptr<Function> {
auto A = make_shared<op::Parameter>(element::f32, Shape{1, 16, 2, 2});
auto B = make_shared<op::Parameter>(element::f32, Shape{32, 16, 1, 1});
......@@ -233,8 +234,9 @@ TEST(cpu_test, reshape_squeeze)
}
}
TEST(cpu_test, reshape_expand)
TEST(cpu_test, reshape_layout_optimizations2)
{
// ExpandDims - inner most and internal dims
auto make_function = []() -> std::shared_ptr<Function> {
auto A = make_shared<op::Parameter>(element::f32, Shape{1, 16, 2, 2});
auto B = make_shared<op::Parameter>(element::f32, Shape{32, 16, 1, 1});
......@@ -271,8 +273,9 @@ TEST(cpu_test, reshape_expand)
}
}
TEST(cpu_test, reshape_squeeze_padded)
TEST(cpu_test, reshape_layout_optimizations3)
{
// Squeeze padded dimension
auto make_function = []() -> std::shared_ptr<Function> {
auto A = make_shared<op::Parameter>(element::f32, Shape{1, 16, 2, 2});
auto B = make_shared<op::Parameter>(element::f32, Shape{1, 16, 1, 1});
......@@ -310,8 +313,9 @@ TEST(cpu_test, reshape_squeeze_padded)
}
}
TEST(cpu_test, reshape_expand_squeeze)
TEST(cpu_test, reshape_layout_optimizations4)
{
// Squeeze and expand dimensions. Ensure no extra conversions downstream
auto make_function = []() -> std::shared_ptr<Function> {
auto A = make_shared<op::Parameter>(element::f32, Shape{1, 16, 1, 8});
auto B1 = make_shared<op::Parameter>(element::f32, Shape{32, 16, 1, 1});
......@@ -322,7 +326,7 @@ TEST(cpu_test, reshape_expand_squeeze)
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto squeeze = make_shared<op::Reshape>(conv1, AxisVector{0, 1, 2, 3}, Shape{1, 32, 8});
auto squeeze = make_shared<op::Reshape>(conv1, AxisVector{0, 1, 2, 3}, Shape{32, 1, 8});
auto relu = make_shared<op::Relu>(squeeze);
auto expand = make_shared<op::Reshape>(relu, AxisVector{0, 1, 2}, Shape{1, 32, 1, 8});
auto B2 = make_shared<op::Parameter>(element::f32, Shape{8, 32, 1, 1});
......@@ -357,3 +361,120 @@ TEST(cpu_test, reshape_expand_squeeze)
}
EXPECT_LE(count_ops_of_type<runtime::cpu::op::ConvertLayout>(cpu_f), 4);
}
TEST(cpu_test, reshape_layout_optimizations5)
{
auto make_function = []() -> std::shared_ptr<Function> {
auto A = make_shared<op::Parameter>(element::f32, Shape{1, 16, 1, 8});
auto B1 = make_shared<op::Parameter>(element::f32, Shape{32, 16, 1, 1});
auto conv1 = make_shared<op::Convolution>(A,
B1,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto expand =
make_shared<op::Reshape>(conv1, AxisVector{0, 1, 2, 3}, Shape{1, 1, 32, 1, 8});
auto relu = make_shared<op::Relu>(expand);
auto squeeze =
make_shared<op::Reshape>(relu, AxisVector{0, 1, 2, 3, 4}, Shape{1, 32, 1, 8});
auto B2 = make_shared<op::Parameter>(element::f32, Shape{8, 32, 1, 1});
auto conv2 = make_shared<op::Convolution>(squeeze,
B2,
Strides{1, 1},
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
return make_shared<Function>(NodeVector{conv2}, op::ParameterVector{A, B1, B2});
};
auto backend = runtime::Backend::create("CPU");
auto cpu_f = make_function();
auto int_f = make_function();
test::Uniform<float> rng(-100.0f, 100.0f);
vector<vector<float>> args;
for (shared_ptr<op::Parameter> param : cpu_f->get_parameters())
{
vector<float> tensor_val(shape_size(param->get_shape()));
rng.initialize(tensor_val);
args.push_back(tensor_val);
}
auto int_results = execute(int_f, args, "INTERPRETER");
auto cpu_results = execute(cpu_f, args, "CPU");
for (size_t i = 0; i < cpu_results.size(); i++)
{
EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f));
}
EXPECT_LE(count_ops_of_type<runtime::cpu::op::ConvertLayout>(cpu_f), 4);
}
TEST(cpu_test, reshape_layout_optimizations6)
{
// Squeeze and expand dimensions. Ensure no extra conversions downstream
auto make_function = []() -> std::shared_ptr<Function> {
auto A = make_shared<op::Parameter>(element::f32, Shape{2, 4, 3, 2});
auto mul = make_shared<op::Multiply>(A, A);
auto sum = make_shared<op::Sum>(mul, AxisVector{0});
auto reshape = make_shared<op::Reshape>(sum, AxisVector{0, 1, 2}, Shape{1, 4, 3, 2});
auto sqrt = make_shared<op::Sqrt>(reshape);
return make_shared<Function>(NodeVector{sqrt}, op::ParameterVector{A});
};
auto backend = runtime::Backend::create("CPU");
auto cpu_f = make_function();
auto int_f = make_function();
test::Uniform<float> rng(-100.0f, 100.0f);
vector<vector<float>> args;
for (shared_ptr<op::Parameter> param : cpu_f->get_parameters())
{
vector<float> tensor_val(shape_size(param->get_shape()));
rng.initialize(tensor_val);
args.push_back(tensor_val);
}
auto int_results = execute(int_f, args, "INTERPRETER");
auto cpu_results = execute(cpu_f, args, "CPU");
for (size_t i = 0; i < cpu_results.size(); i++)
{
EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i)));
}
EXPECT_EQ(count_ops_of_type<runtime::cpu::op::ConvertLayout>(cpu_f), 0);
}
TEST(cpu_test, reshape_layout_optimizations7)
{
// Expand multiple dimensions. Ensure no extra conversions downstream
auto make_function = []() -> std::shared_ptr<Function> {
auto A = make_shared<op::Parameter>(element::f32, Shape{1, 4, 10, 6, 10});
auto mul = make_shared<op::Multiply>(A, A);
auto sum = make_shared<op::Sum>(mul, AxisVector{0, 1});
auto reshape = make_shared<op::Reshape>(sum, AxisVector{0, 1, 2}, Shape{1, 1, 10, 6, 10});
return make_shared<Function>(NodeVector{reshape}, op::ParameterVector{A});
};
auto backend = runtime::Backend::create("CPU");
auto cpu_f = make_function();
auto int_f = make_function();
test::Uniform<float> rng(-100.0f, 100.0f);
vector<vector<float>> args;
for (shared_ptr<op::Parameter> param : cpu_f->get_parameters())
{
vector<float> tensor_val(shape_size(param->get_shape()));
rng.initialize(tensor_val);
args.push_back(tensor_val);
}
auto int_results = execute(int_f, args, "INTERPRETER");
auto cpu_results = execute(cpu_f, args, "CPU");
for (size_t i = 0; i < cpu_results.size(); i++)
{
EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i)));
}
EXPECT_EQ(count_ops_of_type<runtime::cpu::op::ConvertLayout>(cpu_f), 0);
}
This diff is collapsed.
[{
"name" : "Function_0",
"ops" : [
{
"cacheable" : false,
"element_type" : "float",
"inputs" : [],
"name" : "Parameter_12",
"op" : "Parameter",
"outputs" : ["Parameter_12_0"],
"shape" : [ 2, 224, 224, 3 ]
},
{
"cacheable" : false,
"element_type" : "float",
"inputs" : [],
"name" : "Parameter_11",
"op" : "Parameter",
"outputs" : ["Parameter_11_0"],
"shape" : [10]
},
{
"cacheable" : false,
"element_type" : "float",
"inputs" : [],
"name" : "Parameter_10",
"op" : "Parameter",
"outputs" : ["Parameter_10_0"],
"shape" : [ 37632, 10 ]
},
{
"cacheable" : false,
"element_type" : "float",
"inputs" : [],
"name" : "Parameter_9",
"op" : "Parameter",
"outputs" : ["Parameter_9_0"],
"shape" : [3]
},
{
"cacheable" : false,
"element_type" : "float",
"inputs" : [],
"name" : "Parameter_8",
"op" : "Parameter",
"outputs" : ["Parameter_8_0"],
"shape" : [3]
},
{
"cacheable" : false,
"element_type" : "float",
"inputs" : [],
"name" : "Parameter_7",
"op" : "Parameter",
"outputs" : ["Parameter_7_0"],
"shape" : [3]
},
{
"cacheable" : false,
"element_type" : "float",
"inputs" : [],
"name" : "Parameter_6",
"op" : "Parameter",
"outputs" : ["Parameter_6_0"],
"shape" : [3]
},
{
"cacheable" : false,
"element_type" : "float",
"inputs" : [],
"name" : "Parameter_5",
"op" : "Parameter",
"outputs" : ["Parameter_5_0"],
"shape" : [ 3, 3, 3, 3 ]
},
{
"cacheable" : false,
"element_type" : "float",
"inputs" : [],
"name" : "Parameter_4",
"op" : "Parameter",
"outputs" : ["Parameter_4_0"],
"shape" : [3]
},
{
"cacheable" : false,
"element_type" : "float",
"inputs" : [],
"name" : "Parameter_3",
"op" : "Parameter",
"outputs" : ["Parameter_3_0"],
"shape" : [3]
},
{
"cacheable" : false,
"element_type" : "float",
"inputs" : [],
"name" : "Parameter_2",
"op" : "Parameter",
"outputs" : ["Parameter_2_0"],
"shape" : [3]
},
{
"cacheable" : false,
"element_type" : "float",
"inputs" : [],
"name" : "Parameter_1",
"op" : "Parameter",
"outputs" : ["Parameter_1_0"],
"shape" : [3]
},
{
"cacheable" : false,
"element_type" : "float",
"inputs" : [],
"name" : "Parameter_0",
"op" : "Parameter",
"outputs" : ["Parameter_0_0"],
"shape" : [ 3, 3, 3, 3 ]
},
{
"input_order" : [ 0, 3, 1, 2 ],
"inputs" : ["Parameter_12"],
"name" : "Reshape_13",
"op" : "Reshape",
"output_shape" : [ 2, 3, 224, 224 ],
"outputs" : ["Reshape_13_0"]
},
{
"axes" : [0],
"inputs" : ["Parameter_11"],
"name" : "Broadcast_36",
"op" : "Broadcast",
"outputs" : ["Broadcast_36_0"],
"shape" : [ 2, 10 ]
},
{
"input_order" : [ 3, 2, 0, 1 ],
"inputs" : ["Parameter_5"],
"name" : "Reshape_22",
"op" : "Reshape",
"output_shape" : [ 3, 3, 3, 3 ],
"outputs" : ["Reshape_22_0"]
},
{
"input_order" : [ 3, 2, 0, 1 ],
"inputs" : ["Parameter_0"],
"name" : "Reshape_14",
"op" : "Reshape",
"output_shape" : [ 3, 3, 3, 3 ],
"outputs" : ["Reshape_14_0"]
},
{
"data_dilation_strides" : [ 1, 1 ],
"inputs" : [ "Reshape_13", "Reshape_14" ],
"name" : "Convolution_15",
"op" : "Convolution",
"outputs" : ["Convolution_15_0"],
"padding_above" : [ 1, 1 ],
"padding_below" : [ 1, 1 ],
"window_dilation_strides" : [ 1, 1 ],
"window_movement_strides" : [ 1, 1 ]
},
{
"input_order" : [ 0, 2, 3, 1 ],
"inputs" : ["Convolution_15"],
"name" : "Reshape_16",
"op" : "Reshape",
"output_shape" : [ 2, 224, 224, 3 ],
"outputs" : ["Reshape_16_0"]
},
{
"input_order" : [ 0, 3, 1, 2 ],
"inputs" : ["Reshape_16"],
"name" : "Reshape_17",
"op" : "Reshape",
"output_shape" : [ 2, 3, 224, 224 ],
"outputs" : ["Reshape_17_0"]
},
{
"eps" : 1.0009999641624745e-05,
"inputs" : [
"Parameter_1", "Parameter_2", "Reshape_17", "Parameter_3",
"Parameter_4"
],
"name" : "BatchNorm_18",
"op" : "BatchNorm",
"outputs" : ["BatchNorm_18_0"],
"training" : false
},
{
"input_order" : [ 0, 2, 3, 1 ],
"inputs" : ["BatchNorm_18"],
"name" : "Reshape_19",
"op" : "Reshape",
"output_shape" : [ 2, 224, 224, 3 ],
"outputs" : ["Reshape_19_0"]
},
{
"inputs" : ["Reshape_19"],
"name" : "Relu_20",
"op" : "Relu",
"outputs" : ["Relu_20_0"]
},
{
"input_order" : [ 0, 3, 1, 2 ],
"inputs" : ["Relu_20"],
"name" : "Reshape_21",
"op" : "Reshape",
"output_shape" : [ 2, 3, 224, 224 ],
"outputs" : ["Reshape_21_0"]
},
{
"data_dilation_strides" : [ 1, 1 ],
"inputs" : [ "Reshape_21", "Reshape_22" ],
"name" : "Convolution_23",
"op" : "Convolution",
"outputs" : ["Convolution_23_0"],
"padding_above" : [ 1, 1 ],
"padding_below" : [ 1, 1 ],
"window_dilation_strides" : [ 1, 1 ],
"window_movement_strides" : [ 1, 1 ]
},
{
"input_order" : [ 0, 2, 3, 1 ],
"inputs" : ["Convolution_23"],
"name" : "Reshape_24",
"op" : "Reshape",
"output_shape" : [ 2, 224, 224, 3 ],
"outputs" : ["Reshape_24_0"]
},
{
"input_order" : [ 0, 3, 1, 2 ],
"inputs" : ["Reshape_24"],
"name" : "Reshape_25",
"op" : "Reshape",
"output_shape" : [ 2, 3, 224, 224 ],
"outputs" : ["Reshape_25_0"]
},
{
"eps" : 1.0009999641624745e-05,
"inputs" : [
"Parameter_6", "Parameter_7", "Reshape_25", "Parameter_8",
"Parameter_9"
],
"name" : "BatchNorm_26",
"op" : "BatchNorm",
"outputs" : ["BatchNorm_26_0"],
"training" : false
},
{
"input_order" : [ 0, 2, 3, 1 ],
"inputs" : ["BatchNorm_26"],
"name" : "Reshape_27",
"op" : "Reshape",
"output_shape" : [ 2, 224, 224, 3 ],
"outputs" : ["Reshape_27_0"]
},
{
"inputs" : [ "Reshape_27", "Parameter_12" ],
"name" : "Add_28",
"op" : "Add",
"outputs" : ["Add_28_0"]
},
{
"inputs" : ["Add_28"],
"name" : "Relu_29",
"op" : "Relu",
"outputs" : ["Relu_29_0"]
},
{
"input_order" : [ 0, 3, 1, 2 ],
"inputs" : ["Relu_29"],
"name" : "Reshape_30",
"op" : "Reshape",
"output_shape" : [ 2, 3, 224, 224 ],
"outputs" : ["Reshape_30_0"]
},
{
"include_padding_in_avg_computation" : false,
"inputs" : ["Reshape_30"],
"name" : "AvgPool_31",
"op" : "AvgPool",
"outputs" : ["AvgPool_31_0"],
"padding_above" : [ 0, 0 ],
"padding_below" : [ 0, 0 ],
"window_movement_strides" : [ 2, 2 ],
"window_shape" : [ 2, 2 ]
},
{
"input_order" : [ 0, 2, 3, 1 ],
"inputs" : ["AvgPool_31"],
"name" : "Reshape_32",
"op" : "Reshape",
"output_shape" : [ 2, 112, 112, 3 ],
"outputs" : ["Reshape_32_0"]
},
{
"input_order" : [ 0, 1, 2, 3 ],
"inputs" : ["Reshape_32"],
"name" : "Reshape_34",
"op" : "Reshape",
"output_shape" : [ 2, 37632 ],
"outputs" : ["Reshape_34_0"]
},
{
"inputs" : [ "Reshape_34", "Parameter_10" ],
"name" : "Dot_35",
"op" : "Dot",
"outputs" : ["Dot_35_0"],
"reduction_axes_count" : 1
},
{
"inputs" : [ "Dot_35", "Broadcast_36" ],
"name" : "Add_37",
"op" : "Add",
"outputs" : ["Add_37_0"]
},
{
"inputs" : ["Add_37"],
"name" : "Result_38",
"op" : "Result",
"outputs" : ["Result_38_0"]
}
],
"parameters" : [
"Parameter_0", "Parameter_1", "Parameter_2", "Parameter_3", "Parameter_4",
"Parameter_5", "Parameter_6", "Parameter_7", "Parameter_8", "Parameter_9",
"Parameter_10", "Parameter_11", "Parameter_12"
],
"result" : ["Result_38"]
}]
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment