Unverified Commit 4203a832 authored by Jayaram Bobba's avatar Jayaram Bobba Committed by GitHub

Merge branch 'master' into jbobba/maxpool-layouts

parents e37677c0 529362b5
...@@ -75,10 +75,5 @@ namespace ngraph ...@@ -75,10 +75,5 @@ namespace ngraph
protected: protected:
std::unordered_map<Node*, std::shared_ptr<Node>> m_adjoint_map; std::unordered_map<Node*, std::shared_ptr<Node>> m_adjoint_map;
}; };
/// @brief Returns a FunctionSpec for the backprop derivative of its argument.
/// @param f is f(X_i...)
/// @returns f'(X_i..., c) where f'(x_i, ..., c)_j is backprop for X_j
std::shared_ptr<Function> backprop_function(const std::shared_ptr<Function>& f);
} }
} }
...@@ -21,21 +21,20 @@ ...@@ -21,21 +21,20 @@
ngraph::op::BatchNorm::BatchNorm(double eps, ngraph::op::BatchNorm::BatchNorm(double eps,
std::shared_ptr<ngraph::Node> gamma, std::shared_ptr<ngraph::Node> gamma,
std::shared_ptr<ngraph::Node> beta, std::shared_ptr<ngraph::Node> beta,
std::shared_ptr<ngraph::Node> input, std::shared_ptr<ngraph::Node> input)
std::shared_ptr<ngraph::Node> mean, : RequiresTensorViewArgs("BatchNorm", {gamma, beta, input})
std::shared_ptr<ngraph::Node> variance)
: RequiresTensorViewArgs("BatchNorm", {gamma, beta, input, mean, variance})
, m_bn_input_shape(input->get_shape()) , m_bn_input_shape(input->get_shape())
, m_bn_variance_shape(variance->get_shape())
, m_bn_mean_shape(mean->get_shape())
, m_epsilon(eps) , m_epsilon(eps)
{ {
add_output(input->get_element_type(), m_bn_input_shape);
if (m_bn_input_shape.size() < 2) if (m_bn_input_shape.size() < 2)
{ {
throw ngraph_error("input tensor to batchnorm much have tensor of atleast rank 2"); throw ngraph_error("input tensor to batchnorm much have tensor of atleast rank 2");
} }
else
{
this->m_bn_variance_shape.push_back(input->get_shape()[1]);
this->m_bn_mean_shape.push_back(input->get_shape()[1]);
}
if (m_bn_input_shape[1] == 0) if (m_bn_input_shape[1] == 0)
{ {
...@@ -49,51 +48,27 @@ ngraph::op::BatchNorm::BatchNorm(double eps, ...@@ -49,51 +48,27 @@ ngraph::op::BatchNorm::BatchNorm(double eps,
throw ngraph_error("gamma, beta, mean, variance shoud have all rank 1"); throw ngraph_error("gamma, beta, mean, variance shoud have all rank 1");
} }
// assuming input shape (N, C, H, W), check if the size of mean and
// variance are equal to channel axis
if (mean->get_shape()[0] != m_bn_input_shape[1])
{
throw ngraph_error("mean size is not equal to input channel size");
}
if (variance->get_shape()[0] != m_bn_input_shape[1])
{
throw ngraph_error("variance size is not equal to input channel size");
}
if (variance->get_shape().size() != mean->get_shape().size())
{
throw ngraph_error("mean and variance rank does not match");
}
if (gamma->get_shape().size() != beta->get_shape().size()) if (gamma->get_shape().size() != beta->get_shape().size())
{ {
throw ngraph_error("gamma and beta rank does not match"); throw ngraph_error("gamma and beta rank does not match");
} }
if (input->get_element_type() != mean->get_element_type())
{
throw ngraph_error("input tensor and mean element type does not match");
}
if (input->get_element_type() != variance->get_element_type())
{
throw ngraph_error("input tensor and variance element type does not match");
}
if (gamma->get_element_type() != beta->get_element_type()) if (gamma->get_element_type() != beta->get_element_type())
{ {
throw ngraph_error("gamma and beta element type does not match"); throw ngraph_error("gamma and beta element type does not match");
} }
add_output(input->get_element_type(), m_bn_input_shape);
add_output(input->get_element_type(), m_bn_mean_shape);
add_output(input->get_element_type(), m_bn_variance_shape);
} }
std::shared_ptr<ngraph::Node> std::shared_ptr<ngraph::Node>
ngraph::op::BatchNorm::copy_with_new_args(const NodeVector& new_args) const ngraph::op::BatchNorm::copy_with_new_args(const NodeVector& new_args) const
{ {
if (new_args.size() != 5) if (new_args.size() != 3)
throw ngraph_error("Incorrect number of new arguments"); throw ngraph_error("Incorrect number of new arguments");
return std::make_shared<BatchNorm>( return std::make_shared<BatchNorm>(m_epsilon, new_args.at(0), new_args.at(1), new_args.at(2));
m_epsilon, new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3), new_args.at(4));
} }
ngraph::op::BatchNormBackprop::BatchNormBackprop(double eps, ngraph::op::BatchNormBackprop::BatchNormBackprop(double eps,
...@@ -174,10 +149,10 @@ void ngraph::op::BatchNorm::generate_adjoints(autodiff::Adjoints& adjoints, ...@@ -174,10 +149,10 @@ void ngraph::op::BatchNorm::generate_adjoints(autodiff::Adjoints& adjoints,
auto gamma = get_input_op(0); auto gamma = get_input_op(0);
auto beta = get_input_op(1); auto beta = get_input_op(1);
auto input = get_input_op(2); auto input = get_input_op(2);
auto mean = get_input_op(3); auto mean = std::make_shared<op::GetOutputElement>(shared_from_this(), 1);
auto variance = get_input_op(4); auto var = std::make_shared<op::GetOutputElement>(shared_from_this(), 2);
auto bbn = std::make_shared<op::BatchNormBackprop>( auto bbn = std::make_shared<op::BatchNormBackprop>(
get_eps_value(), gamma, beta, input, mean, variance, delta); get_eps_value(), gamma, beta, input, mean, var, delta);
auto dinput = std::make_shared<op::GetOutputElement>(bbn, 0); auto dinput = std::make_shared<op::GetOutputElement>(bbn, 0);
auto dgamma = std::make_shared<op::GetOutputElement>(bbn, 1); auto dgamma = std::make_shared<op::GetOutputElement>(bbn, 1);
auto dbeta = std::make_shared<op::GetOutputElement>(bbn, 2); auto dbeta = std::make_shared<op::GetOutputElement>(bbn, 2);
......
...@@ -33,9 +33,7 @@ namespace ngraph ...@@ -33,9 +33,7 @@ namespace ngraph
BatchNorm(double eps, BatchNorm(double eps,
std::shared_ptr<Node> gamma, std::shared_ptr<Node> gamma,
std::shared_ptr<Node> beta, std::shared_ptr<Node> beta,
std::shared_ptr<Node> input, std::shared_ptr<Node> input);
std::shared_ptr<Node> mean,
std::shared_ptr<Node> variance);
const Shape& get_inputs_shape() const { return m_bn_input_shape; } const Shape& get_inputs_shape() const { return m_bn_input_shape; }
const Shape& get_variance_shape() const { return m_bn_variance_shape; } const Shape& get_variance_shape() const { return m_bn_variance_shape; }
......
...@@ -301,14 +301,26 @@ namespace ngraph ...@@ -301,14 +301,26 @@ namespace ngraph
auto gamma_shape = args[0].get_shape(); auto gamma_shape = args[0].get_shape();
auto beta_shape = args[1].get_shape(); auto beta_shape = args[1].get_shape();
auto input_shape = args[2].get_shape(); auto input_shape = args[2].get_shape();
auto mean_shape = args[3].get_shape();
auto variance_shape = args[4].get_shape();
auto result_shape = out[0].get_shape(); auto result_shape = out[0].get_shape();
auto mean_shape = out[1].get_shape();
auto variance_shape = out[2].get_shape();
// get input element type // get input element type
const string& et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string( const string& et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(
args[2].get_element_type()); args[2].get_element_type());
const string& gamma_format = runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node, 0));
const string& beta_format = runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node, 1));
if (gamma_format.compare("memory::format::x") != 0 &&
beta_format.compare("memory::format::x") != 0)
{
throw std::runtime_error(
"gamma layout->" + gamma_format + ", beta layout->" + beta_format +
" should match and both should have memory::format::x format");
}
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
...@@ -329,16 +341,20 @@ namespace ngraph ...@@ -329,16 +341,20 @@ namespace ngraph
// get the eps value from the bn node // get the eps value from the bn node
writer << "auto epsilon = " << batchnorm->get_eps_value() << ";\n"; writer << "auto epsilon = " << batchnorm->get_eps_value() << ";\n";
const string& input_format = runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node, 2));
const string& result_format = runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
runtime::cpu::mkldnn_utils::get_output_mkldnn_format(node, 0));
// Bind to CPU engine // Bind to CPU engine
writer << "engine cpu_engine = engine(engine::cpu, 0);\n"; writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
// create memory descriptors // create memory descriptors
writer << "memory::desc input_data_desc = memory::desc({" << join(input_shape) writer << "memory::desc input_data_desc = memory::desc({" << join(input_shape)
<< "}, " << et << ", memory::format::nchw);\n"; << "}, " << et << ", " << input_format << ");\n";
// TODO define weights by stacking gamma and beta values // TODO define weights by stacking gamma and beta values
writer << "memory::desc weights_desc = memory::desc({" << join(weights_shape) writer << "memory::desc weights_desc = memory::desc({" << join(weights_shape)
<< "}, " << et << ", memory::format::nc);\n"; << "}, " << et << ", memory::format::nc);\n";
writer << "memory::desc result_desc = memory::desc({" << join(result_shape) << "}, " writer << "memory::desc result_desc = memory::desc({" << join(result_shape) << "}, "
<< et << ", memory::format::nchw);\n"; << et << ", " << result_format << ");\n";
writer << "memory::desc mean_desc = memory::desc({" << join(mean_shape) << "}, " writer << "memory::desc mean_desc = memory::desc({" << join(mean_shape) << "}, "
<< et << ", memory::format::x);\n"; << et << ", memory::format::x);\n";
writer << "memory::desc variance_desc = memory::desc({" << join(variance_shape) writer << "memory::desc variance_desc = memory::desc({" << join(variance_shape)
...@@ -349,17 +365,17 @@ namespace ngraph ...@@ -349,17 +365,17 @@ namespace ngraph
<< args[2].get_name() << ");\n"; << args[2].get_name() << ");\n";
writer << "memory weights = memory({weights_desc, cpu_engine}, bn_weights.data()" writer << "memory weights = memory({weights_desc, cpu_engine}, bn_weights.data()"
<< ");\n"; << ");\n";
writer << "memory mean = memory({mean_desc, cpu_engine}, " << args[3].get_name()
<< ");\n";
writer << "memory variance = memory({variance_desc, cpu_engine}, "
<< args[4].get_name() << ");\n";
writer << "memory result = memory({result_desc, cpu_engine}, " << out[0].get_name() writer << "memory result = memory({result_desc, cpu_engine}, " << out[0].get_name()
<< ");\n"; << ");\n";
writer << "memory mean = memory({mean_desc, cpu_engine}, " << out[1].get_name()
<< ");\n";
writer << "memory variance = memory({variance_desc, cpu_engine}, "
<< out[2].get_name() << ");\n";
// create batchnorm descriptor // create batchnorm descriptor
writer << "batch_normalization_forward::desc bn_fprop_desc = " writer << "batch_normalization_forward::desc bn_fprop_desc = "
"batch_normalization_forward::desc(forward_training," "batch_normalization_forward::desc(forward_training,"
<< "input_data_desc, epsilon, use_global_stats|use_scale_shift);\n"; << "input_data_desc, epsilon, use_scale_shift);\n";
// bn fprop primitive descriptor // bn fprop primitive descriptor
writer writer
<< "batch_normalization_forward::primitive_desc bn_fprop_prim_desc = " << "batch_normalization_forward::primitive_desc bn_fprop_prim_desc = "
...@@ -368,8 +384,8 @@ namespace ngraph ...@@ -368,8 +384,8 @@ namespace ngraph
// create a batchnorm fprop primitive // create a batchnorm fprop primitive
writer << "batch_normalization_forward bn_fprop = " writer << "batch_normalization_forward bn_fprop = "
"batch_normalization_forward(bn_fprop_prim_desc, " "batch_normalization_forward(bn_fprop_prim_desc, "
"primitive::at(input_data),primitive::at(mean), primitive::at(variance)," "primitive::at(input_data),"
<< "primitive::at(weights), result); \n"; << "primitive::at(weights), result, mean, variance); \n";
// create stream and execute // create stream and execute
writer << "stream s = stream(stream::kind::eager);\n" writer << "stream s = stream(stream::kind::eager);\n"
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "ngraph/ops/convolution.hpp" #include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/divide.hpp" #include "ngraph/ops/divide.hpp"
#include "ngraph/ops/dot.hpp" #include "ngraph/ops/dot.hpp"
#include "ngraph/ops/get_output_element.hpp"
#include "ngraph/ops/multiply.hpp" #include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/pad.hpp" #include "ngraph/ops/pad.hpp"
#include "ngraph/ops/parameter.hpp" #include "ngraph/ops/parameter.hpp"
...@@ -301,14 +302,12 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_fprop_bn() ...@@ -301,14 +302,12 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_fprop_bn()
// get epsilon value // get epsilon value
auto eps_ptr = std::dynamic_pointer_cast<op::Constant>(pattern_map[eps_label]); auto eps_ptr = std::dynamic_pointer_cast<op::Constant>(pattern_map[eps_label]);
double epsilon = *(reinterpret_cast<const double*>(eps_ptr->get_data_ptr())); double epsilon = *(reinterpret_cast<const double*>(eps_ptr->get_data_ptr()));
auto bn_node = std::shared_ptr<Node>(new op::BatchNorm(epsilon, auto bn_node = std::make_shared<op::BatchNorm>(
pattern_map[gamma_label], epsilon, pattern_map[gamma_label], pattern_map[beta_label], pattern_map[input]);
pattern_map[beta_label],
pattern_map[input], auto normalized_output = std::shared_ptr<Node>(new op::GetOutputElement(bn_node, 0));
pattern_map[mean_label],
pattern_map[variance_label])); return normalized_output;
return bn_node;
}; };
auto m = std::make_shared<ngraph::pattern::Matcher>(add_beta, callback); auto m = std::make_shared<ngraph::pattern::Matcher>(add_beta, callback);
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*******************************************************************************/ *******************************************************************************/
#include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp" #include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp"
namespace ngraph namespace ngraph
...@@ -22,10 +21,10 @@ namespace ngraph ...@@ -22,10 +21,10 @@ namespace ngraph
{ {
namespace gpu namespace gpu
{ {
void CudaKernelBuilder::get_1_element_op(const std::string& name, void CudaKernelBuilder::get_unary_elementwise_op(const std::string& name,
const std::string& data_type, const std::string& data_type,
const std::string& op, const std::string& op,
std::string& kernel) std::string& kernel)
{ {
kernel = R"( kernel = R"(
extern "C" __global__ extern "C" __global__
...@@ -40,10 +39,10 @@ out[tid] =)" + op + "(in[tid]);\n" + ...@@ -40,10 +39,10 @@ out[tid] =)" + op + "(in[tid]);\n" +
return; return;
} }
void CudaKernelBuilder::get_2_element_op(const std::string& name, void CudaKernelBuilder::get_binary_elementwise_op(const std::string& name,
const std::string& data_type, const std::string& data_type,
const std::string& op, const std::string& op,
std::string& kernel) std::string& kernel)
{ {
kernel = R"( kernel = R"(
extern "C" __global__ extern "C" __global__
...@@ -60,10 +59,11 @@ out[tid] = in1[tid] )" + op + ...@@ -60,10 +59,11 @@ out[tid] = in1[tid] )" + op +
return; return;
} }
void CudaKernelBuilder::get_n_element_op(const std::string& name, void
const std::string& data_type, CudaKernelBuilder::get_arbitrary_elementwise_op(const std::string& name,
const std::vector<std::string>& ops, const std::string& data_type,
std::string& kernel) const std::vector<std::string>& ops,
std::string& kernel)
{ {
kernel = ""; kernel = "";
return; return;
......
...@@ -28,20 +28,20 @@ namespace ngraph ...@@ -28,20 +28,20 @@ namespace ngraph
class CudaKernelBuilder class CudaKernelBuilder
{ {
public: public:
static void get_1_element_op(const std::string& name, static void get_unary_elementwise_op(const std::string& name,
const std::string& data_type, const std::string& data_type,
const std::string& op, const std::string& op,
std::string& kernel); std::string& kernel);
static void get_2_element_op(const std::string& name, static void get_binary_elementwise_op(const std::string& name,
const std::string& data_type, const std::string& data_type,
const std::string& op, const std::string& op,
std::string& kernel); std::string& kernel);
static void get_n_element_op(const std::string& name, static void get_arbitrary_elementwise_op(const std::string& name,
const std::string& data_type, const std::string& data_type,
const std::vector<std::string>& ops, const std::vector<std::string>& ops,
std::string& kernel); std::string& kernel);
}; };
} }
} }
......
...@@ -17,10 +17,8 @@ ...@@ -17,10 +17,8 @@
#include <algorithm> #include <algorithm>
#include <map> #include <map>
#include "ngraph/runtime/gpu/gpu_cuda_function_builder.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_function_pool.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp" #include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_ops.hpp"
namespace ngraph namespace ngraph
{ {
...@@ -28,40 +26,6 @@ namespace ngraph ...@@ -28,40 +26,6 @@ namespace ngraph
{ {
namespace gpu namespace gpu
{ {
void emit_abs(void* in, void* out, size_t count)
{
std::string name = "abs";
// Create an instance of nvrtcProgram with the code string.
if (CudaFunctionPool::instance().get(name) == nullptr)
{
const char* opts[] = {"--gpu-architecture=compute_35",
"--relocatable-device-code=true"};
std::string kernel;
CudaKernelBuilder::get_1_element_op(name, "float", "fabsf", kernel);
CudaFunctionPool::instance().set(
name, CudaFunctionBuilder::get("cuda_" + name, kernel, 2, opts));
}
//convert runtime ptr to driver api ptr
CUdeviceptr d_ptr_in, d_ptr_out;
d_ptr_in = (CUdeviceptr)in;
d_ptr_out = (CUdeviceptr)out;
void* args_list[] = {&d_ptr_in, &d_ptr_out, &count};
CUDA_SAFE_CALL(cuLaunchKernel(*CudaFunctionPool::instance().get(name).get(),
count,
1,
1, // grid dim
1,
1,
1, // block dim
0,
NULL, // shared mem and stream
args_list,
0)); // arguments
CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
}
void emit_broadcast( void emit_broadcast(
void* in, void* out, size_t repeat_size, size_t repeat_times, size_t count) void* in, void* out, size_t repeat_size, size_t repeat_times, size_t count)
{ {
......
...@@ -18,6 +18,9 @@ ...@@ -18,6 +18,9 @@
#include "ngraph/codegen/code_writer.hpp" #include "ngraph/codegen/code_writer.hpp"
#include "ngraph/coordinate.hpp" #include "ngraph/coordinate.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_function_builder.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_function_pool.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_builder.hpp"
#include "ngraph/strides.hpp" #include "ngraph/strides.hpp"
namespace ngraph namespace ngraph
...@@ -26,9 +29,46 @@ namespace ngraph ...@@ -26,9 +29,46 @@ namespace ngraph
{ {
namespace gpu namespace gpu
{ {
void emit_abs(void* in, void* out, size_t count); template <typename T>
struct CudaOpMap;
void emit_broadcast( void emit_broadcast(
void* in, void* out, size_t repeat_size, size_t repeat_times, size_t count); void* in, void* out, size_t repeat_size, size_t repeat_times, size_t count);
template <typename T>
void emit_unary_elementwise_op(void* in, void* out, size_t count, std::string name)
{
// Create an instance of nvrtcProgram with the code string.
if (CudaFunctionPool::instance().get(name) == nullptr)
{
const char* opts[] = {"--gpu-architecture=compute_35",
"--relocatable-device-code=true"};
std::string kernel;
CudaKernelBuilder::get_unary_elementwise_op(
name, "float", CudaOpMap<T>::op, kernel);
CudaFunctionPool::instance().set(
name, CudaFunctionBuilder::get("cuda_" + name, kernel, 2, opts));
}
//convert runtime ptr to driver api ptr
CUdeviceptr d_ptr_in, d_ptr_out;
d_ptr_in = (CUdeviceptr)in;
d_ptr_out = (CUdeviceptr)out;
void* args_list[] = {&d_ptr_in, &d_ptr_out, &count};
CUDA_SAFE_CALL(cuLaunchKernel(*CudaFunctionPool::instance().get(name).get(),
count,
1,
1, // grid dim
1,
1,
1, // block dim
0,
NULL, // shared mem and stream
args_list,
0)); // arguments
CUDA_SAFE_CALL(cuCtxSynchronize()); // Retrieve and print output.
}
} }
} }
} }
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
namespace ngraph
{
namespace op
{
class Abs;
class Acos;
class Asin;
class Atan;
class Ceiling;
class Cos;
class Cosh;
class Exp;
class Floor;
class Log;
class Sin;
class Sinh;
class Tan;
class Tanh;
// Unimplemented or unused in favor of cuDNN impl.
class Max;
class Min;
class Negative;
class Not;
class Sign;
class Sqrt;
}
namespace runtime
{
namespace gpu
{
template <>
struct CudaOpMap<ngraph::op::Abs>
{
static constexpr const char* op = "fabsf";
};
template <>
struct CudaOpMap<ngraph::op::Acos>
{
static constexpr const char* op = "acosf";
};
template <>
struct CudaOpMap<ngraph::op::Asin>
{
static constexpr const char* op = "asinf";
};
template <>
struct CudaOpMap<ngraph::op::Atan>
{
static constexpr const char* op = "atanf";
};
template <>
struct CudaOpMap<ngraph::op::Ceiling>
{
static constexpr const char* op = "ceilf";
};
template <>
struct CudaOpMap<ngraph::op::Cos>
{
static constexpr const char* op = "cosf";
};
template <>
struct CudaOpMap<ngraph::op::Cosh>
{
static constexpr const char* op = "coshf";
};
template <>
struct CudaOpMap<ngraph::op::Exp>
{
static constexpr const char* op = "expf";
};
template <>
struct CudaOpMap<ngraph::op::Floor>
{
static constexpr const char* op = "floorf";
};
template <>
struct CudaOpMap<ngraph::op::Log>
{
static constexpr const char* op = "logf";
};
template <>
struct CudaOpMap<ngraph::op::Max>
{
static constexpr const char* op = "fmaxf";
};
template <>
struct CudaOpMap<ngraph::op::Min>
{
static constexpr const char* op = "fminf";
};
template <>
struct CudaOpMap<ngraph::op::Sin>
{
static constexpr const char* op = "sinf";
};
template <>
struct CudaOpMap<ngraph::op::Sinh>
{
static constexpr const char* op = "sinhf";
};
template <>
struct CudaOpMap<ngraph::op::Sqrt>
{
static constexpr const char* op = "sqrtf";
};
template <>
struct CudaOpMap<ngraph::op::Tan>
{
static constexpr const char* op = "tanf";
};
template <>
struct CudaOpMap<ngraph::op::Tanh>
{
static constexpr const char* op = "tanhf";
};
}
}
}
This diff is collapsed.
...@@ -45,8 +45,8 @@ namespace ngraph ...@@ -45,8 +45,8 @@ namespace ngraph
static void EMITTER_DECL(EmitMultiply); static void EMITTER_DECL(EmitMultiply);
static void EMITTER_DECL(EmitGetOutputElement); static void EMITTER_DECL(EmitGetOutputElement);
static void EMITTER_DECL(EmitXLAGetTupleElement); static void EMITTER_DECL(EmitXLAGetTupleElement);
static void EMITTER_DECL(EmitUnaryElementwise);
static void EMITTER_DECL(EmitTuple); static void EMITTER_DECL(EmitTuple);
static void EMITTER_DECL(EmitAbs);
static void EMITTER_DECL(EmitConcat); static void EMITTER_DECL(EmitConcat);
static void EMITTER_DECL(EmitDivide); static void EMITTER_DECL(EmitDivide);
static void EMITTER_DECL(EmitEqual); static void EMITTER_DECL(EmitEqual);
...@@ -54,7 +54,6 @@ namespace ngraph ...@@ -54,7 +54,6 @@ namespace ngraph
static void EMITTER_DECL(EmitGreaterEq); static void EMITTER_DECL(EmitGreaterEq);
static void EMITTER_DECL(EmitLess); static void EMITTER_DECL(EmitLess);
static void EMITTER_DECL(EmitLessEq); static void EMITTER_DECL(EmitLessEq);
static void EMITTER_DECL(EmitLog);
static void EMITTER_DECL(EmitMaximum); static void EMITTER_DECL(EmitMaximum);
static void EMITTER_DECL(EmitMinimum); static void EMITTER_DECL(EmitMinimum);
static void EMITTER_DECL(EmitNegative); static void EMITTER_DECL(EmitNegative);
...@@ -67,31 +66,18 @@ namespace ngraph ...@@ -67,31 +66,18 @@ namespace ngraph
static void EMITTER_DECL(EmitReshape); static void EMITTER_DECL(EmitReshape);
static void EMITTER_DECL(EmitFunctionCall); static void EMITTER_DECL(EmitFunctionCall);
static void EMITTER_DECL(EmitReduce); static void EMITTER_DECL(EmitReduce);
static void EMITTER_DECL(EmitSign);
static void EMITTER_DECL(EmitSlice); static void EMITTER_DECL(EmitSlice);
static void EMITTER_DECL(EmitSum); static void EMITTER_DECL(EmitSum);
static void EMITTER_DECL(EmitExp);
static void EMITTER_DECL(EmitSin);
static void EMITTER_DECL(EmitSinh);
static void EMITTER_DECL(EmitCos);
static void EMITTER_DECL(EmitCosh);
static void EMITTER_DECL(EmitTan);
static void EMITTER_DECL(EmitTanh);
static void EMITTER_DECL(EmitAsin);
static void EMITTER_DECL(EmitAcos);
static void EMITTER_DECL(EmitAtan);
static void EMITTER_DECL(EmitPower); static void EMITTER_DECL(EmitPower);
static void EMITTER_DECL(EmitReplaceSlice); static void EMITTER_DECL(EmitReplaceSlice);
static void EMITTER_DECL(EmitOneHot); static void EMITTER_DECL(EmitOneHot);
static void EMITTER_DECL(EmitFloor);
static void EMITTER_DECL(EmitCeiling);
static void EMITTER_DECL(EmitSqrt); static void EMITTER_DECL(EmitSqrt);
static void EMITTER_DECL(EmitConvolution); static void EMITTER_DECL(EmitConvolution);
static void EMITTER_DECL(EmitNot);
static void EMITTER_DECL(EmitMaxPool); static void EMITTER_DECL(EmitMaxPool);
static void EMITTER_DECL(EmitReverse); static void EMITTER_DECL(EmitReverse);
static void EMITTER_DECL(EmitReduceWindow); static void EMITTER_DECL(EmitReduceWindow);
static void EMITTER_DECL(EmitSelectAndScatter); static void EMITTER_DECL(EmitSelectAndScatter);
static void EMITTER_DECL(EmitResult);
}; };
} }
} }
......
...@@ -151,7 +151,7 @@ static const runtime::gpu::OpMap dispatcher{ ...@@ -151,7 +151,7 @@ static const runtime::gpu::OpMap dispatcher{
{TI(ngraph::op::Dot), &runtime::gpu::GPU_Emitter::EmitDot}, {TI(ngraph::op::Dot), &runtime::gpu::GPU_Emitter::EmitDot},
{TI(ngraph::op::Multiply), &runtime::gpu::GPU_Emitter::EmitMultiply}, {TI(ngraph::op::Multiply), &runtime::gpu::GPU_Emitter::EmitMultiply},
{TI(ngraph::op::Parameter), &runtime::gpu::GPU_Emitter::EmitNop}, {TI(ngraph::op::Parameter), &runtime::gpu::GPU_Emitter::EmitNop},
{TI(ngraph::op::Abs), &runtime::gpu::GPU_Emitter::EmitAbs}, {TI(ngraph::op::Abs), &runtime::gpu::GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Concat), &runtime::gpu::GPU_Emitter::EmitConcat}, {TI(ngraph::op::Concat), &runtime::gpu::GPU_Emitter::EmitConcat},
{TI(ngraph::op::Divide), &runtime::gpu::GPU_Emitter::EmitDivide}, {TI(ngraph::op::Divide), &runtime::gpu::GPU_Emitter::EmitDivide},
{TI(ngraph::op::Equal), &runtime::gpu::GPU_Emitter::EmitEqual}, {TI(ngraph::op::Equal), &runtime::gpu::GPU_Emitter::EmitEqual},
...@@ -159,7 +159,7 @@ static const runtime::gpu::OpMap dispatcher{ ...@@ -159,7 +159,7 @@ static const runtime::gpu::OpMap dispatcher{
{TI(ngraph::op::GreaterEq), &runtime::gpu::GPU_Emitter::EmitGreaterEq}, {TI(ngraph::op::GreaterEq), &runtime::gpu::GPU_Emitter::EmitGreaterEq},
{TI(ngraph::op::Less), &runtime::gpu::GPU_Emitter::EmitLess}, {TI(ngraph::op::Less), &runtime::gpu::GPU_Emitter::EmitLess},
{TI(ngraph::op::LessEq), &runtime::gpu::GPU_Emitter::EmitLessEq}, {TI(ngraph::op::LessEq), &runtime::gpu::GPU_Emitter::EmitLessEq},
{TI(ngraph::op::Log), &runtime::gpu::GPU_Emitter::EmitLog}, {TI(ngraph::op::Log), &runtime::gpu::GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Maximum), &runtime::gpu::GPU_Emitter::EmitMaximum}, {TI(ngraph::op::Maximum), &runtime::gpu::GPU_Emitter::EmitMaximum},
{TI(ngraph::op::Minimum), &runtime::gpu::GPU_Emitter::EmitMinimum}, {TI(ngraph::op::Minimum), &runtime::gpu::GPU_Emitter::EmitMinimum},
{TI(ngraph::op::Negative), &runtime::gpu::GPU_Emitter::EmitNegative}, {TI(ngraph::op::Negative), &runtime::gpu::GPU_Emitter::EmitNegative},
...@@ -173,30 +173,31 @@ static const runtime::gpu::OpMap dispatcher{ ...@@ -173,30 +173,31 @@ static const runtime::gpu::OpMap dispatcher{
{TI(ngraph::op::Reshape), &runtime::gpu::GPU_Emitter::EmitReshape}, {TI(ngraph::op::Reshape), &runtime::gpu::GPU_Emitter::EmitReshape},
{TI(ngraph::op::FunctionCall), &runtime::gpu::GPU_Emitter::EmitFunctionCall}, {TI(ngraph::op::FunctionCall), &runtime::gpu::GPU_Emitter::EmitFunctionCall},
{TI(ngraph::op::Reduce), &runtime::gpu::GPU_Emitter::EmitReduce}, {TI(ngraph::op::Reduce), &runtime::gpu::GPU_Emitter::EmitReduce},
{TI(ngraph::op::Sign), &runtime::gpu::GPU_Emitter::EmitSign}, {TI(ngraph::op::Sign), &runtime::gpu::GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Slice), &runtime::gpu::GPU_Emitter::EmitSlice}, {TI(ngraph::op::Slice), &runtime::gpu::GPU_Emitter::EmitSlice},
{TI(ngraph::op::Sum), &runtime::gpu::GPU_Emitter::EmitSum}, {TI(ngraph::op::Sum), &runtime::gpu::GPU_Emitter::EmitSum},
{TI(ngraph::op::Exp), &runtime::gpu::GPU_Emitter::EmitExp}, {TI(ngraph::op::Exp), &runtime::gpu::GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Sin), &runtime::gpu::GPU_Emitter::EmitSin}, {TI(ngraph::op::Sin), &runtime::gpu::GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Sinh), &runtime::gpu::GPU_Emitter::EmitSinh}, {TI(ngraph::op::Sinh), &runtime::gpu::GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Cos), &runtime::gpu::GPU_Emitter::EmitCos}, {TI(ngraph::op::Cos), &runtime::gpu::GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Cosh), &runtime::gpu::GPU_Emitter::EmitCosh}, {TI(ngraph::op::Cosh), &runtime::gpu::GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Tan), &runtime::gpu::GPU_Emitter::EmitTan}, {TI(ngraph::op::Tan), &runtime::gpu::GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Tanh), &runtime::gpu::GPU_Emitter::EmitTanh}, {TI(ngraph::op::Tanh), &runtime::gpu::GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Asin), &runtime::gpu::GPU_Emitter::EmitAsin}, {TI(ngraph::op::Asin), &runtime::gpu::GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Acos), &runtime::gpu::GPU_Emitter::EmitAcos}, {TI(ngraph::op::Acos), &runtime::gpu::GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Atan), &runtime::gpu::GPU_Emitter::EmitAtan}, {TI(ngraph::op::Atan), &runtime::gpu::GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::ReplaceSlice), &runtime::gpu::GPU_Emitter::EmitReplaceSlice}, {TI(ngraph::op::ReplaceSlice), &runtime::gpu::GPU_Emitter::EmitReplaceSlice},
{TI(ngraph::op::OneHot), &runtime::gpu::GPU_Emitter::EmitOneHot}, {TI(ngraph::op::OneHot), &runtime::gpu::GPU_Emitter::EmitOneHot},
{TI(ngraph::op::Floor), &runtime::gpu::GPU_Emitter::EmitFloor}, {TI(ngraph::op::Floor), &runtime::gpu::GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Ceiling), &runtime::gpu::GPU_Emitter::EmitCeiling}, {TI(ngraph::op::Ceiling), &runtime::gpu::GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::Sqrt), &runtime::gpu::GPU_Emitter::EmitSqrt}, {TI(ngraph::op::Sqrt), &runtime::gpu::GPU_Emitter::EmitSqrt},
{TI(ngraph::op::Convolution), &runtime::gpu::GPU_Emitter::EmitConvolution}, {TI(ngraph::op::Convolution), &runtime::gpu::GPU_Emitter::EmitConvolution},
{TI(ngraph::op::Not), &runtime::gpu::GPU_Emitter::EmitNot}, {TI(ngraph::op::Not), &runtime::gpu::GPU_Emitter::EmitUnaryElementwise},
{TI(ngraph::op::MaxPool), &runtime::gpu::GPU_Emitter::EmitMaxPool}, {TI(ngraph::op::MaxPool), &runtime::gpu::GPU_Emitter::EmitMaxPool},
{TI(ngraph::op::Reverse), &runtime::gpu::GPU_Emitter::EmitReverse}, {TI(ngraph::op::Reverse), &runtime::gpu::GPU_Emitter::EmitReverse},
{TI(ngraph::op::ReduceWindow), &runtime::gpu::GPU_Emitter::EmitReduceWindow}, {TI(ngraph::op::ReduceWindow), &runtime::gpu::GPU_Emitter::EmitReduceWindow},
{TI(ngraph::op::SelectAndScatter), &runtime::gpu::GPU_Emitter::EmitSelectAndScatter}, {TI(ngraph::op::SelectAndScatter), &runtime::gpu::GPU_Emitter::EmitSelectAndScatter},
{TI(ngraph::op::Result), &runtime::gpu::GPU_Emitter::EmitResult},
}; };
runtime::gpu::GPU_ExternalFunction::GPU_ExternalFunction( runtime::gpu::GPU_ExternalFunction::GPU_ExternalFunction(
...@@ -250,6 +251,7 @@ void runtime::gpu::GPU_ExternalFunction::compile() ...@@ -250,6 +251,7 @@ void runtime::gpu::GPU_ExternalFunction::compile()
#include "ngraph/pass/memory_layout.hpp" #include "ngraph/pass/memory_layout.hpp"
#include "ngraph/runtime/aligned_buffer.hpp" #include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp" #include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
#include "ngraph/runtime/gpu/gpu_cuda_kernel_ops.hpp"
#include "ngraph/runtime/gpu/gpu_util.hpp" #include "ngraph/runtime/gpu/gpu_util.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
)"; )";
...@@ -346,12 +348,15 @@ using namespace std; ...@@ -346,12 +348,15 @@ using namespace std;
shared_ptr<descriptor::TensorView> tv = node->get_outputs()[0].get_tensor_view(); shared_ptr<descriptor::TensorView> tv = node->get_outputs()[0].get_tensor_view();
auto c_value_strings = c->get_value_strings(); auto c_value_strings = c->get_value_strings();
writer << "static " << tv->get_tensor().get_element_type().c_type_string() << " " writer << "static " << tv->get_tensor().get_element_type().c_type_string() << " "
<< tv->get_tensor().get_name() << "[" << c_value_strings.size() << "] =\n"; << tv->get_tensor().get_name() << "_cpu[" << c_value_strings.size()
<< "] =\n";
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
writer << emit_string_array(c_value_strings, 100 - writer.indent * 4); writer << emit_string_array(c_value_strings, 100 - writer.indent * 4);
writer.indent--; writer.indent--;
writer << "\n};\n\n"; writer << "\n};\n\n";
writer << "static " << tv->get_tensor().get_element_type().c_type_string() << " *"
<< tv->get_tensor().get_name() << ";\n";
m_variable_name_map[tv->get_tensor().get_name()] = tv->get_tensor().get_name(); m_variable_name_map[tv->get_tensor().get_name()] = tv->get_tensor().get_name();
} }
} }
...@@ -485,6 +490,26 @@ using namespace std; ...@@ -485,6 +490,26 @@ using namespace std;
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
{
for (shared_ptr<Node> node : current_function->get_ordered_ops())
{
const op::Constant* c = dynamic_cast<op::Constant*>(node.get());
if (c)
{
shared_ptr<descriptor::TensorView> tv =
node->get_outputs()[0].get_tensor_view();
writer << "if(" << tv->get_tensor().get_name() << " == NULL)\n";
writer << "{\n";
writer.indent++;
writer << "runtime::gpu::cuda_memcpyHtD(" << tv->get_tensor().get_name() << ", "
<< tv->get_tensor().get_name() << "_cpu, " << tv->get_tensor().size()
<< ");\n";
writer.indent--;
writer << "}\n";
}
}
}
bool temporaries_used = false; bool temporaries_used = false;
size_t worst_case_tmp_size = 0; size_t worst_case_tmp_size = 0;
for (shared_ptr<Node> node : current_function->get_ordered_ops()) for (shared_ptr<Node> node : current_function->get_ordered_ops())
...@@ -657,7 +682,6 @@ using namespace std; ...@@ -657,7 +682,6 @@ using namespace std;
// Emit operation epilogue // Emit operation epilogue
if (!node->is_parameter() && !node->is_constant()) if (!node->is_parameter() && !node->is_constant())
{ {
handle_output_alias(writer, *node, output_alias_map);
if (m_emit_timing) if (m_emit_timing)
{ {
emit_debug_function_exit(writer, node.get(), in, out); emit_debug_function_exit(writer, node.get(), in, out);
......
...@@ -328,7 +328,7 @@ static shared_ptr<ngraph::Function> ...@@ -328,7 +328,7 @@ static shared_ptr<ngraph::Function>
else if (node_op == "BatchNorm") else if (node_op == "BatchNorm")
{ {
auto epsilon = node_js.at("eps").get<double>(); auto epsilon = node_js.at("eps").get<double>();
node = make_shared<op::BatchNorm>(epsilon, args[0], args[1], args[2], args[3], args[4]); node = make_shared<op::BatchNorm>(epsilon, args[0], args[1], args[2]);
} }
else if (node_op == "BatchNormBackprop") else if (node_op == "BatchNormBackprop")
{ {
......
...@@ -1305,6 +1305,7 @@ TEST(${BACKEND_NAME}, backwards_slice) ...@@ -1305,6 +1305,7 @@ TEST(${BACKEND_NAME}, backwards_slice)
TEST(${BACKEND_NAME}, backwards_softmax_all) TEST(${BACKEND_NAME}, backwards_softmax_all)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}"); auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend(); auto backend = manager->allocate_backend();
...@@ -1322,6 +1323,7 @@ TEST(${BACKEND_NAME}, backwards_softmax_all) ...@@ -1322,6 +1323,7 @@ TEST(${BACKEND_NAME}, backwards_softmax_all)
TEST(${BACKEND_NAME}, backwards_softmax_axis) TEST(${BACKEND_NAME}, backwards_softmax_axis)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}"); auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend(); auto backend = manager->allocate_backend();
...@@ -1339,6 +1341,7 @@ TEST(${BACKEND_NAME}, backwards_softmax_axis) ...@@ -1339,6 +1341,7 @@ TEST(${BACKEND_NAME}, backwards_softmax_axis)
TEST(${BACKEND_NAME}, backwards_softmax_underflow) TEST(${BACKEND_NAME}, backwards_softmax_underflow)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}"); auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend(); auto backend = manager->allocate_backend();
...@@ -1358,6 +1361,7 @@ TEST(${BACKEND_NAME}, backwards_softmax_underflow) ...@@ -1358,6 +1361,7 @@ TEST(${BACKEND_NAME}, backwards_softmax_underflow)
TEST(${BACKEND_NAME}, backwards_softmax_3d) TEST(${BACKEND_NAME}, backwards_softmax_3d)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}"); auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend(); auto backend = manager->allocate_backend();
......
...@@ -120,6 +120,7 @@ TEST(${BACKEND_NAME}, component_cleanup) ...@@ -120,6 +120,7 @@ TEST(${BACKEND_NAME}, component_cleanup)
TEST(${BACKEND_NAME}, aliased_output) TEST(${BACKEND_NAME}, aliased_output)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 2}; Shape shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape); auto B = make_shared<op::Parameter>(element::f32, shape);
...@@ -335,7 +336,6 @@ TEST(${BACKEND_NAME}, abs) ...@@ -335,7 +336,6 @@ TEST(${BACKEND_NAME}, abs)
TEST(${BACKEND_NAME}, ceiling) TEST(${BACKEND_NAME}, ceiling)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 2}; Shape shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Ceiling>(A), op::ParameterVector{A}); auto f = make_shared<Function>(make_shared<op::Ceiling>(A), op::ParameterVector{A});
...@@ -776,7 +776,6 @@ TEST(${BACKEND_NAME}, equal) ...@@ -776,7 +776,6 @@ TEST(${BACKEND_NAME}, equal)
TEST(${BACKEND_NAME}, floor) TEST(${BACKEND_NAME}, floor)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 2}; Shape shape{2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Floor>(A), op::ParameterVector{A}); auto f = make_shared<Function>(make_shared<op::Floor>(A), op::ParameterVector{A});
...@@ -1370,7 +1369,6 @@ TEST(${BACKEND_NAME}, lesseq_bool) ...@@ -1370,7 +1369,6 @@ TEST(${BACKEND_NAME}, lesseq_bool)
TEST(${BACKEND_NAME}, log) TEST(${BACKEND_NAME}, log)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 2, 2}; Shape shape{2, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Log>(A), op::ParameterVector{A}); auto f = make_shared<Function>(make_shared<op::Log>(A), op::ParameterVector{A});
...@@ -2673,7 +2671,6 @@ TEST(${BACKEND_NAME}, reshape_6d) ...@@ -2673,7 +2671,6 @@ TEST(${BACKEND_NAME}, reshape_6d)
TEST(${BACKEND_NAME}, sin) TEST(${BACKEND_NAME}, sin)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{6}; Shape shape{6};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Sin>(A), op::ParameterVector{A}); auto f = make_shared<Function>(make_shared<op::Sin>(A), op::ParameterVector{A});
...@@ -2699,7 +2696,6 @@ TEST(${BACKEND_NAME}, sin) ...@@ -2699,7 +2696,6 @@ TEST(${BACKEND_NAME}, sin)
TEST(${BACKEND_NAME}, cos) TEST(${BACKEND_NAME}, cos)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{6}; Shape shape{6};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Cos>(A), op::ParameterVector{A}); auto f = make_shared<Function>(make_shared<op::Cos>(A), op::ParameterVector{A});
...@@ -2725,7 +2721,6 @@ TEST(${BACKEND_NAME}, cos) ...@@ -2725,7 +2721,6 @@ TEST(${BACKEND_NAME}, cos)
TEST(${BACKEND_NAME}, tan) TEST(${BACKEND_NAME}, tan)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{6}; Shape shape{6};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Tan>(A), op::ParameterVector{A}); auto f = make_shared<Function>(make_shared<op::Tan>(A), op::ParameterVector{A});
...@@ -2746,12 +2741,11 @@ TEST(${BACKEND_NAME}, tan) ...@@ -2746,12 +2741,11 @@ TEST(${BACKEND_NAME}, tan)
input.begin(), input.end(), input.begin(), [](float x) -> float { return tanf(x); }); input.begin(), input.end(), input.begin(), [](float x) -> float { return tanf(x); });
cf->call({a}, {result}); cf->call({a}, {result});
EXPECT_EQ(input, read_vector<float>(result)); EXPECT_TRUE(test::all_close(input, read_vector<float>(result)));
} }
TEST(${BACKEND_NAME}, asin) TEST(${BACKEND_NAME}, asin)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{6}; Shape shape{6};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Asin>(A), op::ParameterVector{A}); auto f = make_shared<Function>(make_shared<op::Asin>(A), op::ParameterVector{A});
...@@ -2776,7 +2770,6 @@ TEST(${BACKEND_NAME}, asin) ...@@ -2776,7 +2770,6 @@ TEST(${BACKEND_NAME}, asin)
TEST(${BACKEND_NAME}, acos) TEST(${BACKEND_NAME}, acos)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{6}; Shape shape{6};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Acos>(A), op::ParameterVector{A}); auto f = make_shared<Function>(make_shared<op::Acos>(A), op::ParameterVector{A});
...@@ -2801,7 +2794,6 @@ TEST(${BACKEND_NAME}, acos) ...@@ -2801,7 +2794,6 @@ TEST(${BACKEND_NAME}, acos)
TEST(${BACKEND_NAME}, atan) TEST(${BACKEND_NAME}, atan)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{6}; Shape shape{6};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Atan>(A), op::ParameterVector{A}); auto f = make_shared<Function>(make_shared<op::Atan>(A), op::ParameterVector{A});
...@@ -2826,7 +2818,6 @@ TEST(${BACKEND_NAME}, atan) ...@@ -2826,7 +2818,6 @@ TEST(${BACKEND_NAME}, atan)
TEST(${BACKEND_NAME}, sinh) TEST(${BACKEND_NAME}, sinh)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{6}; Shape shape{6};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Sinh>(A), op::ParameterVector{A}); auto f = make_shared<Function>(make_shared<op::Sinh>(A), op::ParameterVector{A});
...@@ -2851,7 +2842,6 @@ TEST(${BACKEND_NAME}, sinh) ...@@ -2851,7 +2842,6 @@ TEST(${BACKEND_NAME}, sinh)
TEST(${BACKEND_NAME}, cosh) TEST(${BACKEND_NAME}, cosh)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{6}; Shape shape{6};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Cosh>(A), op::ParameterVector{A}); auto f = make_shared<Function>(make_shared<op::Cosh>(A), op::ParameterVector{A});
...@@ -2876,7 +2866,6 @@ TEST(${BACKEND_NAME}, cosh) ...@@ -2876,7 +2866,6 @@ TEST(${BACKEND_NAME}, cosh)
TEST(${BACKEND_NAME}, tanh) TEST(${BACKEND_NAME}, tanh)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{6}; Shape shape{6};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Tanh>(A), op::ParameterVector{A}); auto f = make_shared<Function>(make_shared<op::Tanh>(A), op::ParameterVector{A});
...@@ -2896,12 +2885,11 @@ TEST(${BACKEND_NAME}, tanh) ...@@ -2896,12 +2885,11 @@ TEST(${BACKEND_NAME}, tanh)
input.begin(), input.end(), input.begin(), [](float x) -> float { return tanhf(x); }); input.begin(), input.end(), input.begin(), [](float x) -> float { return tanhf(x); });
cf->call({a}, {result}); cf->call({a}, {result});
EXPECT_EQ(input, read_vector<float>(result)); EXPECT_TRUE(test::all_close(input, read_vector<float>(result)));
} }
TEST(${BACKEND_NAME}, exp) TEST(${BACKEND_NAME}, exp)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{8}; Shape shape{8};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Exp>(A), op::ParameterVector{A}); auto f = make_shared<Function>(make_shared<op::Exp>(A), op::ParameterVector{A});
...@@ -8441,6 +8429,7 @@ TEST(${BACKEND_NAME}, relu_4Dbackprop) ...@@ -8441,6 +8429,7 @@ TEST(${BACKEND_NAME}, relu_4Dbackprop)
TEST(${BACKEND_NAME}, softmax_all) TEST(${BACKEND_NAME}, softmax_all)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 3}; Shape shape{2, 3};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = auto f =
...@@ -8473,6 +8462,7 @@ TEST(${BACKEND_NAME}, softmax_all) ...@@ -8473,6 +8462,7 @@ TEST(${BACKEND_NAME}, softmax_all)
TEST(${BACKEND_NAME}, softmax_axis) TEST(${BACKEND_NAME}, softmax_axis)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 3}; Shape shape{2, 3};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Softmax>(A, AxisSet{1}), op::ParameterVector{A}); auto f = make_shared<Function>(make_shared<op::Softmax>(A, AxisSet{1}), op::ParameterVector{A});
...@@ -8501,6 +8491,7 @@ TEST(${BACKEND_NAME}, softmax_axis) ...@@ -8501,6 +8491,7 @@ TEST(${BACKEND_NAME}, softmax_axis)
TEST(${BACKEND_NAME}, softmax_underflow) TEST(${BACKEND_NAME}, softmax_underflow)
{ {
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
Shape shape{2, 3}; Shape shape{2, 3};
auto A = make_shared<op::Parameter>(element::f32, shape); auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Softmax>(A, AxisSet{0}), op::ParameterVector{A}); auto f = make_shared<Function>(make_shared<op::Softmax>(A, AxisSet{0}), op::ParameterVector{A});
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "ngraph/log.hpp" #include "ngraph/log.hpp"
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "ngraph/ops/batch_norm.hpp" #include "ngraph/ops/batch_norm.hpp"
#include "ngraph/ops/get_output_element.hpp"
#include "ngraph/ops/sum.hpp" #include "ngraph/ops/sum.hpp"
#include "ngraph/pass/graph_rewrite.hpp" #include "ngraph/pass/graph_rewrite.hpp"
#include "ngraph/pass/manager.hpp" #include "ngraph/pass/manager.hpp"
...@@ -254,18 +255,21 @@ TEST(cpu_fusion, batchnorm_fprop_b1c2h2w2) ...@@ -254,18 +255,21 @@ TEST(cpu_fusion, batchnorm_fprop_b1c2h2w2)
auto input_shape = Shape{1, 2, 2, 2}; auto input_shape = Shape{1, 2, 2, 2};
auto input = make_shared<op::Parameter>(element::f32, input_shape); auto input = make_shared<op::Parameter>(element::f32, input_shape);
auto mean_shape = Shape{2}; auto mean_shape = Shape{2};
auto mean = make_shared<op::Parameter>(element::f32, mean_shape);
auto var_shape = Shape{2}; auto var_shape = Shape{2};
auto var = make_shared<op::Parameter>(element::f32, var_shape);
auto gamma_shape = Shape{2}; auto gamma_shape = Shape{2};
auto gamma = make_shared<op::Parameter>(element::f32, gamma_shape); auto gamma = make_shared<op::Parameter>(element::f32, gamma_shape);
auto beta_shape = Shape{2}; auto beta_shape = Shape{2};
auto beta = make_shared<op::Parameter>(element::f32, beta_shape); auto beta = make_shared<op::Parameter>(element::f32, beta_shape);
double eps = 0.001; double eps = 0.001;
auto shape_r = Shape{1, 2, 2, 2}; auto shape_r = Shape{1, 2, 2, 2};
auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input, mean, var); auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input);
auto f = make_shared<Function>(bn, op::ParameterVector{mean, var, input, gamma, beta}); auto output_rt = std::make_shared<op::GetOutputElement>(bn, 0);
auto mean_rt = std::make_shared<op::GetOutputElement>(bn, 1);
auto variance_rt = std::make_shared<op::GetOutputElement>(bn, 2);
auto f = make_shared<Function>(NodeVector{output_rt, mean_rt, variance_rt},
op::ParameterVector{input, gamma, beta});
auto manager = runtime::Manager::get("CPU"); auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(f); auto external = manager->compile(f);
auto backend = manager->allocate_backend(); auto backend = manager->allocate_backend();
...@@ -283,15 +287,13 @@ TEST(cpu_fusion, batchnorm_fprop_b1c2h2w2) ...@@ -283,15 +287,13 @@ TEST(cpu_fusion, batchnorm_fprop_b1c2h2w2)
0.64589411f, 0.64589411f,
0.4375872f, 0.4375872f,
0.89177299f}); 0.89177299f});
auto _mean = backend->make_primary_tensor_view(element::f32, mean_shape);
copy_data(_mean, vector<float>{0.60291237f, 0.59972727f});
auto _var = backend->make_primary_tensor_view(element::f32, var_shape);
copy_data(_var, vector<float>{0.00472505f, 0.03617825f});
auto _gamma = backend->make_primary_tensor_view(element::f32, gamma_shape); auto _gamma = backend->make_primary_tensor_view(element::f32, gamma_shape);
copy_data(_gamma, vector<float>{1.0f, 1.0f}); copy_data(_gamma, vector<float>{1.0f, 1.0f});
auto _beta = backend->make_primary_tensor_view(element::f32, beta_shape); auto _beta = backend->make_primary_tensor_view(element::f32, beta_shape);
copy_data(_beta, vector<float>{0.0f, 0.0f}); copy_data(_beta, vector<float>{0.0f, 0.0f});
auto result = backend->make_primary_tensor_view(element::f32, shape_r); auto bn_output = backend->make_primary_tensor_view(element::f32, shape_r);
auto result_mean = backend->make_primary_tensor_view(element::f32, mean_shape);
auto result_variance = backend->make_primary_tensor_view(element::f32, var_shape);
vector<float> expected_result{-0.71498716f, vector<float> expected_result{-0.71498716f,
1.48388731f, 1.48388731f,
...@@ -301,8 +303,14 @@ TEST(cpu_fusion, batchnorm_fprop_b1c2h2w2) ...@@ -301,8 +303,14 @@ TEST(cpu_fusion, batchnorm_fprop_b1c2h2w2)
0.23943391f, 0.23943391f,
-0.84090298f, -0.84090298f,
1.51462936f}; 1.51462936f};
cf->call({_mean, _var, _input, _gamma, _beta}, {result}); vector<float> expected_mean{0.602912f, 0.599727f};
EXPECT_TRUE(test::all_close(expected_result, read_vector<float>(result))); vector<float> expected_variance{0.00472505f, 0.0361782f};
cf->call({_input, _gamma, _beta}, {bn_output, result_mean, result_variance});
EXPECT_TRUE(test::all_close(expected_result, read_vector<float>(bn_output)));
EXPECT_TRUE(test::all_close(expected_mean, read_vector<float>(result_mean)));
EXPECT_TRUE(test::all_close(expected_variance, read_vector<float>(result_variance)));
} }
TEST(cpu_fusion, batchnorm_fprop_b2c2h2w1) TEST(cpu_fusion, batchnorm_fprop_b2c2h2w1)
...@@ -310,18 +318,21 @@ TEST(cpu_fusion, batchnorm_fprop_b2c2h2w1) ...@@ -310,18 +318,21 @@ TEST(cpu_fusion, batchnorm_fprop_b2c2h2w1)
auto input_shape = Shape{2, 2, 2, 1}; auto input_shape = Shape{2, 2, 2, 1};
auto input = make_shared<op::Parameter>(element::f32, input_shape); auto input = make_shared<op::Parameter>(element::f32, input_shape);
auto mean_shape = Shape{2}; auto mean_shape = Shape{2};
auto mean = make_shared<op::Parameter>(element::f32, mean_shape);
auto var_shape = Shape{2}; auto var_shape = Shape{2};
auto var = make_shared<op::Parameter>(element::f32, var_shape);
auto gamma_shape = Shape{2}; auto gamma_shape = Shape{2};
auto gamma = make_shared<op::Parameter>(element::f32, gamma_shape); auto gamma = make_shared<op::Parameter>(element::f32, gamma_shape);
auto beta_shape = Shape{2}; auto beta_shape = Shape{2};
auto beta = make_shared<op::Parameter>(element::f32, beta_shape); auto beta = make_shared<op::Parameter>(element::f32, beta_shape);
double eps = 0.001; double eps = 0.001;
auto shape_r = Shape{2, 2, 2, 1}; auto shape_r = Shape{2, 2, 2, 1};
auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input, mean, var); auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input);
auto f = make_shared<Function>(bn, op::ParameterVector{mean, var, input, gamma, beta}); auto output_rt = std::make_shared<op::GetOutputElement>(bn, 0);
auto mean_rt = std::make_shared<op::GetOutputElement>(bn, 1);
auto variance_rt = std::make_shared<op::GetOutputElement>(bn, 2);
auto f = make_shared<Function>(NodeVector{output_rt, mean_rt, variance_rt},
op::ParameterVector{input, gamma, beta});
auto manager = runtime::Manager::get("CPU"); auto manager = runtime::Manager::get("CPU");
auto external = manager->compile(f); auto external = manager->compile(f);
auto backend = manager->allocate_backend(); auto backend = manager->allocate_backend();
...@@ -337,20 +348,24 @@ TEST(cpu_fusion, batchnorm_fprop_b2c2h2w1) ...@@ -337,20 +348,24 @@ TEST(cpu_fusion, batchnorm_fprop_b2c2h2w1)
0.64589411f, 0.64589411f,
0.4375872f, 0.4375872f,
0.89177299f}); 0.89177299f});
auto _mean = backend->make_primary_tensor_view(element::f32, mean_shape);
copy_data(_mean, vector<float>{0.60291237f, 0.59972727f});
auto _var = backend->make_primary_tensor_view(element::f32, var_shape);
copy_data(_var, vector<float>{0.00472505f, 0.03617825f});
auto _gamma = backend->make_primary_tensor_view(element::f32, gamma_shape); auto _gamma = backend->make_primary_tensor_view(element::f32, gamma_shape);
copy_data(_gamma, vector<float>{1.0f, 1.0f}); copy_data(_gamma, vector<float>{1.0f, 1.0f});
auto _beta = backend->make_primary_tensor_view(element::f32, beta_shape); auto _beta = backend->make_primary_tensor_view(element::f32, beta_shape);
copy_data(_beta, vector<float>{0.0f, 0.0f}); copy_data(_beta, vector<float>{0.0f, 0.0f});
auto result = backend->make_primary_tensor_view(element::f32, shape_r); auto bn_output = backend->make_primary_tensor_view(element::f32, shape_r);
auto result_mean = backend->make_primary_tensor_view(element::f32, mean_shape);
auto result_variance = backend->make_primary_tensor_view(element::f32, var_shape);
vector<float> expected_result{ vector<float> expected_result{
-0.714987f, 1.48389f, 0.015746f, -0.284436f, -2.36912f, 0.56806f, -0.840903f, 1.51463f}; -0.30327f, 1.1561f, -0.0963782f, -0.434702f, -1.4011f, 0.548275f, -1.06187f, 1.59295f};
cf->call({_mean, _var, _input, _gamma, _beta}, {result}); vector<float> expected_mean{0.583388f, 0.619252f};
EXPECT_TRUE(test::all_close(expected_result, read_vector<float>(result))); vector<float> expected_variance{0.0119972f, 0.0282681f};
cf->call({_input, _gamma, _beta}, {bn_output, result_mean, result_variance});
EXPECT_TRUE(test::all_close(expected_result, read_vector<float>(bn_output)));
EXPECT_TRUE(test::all_close(expected_mean, read_vector<float>(result_mean)));
EXPECT_TRUE(test::all_close(expected_variance, read_vector<float>(result_variance)));
} }
TEST(cpu_fusion, fuse_fprop_bn) TEST(cpu_fusion, fuse_fprop_bn)
...@@ -404,7 +419,10 @@ TEST(cpu_fusion, bn_bprop_n4c3h2w2) ...@@ -404,7 +419,10 @@ TEST(cpu_fusion, bn_bprop_n4c3h2w2)
auto beta = make_shared<op::Parameter>(element::f32, beta_shape); auto beta = make_shared<op::Parameter>(element::f32, beta_shape);
double eps = 0.001; double eps = 0.001;
auto shape_r = Shape{4, 3, 2, 2}; auto shape_r = Shape{4, 3, 2, 2};
auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input, mean, var); auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input);
auto bn_dx = make_shared<op::GetOutputElement>(bn, 0);
auto bn_dgamma = make_shared<op::GetOutputElement>(bn, 1);
auto bn_dbeta = make_shared<op::GetOutputElement>(bn, 2);
auto manager = runtime::Manager::get("CPU"); auto manager = runtime::Manager::get("CPU");
auto backend = manager->allocate_backend(); auto backend = manager->allocate_backend();
...@@ -436,7 +454,8 @@ TEST(cpu_fusion, bn_bprop_n4c3h2w2) ...@@ -436,7 +454,8 @@ TEST(cpu_fusion, bn_bprop_n4c3h2w2)
vector<float> deltaData(shape_size(shape_r), 20.0f); vector<float> deltaData(shape_size(shape_r), 20.0f);
copy_data(_delta, deltaData); copy_data(_delta, deltaData);
auto f = make_shared<Function>(bn, op::ParameterVector{mean, var, input, gamma, beta}); auto f = make_shared<Function>(NodeVector{bn_dx, bn_dgamma, bn_dbeta},
op::ParameterVector{mean, var, input, gamma, beta});
auto C = std::make_shared<op::Parameter>(element::f32, shape_r); auto C = std::make_shared<op::Parameter>(element::f32, shape_r);
auto dinput = bn->backprop_node(input, C); auto dinput = bn->backprop_node(input, C);
......
...@@ -21,15 +21,8 @@ ...@@ -21,15 +21,8 @@
namespace ngraph namespace ngraph
{ {
class Node;
class Function; class Function;
namespace runtime
{
class Backend;
class Manager;
}
namespace autodiff namespace autodiff
{ {
/// @brief Returns a FunctionSpec for the backprop derivative of its argument. /// @brief Returns a FunctionSpec for the backprop derivative of its argument.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment