Unverified Commit 0c43f175 authored by adstraw's avatar adstraw Committed by GitHub

add softmax op (#542)

add softmax op and documentation
parent 498fbefd
.. softmax.rst:
#######
Softmax
#######
.. code-block:: cpp
Softmax // Softmax operation
Description
===========
Produces a tensor of the same element type and shape as ``arg``,
where the value at each coordinate of ``output`` is the expine of the
value of the corresponding coordinate of ``arg`` divided by the sum
of the expine of all coordinates of ``arg`` in the specified ``axes``.
Inputs
------
+-----------------+-------------------------+--------------------------------+
| Name | Element Type | Shape |
+=================+=========================+================================+
| ``arg`` | Any | Any |
+-----------------+-------------------------+--------------------------------+
Parameters
----------
+-----------------+----------------------------------------------------------------+
| Name | Description |
+=================+================================================================+
| ``axes`` | The axis positions (0-based) on which to calculate the softmax |
+-----------------+----------------------------------------------------------------+
Outputs
-------
+-----------------+-------------------------+--------------------------------+
| Name | Element Type | Shape |
+=================+=========================+================================+
| ``output`` | Same as ``arg`` | Same as ``arg`` |
+-----------------+-------------------------+--------------------------------+
Mathematical Definition
=======================
.. math::
\texttt{output}_{i} = \frac{\exp(\texttt{arg}_{i})}{\sum_{j} \exp(\texttt{arg}_{j})}
C++ Interface
=============
.. doxygenclass:: ngraph::op::Softmax
:project: ngraph
:members: m_axes
\ No newline at end of file
......@@ -72,6 +72,7 @@ set (SRC
ops/sin.cpp
ops/sinh.cpp
ops/slice.cpp
ops/softmax.cpp
ops/sqrt.cpp
ops/subtract.cpp
ops/sum.cpp
......
......@@ -118,6 +118,7 @@
#include "ngraph/ops/sin.hpp"
#include "ngraph/ops/sinh.hpp"
#include "ngraph/ops/slice.hpp"
#include "ngraph/ops/softmax.hpp"
#include "ngraph/ops/sqrt.hpp"
#include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/sum.hpp"
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/ops/softmax.hpp"
#include <algorithm>
#include "ngraph/builder/autobroadcast.hpp"
#include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/sum.hpp"
void ngraph::op::Softmax::generate_adjoints(autodiff::Adjoints& adjoints,
const std::shared_ptr<Node>& delta)
{
auto z = delta * shared_from_this();
auto zsum = std::make_shared<op::Sum>(z, m_axes);
Shape shape;
for (size_t i = 0; i < get_shape().size(); ++i)
{
if (m_axes.find(i) == m_axes.end())
{
shape.push_back(get_shape()[i]);
}
else
{
shape.push_back(1);
}
}
AxisVector order(zsum->get_shape().size());
std::iota(order.begin(), order.end(), 0);
auto zreshape = std::make_shared<op::Reshape>(zsum, order, shape);
auto adjoint =
z - builder::make_with_numpy_broadcast<op::Multiply>(shared_from_this(), zreshape);
auto x = get_input_op(0);
adjoints.add_delta(x, adjoint);
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/ops/util/unary_elementwise_arithmetic.hpp"
namespace ngraph
{
namespace op
{
/// \brief Softmax operation.
///
class Softmax : public util::UnaryElementwiseArithmetic
{
public:
/// \brief Constructs a softmax operation.
///
/// \param arg Node that produces the first input tensor.<br>
/// `[d0, ...]`
/// \param axes The axis positions (0-based) on which to calculate the softmax.
///
/// Output `[d0, ...]`
///
Softmax(const std::shared_ptr<Node>& arg, const AxisSet& axes)
: UnaryElementwiseArithmetic("Softmax", arg)
, m_axes(axes)
{
for (auto axis : m_axes)
{
if (axis >= get_shape().size())
{
throw ngraph_error("Axis for softmax reduction operator is out of bounds");
}
}
// empty axes == all axes
if (m_axes.size() == 0)
{
for (size_t i = 0; i < get_shape().size(); ++i)
{
m_axes.insert(i);
}
}
}
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override
{
if (new_args.size() != 1)
{
throw ngraph_error("Incorrect number of new arguments");
}
return std::make_shared<Softmax>(new_args.at(0), m_axes);
}
const AxisSet& get_axes() const { return m_axes; }
protected:
virtual void generate_adjoints(autodiff::Adjoints& adjoints,
const std::shared_ptr<Node>& delta) override;
private:
AxisSet m_axes;
};
}
}
......@@ -79,6 +79,7 @@
#include "ngraph/ops/sin.hpp"
#include "ngraph/ops/sinh.hpp"
#include "ngraph/ops/slice.hpp"
#include "ngraph/ops/softmax.hpp"
#include "ngraph/ops/sqrt.hpp"
#include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/sum.hpp"
......@@ -3188,6 +3189,203 @@ namespace ngraph
writer << " " << out[0].get_size() << ");\n";
}
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::Softmax)
{
const ngraph::op::Softmax* softmax = static_cast<const ngraph::op::Softmax*>(node);
auto type = out[0].get_type();
auto shape = out[0].get_shape();
auto dims = out[0].get_shape().size();
auto axes = softmax->get_axes();
// create arg/out if 1d
if (dims < 1)
{
writer << type << "* arg = " << args[0].get_name() << "\n";
writer << type << "* out = " << out[0].get_name() << "\n";
}
// else cast arg/out to an Nd array
else
{
std::string shape1toN;
for (size_t d = 1; d < dims; ++d)
{
shape1toN += "[";
shape1toN += std::to_string(shape[d]);
shape1toN += "]";
}
writer << type << " (*arg)" << shape1toN << " = (" << type << " (*)"
<< shape1toN << ") " << args[0].get_name() << ";\n";
writer << type << " (*out)" << shape1toN << " = (" << type << " (*)"
<< shape1toN << ") " << out[0].get_name() << ";\n";
}
// build arg/out index
std::string index;
for (size_t d = 0; d < dims; ++d)
{
index += "[i";
index += std::to_string(d);
index += "]";
}
// calculate e ^ (arg - max)
// outer loop(s) - for axis not in axes
for (size_t d = 0; d < dims; ++d)
{
if (axes.find(d) == axes.end())
{
writer << "#pragma omp parallel for\n";
writer << "for (size_t i" << d << " = 0; i" << d << " < " << shape[d]
<< "; ++i" << d << ")\n";
writer << "{\n";
writer.indent++;
}
}
// max inner loop(s)
writer << type << " m = 0;\n"; // TODO: needs to be minval for the type
for (size_t d = 0; d < dims; ++d)
{
if (axes.find(d) != axes.end())
{
writer << "for (size_t i" << d << " = 0; i" << d << " < " << shape[d]
<< "; ++i" << d << ")\n";
writer << "{\n";
writer.indent++;
}
}
writer << "if (arg" << index << " > m)\n";
writer << "{\n";
writer.indent++;
writer << "m = arg" << index << ";\n";
writer.indent--;
writer << "}\n";
// end max inner loop(s)
for (size_t d = 0; d < dims; ++d)
{
if (axes.find(d) != axes.end())
{
writer.indent--;
writer << "}\n";
}
}
// e ^ (arg - max) inner loop
for (size_t d = 0; d < dims; ++d)
{
if (axes.find(d) != axes.end())
{
writer << "for (size_t i" << d << " = 0; i" << d << " < " << shape[d]
<< "; ++i" << d << ")\n";
writer << "{\n";
writer.indent++;
}
}
writer << "out" << index << " = exp(arg" << index << " - m);\n";
// end e ^ (arg - max) inner loop
for (size_t d = 0; d < dims; ++d)
{
if (axes.find(d) != axes.end())
{
writer.indent--;
writer << "}\n";
}
}
// end e ^ (arg - max) outer loop(s)
for (size_t d = 0; d < dims; ++d)
{
if (axes.find(d) == axes.end())
{
writer.indent--;
writer << "}\n";
}
}
// calculate softmax = e ^ (arg - max) / sum (e ^ (arg - max))
// outer loop(s) - for axis not in axes
for (size_t d = 0; d < dims; ++d)
{
if (axes.find(d) == axes.end())
{
writer << "#pragma omp parallel for\n";
writer << "for (size_t i" << d << " = 0; i" << d << " < " << shape[d]
<< "; ++i" << d << ")\n";
writer << "{\n";
writer.indent++;
}
}
// sum (e ^ (arg - max) inner loop(s)
writer << type << " d = 0;\n";
for (size_t d = 0; d < dims; ++d)
{
if (axes.find(d) != axes.end())
{
writer << "for (size_t i" << d << " = 0; i" << d << " < " << shape[d]
<< "; ++i" << d << ")\n";
writer << "{\n";
writer.indent++;
}
}
writer << "d += out" << index << ";\n";
// end sum (e ^ (arg - max) inner loop(s)
for (size_t d = 0; d < dims; ++d)
{
if (axes.find(d) != axes.end())
{
writer.indent--;
writer << "}\n";
}
}
writer << "d = 1 / d;\n";
// softmax inner loop(s)
for (size_t d = 0; d < dims; ++d)
{
if (axes.find(d) != axes.end())
{
writer << "for (size_t i" << d << " = 0; i" << d << " < " << shape[d]
<< "; ++i" << d << ")\n";
writer << "{\n";
writer.indent++;
}
}
writer << "out" << index << " *= d;\n";
// end softmax inner loop(s)
for (size_t d = 0; d < dims; ++d)
{
if (axes.find(d) != axes.end())
{
writer.indent--;
writer << "}\n";
}
}
// end softmax outer loop(s)
for (size_t d = 0; d < dims; ++d)
{
if (axes.find(d) == axes.end())
{
writer.indent--;
writer << "}\n";
}
}
}
}
}
}
......
......@@ -89,6 +89,7 @@
#include "ngraph/ops/sin.hpp"
#include "ngraph/ops/sinh.hpp"
#include "ngraph/ops/slice.hpp"
#include "ngraph/ops/softmax.hpp"
#include "ngraph/ops/sqrt.hpp"
#include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/sum.hpp"
......@@ -239,6 +240,7 @@ static const runtime::cpu::OpMap dispatcher{
{TI(ngraph::op::Min), &runtime::cpu::CPU_Emitter::emit<op::Min>},
{TI(ngraph::op::Relu), &runtime::cpu::CPU_Emitter::emit<op::Relu>},
{TI(ngraph::op::ReluBackprop), &runtime::cpu::CPU_Emitter::emit<op::ReluBackprop>},
{TI(ngraph::op::Softmax), &runtime::cpu::CPU_Emitter::emit<op::Softmax>},
};
runtime::cpu::CPU_ExternalFunction::CPU_ExternalFunction(
......
......@@ -42,6 +42,7 @@
#include "ngraph/ops/reverse.hpp"
#include "ngraph/ops/select_and_scatter.hpp"
#include "ngraph/ops/slice.hpp"
#include "ngraph/ops/softmax.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/runtime/call_frame.hpp"
#include "ngraph/runtime/host_tensor_view.hpp"
......@@ -95,6 +96,7 @@
#include "ngraph/runtime/kernel/sin.hpp"
#include "ngraph/runtime/kernel/sinh.hpp"
#include "ngraph/runtime/kernel/slice.hpp"
#include "ngraph/runtime/kernel/softmax.hpp"
#include "ngraph/runtime/kernel/sqrt.hpp"
#include "ngraph/runtime/kernel/subtract.hpp"
#include "ngraph/runtime/kernel/sum.hpp"
......@@ -812,6 +814,14 @@ private:
slice->get_strides(),
out[0]->get_shape());
}
else if (node_op == "Softmax")
{
const op::Softmax* softmax = static_cast<const op::Softmax*>(&node);
kernel::softmax<T>(reinterpret_cast<T*>(args[0]->get_data_ptr()),
reinterpret_cast<T*>(out[0]->get_data_ptr()),
out[0]->get_shape(),
softmax->get_axes());
}
else if (node_op == "Sqrt")
{
kernel::sqrt<T>(reinterpret_cast<T*>(args[0]->get_data_ptr()),
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <cmath>
#include "ngraph/coordinate_transform.hpp"
#include "ngraph/runtime/kernel/max.hpp"
#include "ngraph/runtime/kernel/sum.hpp"
namespace ngraph
{
namespace runtime
{
namespace kernel
{
template <typename T>
void softmax(T* arg, T* out, const Shape& shape, const AxisSet& axes)
{
auto temp_shape = project(shape, axes);
auto temp_elements = std::accumulate(
temp_shape.begin(), temp_shape.end(), 1, std::multiplies<size_t>());
auto temp_ptr = new T[temp_elements];
max(arg, temp_ptr, shape, temp_shape, axes);
CoordinateTransform transform(shape);
CoordinateTransform temp_transform(temp_shape);
for (const Coordinate& coord : transform)
{
Coordinate temp_coord = project(coord, axes);
out[transform.index(coord)] = std::exp(
arg[transform.index(coord)] - temp_ptr[temp_transform.index(temp_coord)]);
}
sum(out, temp_ptr, shape, temp_shape, axes);
for (const Coordinate& coord : transform)
{
Coordinate temp_coord = project(coord, axes);
out[transform.index(coord)] /= temp_ptr[temp_transform.index(temp_coord)];
}
delete[] temp_ptr;
}
}
}
}
......@@ -70,6 +70,7 @@
#include "ngraph/ops/sin.hpp"
#include "ngraph/ops/sinh.hpp"
#include "ngraph/ops/slice.hpp"
#include "ngraph/ops/softmax.hpp"
#include "ngraph/ops/sqrt.hpp"
#include "ngraph/ops/subtract.hpp"
#include "ngraph/ops/sum.hpp"
......@@ -712,6 +713,11 @@ static shared_ptr<ngraph::Function>
auto strides = node_js.at("strides").get<vector<size_t>>();
node = make_shared<op::Slice>(args[0], lower_bounds, upper_bounds, strides);
}
else if (node_op == "Softmax")
{
auto reduction_axes = node_js.at("reduction_axes").get<set<size_t>>();
node = make_shared<op::Softmax>(args[0], reduction_axes);
}
else if (node_op == "Sqrt")
{
node = make_shared<op::Sqrt>(args[0]);
......
......@@ -1303,27 +1303,116 @@ TEST(${BACKEND_NAME}, backwards_slice)
}
}
TEST(${BACKEND_NAME}, backwards_sqrt)
TEST(${BACKEND_NAME}, backwards_softmax_all)
{
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
// Deriv has an asymptote at 0 so we'll stay away from there.
test::Uniform<float> rng(0.1f, 10.0f);
test::Uniform<float> rng(-1.0f, 1.0f);
Shape shape{2, 3};
auto x0 = rng.initialize(backend->make_primary_tensor_view<float>(shape));
auto make_graph = [shape]() {
auto X = make_shared<op::Parameter>(element::f32, shape);
return make_shared<Function>(make_shared<op::Sqrt>(X),
std::vector<std::shared_ptr<op::Parameter>>{X});
auto X0 = make_shared<op::Parameter>(element::f32, shape);
return make_shared<Function>(make_shared<op::Softmax>(X0, AxisSet{0, 1}),
std::vector<std::shared_ptr<op::Parameter>>{X0});
};
EXPECT_TRUE(autodiff_numeric_compare<float>(manager, backend, make_graph, {x0}, .01f, .01f));
}
for (auto i = 0; i < ${TEST_LOOPS}; i++)
{
auto x = rng.initialize(backend->make_primary_tensor_view<float>(shape));
TEST(${BACKEND_NAME}, backwards_softmax_axis)
{
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
EXPECT_TRUE(autodiff_numeric_compare<float>(manager, backend, make_graph, {x}, .01f, .01f));
}
test::Uniform<float> rng(-1.0f, 1.0f);
Shape shape{2, 3};
auto x0 = rng.initialize(backend->make_primary_tensor_view<float>(shape));
auto make_graph = [shape]() {
auto X0 = make_shared<op::Parameter>(element::f32, shape);
return make_shared<Function>(make_shared<op::Softmax>(X0, AxisSet{1}),
std::vector<std::shared_ptr<op::Parameter>>{X0});
};
EXPECT_TRUE(autodiff_numeric_compare<float>(manager, backend, make_graph, {x0}, .01f, .01f));
}
TEST(${BACKEND_NAME}, backwards_softmax_underflow)
{
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
auto low = std::numeric_limits<float>::lowest();
Shape shape{2, 3};
auto x0 = backend->make_primary_tensor_view(element::f32, shape);
copy_data(x0, vector<float>{low, 1, 2, 3, 4, 5});
auto make_graph = [shape]() {
auto X0 = make_shared<op::Parameter>(element::f32, shape);
return make_shared<Function>(make_shared<op::Softmax>(X0, AxisSet{0, 1}),
std::vector<std::shared_ptr<op::Parameter>>{X0});
};
EXPECT_TRUE(autodiff_numeric_compare<float>(manager, backend, make_graph, {x0}, .01f, .01f));
}
TEST(${BACKEND_NAME}, backwards_softmax_3d)
{
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
test::Uniform<float> rng(-1.0f, 1.0f);
Shape shape{2, 3, 4};
auto x0 = rng.initialize(backend->make_primary_tensor_view<float>(shape));
auto make_graph0 = [shape]() {
auto X0 = make_shared<op::Parameter>(element::f32, shape);
return make_shared<Function>(make_shared<op::Softmax>(X0, AxisSet{0}),
std::vector<std::shared_ptr<op::Parameter>>{X0});
};
EXPECT_TRUE(autodiff_numeric_compare<float>(manager, backend, make_graph0, {x0}, .01f, .01f));
auto make_graph1 = [shape]() {
auto X0 = make_shared<op::Parameter>(element::f32, shape);
return make_shared<Function>(make_shared<op::Softmax>(X0, AxisSet{1}),
std::vector<std::shared_ptr<op::Parameter>>{X0});
};
EXPECT_TRUE(autodiff_numeric_compare<float>(manager, backend, make_graph1, {x0}, .01f, .01f));
auto make_graph2 = [shape]() {
auto X0 = make_shared<op::Parameter>(element::f32, shape);
return make_shared<Function>(make_shared<op::Softmax>(X0, AxisSet{2}),
std::vector<std::shared_ptr<op::Parameter>>{X0});
};
EXPECT_TRUE(autodiff_numeric_compare<float>(manager, backend, make_graph2, {x0}, .01f, .01f));
auto make_graph01 = [shape]() {
auto X0 = make_shared<op::Parameter>(element::f32, shape);
return make_shared<Function>(make_shared<op::Softmax>(X0, AxisSet{0, 1}),
std::vector<std::shared_ptr<op::Parameter>>{X0});
};
EXPECT_TRUE(autodiff_numeric_compare<float>(manager, backend, make_graph01, {x0}, .01f, .01f));
auto make_graph02 = [shape]() {
auto X0 = make_shared<op::Parameter>(element::f32, shape);
return make_shared<Function>(make_shared<op::Softmax>(X0, AxisSet{0, 2}),
std::vector<std::shared_ptr<op::Parameter>>{X0});
};
EXPECT_TRUE(autodiff_numeric_compare<float>(manager, backend, make_graph02, {x0}, .01f, .01f));
auto make_graph12 = [shape]() {
auto X0 = make_shared<op::Parameter>(element::f32, shape);
return make_shared<Function>(make_shared<op::Softmax>(X0, AxisSet{1, 2}),
std::vector<std::shared_ptr<op::Parameter>>{X0});
};
EXPECT_TRUE(autodiff_numeric_compare<float>(manager, backend, make_graph12, {x0}, .01f, .01f));
auto make_graph012 = [shape]() {
auto X0 = make_shared<op::Parameter>(element::f32, shape);
return make_shared<Function>(make_shared<op::Softmax>(X0, AxisSet{0, 1, 2}),
std::vector<std::shared_ptr<op::Parameter>>{X0});
};
EXPECT_TRUE(autodiff_numeric_compare<float>(manager, backend, make_graph012, {x0}, .01f, .01f));
}
TEST(${BACKEND_NAME}, backwards_subtract)
......
......@@ -8447,3 +8447,90 @@ TEST(${BACKEND_NAME}, relu_4Dbackprop)
cf->call({a, delta}, {result});
EXPECT_EQ(read_vector<float>(result), expected);
}
TEST(${BACKEND_NAME}, softmax_all)
{
Shape shape{2, 3};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto f =
make_shared<Function>(make_shared<op::Softmax>(A, AxisSet{0, 1}), op::ParameterVector{A});
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
auto a = backend->make_primary_tensor_view(element::f32, shape);
copy_data(a, vector<float>{-3, -2, -1, 0, 1, 2});
auto result = backend->make_primary_tensor_view(element::f32, shape);
auto d = expf(-3) + expf(-2) + expf(-1) + expf(0) + expf(1) + expf(2);
cf->call({a}, {result});
vector<float> expected{
expf(-3) / d, expf(-2) / d, expf(-1) / d, expf(0) / d, expf(1) / d, expf(2) / d};
EXPECT_TRUE(test::all_close(expected, read_vector<float>(result)));
// empty AxisSet is the same as "full" AxisSet
f = make_shared<Function>(make_shared<op::Softmax>(A, AxisSet{}), op::ParameterVector{A});
external = manager->compile(f);
cf = backend->make_call_frame(external);
cf->call({a}, {result});
EXPECT_TRUE(test::all_close(expected, read_vector<float>(result)));
}
TEST(${BACKEND_NAME}, softmax_axis)
{
Shape shape{2, 3};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Softmax>(A, AxisSet{1}), op::ParameterVector{A});
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
auto a = backend->make_primary_tensor_view(element::f32, shape);
copy_data(a, vector<float>{-10, -20, -30, -40, -50, -60});
auto result = backend->make_primary_tensor_view(element::f32, shape);
auto d0 = expf(-10) + expf(-20) + expf(-30);
auto d1 = expf(-40) + expf(-50) + expf(-60);
cf->call({a}, {result});
vector<float> expected{expf(-10) / d0,
expf(-20) / d0,
expf(-30) / d0,
expf(-40) / d1,
expf(-50) / d1,
expf(-60) / d1};
EXPECT_TRUE(test::all_close(expected, read_vector<float>(result)));
}
TEST(${BACKEND_NAME}, softmax_underflow)
{
Shape shape{2, 3};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Softmax>(A, AxisSet{0}), op::ParameterVector{A});
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
auto low = std::numeric_limits<float>::lowest();
auto a = backend->make_primary_tensor_view(element::f32, shape);
copy_data(a, vector<float>{low, 1, 2, 3, 4, 5});
auto result = backend->make_primary_tensor_view(element::f32, shape);
auto d0 = expf(low) + expf(3);
auto d1 = expf(1) + expf(4);
auto d2 = expf(2) + expf(5);
cf->call({a}, {result});
vector<float> expected{
expf(low) / d0, expf(1) / d1, expf(2) / d2, expf(3) / d0, expf(4) / d1, expf(5) / d2};
EXPECT_TRUE(test::all_close(expected, read_vector<float>(result)));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment