Unverified Commit e433e55a authored by Matthew Brookhart's avatar Matthew Brookhart Committed by GitHub

Add a few more openmp ops (#374)

* Add a few more openmp ops

* fix a warning

* fix merge error
parent 29231e11
......@@ -633,10 +633,19 @@ void runtime::cpu::CPU_Emitter::EmitSelect(codegen::CodeWriter& writer,
{
writer << "{ // " << n->get_name() << "\n";
writer.indent++;
#if PREFER_EIGEN == 1
writer << emit_array1d(out[0]) << " =\n"
<< " " << emit_array1d(args[0]) << "\n"
<< " .select(" << emit_array1d(args[1]) << ",\n"
<< " " << emit_array1d(args[2]) << ");\n";
#else
writer << "#pragma omp parallel for\n";
writer << "for (size_t i = 0; i < " << out[0].get_size() << "; i++)\n";
writer << "{\n";
writer << " " << out[0].get_name() << "[i] = " << args[0].get_name() << "[i] ? "
<< args[1].get_name() << "[i] : " << args[2].get_name() << "[i];\n";
writer << "}\n";
#endif
writer.indent--;
writer << "}\n";
}
......@@ -761,9 +770,18 @@ void runtime::cpu::CPU_Emitter::EmitConvert(codegen::CodeWriter& writer,
writer << "{ // " << n->get_name() << "\n";
writer.indent++;
#if PREFER_EIGEN == 1
writer << emit_array1d(out[0]) << " =\n"
<< " " << emit_array1d(args[0]) << "\n"
<< " .template cast<" << result_element_type.c_type_string() << ">();\n";
#else
writer << "#pragma omp parallel for\n";
writer << "for (size_t i = 0; i < " << out[0].get_size() << "; i++)\n";
writer << "{\n";
writer << " " << out[0].get_name() << "[i] = (" << result_element_type.c_type_string()
<< ")(" << args[0].get_name() << "[i]);\n";
writer << "}\n";
#endif
writer.indent--;
writer << "}\n";
}
......@@ -922,8 +940,8 @@ void runtime::cpu::CPU_Emitter::EmitReduce(codegen::CodeWriter& writer,
auto& f_result_element_type = out[0].get_element_type();
auto result_shape = out[0].get_shape();
#if PREFER_EIGEN == 1
auto& reduction_axes = reduce->get_reduction_axes();
// Trivial case: no reduction axes (this includes the scalar-reductee case).
if (reduction_axes.empty())
{
......@@ -1082,6 +1100,35 @@ void runtime::cpu::CPU_Emitter::EmitReduce(codegen::CodeWriter& writer,
writer << " {" << join(reduce->get_reduction_axes()) << "},\n";
writer << " f);\n";
}
#else
writer << "{ // " << n->get_name() << " 1\n";
writer.indent++;
string type = f_result_element_type.c_type_string();
writer << "auto f = [](" << type << " x, " << type << " y) -> " << type << "\n{";
writer.indent++;
writer << "\n";
writer << type << " result;\n";
writer << "void* args[] = {&x, &y};\n";
writer << "void* out[] = {&result};\n";
writer << reduction_function->get_name() << "(args, out);\n";
writer << "return result;\n";
writer.indent--;
writer << "};\n";
kernel::emit_reduce(writer,
args[0].get_element_type().c_type_string(),
args[0].get_name(),
args[1].get_name(),
out[0].get_name(),
args[0].get_shape(),
out[0].get_shape(),
reduce->get_reduction_axes());
writer.indent--;
writer << "}\n";
#endif
}
void runtime::cpu::CPU_Emitter::EmitSign(codegen::CodeWriter& writer,
......@@ -1091,8 +1138,17 @@ void runtime::cpu::CPU_Emitter::EmitSign(codegen::CodeWriter& writer,
{
writer << "{ // " << n->get_name() << "\n";
writer.indent++;
#if PREFER_EIGEN == 1
writer << emit_array1d(out[0]) << " =\n"
<< " " << emit_array1d(args[0]) << ".sign();\n";
#else
writer << "#pragma omp parallel for\n";
writer << "for (size_t i = 0; i < " << out[0].get_size() << "; i++)\n";
writer << "{\n";
writer << " " << out[0].get_name() << "[i] = (0 < " << args[0].get_name() << "[i]) - ("
<< args[0].get_name() << "[i] < 0);\n";
writer << "}\n";
#endif
writer.indent--;
writer << "}\n";
}
......
......@@ -364,3 +364,81 @@ void ngraph::runtime::cpu::kernel::emit_sum(codegen::CodeWriter& writer,
close_for_loops(writer, index_vars);
}
}
void ngraph::runtime::cpu::kernel::emit_reduce(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& arg1,
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisSet& reduction_axes)
{
// create input and output arrays
auto source_nd_name = recast_tmp_var(writer, element_type, arg0, arg0_shape, "source_nd");
auto dest_nd_name = recast_tmp_var(writer, element_type, out, out_shape, "dest_nd");
// zero the output to make sure we don't have randomly initialized data
if (out_shape.size() == 0)
{
writer << dest_nd_name << " = " << arg1 << "[0];\n";
}
else
{
auto output_vars = open_for_loops(writer, out_shape);
writer << dest_nd_name << emit_bracketed_string(output_vars) << " = " << arg1 << "[0];\n";
close_for_loops(writer, output_vars);
}
// If we don't have a zero index in the input, perform the sum
if (std::find(arg0_shape.begin(), arg0_shape.end(), 0) == arg0_shape.end())
{
// create the the interation variables without writing the for loops
std::vector<std::string> index_vars;
for (size_t i = 0; i < arg0_shape.size(); i++)
{
std::string index_var = writer.generate_temporary_name("i");
index_vars.push_back(index_var);
}
// calculate the output indexes based on what's being reduced
std::vector<std::string> out_indexes;
size_t outer_arg_index = -1;
for (size_t i = 0; i < index_vars.size(); ++i)
{
if (reduction_axes.count(i) == 0)
{
if (out_indexes.size() == 0)
{
outer_arg_index = i;
}
out_indexes.push_back(index_vars[i]);
}
}
// make the first output shape our outer loop, optimize with openmp
if (outer_arg_index != -1)
{
writer << start_index_loop(
index_vars[outer_arg_index], 0, arg0_shape[outer_arg_index], true);
writer.indent++;
}
// create the rest of the loops, don't parallelize.
for (size_t i = 0; i < arg0_shape.size(); i++)
{
if (i != outer_arg_index)
{
std::string index_var = index_vars[i];
writer << start_index_loop(index_var, 0, arg0_shape[i], false);
writer.indent++;
}
}
writer << dest_nd_name << emit_bracketed_string(out_indexes) << " = f(" << dest_nd_name
<< emit_bracketed_string(out_indexes) << "," << source_nd_name
<< emit_bracketed_string(index_vars) << ");\n";
close_for_loops(writer, index_vars);
}
}
......@@ -73,6 +73,14 @@ namespace ngraph
const Shape& arg0_shape,
const Shape& out_shape,
const AxisSet& reduction_axes);
void emit_reduce(codegen::CodeWriter& writer,
const std::string& element_type,
const std::string& arg0, // replacement context
const std::string& arg1,
const std::string& out,
const Shape& arg0_shape,
const Shape& out_shape,
const AxisSet& reduction_axes);
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment