comments and code style

0af487e9 · fenglei.tian · 7525d6e1 · 0af487e9 · 0af487e9
Commit 0af487e9 authored Mar 02, 2018 by fenglei.tian
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 4 deletions

gpu_cuda_kernel_emitters.cpp src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp +2 -2

gpu_emitter.cpp src/ngraph/runtime/gpu/gpu_emitter.cpp +5 -2

No files found.
--- a/src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp
+++ b/src/ngraph/runtime/gpu/gpu_cuda_kernel_emitters.cpp
@@ -76,8 +76,8 @@ namespace ngraph
                    kernel = R"(
 extern "C" __global__
-void cuda_)" + name + "(" +
+void cuda_)" + name + "(" + data_type +
-                         data_type + "* in, " + data_type + "* out, size_t m, size_t k, size_t n)\n" + R"(
+                             "* in, " + data_type + "* out, size_t m, size_t k, size_t n)\n" + R"(
 {
    size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
    if(tid < n)

--- a/src/ngraph/runtime/gpu/gpu_emitter.cpp
+++ b/src/ngraph/runtime/gpu/gpu_emitter.cpp
@@ -462,6 +462,7 @@ void runtime::gpu::GPU_Emitter::EmitBroadcast(
    auto result_shape = out[0].get_shape();
    auto& axes = broadcast->get_broadcast_axes();
+    //broadcast axes is empty, do a copy
    if (axes.empty())
    {
        writer << "{   // " << n->get_name() << " \n";
@@ -473,8 +474,10 @@ void runtime::gpu::GPU_Emitter::EmitBroadcast(
        return;
    }
+    //broadcast axes size is 1, or can be group to 1 (serveral continuous axes, like 01 or 12 or 123 etc)
    vector<int> axes_v;
    std::copy(axes.begin(), axes.end(), std::back_inserter(axes_v));
+    std::sort(axes_v.begin(), axes_v.end());
    bool is_one_axes = true;
    if (axes.size() != 1)
    {
@@ -490,13 +493,13 @@ void runtime::gpu::GPU_Emitter::EmitBroadcast(
    if (is_one_axes)
    {
        int repeat_times = 1;
-        for (int i = 0; i < axes.size(); i++)
+        for (int i = 0; i < axes_v.size(); i++)
        {
            repeat_times *= result_shape[axes_v[i]];
        }
        int repeat_size = 1;
-        for (int i = *axes.rbegin(); i < result_shape.size(); i++)
+        for (int i = *axes_v.rbegin() + 1; i < result_shape.size(); i++)
        {
            repeat_size *= result_shape[i];
        }