src/contrib/mlir · aedd8c2e365cfb7bece89b98825dbd868257e5e2 · submodule / ngraph

[MLIR] Enable affine dialect loop fusion (#3290) · aedd8c2e

Diego Caballero authored Jul 29, 2019

* [MLIR] Enable affine dialect loop fusion

Enable affine dialect loop fusion in nGraph pipeline. It also adds an
opt flag to enable/disable it when ngraph-opt is in place. Fusion seems
to work for simple cases. It wasn't able to fuse dot + add, though, at
least in my test case. One example that worked:

Input:
  %6 = alloc() : memref<2500x2500xf32>
  affine.for %i3 = 0 to 2500 {
    affine.for %i4 = 0 to 2500 {
      %7 = load %arg0[%i3, %i4] : memref<2500x2500xf32>
      %8 = load %0[%i3, %i4] : memref<2500x2500xf32>
      %9 = addf %8, %7 : f32
      store %9, %6[%i3, %i4] : memref<2500x2500xf32>
    }
  }
  %10 = alloc() : memref<2500x2500xf32>
  affine.for %i5 = 0 to 2500 {
    affine.for %i6 = 0 to 2500 {
      %11 = load %arg2[%i5, %i6] : memref<2500x2500xf32>
      %12 = load %0[%i5, %i6] : memref<2500x2500xf32>
      %13 = addf %12, %11 : f32
      store %13, %10[%i5, %i6] : memref<2500x2500xf32>
    }
  }
  %14 = alloc() : memref<2500x2500xf32>
  affine.for %i7 = 0 to 2500 {
    affine.for %i8 = 0 to 2500 {
      %15 = load %10[%i7, %i8] : memref<2500x2500xf32>
      %16 = load %6[%i7, %i8] : memref<2500x2500xf32>
      %17 = addf %16, %15 : f32
      store %17, %14[%i7, %i8] : memref<2500x2500xf32>
    }
  }

Output:
  %8 = alloc() : memref<2500x2500xf32>
  affine.for %i3 = 0 to 2500 {
    affine.for %i4 = 0 to 2500 {
      %9 = load %arg2[%i3, %i4] : memref<2500x2500xf32>
      %10 = load %2[%i3, %i4] : memref<2500x2500xf32>
      %11 = addf %10, %9 : f32
      %12 = affine.apply #map2(%i3, %i4, %i3, %i4)
      %13 = affine.apply #map3(%i3, %i4, %i3, %i4)
      store %11, %0[%12, %13] : memref<1x1xf32>
      %14 = load %arg0[%i3, %i4] : memref<2500x2500xf32>
      %15 = load %2[%i3, %i4] : memref<2500x2500xf32>
      %16 = addf %15, %14 : f32
      %17 = affine.apply #map2(%i3, %i4, %i3, %i4)
      %18 = affine.apply #map3(%i3, %i4, %i3, %i4)
      store %16, %1[%17, %18] : memref<1x1xf32>
      %19 = affine.apply #map2(%i3, %i4, %i3, %i4)
      %20 = affine.apply #map3(%i3, %i4, %i3, %i4)
      %21 = load %0[%19, %20] : memref<1x1xf32>
      %22 = affine.apply #map2(%i3, %i4, %i3, %i4)
      %23 = affine.apply #map3(%i3, %i4, %i3, %i4)
      %24 = load %1[%22, %23] : memref<1x1xf32>
      %25 = addf %24, %21 : f32
      store %25, %8[%i3, %i4] : memref<2500x2500xf32>
    }
  }

* Rename MLIR_LLVM_OPTIONS to NGRAPH_MLIR_OPTIONS

Something like this works now:
NGRAPH_MLIR_OPTIONS="--enable-affine-loop-fusion=false"

* Disable loop fusion by default and fix typo

aedd8c2e

Name	Last commit	Last update
..
dialect		Loading commit data...
pass		Loading commit data...
CMakeLists.txt		Loading commit data...
compiler.cpp		Loading commit data...
compiler.hpp		Loading commit data...
lowerer.cpp		Loading commit data...
lowerer.hpp		Loading commit data...
memory_manager.cpp		Loading commit data...
memory_manager.hpp		Loading commit data...
op_lowerers.inc		Loading commit data...
ops_supported.inc		Loading commit data...