src/contrib/mlir/compiler.cpp · aedd8c2e365cfb7bece89b98825dbd868257e5e2 · submodule / ngraph

[MLIR] Enable affine dialect loop fusion (#3290) · aedd8c2e
Diego Caballero authored Jul 29, 2019
* [MLIR] Enable affine dialect loop fusion

Enable affine dialect loop fusion in nGraph pipeline. It also adds an
opt flag to enable/disable it when ngraph-opt is in place. Fusion seems
to work for simple cases. It wasn't able to fuse dot + add, though, at
least in my test case. One example that worked:

Input:
  %6 = alloc() : memref<2500x2500xf32>
  affine.for %i3 = 0 to 2500 {
    affine.for %i4 = 0 to 2500 {
      %7 = load %arg0[%i3, %i4] : memref<2500x2500xf32>
      %8 = load %0[%i3, %i4] : memref<2500x2500xf32>
      %9 = addf %8, %7 : f32
      store %9, %6[%i3, %i4] : memref<2500x2500xf32>
    }
  }
  %10 = alloc() : memref<2500x2500xf32>
  affine.for %i5 = 0 to 2500 {
    affine.for %i6 = 0 to 2500 {
      %11 = load %arg2[%i5, %i6] : memref<2500x2500xf32>
      %12 = load %0[%i5, %i6] : memref<2500x2500xf32>
      %13 = addf %12, %11 : f32
      store %13, %10[%i5, %i6] : memref<2500x2500xf32>
    }
  }
  %14 = alloc() : memref<2500x2500xf32>
  affine.for %i7 = 0 to 2500 {
    affine.for %i8 = 0 to 2500 {
      %15 = load %10[%i7, %i8] : memref<2500x2500xf32>
      %16 = load %6[%i7, %i8] : memref<2500x2500xf32>
      %17 = addf %16, %15 : f32
      store %17, %14[%i7, %i8] : memref<2500x2500xf32>
    }
  }

Output:
  %8 = alloc() : memref<2500x2500xf32>
  affine.for %i3 = 0 to 2500 {
    affine.for %i4 = 0 to 2500 {
      %9 = load %arg2[%i3, %i4] : memref<2500x2500xf32>
      %10 = load %2[%i3, %i4] : memref<2500x2500xf32>
      %11 = addf %10, %9 : f32
      %12 = affine.apply #map2(%i3, %i4, %i3, %i4)
      %13 = affine.apply #map3(%i3, %i4, %i3, %i4)
      store %11, %0[%12, %13] : memref<1x1xf32>
      %14 = load %arg0[%i3, %i4] : memref<2500x2500xf32>
      %15 = load %2[%i3, %i4] : memref<2500x2500xf32>
      %16 = addf %15, %14 : f32
      %17 = affine.apply #map2(%i3, %i4, %i3, %i4)
      %18 = affine.apply #map3(%i3, %i4, %i3, %i4)
      store %16, %1[%17, %18] : memref<1x1xf32>
      %19 = affine.apply #map2(%i3, %i4, %i3, %i4)
      %20 = affine.apply #map3(%i3, %i4, %i3, %i4)
      %21 = load %0[%19, %20] : memref<1x1xf32>
      %22 = affine.apply #map2(%i3, %i4, %i3, %i4)
      %23 = affine.apply #map3(%i3, %i4, %i3, %i4)
      %24 = load %1[%22, %23] : memref<1x1xf32>
      %25 = addf %24, %21 : f32
      store %25, %8[%i3, %i4] : memref<2500x2500xf32>
    }
  }

* Rename MLIR_LLVM_OPTIONS to NGRAPH_MLIR_OPTIONS

Something like this works now:
NGRAPH_MLIR_OPTIONS="--enable-affine-loop-fusion=false"

* Disable loop fusion by default and fix typo
aedd8c2e
compiler.cpp 21.6 KB
Replace compiler.cpp