diff --git a/mlir/test/Integration/Dialect/Vector/CPU/AVX512/test-sparse-dot-product.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AVX512/test-sparse-dot-product.mlir index 65c7357714de..00b21178f7b5 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/AVX512/test-sparse-dot-product.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/AVX512/test-sparse-dot-product.mlir @@ -394,15 +394,15 @@ func @memref_dot_while_branchless(%m_A : memref, %m_B : memref, func @entry() -> i32 { // Initialize large buffers that can be used for multiple test cases of // different sizes. - %b_A = alloc() : memref<128xi64> - %b_B = alloc() : memref<128xf64> - %b_C = alloc() : memref<128xi64> - %b_D = alloc() : memref<128xf64> + %b_A = memref.alloc() : memref<128xi64> + %b_B = memref.alloc() : memref<128xf64> + %b_C = memref.alloc() : memref<128xi64> + %b_D = memref.alloc() : memref<128xf64> - %m_A = memref_cast %b_A : memref<128xi64> to memref - %m_B = memref_cast %b_B : memref<128xf64> to memref - %m_C = memref_cast %b_C : memref<128xi64> to memref - %m_D = memref_cast %b_D : memref<128xf64> to memref + %m_A = memref.cast %b_A : memref<128xi64> to memref + %m_B = memref.cast %b_B : memref<128xf64> to memref + %m_C = memref.cast %b_C : memref<128xi64> to memref + %m_D = memref.cast %b_D : memref<128xf64> to memref // --- Test case 1 ---. // M and N must be a multiple of 8 if smaller than 128. @@ -467,10 +467,10 @@ func @entry() -> i32 { // CHECK: 111 // Release all resources. - dealloc %b_A : memref<128xi64> - dealloc %b_B : memref<128xf64> - dealloc %b_C : memref<128xi64> - dealloc %b_D : memref<128xf64> + memref.dealloc %b_A : memref<128xi64> + memref.dealloc %b_B : memref<128xf64> + memref.dealloc %b_C : memref<128xi64> + memref.dealloc %b_D : memref<128xf64> %r = constant 0 : i32 return %r : i32 diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir index da70bf61836e..52b5bbd62080 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir @@ -9,8 +9,8 @@ // RUN: | FileCheck %s func @main() { - %data = alloc() : memref<2x6xi32> - %sum = alloc() : memref<2xi32> + %data = memref.alloc() : memref<2x6xi32> + %sum = memref.alloc() : memref<2xi32> %cst0 = constant 0 : i32 %cst1 = constant 1 : i32 %cst2 = constant 2 : i32 @@ -32,31 +32,31 @@ func @main() { %c5 = constant 5 : index %c6 = constant 6 : index - %cast_data = memref_cast %data : memref<2x6xi32> to memref<*xi32> + %cast_data = memref.cast %data : memref<2x6xi32> to memref<*xi32> gpu.host_register %cast_data : memref<*xi32> - %cast_sum = memref_cast %sum : memref<2xi32> to memref<*xi32> + %cast_sum = memref.cast %sum : memref<2xi32> to memref<*xi32> gpu.host_register %cast_sum : memref<*xi32> - store %cst0, %data[%c0, %c0] : memref<2x6xi32> - store %cst1, %data[%c0, %c1] : memref<2x6xi32> - store %cst2, %data[%c0, %c2] : memref<2x6xi32> - store %cst4, %data[%c0, %c3] : memref<2x6xi32> - store %cst8, %data[%c0, %c4] : memref<2x6xi32> - store %cst16, %data[%c0, %c5] : memref<2x6xi32> + memref.store %cst0, %data[%c0, %c0] : memref<2x6xi32> + memref.store %cst1, %data[%c0, %c1] : memref<2x6xi32> + memref.store %cst2, %data[%c0, %c2] : memref<2x6xi32> + memref.store %cst4, %data[%c0, %c3] : memref<2x6xi32> + memref.store %cst8, %data[%c0, %c4] : memref<2x6xi32> + memref.store %cst16, %data[%c0, %c5] : memref<2x6xi32> - store %cst2, %data[%c1, %c0] : memref<2x6xi32> - store %cst3, %data[%c1, %c1] : memref<2x6xi32> - store %cst6, %data[%c1, %c2] : memref<2x6xi32> - store %cst7, %data[%c1, %c3] : memref<2x6xi32> - store %cst10, %data[%c1, %c4] : memref<2x6xi32> - store %cst11, %data[%c1, %c5] : memref<2x6xi32> + memref.store %cst2, %data[%c1, %c0] : memref<2x6xi32> + memref.store %cst3, %data[%c1, %c1] : memref<2x6xi32> + memref.store %cst6, %data[%c1, %c2] : memref<2x6xi32> + memref.store %cst7, %data[%c1, %c3] : memref<2x6xi32> + memref.store %cst10, %data[%c1, %c4] : memref<2x6xi32> + memref.store %cst11, %data[%c1, %c5] : memref<2x6xi32> // AND gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1) threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) { - %val = load %data[%bx, %tx] : memref<2x6xi32> + %val = memref.load %data[%bx, %tx] : memref<2x6xi32> %reduced = "gpu.all_reduce"(%val) ({}) { op = "and" } : (i32) -> (i32) - store %reduced, %sum[%bx] : memref<2xi32> + memref.store %reduced, %sum[%bx] : memref<2xi32> gpu.terminator } diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir index 9edacf3c4099..c03e8ee03814 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir @@ -9,8 +9,8 @@ // RUN: | FileCheck %s func @main() { - %data = alloc() : memref<2x6xi32> - %sum = alloc() : memref<2xi32> + %data = memref.alloc() : memref<2x6xi32> + %sum = memref.alloc() : memref<2xi32> %cst0 = constant 0 : i32 %cst1 = constant 1 : i32 %cst2 = constant 2 : i32 @@ -32,31 +32,31 @@ func @main() { %c5 = constant 5 : index %c6 = constant 6 : index - %cast_data = memref_cast %data : memref<2x6xi32> to memref<*xi32> + %cast_data = memref.cast %data : memref<2x6xi32> to memref<*xi32> gpu.host_register %cast_data : memref<*xi32> - %cast_sum = memref_cast %sum : memref<2xi32> to memref<*xi32> + %cast_sum = memref.cast %sum : memref<2xi32> to memref<*xi32> gpu.host_register %cast_sum : memref<*xi32> - store %cst0, %data[%c0, %c0] : memref<2x6xi32> - store %cst1, %data[%c0, %c1] : memref<2x6xi32> - store %cst2, %data[%c0, %c2] : memref<2x6xi32> - store %cst4, %data[%c0, %c3] : memref<2x6xi32> - store %cst8, %data[%c0, %c4] : memref<2x6xi32> - store %cst16, %data[%c0, %c5] : memref<2x6xi32> + memref.store %cst0, %data[%c0, %c0] : memref<2x6xi32> + memref.store %cst1, %data[%c0, %c1] : memref<2x6xi32> + memref.store %cst2, %data[%c0, %c2] : memref<2x6xi32> + memref.store %cst4, %data[%c0, %c3] : memref<2x6xi32> + memref.store %cst8, %data[%c0, %c4] : memref<2x6xi32> + memref.store %cst16, %data[%c0, %c5] : memref<2x6xi32> - store %cst2, %data[%c1, %c0] : memref<2x6xi32> - store %cst3, %data[%c1, %c1] : memref<2x6xi32> - store %cst6, %data[%c1, %c2] : memref<2x6xi32> - store %cst7, %data[%c1, %c3] : memref<2x6xi32> - store %cst10, %data[%c1, %c4] : memref<2x6xi32> - store %cst11, %data[%c1, %c5] : memref<2x6xi32> + memref.store %cst2, %data[%c1, %c0] : memref<2x6xi32> + memref.store %cst3, %data[%c1, %c1] : memref<2x6xi32> + memref.store %cst6, %data[%c1, %c2] : memref<2x6xi32> + memref.store %cst7, %data[%c1, %c3] : memref<2x6xi32> + memref.store %cst10, %data[%c1, %c4] : memref<2x6xi32> + memref.store %cst11, %data[%c1, %c5] : memref<2x6xi32> // MAX gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1) threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) { - %val = load %data[%bx, %tx] : memref<2x6xi32> + %val = memref.load %data[%bx, %tx] : memref<2x6xi32> %reduced = "gpu.all_reduce"(%val) ({}) { op = "max" } : (i32) -> (i32) - store %reduced, %sum[%bx] : memref<2xi32> + memref.store %reduced, %sum[%bx] : memref<2xi32> gpu.terminator } diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir index d88f2f2b43d4..6c8fe9ed6ca8 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir @@ -9,8 +9,8 @@ // RUN: | FileCheck %s func @main() { - %data = alloc() : memref<2x6xi32> - %sum = alloc() : memref<2xi32> + %data = memref.alloc() : memref<2x6xi32> + %sum = memref.alloc() : memref<2xi32> %cst0 = constant 0 : i32 %cst1 = constant 1 : i32 %cst2 = constant 2 : i32 @@ -32,31 +32,31 @@ func @main() { %c5 = constant 5 : index %c6 = constant 6 : index - %cast_data = memref_cast %data : memref<2x6xi32> to memref<*xi32> + %cast_data = memref.cast %data : memref<2x6xi32> to memref<*xi32> gpu.host_register %cast_data : memref<*xi32> - %cast_sum = memref_cast %sum : memref<2xi32> to memref<*xi32> + %cast_sum = memref.cast %sum : memref<2xi32> to memref<*xi32> gpu.host_register %cast_sum : memref<*xi32> - store %cst0, %data[%c0, %c0] : memref<2x6xi32> - store %cst1, %data[%c0, %c1] : memref<2x6xi32> - store %cst2, %data[%c0, %c2] : memref<2x6xi32> - store %cst4, %data[%c0, %c3] : memref<2x6xi32> - store %cst8, %data[%c0, %c4] : memref<2x6xi32> - store %cst16, %data[%c0, %c5] : memref<2x6xi32> + memref.store %cst0, %data[%c0, %c0] : memref<2x6xi32> + memref.store %cst1, %data[%c0, %c1] : memref<2x6xi32> + memref.store %cst2, %data[%c0, %c2] : memref<2x6xi32> + memref.store %cst4, %data[%c0, %c3] : memref<2x6xi32> + memref.store %cst8, %data[%c0, %c4] : memref<2x6xi32> + memref.store %cst16, %data[%c0, %c5] : memref<2x6xi32> - store %cst2, %data[%c1, %c0] : memref<2x6xi32> - store %cst3, %data[%c1, %c1] : memref<2x6xi32> - store %cst6, %data[%c1, %c2] : memref<2x6xi32> - store %cst7, %data[%c1, %c3] : memref<2x6xi32> - store %cst10, %data[%c1, %c4] : memref<2x6xi32> - store %cst11, %data[%c1, %c5] : memref<2x6xi32> + memref.store %cst2, %data[%c1, %c0] : memref<2x6xi32> + memref.store %cst3, %data[%c1, %c1] : memref<2x6xi32> + memref.store %cst6, %data[%c1, %c2] : memref<2x6xi32> + memref.store %cst7, %data[%c1, %c3] : memref<2x6xi32> + memref.store %cst10, %data[%c1, %c4] : memref<2x6xi32> + memref.store %cst11, %data[%c1, %c5] : memref<2x6xi32> // MIN gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1) threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) { - %val = load %data[%bx, %tx] : memref<2x6xi32> + %val = memref.load %data[%bx, %tx] : memref<2x6xi32> %reduced = "gpu.all_reduce"(%val) ({}) { op = "min" } : (i32) -> (i32) - store %reduced, %sum[%bx] : memref<2xi32> + memref.store %reduced, %sum[%bx] : memref<2xi32> gpu.terminator } diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir index 6910b511099e..5770819133e8 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir @@ -10,15 +10,15 @@ // CHECK-COUNT-8: [{{(5356, ){12}5356}}] func @main() { - %arg = alloc() : memref<2x4x13xf32> - %dst = memref_cast %arg : memref<2x4x13xf32> to memref + %arg = memref.alloc() : memref<2x4x13xf32> + %dst = memref.cast %arg : memref<2x4x13xf32> to memref %c0 = constant 0 : index %c1 = constant 1 : index %c2 = constant 2 : index - %sx = dim %dst, %c2 : memref - %sy = dim %dst, %c1 : memref - %sz = dim %dst, %c0 : memref - %cast_dst = memref_cast %dst : memref to memref<*xf32> + %sx = memref.dim %dst, %c2 : memref + %sy = memref.dim %dst, %c1 : memref + %sz = memref.dim %dst, %c0 : memref + %cast_dst = memref.cast %dst : memref to memref<*xf32> gpu.host_register %cast_dst : memref<*xf32> gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1) threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %sy, %block_z = %sz) { @@ -29,7 +29,7 @@ func @main() { %t3 = index_cast %idx : index to i32 %val = sitofp %t3 : i32 to f32 %sum = "gpu.all_reduce"(%val) ({}) { op = "add" } : (f32) -> (f32) - store %sum, %dst[%tz, %ty, %tx] : memref + memref.store %sum, %dst[%tz, %ty, %tx] : memref gpu.terminator } call @print_memref_f32(%cast_dst) : (memref<*xf32>) -> () diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir index 52b4ef5aadf7..c1dcd0f96993 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir @@ -9,8 +9,8 @@ // RUN: | FileCheck %s func @main() { - %data = alloc() : memref<2x6xi32> - %sum = alloc() : memref<2xi32> + %data = memref.alloc() : memref<2x6xi32> + %sum = memref.alloc() : memref<2xi32> %cst0 = constant 0 : i32 %cst1 = constant 1 : i32 %cst2 = constant 2 : i32 @@ -32,31 +32,31 @@ func @main() { %c5 = constant 5 : index %c6 = constant 6 : index - %cast_data = memref_cast %data : memref<2x6xi32> to memref<*xi32> + %cast_data = memref.cast %data : memref<2x6xi32> to memref<*xi32> gpu.host_register %cast_data : memref<*xi32> - %cast_sum = memref_cast %sum : memref<2xi32> to memref<*xi32> + %cast_sum = memref.cast %sum : memref<2xi32> to memref<*xi32> gpu.host_register %cast_sum : memref<*xi32> - store %cst0, %data[%c0, %c0] : memref<2x6xi32> - store %cst1, %data[%c0, %c1] : memref<2x6xi32> - store %cst2, %data[%c0, %c2] : memref<2x6xi32> - store %cst4, %data[%c0, %c3] : memref<2x6xi32> - store %cst8, %data[%c0, %c4] : memref<2x6xi32> - store %cst16, %data[%c0, %c5] : memref<2x6xi32> + memref.store %cst0, %data[%c0, %c0] : memref<2x6xi32> + memref.store %cst1, %data[%c0, %c1] : memref<2x6xi32> + memref.store %cst2, %data[%c0, %c2] : memref<2x6xi32> + memref.store %cst4, %data[%c0, %c3] : memref<2x6xi32> + memref.store %cst8, %data[%c0, %c4] : memref<2x6xi32> + memref.store %cst16, %data[%c0, %c5] : memref<2x6xi32> - store %cst2, %data[%c1, %c0] : memref<2x6xi32> - store %cst3, %data[%c1, %c1] : memref<2x6xi32> - store %cst6, %data[%c1, %c2] : memref<2x6xi32> - store %cst7, %data[%c1, %c3] : memref<2x6xi32> - store %cst10, %data[%c1, %c4] : memref<2x6xi32> - store %cst11, %data[%c1, %c5] : memref<2x6xi32> + memref.store %cst2, %data[%c1, %c0] : memref<2x6xi32> + memref.store %cst3, %data[%c1, %c1] : memref<2x6xi32> + memref.store %cst6, %data[%c1, %c2] : memref<2x6xi32> + memref.store %cst7, %data[%c1, %c3] : memref<2x6xi32> + memref.store %cst10, %data[%c1, %c4] : memref<2x6xi32> + memref.store %cst11, %data[%c1, %c5] : memref<2x6xi32> // OR gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1) threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) { - %val = load %data[%bx, %tx] : memref<2x6xi32> + %val = memref.load %data[%bx, %tx] : memref<2x6xi32> %reduced = "gpu.all_reduce"(%val) ({}) { op = "or" } : (i32) -> (i32) - store %reduced, %sum[%bx] : memref<2xi32> + memref.store %reduced, %sum[%bx] : memref<2xi32> gpu.terminator } diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir index ea6987e99a95..118117a65f34 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir @@ -10,12 +10,12 @@ // CHECK: [{{(35, ){34}35}}] func @main() { - %arg = alloc() : memref<35xf32> - %dst = memref_cast %arg : memref<35xf32> to memref + %arg = memref.alloc() : memref<35xf32> + %dst = memref.cast %arg : memref<35xf32> to memref %one = constant 1 : index %c0 = constant 0 : index - %sx = dim %dst, %c0 : memref - %cast_dst = memref_cast %dst : memref to memref<*xf32> + %sx = memref.dim %dst, %c0 : memref + %cast_dst = memref.cast %dst : memref to memref<*xf32> gpu.host_register %cast_dst : memref<*xf32> gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one) threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) { @@ -26,7 +26,7 @@ func @main() { "gpu.yield"(%xor) : (i32) -> () }) : (i32) -> (i32) %res = sitofp %xor : i32 to f32 - store %res, %dst[%tx] : memref + memref.store %res, %dst[%tx] : memref gpu.terminator } call @print_memref_f32(%cast_dst) : (memref<*xf32>) -> () diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir index a934f9696797..a3710bc61715 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir @@ -9,8 +9,8 @@ // RUN: | FileCheck %s func @main() { - %data = alloc() : memref<2x6xi32> - %sum = alloc() : memref<2xi32> + %data = memref.alloc() : memref<2x6xi32> + %sum = memref.alloc() : memref<2xi32> %cst0 = constant 0 : i32 %cst1 = constant 1 : i32 %cst2 = constant 2 : i32 @@ -32,31 +32,31 @@ func @main() { %c5 = constant 5 : index %c6 = constant 6 : index - %cast_data = memref_cast %data : memref<2x6xi32> to memref<*xi32> + %cast_data = memref.cast %data : memref<2x6xi32> to memref<*xi32> gpu.host_register %cast_data : memref<*xi32> - %cast_sum = memref_cast %sum : memref<2xi32> to memref<*xi32> + %cast_sum = memref.cast %sum : memref<2xi32> to memref<*xi32> gpu.host_register %cast_sum : memref<*xi32> - store %cst0, %data[%c0, %c0] : memref<2x6xi32> - store %cst1, %data[%c0, %c1] : memref<2x6xi32> - store %cst2, %data[%c0, %c2] : memref<2x6xi32> - store %cst4, %data[%c0, %c3] : memref<2x6xi32> - store %cst8, %data[%c0, %c4] : memref<2x6xi32> - store %cst16, %data[%c0, %c5] : memref<2x6xi32> + memref.store %cst0, %data[%c0, %c0] : memref<2x6xi32> + memref.store %cst1, %data[%c0, %c1] : memref<2x6xi32> + memref.store %cst2, %data[%c0, %c2] : memref<2x6xi32> + memref.store %cst4, %data[%c0, %c3] : memref<2x6xi32> + memref.store %cst8, %data[%c0, %c4] : memref<2x6xi32> + memref.store %cst16, %data[%c0, %c5] : memref<2x6xi32> - store %cst2, %data[%c1, %c0] : memref<2x6xi32> - store %cst3, %data[%c1, %c1] : memref<2x6xi32> - store %cst6, %data[%c1, %c2] : memref<2x6xi32> - store %cst7, %data[%c1, %c3] : memref<2x6xi32> - store %cst10, %data[%c1, %c4] : memref<2x6xi32> - store %cst11, %data[%c1, %c5] : memref<2x6xi32> + memref.store %cst2, %data[%c1, %c0] : memref<2x6xi32> + memref.store %cst3, %data[%c1, %c1] : memref<2x6xi32> + memref.store %cst6, %data[%c1, %c2] : memref<2x6xi32> + memref.store %cst7, %data[%c1, %c3] : memref<2x6xi32> + memref.store %cst10, %data[%c1, %c4] : memref<2x6xi32> + memref.store %cst11, %data[%c1, %c5] : memref<2x6xi32> // XOR gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1) threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) { - %val = load %data[%bx, %tx] : memref<2x6xi32> + %val = memref.load %data[%bx, %tx] : memref<2x6xi32> %reduced = "gpu.all_reduce"(%val) ({}) { op = "xor" } : (i32) -> (i32) - store %reduced, %sum[%bx] : memref<2xi32> + memref.store %reduced, %sum[%bx] : memref<2xi32> gpu.terminator } diff --git a/mlir/test/Integration/GPU/CUDA/async.mlir b/mlir/test/Integration/GPU/CUDA/async.mlir index c46400624d88..fd9bc4749dd0 100644 --- a/mlir/test/Integration/GPU/CUDA/async.mlir +++ b/mlir/test/Integration/GPU/CUDA/async.mlir @@ -16,13 +16,13 @@ func @main() { %count = constant 2 : index // initialize h0 on host - %h0 = alloc(%count) : memref - %h0_unranked = memref_cast %h0 : memref to memref<*xi32> + %h0 = memref.alloc(%count) : memref + %h0_unranked = memref.cast %h0 : memref to memref<*xi32> gpu.host_register %h0_unranked : memref<*xi32> %v0 = constant 42 : i32 - store %v0, %h0[%c0] : memref - store %v0, %h0[%c1] : memref + memref.store %v0, %h0[%c0] : memref + memref.store %v0, %h0[%c1] : memref // copy h0 to b0 on device. %t0, %f0 = async.execute () -> !async.value> { @@ -54,10 +54,10 @@ func @main() { ) { gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1) threads(%tx, %ty, %tz) in (%block_x = %count, %block_y = %c1, %block_z = %c1) { - %v1 = load %b1[%tx] : memref - %v2 = load %b2[%tx] : memref + %v1 = memref.load %b1[%tx] : memref + %v2 = memref.load %b2[%tx] : memref %sum = addi %v1, %v2 : i32 - store %sum, %h0[%tx] : memref + memref.store %sum, %h0[%tx] : memref gpu.terminator } async.yield diff --git a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir index 1cb56cd9ca04..ec9720f55666 100644 --- a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir +++ b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir @@ -11,10 +11,10 @@ func @other_func(%arg0 : f32, %arg1 : memref) { %cst = constant 1 : index %c0 = constant 0 : index - %cst2 = dim %arg1, %c0 : memref + %cst2 = memref.dim %arg1, %c0 : memref gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst, %grid_z = %cst) threads(%tx, %ty, %tz) in (%block_x = %cst2, %block_y = %cst, %block_z = %cst) { - store %arg0, %arg1[%tx] : memref + memref.store %arg0, %arg1[%tx] : memref gpu.terminator } return @@ -22,10 +22,10 @@ func @other_func(%arg0 : f32, %arg1 : memref) { // CHECK: [1, 1, 1, 1, 1] func @main() { - %arg0 = alloc() : memref<5xf32> + %arg0 = memref.alloc() : memref<5xf32> %21 = constant 5 : i32 - %22 = memref_cast %arg0 : memref<5xf32> to memref - %23 = memref_cast %22 : memref to memref<*xf32> + %22 = memref.cast %arg0 : memref<5xf32> to memref + %23 = memref.cast %22 : memref to memref<*xf32> gpu.host_register %23 : memref<*xf32> call @print_memref_f32(%23) : (memref<*xf32>) -> () %24 = constant 1.0 : f32 diff --git a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir index aaef634cbbd6..109a6136fb1a 100644 --- a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir +++ b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir @@ -9,9 +9,9 @@ // RUN: | FileCheck %s func @main() { - %data = alloc() : memref<2x6xf32> - %sum = alloc() : memref<2xf32> - %mul = alloc() : memref<2xf32> + %data = memref.alloc() : memref<2x6xf32> + %sum = memref.alloc() : memref<2xf32> + %mul = memref.alloc() : memref<2xf32> %cst0 = constant 0.0 : f32 %cst1 = constant 1.0 : f32 %cst2 = constant 2.0 : f32 @@ -33,35 +33,35 @@ func @main() { %c5 = constant 5 : index %c6 = constant 6 : index - %cast_data = memref_cast %data : memref<2x6xf32> to memref<*xf32> + %cast_data = memref.cast %data : memref<2x6xf32> to memref<*xf32> gpu.host_register %cast_data : memref<*xf32> - %cast_sum = memref_cast %sum : memref<2xf32> to memref<*xf32> + %cast_sum = memref.cast %sum : memref<2xf32> to memref<*xf32> gpu.host_register %cast_sum : memref<*xf32> - %cast_mul = memref_cast %mul : memref<2xf32> to memref<*xf32> + %cast_mul = memref.cast %mul : memref<2xf32> to memref<*xf32> gpu.host_register %cast_mul : memref<*xf32> - store %cst0, %data[%c0, %c0] : memref<2x6xf32> - store %cst1, %data[%c0, %c1] : memref<2x6xf32> - store %cst2, %data[%c0, %c2] : memref<2x6xf32> - store %cst4, %data[%c0, %c3] : memref<2x6xf32> - store %cst8, %data[%c0, %c4] : memref<2x6xf32> - store %cst16, %data[%c0, %c5] : memref<2x6xf32> + memref.store %cst0, %data[%c0, %c0] : memref<2x6xf32> + memref.store %cst1, %data[%c0, %c1] : memref<2x6xf32> + memref.store %cst2, %data[%c0, %c2] : memref<2x6xf32> + memref.store %cst4, %data[%c0, %c3] : memref<2x6xf32> + memref.store %cst8, %data[%c0, %c4] : memref<2x6xf32> + memref.store %cst16, %data[%c0, %c5] : memref<2x6xf32> - store %cst2, %data[%c1, %c0] : memref<2x6xf32> - store %cst3, %data[%c1, %c1] : memref<2x6xf32> - store %cst6, %data[%c1, %c2] : memref<2x6xf32> - store %cst7, %data[%c1, %c3] : memref<2x6xf32> - store %cst10, %data[%c1, %c4] : memref<2x6xf32> - store %cst11, %data[%c1, %c5] : memref<2x6xf32> + memref.store %cst2, %data[%c1, %c0] : memref<2x6xf32> + memref.store %cst3, %data[%c1, %c1] : memref<2x6xf32> + memref.store %cst6, %data[%c1, %c2] : memref<2x6xf32> + memref.store %cst7, %data[%c1, %c3] : memref<2x6xf32> + memref.store %cst10, %data[%c1, %c4] : memref<2x6xf32> + memref.store %cst11, %data[%c1, %c5] : memref<2x6xf32> // ADD + MUL gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1) threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) { - %val = load %data[%bx, %tx] : memref<2x6xf32> + %val = memref.load %data[%bx, %tx] : memref<2x6xf32> %reduced0 = "gpu.all_reduce"(%val) ({}) { op = "add" } : (f32) -> (f32) - store %reduced0, %sum[%bx] : memref<2xf32> + memref.store %reduced0, %sum[%bx] : memref<2xf32> %reduced1 = "gpu.all_reduce"(%val) ({}) { op = "mul" } : (f32) -> (f32) - store %reduced1, %mul[%bx] : memref<2xf32> + memref.store %reduced1, %mul[%bx] : memref<2xf32> gpu.terminator } diff --git a/mlir/test/Integration/GPU/CUDA/shuffle.mlir b/mlir/test/Integration/GPU/CUDA/shuffle.mlir index 97770100eb33..e303362d1ec1 100644 --- a/mlir/test/Integration/GPU/CUDA/shuffle.mlir +++ b/mlir/test/Integration/GPU/CUDA/shuffle.mlir @@ -10,12 +10,12 @@ // CHECK: [4, 5, 6, 7, 0, 1, 2, 3, 12, -1, -1, -1, 8] func @main() { - %arg = alloc() : memref<13xf32> - %dst = memref_cast %arg : memref<13xf32> to memref + %arg = memref.alloc() : memref<13xf32> + %dst = memref.cast %arg : memref<13xf32> to memref %one = constant 1 : index %c0 = constant 0 : index - %sx = dim %dst, %c0 : memref - %cast_dst = memref_cast %dst : memref to memref<*xf32> + %sx = memref.dim %dst, %c0 : memref + %cast_dst = memref.cast %dst : memref to memref<*xf32> gpu.host_register %cast_dst : memref<*xf32> gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one) threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) { @@ -29,7 +29,7 @@ func @main() { %m1 = constant -1.0 : f32 br ^bb1(%m1 : f32) ^bb1(%value : f32): - store %value, %dst[%tx] : memref + memref.store %value, %dst[%tx] : memref gpu.terminator } call @print_memref_f32(%cast_dst) : (memref<*xf32>) -> () diff --git a/mlir/test/Integration/GPU/CUDA/two-modules.mlir b/mlir/test/Integration/GPU/CUDA/two-modules.mlir index 4926218a32aa..fc1bd6b59aaa 100644 --- a/mlir/test/Integration/GPU/CUDA/two-modules.mlir +++ b/mlir/test/Integration/GPU/CUDA/two-modules.mlir @@ -10,23 +10,23 @@ // CHECK: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] func @main() { - %arg = alloc() : memref<13xi32> - %dst = memref_cast %arg : memref<13xi32> to memref + %arg = memref.alloc() : memref<13xi32> + %dst = memref.cast %arg : memref<13xi32> to memref %one = constant 1 : index %c0 = constant 0 : index - %sx = dim %dst, %c0 : memref - %cast_dst = memref_cast %dst : memref to memref<*xi32> + %sx = memref.dim %dst, %c0 : memref + %cast_dst = memref.cast %dst : memref to memref<*xi32> gpu.host_register %cast_dst : memref<*xi32> gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one) threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) { %t0 = index_cast %tx : index to i32 - store %t0, %dst[%tx] : memref + memref.store %t0, %dst[%tx] : memref gpu.terminator } gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one) threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) { %t0 = index_cast %tx : index to i32 - store %t0, %dst[%tx] : memref + memref.store %t0, %dst[%tx] : memref gpu.terminator } call @print_memref_i32(%cast_dst) : (memref<*xi32>) -> ()