|
| 1 | +// RUN: mlir-opt %s -acc-legalize-serial | FileCheck %s |
| 2 | + |
| 3 | +acc.private.recipe @privatization_memref_10_f32 : memref<10xf32> init { |
| 4 | +^bb0(%arg0: memref<10xf32>): |
| 5 | + %0 = memref.alloc() : memref<10xf32> |
| 6 | + acc.yield %0 : memref<10xf32> |
| 7 | +} destroy { |
| 8 | +^bb0(%arg0: memref<10xf32>): |
| 9 | + memref.dealloc %arg0 : memref<10xf32> |
| 10 | + acc.terminator |
| 11 | +} |
| 12 | + |
| 13 | +acc.private.recipe @privatization_memref_10_10_f32 : memref<10x10xf32> init { |
| 14 | +^bb0(%arg0: memref<10x10xf32>): |
| 15 | + %0 = memref.alloc() : memref<10x10xf32> |
| 16 | + acc.yield %0 : memref<10x10xf32> |
| 17 | +} destroy { |
| 18 | +^bb0(%arg0: memref<10x10xf32>): |
| 19 | + memref.dealloc %arg0 : memref<10x10xf32> |
| 20 | + acc.terminator |
| 21 | +} |
| 22 | + |
| 23 | +acc.firstprivate.recipe @firstprivatization_memref_10xf32 : memref<10xf32> init { |
| 24 | +^bb0(%arg0: memref<10xf32>): |
| 25 | + %0 = memref.alloc() : memref<10xf32> |
| 26 | + acc.yield %0 : memref<10xf32> |
| 27 | +} copy { |
| 28 | +^bb0(%arg0: memref<10xf32>, %arg1: memref<10xf32>): |
| 29 | + acc.terminator |
| 30 | +} destroy { |
| 31 | +^bb0(%arg0: memref<10xf32>): |
| 32 | + memref.dealloc %arg0 : memref<10xf32> |
| 33 | + acc.terminator |
| 34 | +} |
| 35 | + |
| 36 | +acc.reduction.recipe @reduction_add_i64 : i64 reduction_operator<add> init { |
| 37 | +^bb0(%0: i64): |
| 38 | + %1 = arith.constant 0 : i64 |
| 39 | + acc.yield %1 : i64 |
| 40 | +} combiner { |
| 41 | +^bb0(%0: i64, %1: i64): |
| 42 | + %2 = arith.addi %0, %1 : i64 |
| 43 | + acc.yield %2 : i64 |
| 44 | +} |
| 45 | + |
| 46 | +acc.reduction.recipe @reduction_add_memref_i64 : memref<i64> reduction_operator<add> init { |
| 47 | +^bb0(%arg0: memref<i64>): |
| 48 | + %0 = memref.alloca() : memref<i64> |
| 49 | + %c0 = arith.constant 0 : i64 |
| 50 | + memref.store %c0, %0[] : memref<i64> |
| 51 | + acc.yield %0 : memref<i64> |
| 52 | +} combiner { |
| 53 | +^bb0(%arg0: memref<i64>, %arg1: memref<i64>): |
| 54 | + %0 = memref.load %arg0[] : memref<i64> |
| 55 | + %1 = memref.load %arg1[] : memref<i64> |
| 56 | + %2 = arith.addi %0, %1 : i64 |
| 57 | + memref.store %2, %arg0[] : memref<i64> |
| 58 | + acc.terminator |
| 59 | +} |
| 60 | + |
| 61 | +// CHECK: func.func @testserialop(%[[VAL_0:.*]]: memref<10xf32>, %[[VAL_1:.*]]: memref<10xf32>, %[[VAL_2:.*]]: memref<10x10xf32>) { |
| 62 | +// CHECK: %[[VAL_3:.*]] = arith.constant 1 : i64 |
| 63 | +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 |
| 64 | +// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index |
| 65 | +// CHECK: acc.parallel async(%[[VAL_3]] : i64) num_gangs({%[[VAL_4]] : i32}) num_workers(%[[VAL_4]] : i32) vector_length(%[[VAL_4]] : i32) { |
| 66 | +// CHECK: } |
| 67 | +// CHECK: acc.parallel async(%[[VAL_4]] : i32) num_gangs({%[[VAL_4]] : i32}) num_workers(%[[VAL_4]] : i32) vector_length(%[[VAL_4]] : i32) { |
| 68 | +// CHECK: } |
| 69 | +// CHECK: acc.parallel async(%[[VAL_5]] : index) num_gangs({%[[VAL_4]] : i32}) num_workers(%[[VAL_4]] : i32) vector_length(%[[VAL_4]] : i32) { |
| 70 | +// CHECK: } |
| 71 | +// CHECK: acc.parallel num_gangs({%[[VAL_4]] : i32}) num_workers(%[[VAL_4]] : i32) vector_length(%[[VAL_4]] : i32) wait({%[[VAL_3]] : i64}) { |
| 72 | +// CHECK: } |
| 73 | +// CHECK: acc.parallel num_gangs({%[[VAL_4]] : i32}) num_workers(%[[VAL_4]] : i32) vector_length(%[[VAL_4]] : i32) wait({%[[VAL_4]] : i32}) { |
| 74 | +// CHECK: } |
| 75 | +// CHECK: acc.parallel num_gangs({%[[VAL_4]] : i32}) num_workers(%[[VAL_4]] : i32) vector_length(%[[VAL_4]] : i32) wait({%[[VAL_5]] : index}) { |
| 76 | +// CHECK: } |
| 77 | +// CHECK: acc.parallel num_gangs({%[[VAL_4]] : i32}) num_workers(%[[VAL_4]] : i32) vector_length(%[[VAL_4]] : i32) wait({%[[VAL_3]] : i64, %[[VAL_4]] : i32, %[[VAL_5]] : index}) { |
| 78 | +// CHECK: } |
| 79 | +// CHECK: %[[VAL_6:.*]] = acc.firstprivate varPtr(%[[VAL_1]] : memref<10xf32>) recipe(@firstprivatization_memref_10xf32) -> memref<10xf32> |
| 80 | +// CHECK: %[[VAL_9:.*]] = acc.private varPtr(%[[VAL_2]] : memref<10x10xf32>) recipe(@privatization_memref_10_10_f32) -> memref<10x10xf32> |
| 81 | +// CHECK: acc.parallel firstprivate(%[[VAL_6]] : memref<10xf32>) num_gangs({%[[VAL_4]] : i32}) num_workers(%[[VAL_4]] : i32) private(%[[VAL_9]] : memref<10x10xf32>) vector_length(%[[VAL_4]] : i32) { |
| 82 | +// CHECK: } |
| 83 | +// CHECK: %[[VAL_7:.*]] = acc.copyin varPtr(%[[VAL_0]] : memref<10xf32>) -> memref<10xf32> {dataClause = #acc<data_clause acc_copy>} |
| 84 | +// CHECK: acc.parallel dataOperands(%[[VAL_7]] : memref<10xf32>) num_gangs({%[[VAL_4]] : i32}) num_workers(%[[VAL_4]] : i32) vector_length(%[[VAL_4]] : i32) { |
| 85 | +// CHECK: } |
| 86 | +// CHECK: %[[I64MEM:.*]] = memref.alloca() : memref<i64> |
| 87 | +// CHECK: memref.store %[[VAL_3]], %[[I64MEM]][] : memref<i64> |
| 88 | +// CHECK: %[[VAL_10:.*]] = acc.reduction varPtr(%[[I64MEM]] : memref<i64>) recipe(@reduction_add_memref_i64) -> memref<i64> |
| 89 | +// CHECK: acc.parallel num_gangs({%[[VAL_4]] : i32}) num_workers(%[[VAL_4]] : i32) vector_length(%[[VAL_4]] : i32) reduction(%[[VAL_10]] : memref<i64>) { |
| 90 | +// CHECK: } |
| 91 | +// CHECK: acc.parallel combined(loop) num_gangs({%[[VAL_4]] : i32}) num_workers(%[[VAL_4]] : i32) vector_length(%[[VAL_4]] : i32) { |
| 92 | +// CHECK: acc.loop combined(serial) control(%{{.*}} : index) = (%[[VAL_5]] : index) to (%[[VAL_5]] : index) step (%[[VAL_5]] : index) { |
| 93 | +// CHECK: acc.yield |
| 94 | +// CHECK: } attributes {seq = [#acc.device_type<none>]} |
| 95 | +// CHECK: acc.terminator |
| 96 | +// CHECK: } |
| 97 | +// CHECK: acc.parallel num_gangs({%[[VAL_4]] : i32}) num_workers(%[[VAL_4]] : i32) vector_length(%[[VAL_4]] : i32) { |
| 98 | +// CHECK: } attributes {defaultAttr = #acc<defaultvalue none>} |
| 99 | +// CHECK: acc.parallel num_gangs({%[[VAL_4]] : i32}) num_workers(%[[VAL_4]] : i32) vector_length(%[[VAL_4]] : i32) { |
| 100 | +// CHECK: } attributes {defaultAttr = #acc<defaultvalue present>} |
| 101 | +// CHECK: acc.parallel num_gangs({%[[VAL_4]] : i32}) num_workers(%[[VAL_4]] : i32) vector_length(%[[VAL_4]] : i32) { |
| 102 | +// CHECK: } |
| 103 | +// CHECK: acc.parallel num_gangs({%[[VAL_4]] : i32}) num_workers(%[[VAL_4]] : i32) vector_length(%[[VAL_4]] : i32) { |
| 104 | +// CHECK: } |
| 105 | +// CHECK: acc.parallel num_gangs({%[[VAL_4]] : i32}) num_workers(%[[VAL_4]] : i32) vector_length(%[[VAL_4]] : i32) { |
| 106 | +// CHECK: } attributes {selfAttr} |
| 107 | +// CHECK: acc.parallel num_gangs({%[[VAL_4]] : i32}) num_workers(%[[VAL_4]] : i32) vector_length(%[[VAL_4]] : i32) { |
| 108 | +// CHECK: acc.yield |
| 109 | +// CHECK: } attributes {selfAttr} |
| 110 | +// CHECK: return |
| 111 | +// CHECK: } |
| 112 | + |
| 113 | +func.func @testserialop(%a: memref<10xf32>, %b: memref<10xf32>, %c: memref<10x10xf32>) -> () { |
| 114 | + %i64value = arith.constant 1 : i64 |
| 115 | + %i32value = arith.constant 1 : i32 |
| 116 | + %idxValue = arith.constant 1 : index |
| 117 | + acc.serial async(%i64value: i64) { |
| 118 | + } |
| 119 | + acc.serial async(%i32value: i32) { |
| 120 | + } |
| 121 | + acc.serial async(%idxValue: index) { |
| 122 | + } |
| 123 | + acc.serial wait({%i64value: i64}) { |
| 124 | + } |
| 125 | + acc.serial wait({%i32value: i32}) { |
| 126 | + } |
| 127 | + acc.serial wait({%idxValue: index}) { |
| 128 | + } |
| 129 | + acc.serial wait({%i64value : i64, %i32value : i32, %idxValue : index}) { |
| 130 | + } |
| 131 | + %firstprivate = acc.firstprivate varPtr(%b : memref<10xf32>) recipe(@firstprivatization_memref_10xf32) -> memref<10xf32> |
| 132 | + %c_private = acc.private varPtr(%c : memref<10x10xf32>) recipe(@privatization_memref_10_10_f32) -> memref<10x10xf32> |
| 133 | + acc.serial private(%c_private : memref<10x10xf32>) firstprivate(%firstprivate : memref<10xf32>) { |
| 134 | + } |
| 135 | + %copyinfromcopy = acc.copyin varPtr(%a : memref<10xf32>) -> memref<10xf32> {dataClause = #acc<data_clause acc_copy>} |
| 136 | + acc.serial dataOperands(%copyinfromcopy : memref<10xf32>) { |
| 137 | + } |
| 138 | + %i64mem = memref.alloca() : memref<i64> |
| 139 | + memref.store %i64value, %i64mem[] : memref<i64> |
| 140 | + %i64reduction = acc.reduction varPtr(%i64mem : memref<i64>) recipe(@reduction_add_memref_i64) -> memref<i64> |
| 141 | + acc.serial reduction(%i64reduction : memref<i64>) { |
| 142 | + } |
| 143 | + acc.serial combined(loop) { |
| 144 | + acc.loop combined(serial) control(%arg3 : index) = (%idxValue : index) to (%idxValue : index) step (%idxValue : index) { |
| 145 | + acc.yield |
| 146 | + } attributes {seq = [#acc.device_type<none>]} |
| 147 | + acc.terminator |
| 148 | + } |
| 149 | + acc.serial { |
| 150 | + } attributes {defaultAttr = #acc<defaultvalue none>} |
| 151 | + acc.serial { |
| 152 | + } attributes {defaultAttr = #acc<defaultvalue present>} |
| 153 | + acc.serial { |
| 154 | + } attributes {asyncAttr} |
| 155 | + acc.serial { |
| 156 | + } attributes {waitAttr} |
| 157 | + acc.serial { |
| 158 | + } attributes {selfAttr} |
| 159 | + acc.serial { |
| 160 | + acc.yield |
| 161 | + } attributes {selfAttr} |
| 162 | + return |
| 163 | +} |
| 164 | + |
0 commit comments