|
| 1 | +// RUN: onnx-mlir-opt --mcpu=z16 --maccel=NNPA --enable-compiler-stick-unstick=true --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s |
| 2 | + |
| 3 | +func.func @should_lower_to_zlow(%arg0: tensor<1x3x5x7xf32>) -> tensor<*xf32> { |
| 4 | + %0 = "zhigh.Stick"(%arg0) {layout = "NHWC"} : (tensor<1x3x5x7xf32>) -> tensor<*xf16> |
| 5 | + %1 = "zhigh.Unstick"(%0) : (tensor<*xf16>) -> tensor<*xf32> |
| 6 | + return %1 : tensor<*xf32> |
| 7 | + |
| 8 | +// CHECK-DAG: [[MAP_0_:#.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3 floordiv 64, d1, d2 floordiv 32, d2 mod 32, d3 mod 64)> |
| 9 | +// CHECK-LABEL: func.func @should_lower_to_zlow |
| 10 | +// CHECK-SAME: ([[PARAM_0_:%.+]]: memref<1x3x5x7xf32>) -> memref<1x3x5x7xf32> { |
| 11 | +// CHECK-DAG: [[RES_:%.+]] = memref.alloc() {{.*}}: memref<1x5x7x3xf16, #map> |
| 12 | +// CHECK-DAG: [[RES_1_:%.+]] = memref.alloc() {{.*}}: memref<1x5x7x3xf32> |
| 13 | +// CHECK-DAG: [[LOOP_0_:%.+]]:4 = krnl.define_loops 4 |
| 14 | +// CHECK: krnl.iterate([[LOOP_0_]]#0, [[LOOP_0_]]#1, [[LOOP_0_]]#2, [[LOOP_0_]]#3) with ([[LOOP_0_]]#0 -> [[I_0_:%.+]] = 0 to 1, [[LOOP_0_]]#1 -> [[I_1_:%.+]] = 0 to 3, [[LOOP_0_]]#2 -> [[I_2_:%.+]] = 0 to 5, [[LOOP_0_]]#3 -> [[I_3_:%.+]] = 0 to 7){ |
| 15 | +// CHECK: [[VAR_2_:%.+]]:4 = krnl.get_induction_var_value([[LOOP_0_]]#0, [[LOOP_0_]]#1, [[LOOP_0_]]#2, [[LOOP_0_]]#3) : (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index, index) |
| 16 | +// CHECK: [[LOAD_PARAM_0_MEM_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[VAR_2_]]#0, [[VAR_2_]]#1, [[VAR_2_]]#2, [[VAR_2_]]#3] : memref<1x3x5x7xf32> |
| 17 | +// CHECK: krnl.store [[LOAD_PARAM_0_MEM_]], [[RES_1_]]{{.}}[[VAR_2_]]#0, [[VAR_2_]]#2, [[VAR_2_]]#3, [[VAR_2_]]#1] : memref<1x5x7x3xf32> |
| 18 | +// CHECK: } |
| 19 | +// CHECK: "zlow.stick"([[RES_1_]], [[RES_]]) {layout = "NHWC"} : (memref<1x5x7x3xf32>, memref<1x5x7x3xf16, #map>) -> () |
| 20 | +// CHECK: [[RES_2_:%.+]] = memref.alloc() {{.*}}: memref<1x5x7x3xf32> |
| 21 | +// CHECK: "zlow.unstick"([[RES_]], [[RES_]]_1) {layout = "NHWC"} : (memref<1x5x7x3xf16, #map>, memref<1x5x7x3xf32>) -> () |
| 22 | +// CHECK-DAG: [[RES_3_:%.+]] = memref.alloc() {{.*}}: memref<1x3x5x7xf32> |
| 23 | +// CHECK-DAG: [[LOOP_1_:%.+]]:4 = krnl.define_loops 4 |
| 24 | +// CHECK: krnl.iterate([[LOOP_1_]]#0, [[LOOP_1_]]#1, [[LOOP_1_]]#2, [[LOOP_1_]]#3) with ([[LOOP_1_]]#0 -> [[I_4_:%.+]] = 0 to 1, [[LOOP_1_]]#1 -> [[I_5_:%.+]] = 0 to 5, [[LOOP_1_]]#2 -> [[I_6_:%.+]] = 0 to 7, [[LOOP_1_]]#3 -> [[I_7_:%.+]] = 0 to 3){ |
| 25 | +// CHECK: [[VAR_2_1_:%.+]]:4 = krnl.get_induction_var_value([[LOOP_1_]]#0, [[LOOP_1_]]#1, [[LOOP_1_]]#2, [[LOOP_1_]]#3) : (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index, index) |
| 26 | +// CHECK: [[LOAD_PARAM_0_MEM_1_:%.+]] = krnl.load [[RES_2_]]{{.}}[[VAR_2_1_]]#0, [[VAR_2_1_]]#1, [[VAR_2_1_]]#2, [[VAR_2_1_]]#3] : memref<1x5x7x3xf32> |
| 27 | +// CHECK: krnl.store [[LOAD_PARAM_0_MEM_1_]], [[RES_3_]]{{.}}[[VAR_2_1_]]#0, [[VAR_2_1_]]#3, [[VAR_2_1_]]#1, [[VAR_2_1_]]#2] : memref<1x3x5x7xf32> |
| 28 | +// CHECK: } |
| 29 | +// CHECK: return [[RES_3_]] : memref<1x3x5x7xf32> |
| 30 | +// CHECK: } |
| 31 | +} |
| 32 | + |
| 33 | +// ----- |
| 34 | + |
| 35 | +func.func @should_lower_to_zlow_unknown_dims(%arg0: tensor<1x?x?x7xf32>) -> tensor<*xf32> { |
| 36 | + %0 = "zhigh.Stick"(%arg0) {layout = "NHWC"} : (tensor<1x?x?x7xf32>) -> tensor<*xf16> |
| 37 | + %1 = "zhigh.Unstick"(%0) : (tensor<*xf16>) -> tensor<*xf32> |
| 38 | + return %1 : tensor<*xf32> |
| 39 | + |
| 40 | +// CHECK-DAG: [[MAP_0_:#.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3 floordiv 64, d1, d2 floordiv 32, d2 mod 32, d3 mod 64)> |
| 41 | +// CHECK-DAG: [[MAP_1_:#.+]] = affine_map<(d0) -> (d0)> |
| 42 | +// CHECK-DAG: [[MAP_2_:#.+]] = affine_map<(d0, d1) -> (d1)> |
| 43 | +// CHECK-LABEL: func.func @should_lower_to_zlow_unknown_dims |
| 44 | +// CHECK-SAME: ([[PARAM_0_:%.+]]: memref<1x?x?x7xf32>) -> memref<1x?x?x7xf32> { |
| 45 | +// CHECK-DAG: [[CST_2_:%.+]] = arith.constant 2 : index |
| 46 | +// CHECK-DAG: [[CST_1_:%.+]] = arith.constant 1 : index |
| 47 | +// CHECK-NOT: separator of consecutive DAGs |
| 48 | +// CHECK-DAG: [[VAR_dim_:%.+]] = memref.dim [[PARAM_0_]], [[CST_1_]] : memref<1x?x?x7xf32> |
| 49 | +// CHECK-DAG: [[VAR_dim_0_:%.+]] = memref.dim [[PARAM_0_]], [[CST_2_]] : memref<1x?x?x7xf32> |
| 50 | +// CHECK-NOT: separator of consecutive DAGs |
| 51 | +// CHECK-DAG: [[RES_:%.+]] = memref.alloc([[VAR_dim_0_]], [[VAR_dim_]]) {{.*}}: memref<1x?x7x?xf16, #map> |
| 52 | +// CHECK-DAG: [[VAR_dim_1_:%.+]] = memref.dim [[PARAM_0_]], [[CST_2_]] : memref<1x?x?x7xf32> |
| 53 | +// CHECK-DAG: [[VAR_dim_2_:%.+]] = memref.dim [[PARAM_0_]], [[CST_1_]] : memref<1x?x?x7xf32> |
| 54 | +// CHECK-NOT: separator of consecutive DAGs |
| 55 | +// CHECK-DAG: [[RES_1_:%.+]] = memref.alloc([[VAR_dim_1_]], [[VAR_dim_2_]]) {{.*}}: memref<1x?x7x?xf32> |
| 56 | +// CHECK-DAG: [[LOOP_0_:%.+]]:4 = krnl.define_loops 4 |
| 57 | +// CHECK-DAG: [[VAR_dim_4_:%.+]] = memref.dim [[PARAM_0_]], [[CST_1_]] : memref<1x?x?x7xf32> |
| 58 | +// CHECK-DAG: [[VAR_dim_5_:%.+]] = memref.dim [[PARAM_0_]], [[CST_2_]] : memref<1x?x?x7xf32> |
| 59 | +// CHECK: krnl.iterate([[LOOP_0_]]#0, [[LOOP_0_]]#1, [[LOOP_0_]]#2, [[LOOP_0_]]#3) with ([[LOOP_0_]]#0 -> [[I_0_:%.+]] = 0 to 1, [[LOOP_0_]]#1 -> [[I_1_:%.+]] = 0 to [[MAP_1_]]([[VAR_dim_4_]]), [[LOOP_0_]]#2 -> [[I_2_:%.+]] = 0 to [[MAP_2_]]([[VAR_dim_4_]], [[VAR_dim_5_]]), [[LOOP_0_]]#3 -> [[I_3_:%.+]] = 0 to 7){ |
| 60 | +// CHECK: [[VAR_2_:%.+]]:4 = krnl.get_induction_var_value([[LOOP_0_]]#0, [[LOOP_0_]]#1, [[LOOP_0_]]#2, [[LOOP_0_]]#3) : (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index, index) |
| 61 | +// CHECK: [[LOAD_PARAM_0_MEM_:%.+]] = krnl.load [[PARAM_0_]]{{.}}[[VAR_2_]]#0, [[VAR_2_]]#1, [[VAR_2_]]#2, [[VAR_2_]]#3] : memref<1x?x?x7xf32> |
| 62 | +// CHECK: krnl.store [[LOAD_PARAM_0_MEM_]], [[RES_1_]]{{.}}[[VAR_2_]]#0, [[VAR_2_]]#2, [[VAR_2_]]#3, [[VAR_2_]]#1] : memref<1x?x7x?xf32> |
| 63 | +// CHECK: } |
| 64 | +// CHECK: "zlow.stick"([[RES_1_]], [[RES_]]) {layout = "NHWC"} : (memref<1x?x7x?xf32>, memref<1x?x7x?xf16, #map>) -> () |
| 65 | +// CHECK: [[RES_2_:%.+]] = memref.alloc([[VAR_dim_0_]], [[VAR_dim_]]) {{.*}}: memref<1x?x7x?xf32> |
| 66 | +// CHECK: "zlow.unstick"([[RES_]], [[RES_]]_6) {layout = "NHWC"} : (memref<1x?x7x?xf16, #map>, memref<1x?x7x?xf32>) -> () |
| 67 | +// CHECK-DAG: [[RES_3_:%.+]] = memref.alloc([[VAR_dim_]], [[VAR_dim_]]_0) {{.*}}: memref<1x?x?x7xf32> |
| 68 | +// CHECK-DAG: [[LOOP_1_:%.+]]:4 = krnl.define_loops 4 |
| 69 | +// CHECK: krnl.iterate([[LOOP_1_]]#0, [[LOOP_1_]]#1, [[LOOP_1_]]#2, [[LOOP_1_]]#3) with ([[LOOP_1_]]#0 -> [[I_4_:%.+]] = 0 to 1, [[LOOP_1_]]#1 -> [[I_5_:%.+]] = 0 to [[MAP_1_]]([[VAR_dim_0_]]), [[LOOP_1_]]#2 -> [[I_6_:%.+]] = 0 to 7, [[LOOP_1_]]#3 -> [[I_7_:%.+]] = 0 to [[MAP_2_]]([[VAR_dim_0_]], [[VAR_dim_]])){ |
| 70 | +// CHECK: [[VAR_2_1_:%.+]]:4 = krnl.get_induction_var_value([[LOOP_1_]]#0, [[LOOP_1_]]#1, [[LOOP_1_]]#2, [[LOOP_1_]]#3) : (!krnl.loop, !krnl.loop, !krnl.loop, !krnl.loop) -> (index, index, index, index) |
| 71 | +// CHECK: [[LOAD_PARAM_0_MEM_1_:%.+]] = krnl.load [[RES_2_]]{{.}}[[VAR_2_1_]]#0, [[VAR_2_1_]]#1, [[VAR_2_1_]]#2, [[VAR_2_1_]]#3] : memref<1x?x7x?xf32> |
| 72 | +// CHECK: krnl.store [[LOAD_PARAM_0_MEM_1_]], [[RES_3_]]{{.}}[[VAR_2_1_]]#0, [[VAR_2_1_]]#3, [[VAR_2_1_]]#1, [[VAR_2_1_]]#2] : memref<1x?x?x7xf32> |
| 73 | +// CHECK: } |
| 74 | +// CHECK: return [[RES_3_]] : memref<1x?x?x7xf32> |
| 75 | +// CHECK: } |
| 76 | +} |
0 commit comments