From 48a21e69159a7e6698cde380f6d64274c6569f29 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 24 Jun 2025 15:45:06 -0700 Subject: [PATCH] [RISCV] Fix a correctness issue in optimizeCondBranch. Prevent optimizing compare with x0. NFC (#145440) We were incorrectly changing -1 to 0 for unsigned compares in case 1. The comment incorrectly said UINT64_MAX is bigger than INT64_MAX, but we were doing a signed compare and UINT64_MAX is smaller than INT64_MAX in signed space. Prevent changing 0 constants since we can use x0. The test cases for these are contrived to use addi rd, x0, 0. We're more likely to have a COPY from x0 which we already don't optimize for other reasons. Check if registers are virtual before calling hasOneUse. The use count for physical registers is meaningless. --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 69 ++--- llvm/test/CodeGen/RISCV/branch-opt.mir | 308 +++++++++++++++++++++++ 2 files changed, 347 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 6e30ffce99c4..5711f0077b12 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1449,36 +1449,45 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const { return Register(); }; - if (isFromLoadImm(MRI, LHS, C0) && MRI.hasOneUse(LHS.getReg())) { - // Might be case 1. - // Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need - // to worry about unsigned overflow here) - if (C0 < INT64_MAX) - if (Register RegZ = searchConst(C0 + 1)) { - reverseBranchCondition(Cond); - Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false); - Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); - // We might extend the live range of Z, clear its kill flag to - // account for this. - MRI.clearKillFlags(RegZ); - modifyBranch(); - return true; - } - } else if (isFromLoadImm(MRI, RHS, C0) && MRI.hasOneUse(RHS.getReg())) { - // Might be case 2. - // For unsigned cases, we don't want C1 to wrap back to UINT64_MAX - // when C0 is zero. - if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0) - if (Register RegZ = searchConst(C0 - 1)) { - reverseBranchCondition(Cond); - Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); - Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false); - // We might extend the live range of Z, clear its kill flag to - // account for this. - MRI.clearKillFlags(RegZ); - modifyBranch(); - return true; - } + // Might be case 1. + // Don't change 0 to 1 since we can use x0. + // For unsigned cases changing -1U to 0 would be incorrect. + // The incorrect case for signed would be INT_MAX, but isFromLoadImm can't + // return that. + if (isFromLoadImm(MRI, LHS, C0) && C0 != 0 && LHS.getReg().isVirtual() && + MRI.hasOneUse(LHS.getReg()) && + (CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT || C0 != -1)) { + assert(isInt<12>(C0) && "Unexpected immediate"); + if (Register RegZ = searchConst(C0 + 1)) { + reverseBranchCondition(Cond); + Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false); + Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); + // We might extend the live range of Z, clear its kill flag to + // account for this. + MRI.clearKillFlags(RegZ); + modifyBranch(); + return true; + } + } + + // Might be case 2. + // For signed cases we don't want to change 0 since we can use x0. + // For unsigned cases changing 0 to -1U would be incorrect. + // The incorrect case for signed would be INT_MIN, but isFromLoadImm can't + // return that. + if (isFromLoadImm(MRI, RHS, C0) && C0 != 0 && RHS.getReg().isVirtual() && + MRI.hasOneUse(RHS.getReg())) { + assert(isInt<12>(C0) && "Unexpected immediate"); + if (Register RegZ = searchConst(C0 - 1)) { + reverseBranchCondition(Cond); + Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); + Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false); + // We might extend the live range of Z, clear its kill flag to + // account for this. + MRI.clearKillFlags(RegZ); + modifyBranch(); + return true; + } } return false; diff --git a/llvm/test/CodeGen/RISCV/branch-opt.mir b/llvm/test/CodeGen/RISCV/branch-opt.mir index ba3a20f2fbfc..76ade18ce516 100644 --- a/llvm/test/CodeGen/RISCV/branch-opt.mir +++ b/llvm/test/CodeGen/RISCV/branch-opt.mir @@ -18,6 +18,74 @@ ret void } + define void @ule_negone(ptr %a, i32 signext %b, ptr %c, ptr %d) { + store i32 0, ptr %a + %p = icmp ule i32 %b, -1 + br i1 %p, label %block1, label %block2 + + block1: ; preds = %0 + store i32 %b, ptr %c + br label %end_block + + block2: ; preds = %0 + store i32 87, ptr %d + br label %end_block + + end_block: ; preds = %block2, %block1 + ret void + } + + define void @ult_zero(ptr %a, i32 signext %b, ptr %c, ptr %d) { + store i32 -1, ptr %a + %p = icmp ult i32 %b, 0 + br i1 %p, label %block1, label %block2 + + block1: ; preds = %0 + store i32 %b, ptr %c + br label %end_block + + block2: ; preds = %0 + store i32 87, ptr %d + br label %end_block + + end_block: ; preds = %block2, %block1 + ret void + } + + define void @sle_zero(ptr %a, i32 signext %b, ptr %c, ptr %d) { + store i32 1, ptr %a + %p = icmp sle i32 %b, 0 + br i1 %p, label %block1, label %block2 + + block1: ; preds = %0 + store i32 %b, ptr %c + br label %end_block + + block2: ; preds = %0 + store i32 87, ptr %d + br label %end_block + + end_block: ; preds = %block2, %block1 + ret void + } + + define void @slt_zero(ptr %a, i32 signext %b, ptr %c, ptr %d) { + store i32 -1, ptr %a + %p = icmp slt i32 %b, 0 + br i1 %p, label %block1, label %block2 + + block1: ; preds = %0 + store i32 %b, ptr %c + br label %end_block + + block2: ; preds = %0 + store i32 87, ptr %d + br label %end_block + + end_block: ; preds = %block2, %block1 + ret void + } + declare void @bar(...) ... @@ -66,3 +134,243 @@ body: | PseudoRET ... +--- +name: ule_negone +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ule_negone + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $x10, $x11, $x12, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x13 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 0 + ; CHECK-NEXT: SW killed [[ADDI]], [[COPY3]], 0 :: (store (s32)) + ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, -1 + ; CHECK-NEXT: BLTU killed [[ADDI1]], [[COPY2]], %bb.2 + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY [[COPY2]] + ; CHECK-NEXT: SW [[COPY4]], [[COPY1]], 0 :: (store (s32)) + ; CHECK-NEXT: PseudoBR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI2:%[0-9]+]]:gpr = ADDI $x0, 87 + ; CHECK-NEXT: SW killed [[ADDI2]], [[COPY]], 0 :: (store (s32)) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: PseudoRET + bb.0: + successors: %bb.1, %bb.2 + liveins: $x10, $x11, $x12, $x13 + + %3:gpr = COPY $x13 + %2:gpr = COPY $x12 + %1:gpr = COPY $x11 + %0:gpr = COPY $x10 + %5:gpr = ADDI $x0, 0 + SW killed %5, %0, 0 :: (store (s32)) + %6:gpr = ADDI $x0, -1 + BLTU killed %6, %1, %bb.2 + PseudoBR %bb.1 + + bb.1: + %4:gpr = COPY %1 + SW %4, %2, 0 :: (store (s32)) + PseudoBR %bb.3 + + bb.2: + %7:gpr = ADDI $x0, 87 + SW killed %7, %3, 0 :: (store (s32)) + + bb.3: + PseudoRET +... +--- +name: ult_zero +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: ult_zero + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $x10, $x11, $x12, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x13 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, -1 + ; CHECK-NEXT: SW killed [[ADDI]], [[COPY3]], 0 :: (store (s32)) + ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 + ; CHECK-NEXT: BLTU [[COPY2]], killed [[ADDI1]], %bb.2 + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY [[COPY2]] + ; CHECK-NEXT: SW [[COPY4]], [[COPY1]], 0 :: (store (s32)) + ; CHECK-NEXT: PseudoBR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI2:%[0-9]+]]:gpr = ADDI $x0, 87 + ; CHECK-NEXT: SW killed [[ADDI2]], [[COPY]], 0 :: (store (s32)) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: PseudoRET + bb.0: + successors: %bb.1, %bb.2 + liveins: $x10, $x11, $x12, $x13 + + %3:gpr = COPY $x13 + %2:gpr = COPY $x12 + %1:gpr = COPY $x11 + %0:gpr = COPY $x10 + %5:gpr = ADDI $x0, -1 + SW killed %5, %0, 0 :: (store (s32)) + %6:gpr = ADDI $x0, 0 + BLTU %1, killed %6, %bb.2 + PseudoBR %bb.1 + + bb.1: + %4:gpr = COPY %1 + SW %4, %2, 0 :: (store (s32)) + PseudoBR %bb.3 + + bb.2: + %7:gpr = ADDI $x0, 87 + SW killed %7, %3, 0 :: (store (s32)) + + bb.3: + PseudoRET +... +--- +name: sle_zero +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: sle_zero + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $x10, $x11, $x12, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x13 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 + ; CHECK-NEXT: SW killed [[ADDI]], [[COPY3]], 0 :: (store (s32)) + ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 + ; CHECK-NEXT: BLT killed [[ADDI1]], [[COPY2]], %bb.2 + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY [[COPY2]] + ; CHECK-NEXT: SW [[COPY4]], [[COPY1]], 0 :: (store (s32)) + ; CHECK-NEXT: PseudoBR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI2:%[0-9]+]]:gpr = ADDI $x0, 87 + ; CHECK-NEXT: SW killed [[ADDI2]], [[COPY]], 0 :: (store (s32)) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: PseudoRET + bb.0: + successors: %bb.1, %bb.2 + liveins: $x10, $x11, $x12, $x13 + + %3:gpr = COPY $x13 + %2:gpr = COPY $x12 + %1:gpr = COPY $x11 + %0:gpr = COPY $x10 + %5:gpr = ADDI $x0, 1 + SW killed %5, %0, 0 :: (store (s32)) + %6:gpr = ADDI $x0, 0 + BLT killed %6, %1, %bb.2 + PseudoBR %bb.1 + + bb.1: + %4:gpr = COPY %1 + SW %4, %2, 0 :: (store (s32)) + PseudoBR %bb.3 + + bb.2: + %7:gpr = ADDI $x0, 87 + SW killed %7, %3, 0 :: (store (s32)) + + bb.3: + PseudoRET +... +--- +name: slt_zero +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: slt_zero + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $x10, $x11, $x12, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x13 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x12 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, -1 + ; CHECK-NEXT: SW killed [[ADDI]], [[COPY3]], 0 :: (store (s32)) + ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 + ; CHECK-NEXT: BLT [[COPY2]], killed [[ADDI1]], %bb.2 + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY [[COPY2]] + ; CHECK-NEXT: SW [[COPY4]], [[COPY1]], 0 :: (store (s32)) + ; CHECK-NEXT: PseudoBR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI2:%[0-9]+]]:gpr = ADDI $x0, 87 + ; CHECK-NEXT: SW killed [[ADDI2]], [[COPY]], 0 :: (store (s32)) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: PseudoRET + bb.0: + successors: %bb.1, %bb.2 + liveins: $x10, $x11, $x12, $x13 + + %3:gpr = COPY $x13 + %2:gpr = COPY $x12 + %1:gpr = COPY $x11 + %0:gpr = COPY $x10 + %5:gpr = ADDI $x0, -1 + SW killed %5, %0, 0 :: (store (s32)) + %6:gpr = ADDI $x0, 0 + BLT %1, killed %6, %bb.2 + PseudoBR %bb.1 + + bb.1: + %4:gpr = COPY %1 + SW %4, %2, 0 :: (store (s32)) + PseudoBR %bb.3 + + bb.2: + %7:gpr = ADDI $x0, 87 + SW killed %7, %3, 0 :: (store (s32)) + + bb.3: + PseudoRET +...