Alex MacLean a94920cdd5
[NVPTX] Fixup and refactor brx.idx support (#171933)
Guard "brx.idx" generation to appropriate PTX ISA and SM version.

In addition, do some minor refactoring moving the expansion into ISel as
doing this during operation legalization is more complex and offers no
benefits.

fixes https://github.com/llvm/llvm-project/issues/171709
2025-12-12 11:36:15 -08:00

74 lines
2.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mcpu=sm_30 -mattr=+ptx60 -verify-machineinstrs | FileCheck %s
target triple = "nvptx64-unknown-nvidiacl"
define void @pr170051(i32 %cond) {
; CHECK-LABEL: pr170051(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: mov.b32 %r2, 0;
; CHECK-NEXT: ld.param.b32 %r1, [pr170051_param_0];
; CHECK-NEXT: setp.gt.u32 %p1, %r1, 6;
; CHECK-NEXT: bra.uni $L__BB0_3;
; CHECK-NEXT: $L__BB0_1: // %BS_LABEL_2
; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: or.b32 %r3, %r2, 1;
; CHECK-NEXT: $L__BB0_2: // %for.cond4
; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: mov.b32 %r2, %r3;
; CHECK-NEXT: $L__BB0_3: // %BS_LABEL_1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: @%p1 bra $L__BB0_5;
; CHECK-NEXT: // %bb.4: // %BS_LABEL_1
; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: mov.b32 %r3, %r1;
; CHECK-NEXT: $L_brx_0: .branchtargets
; CHECK-NEXT: $L__BB0_2,
; CHECK-NEXT: $L__BB0_3,
; CHECK-NEXT: $L__BB0_5,
; CHECK-NEXT: $L__BB0_5,
; CHECK-NEXT: $L__BB0_1,
; CHECK-NEXT: $L__BB0_5,
; CHECK-NEXT: $L__BB0_3;
; CHECK-NEXT: brx.idx %r1, $L_brx_0;
; CHECK-NEXT: $L__BB0_5: // %unreachable
; CHECK-NEXT: // begin inline asm
; CHECK-NEXT: exit;
; CHECK-NEXT: // end inline asm
entry:
br label %for.cond
for.cond: ; preds = %for.cond4.for.cond_crit_edge, %BS_LABEL_1, %BS_LABEL_1, %entry
%p_2218_0.1 = phi i32 [ 0, %entry ], [ %p_2218_0.3, %BS_LABEL_1 ], [ %p_2218_0.3, %BS_LABEL_1 ], [ poison, %for.cond4.for.cond_crit_edge ]
br label %BS_LABEL_1
BS_LABEL_2: ; preds = %BS_LABEL_1
%sub = or i32 %p_2218_0.3, 1
br label %for.cond4
for.cond4: ; preds = %BS_LABEL_1, %BS_LABEL_2
%p_2218_0.2 = phi i32 [ 0, %BS_LABEL_1 ], [ %sub, %BS_LABEL_2 ]
br i1 false, label %for.cond4.for.cond_crit_edge, label %BS_LABEL_1
for.cond4.for.cond_crit_edge: ; preds = %for.cond4
br label %for.cond
BS_LABEL_1: ; preds = %for.cond4, %for.cond
%p_2218_0.3 = phi i32 [ %p_2218_0.2, %for.cond4 ], [ %p_2218_0.1, %for.cond ]
switch i32 %cond, label %unreachable [
i32 0, label %for.cond4
i32 4, label %BS_LABEL_2
i32 1, label %for.cond
i32 6, label %for.cond
]
unreachable: ; preds = %BS_LABEL_1
unreachable
}