
Allow directly storing an immediate instead of requiring that it first be moved into a register. This makes for more compact and readable PTX. An approach similar to this (using a ComplexPattern) this could be used for most PTX instructions to avoid the need for `_[ri]+` variants and boiler-plate.
31 lines
926 B
LLVM
31 lines
926 B
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function foo --extra_scrub --default-march nvptx64 --filter-out ".*//.*" --filter-out "[\(\)\{\}]" --version 5
|
|
|
|
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
|
|
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
|
|
|
target triple = "nvptx-nvidia-cuda"
|
|
|
|
@i1g = addrspace(1) global i1 false, align 2
|
|
|
|
define void @foo() {
|
|
; CHECK-LABEL: foo(
|
|
; CHECK: .reg .pred %p<2>;
|
|
; CHECK: .reg .b16 %rs<3>;
|
|
; CHECK-EMPTY:
|
|
; CHECK: ld.global.b8 %rs1, [i1g];
|
|
; CHECK: and.b16 %rs2, %rs1, 1;
|
|
; CHECK: setp.ne.b16 %p1, %rs2, 0;
|
|
; CHECK: @%p1 bra $L__BB0_2;
|
|
; CHECK: st.global.b8 [i1g], 1;
|
|
; CHECK: ret;
|
|
%tmp = load i1, ptr addrspace(1) @i1g, align 2
|
|
br i1 %tmp, label %if.end, label %if.then
|
|
|
|
if.then:
|
|
store i1 true, ptr addrspace(1) @i1g, align 2
|
|
br label %if.end
|
|
|
|
if.end:
|
|
ret void
|
|
}
|