llvm-project/llvm/test/CodeGen/NVPTX/load-store-scalars.ll
Alex MacLean 76c9bfefa4
[NVPTX] Remove Float register classes (#140487)
These classes are redundant, as the untyped "Int" classes can be used
for all float operations. This change is intended to be as minimal as
possible and leaves the many potential simplifications and refactors
this exposes as future work.
2025-05-21 11:33:57 -07:00

3175 lines
106 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck -check-prefixes=CHECK,SM60 %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | FileCheck %s -check-prefixes=CHECK,SM70
; RUN: %if ptxas-12.2 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | %ptxas-verify -arch=sm_70 %}
; TODO: generate PTX that preserves Concurrent Forward Progress
; for atomic operations to local statespace
; by generating atomic or volatile operations.
; TODO: add weak,atomic,volatile,atomic volatile tests
; for .const and .param statespaces.
; TODO: optimize .sys.shared into .cta.shared or .cluster.shared .
;; generic statespace
; generic
define void @generic_i8(ptr %a) {
; CHECK-LABEL: generic_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_i8_param_0];
; CHECK-NEXT: ld.b8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.b8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load i8, ptr %a
%a.add = add i8 %a.load, 1
store i8 %a.add, ptr %a
ret void
}
define void @generic_i16(ptr %a) {
; CHECK-LABEL: generic_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_i16_param_0];
; CHECK-NEXT: ld.b16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.b16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load i16, ptr %a
%a.add = add i16 %a.load, 1
store i16 %a.add, ptr %a
ret void
}
define void @generic_i32(ptr %a) {
; CHECK-LABEL: generic_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_i32_param_0];
; CHECK-NEXT: ld.b32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load i32, ptr %a
%a.add = add i32 %a.load, 1
store i32 %a.add, ptr %a
ret void
}
define void @generic_i64(ptr %a) {
; CHECK-LABEL: generic_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_i64_param_0];
; CHECK-NEXT: ld.b64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load i64, ptr %a
%a.add = add i64 %a.load, 1
store i64 %a.add, ptr %a
ret void
}
define void @generic_float(ptr %a) {
; CHECK-LABEL: generic_float(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_float_param_0];
; CHECK-NEXT: ld.b32 %r1, [%rd1];
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; CHECK-NEXT: st.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load float, ptr %a
%a.add = fadd float %a.load, 1.
store float %a.add, ptr %a
ret void
}
define void @generic_double(ptr %a) {
; CHECK-LABEL: generic_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_double_param_0];
; CHECK-NEXT: ld.b64 %rd2, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; CHECK-NEXT: st.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load double, ptr %a
%a.add = fadd double %a.load, 1.
store double %a.add, ptr %a
ret void
}
; generic_volatile
define void @generic_volatile_i8(ptr %a) {
; CHECK-LABEL: generic_volatile_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_volatile_i8_param_0];
; CHECK-NEXT: ld.volatile.b8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.b8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr %a
%a.add = add i8 %a.load, 1
store volatile i8 %a.add, ptr %a
ret void
}
define void @generic_volatile_i16(ptr %a) {
; CHECK-LABEL: generic_volatile_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_volatile_i16_param_0];
; CHECK-NEXT: ld.volatile.b16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.b16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i16, ptr %a
%a.add = add i16 %a.load, 1
store volatile i16 %a.add, ptr %a
ret void
}
define void @generic_volatile_i32(ptr %a) {
; CHECK-LABEL: generic_volatile_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_volatile_i32_param_0];
; CHECK-NEXT: ld.volatile.b32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.volatile.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile i32, ptr %a
%a.add = add i32 %a.load, 1
store volatile i32 %a.add, ptr %a
ret void
}
define void @generic_volatile_i64(ptr %a) {
; CHECK-LABEL: generic_volatile_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_volatile_i64_param_0];
; CHECK-NEXT: ld.volatile.b64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.volatile.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load volatile i64, ptr %a
%a.add = add i64 %a.load, 1
store volatile i64 %a.add, ptr %a
ret void
}
define void @generic_volatile_float(ptr %a) {
; CHECK-LABEL: generic_volatile_float(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_volatile_float_param_0];
; CHECK-NEXT: ld.volatile.b32 %r1, [%rd1];
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; CHECK-NEXT: st.volatile.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile float, ptr %a
%a.add = fadd float %a.load, 1.
store volatile float %a.add, ptr %a
ret void
}
define void @generic_volatile_double(ptr %a) {
; CHECK-LABEL: generic_volatile_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_volatile_double_param_0];
; CHECK-NEXT: ld.volatile.b64 %rd2, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; CHECK-NEXT: st.volatile.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load volatile double, ptr %a
%a.add = fadd double %a.load, 1.
store volatile double %a.add, ptr %a
ret void
}
; generic_unordered_sys
define void @generic_unordered_sys_i8(ptr %a) {
; SM60-LABEL: generic_unordered_sys_i8(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_i8_param_0];
; SM60-NEXT: ld.volatile.b8 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.b8 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_unordered_sys_i8(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_i8_param_0];
; SM70-NEXT: ld.relaxed.sys.b8 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.b8 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i8, ptr %a unordered, align 1
%a.add = add i8 %a.load, 1
store atomic i8 %a.add, ptr %a unordered, align 1
ret void
}
define void @generic_unordered_sys_i16(ptr %a) {
; SM60-LABEL: generic_unordered_sys_i16(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_i16_param_0];
; SM60-NEXT: ld.volatile.b16 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.b16 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_unordered_sys_i16(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_i16_param_0];
; SM70-NEXT: ld.relaxed.sys.b16 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.b16 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i16, ptr %a unordered, align 2
%a.add = add i16 %a.load, 1
store atomic i16 %a.add, ptr %a unordered, align 2
ret void
}
define void @generic_unordered_sys_i32(ptr %a) {
; SM60-LABEL: generic_unordered_sys_i32(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_i32_param_0];
; SM60-NEXT: ld.volatile.b32 %r1, [%rd1];
; SM60-NEXT: add.s32 %r2, %r1, 1;
; SM60-NEXT: st.volatile.b32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_unordered_sys_i32(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_i32_param_0];
; SM70-NEXT: ld.relaxed.sys.b32 %r1, [%rd1];
; SM70-NEXT: add.s32 %r2, %r1, 1;
; SM70-NEXT: st.relaxed.sys.b32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic i32, ptr %a unordered, align 4
%a.add = add i32 %a.load, 1
store atomic i32 %a.add, ptr %a unordered, align 4
ret void
}
define void @generic_unordered_sys_i64(ptr %a) {
; SM60-LABEL: generic_unordered_sys_i64(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_i64_param_0];
; SM60-NEXT: ld.volatile.b64 %rd2, [%rd1];
; SM60-NEXT: add.s64 %rd3, %rd2, 1;
; SM60-NEXT: st.volatile.b64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_unordered_sys_i64(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_i64_param_0];
; SM70-NEXT: ld.relaxed.sys.b64 %rd2, [%rd1];
; SM70-NEXT: add.s64 %rd3, %rd2, 1;
; SM70-NEXT: st.relaxed.sys.b64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic i64, ptr %a unordered, align 8
%a.add = add i64 %a.load, 1
store atomic i64 %a.add, ptr %a unordered, align 8
ret void
}
define void @generic_unordered_sys_float(ptr %a) {
; SM60-LABEL: generic_unordered_sys_float(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_float_param_0];
; SM60-NEXT: ld.volatile.b32 %r1, [%rd1];
; SM60-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; SM60-NEXT: st.volatile.b32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_unordered_sys_float(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_float_param_0];
; SM70-NEXT: ld.relaxed.sys.b32 %r1, [%rd1];
; SM70-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; SM70-NEXT: st.relaxed.sys.b32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic float, ptr %a unordered, align 4
%a.add = fadd float %a.load, 1.
store atomic float %a.add, ptr %a unordered, align 4
ret void
}
define void @generic_unordered_sys_double(ptr %a) {
; SM60-LABEL: generic_unordered_sys_double(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_double_param_0];
; SM60-NEXT: ld.volatile.b64 %rd2, [%rd1];
; SM60-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; SM60-NEXT: st.volatile.b64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_unordered_sys_double(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_double_param_0];
; SM70-NEXT: ld.relaxed.sys.b64 %rd2, [%rd1];
; SM70-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; SM70-NEXT: st.relaxed.sys.b64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic double, ptr %a unordered, align 8
%a.add = fadd double %a.load, 1.
store atomic double %a.add, ptr %a unordered, align 8
ret void
}
; generic_unordered_volatile_sys
define void @generic_unordered_volatile_sys_i8(ptr %a) {
; CHECK-LABEL: generic_unordered_volatile_sys_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_unordered_volatile_sys_i8_param_0];
; CHECK-NEXT: ld.volatile.b8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.b8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr %a unordered, align 1
%a.add = add i8 %a.load, 1
store atomic volatile i8 %a.add, ptr %a unordered, align 1
ret void
}
define void @generic_unordered_volatile_sys_i16(ptr %a) {
; CHECK-LABEL: generic_unordered_volatile_sys_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_unordered_volatile_sys_i16_param_0];
; CHECK-NEXT: ld.volatile.b16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.b16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i16, ptr %a unordered, align 2
%a.add = add i16 %a.load, 1
store atomic volatile i16 %a.add, ptr %a unordered, align 2
ret void
}
define void @generic_unordered_volatile_sys_i32(ptr %a) {
; CHECK-LABEL: generic_unordered_volatile_sys_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_unordered_volatile_sys_i32_param_0];
; CHECK-NEXT: ld.volatile.b32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.volatile.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i32, ptr %a unordered, align 4
%a.add = add i32 %a.load, 1
store atomic volatile i32 %a.add, ptr %a unordered, align 4
ret void
}
define void @generic_unordered_volatile_sys_i64(ptr %a) {
; CHECK-LABEL: generic_unordered_volatile_sys_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_unordered_volatile_sys_i64_param_0];
; CHECK-NEXT: ld.volatile.b64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.volatile.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i64, ptr %a unordered, align 8
%a.add = add i64 %a.load, 1
store atomic volatile i64 %a.add, ptr %a unordered, align 8
ret void
}
define void @generic_unordered_volatile_sys_float(ptr %a) {
; CHECK-LABEL: generic_unordered_volatile_sys_float(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_unordered_volatile_sys_float_param_0];
; CHECK-NEXT: ld.volatile.b32 %r1, [%rd1];
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; CHECK-NEXT: st.volatile.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile float, ptr %a unordered, align 4
%a.add = fadd float %a.load, 1.
store atomic volatile float %a.add, ptr %a unordered, align 4
ret void
}
define void @generic_unordered_volatile_sys_double(ptr %a) {
; CHECK-LABEL: generic_unordered_volatile_sys_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_unordered_volatile_sys_double_param_0];
; CHECK-NEXT: ld.volatile.b64 %rd2, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; CHECK-NEXT: st.volatile.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile double, ptr %a unordered, align 8
%a.add = fadd double %a.load, 1.
store atomic volatile double %a.add, ptr %a unordered, align 8
ret void
}
; generic_monotonic_sys
define void @generic_monotonic_sys_i8(ptr %a) {
; SM60-LABEL: generic_monotonic_sys_i8(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_i8_param_0];
; SM60-NEXT: ld.volatile.b8 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.b8 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_monotonic_sys_i8(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_i8_param_0];
; SM70-NEXT: ld.relaxed.sys.b8 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.b8 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i8, ptr %a monotonic, align 1
%a.add = add i8 %a.load, 1
store atomic i8 %a.add, ptr %a monotonic, align 1
ret void
}
define void @generic_monotonic_sys_i16(ptr %a) {
; SM60-LABEL: generic_monotonic_sys_i16(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_i16_param_0];
; SM60-NEXT: ld.volatile.b16 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.b16 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_monotonic_sys_i16(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_i16_param_0];
; SM70-NEXT: ld.relaxed.sys.b16 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.b16 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i16, ptr %a monotonic, align 2
%a.add = add i16 %a.load, 1
store atomic i16 %a.add, ptr %a monotonic, align 2
ret void
}
define void @generic_monotonic_sys_i32(ptr %a) {
; SM60-LABEL: generic_monotonic_sys_i32(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_i32_param_0];
; SM60-NEXT: ld.volatile.b32 %r1, [%rd1];
; SM60-NEXT: add.s32 %r2, %r1, 1;
; SM60-NEXT: st.volatile.b32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_monotonic_sys_i32(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_i32_param_0];
; SM70-NEXT: ld.relaxed.sys.b32 %r1, [%rd1];
; SM70-NEXT: add.s32 %r2, %r1, 1;
; SM70-NEXT: st.relaxed.sys.b32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic i32, ptr %a monotonic, align 4
%a.add = add i32 %a.load, 1
store atomic i32 %a.add, ptr %a monotonic, align 4
ret void
}
define void @generic_monotonic_sys_i64(ptr %a) {
; SM60-LABEL: generic_monotonic_sys_i64(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_i64_param_0];
; SM60-NEXT: ld.volatile.b64 %rd2, [%rd1];
; SM60-NEXT: add.s64 %rd3, %rd2, 1;
; SM60-NEXT: st.volatile.b64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_monotonic_sys_i64(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_i64_param_0];
; SM70-NEXT: ld.relaxed.sys.b64 %rd2, [%rd1];
; SM70-NEXT: add.s64 %rd3, %rd2, 1;
; SM70-NEXT: st.relaxed.sys.b64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic i64, ptr %a monotonic, align 8
%a.add = add i64 %a.load, 1
store atomic i64 %a.add, ptr %a monotonic, align 8
ret void
}
define void @generic_monotonic_sys_float(ptr %a) {
; SM60-LABEL: generic_monotonic_sys_float(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_float_param_0];
; SM60-NEXT: ld.volatile.b32 %r1, [%rd1];
; SM60-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; SM60-NEXT: st.volatile.b32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_monotonic_sys_float(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_float_param_0];
; SM70-NEXT: ld.relaxed.sys.b32 %r1, [%rd1];
; SM70-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; SM70-NEXT: st.relaxed.sys.b32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic float, ptr %a monotonic, align 4
%a.add = fadd float %a.load, 1.
store atomic float %a.add, ptr %a monotonic, align 4
ret void
}
define void @generic_monotonic_sys_double(ptr %a) {
; SM60-LABEL: generic_monotonic_sys_double(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_double_param_0];
; SM60-NEXT: ld.volatile.b64 %rd2, [%rd1];
; SM60-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; SM60-NEXT: st.volatile.b64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: generic_monotonic_sys_double(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_double_param_0];
; SM70-NEXT: ld.relaxed.sys.b64 %rd2, [%rd1];
; SM70-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; SM70-NEXT: st.relaxed.sys.b64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic double, ptr %a monotonic, align 8
%a.add = fadd double %a.load, 1.
store atomic double %a.add, ptr %a monotonic, align 8
ret void
}
; generic_monotonic_volatile_sys
define void @generic_monotonic_volatile_sys_i8(ptr %a) {
; CHECK-LABEL: generic_monotonic_volatile_sys_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_monotonic_volatile_sys_i8_param_0];
; CHECK-NEXT: ld.volatile.b8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.b8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr %a monotonic, align 1
%a.add = add i8 %a.load, 1
store atomic volatile i8 %a.add, ptr %a monotonic, align 1
ret void
}
define void @generic_monotonic_volatile_sys_i16(ptr %a) {
; CHECK-LABEL: generic_monotonic_volatile_sys_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_monotonic_volatile_sys_i16_param_0];
; CHECK-NEXT: ld.volatile.b16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.b16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i16, ptr %a monotonic, align 2
%a.add = add i16 %a.load, 1
store atomic volatile i16 %a.add, ptr %a monotonic, align 2
ret void
}
define void @generic_monotonic_volatile_sys_i32(ptr %a) {
; CHECK-LABEL: generic_monotonic_volatile_sys_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_monotonic_volatile_sys_i32_param_0];
; CHECK-NEXT: ld.volatile.b32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.volatile.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i32, ptr %a monotonic, align 4
%a.add = add i32 %a.load, 1
store atomic volatile i32 %a.add, ptr %a monotonic, align 4
ret void
}
define void @generic_monotonic_volatile_sys_i64(ptr %a) {
; CHECK-LABEL: generic_monotonic_volatile_sys_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_monotonic_volatile_sys_i64_param_0];
; CHECK-NEXT: ld.volatile.b64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.volatile.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i64, ptr %a monotonic, align 8
%a.add = add i64 %a.load, 1
store atomic volatile i64 %a.add, ptr %a monotonic, align 8
ret void
}
define void @generic_monotonic_volatile_sys_float(ptr %a) {
; CHECK-LABEL: generic_monotonic_volatile_sys_float(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_monotonic_volatile_sys_float_param_0];
; CHECK-NEXT: ld.volatile.b32 %r1, [%rd1];
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; CHECK-NEXT: st.volatile.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile float, ptr %a monotonic, align 4
%a.add = fadd float %a.load, 1.
store atomic volatile float %a.add, ptr %a monotonic, align 4
ret void
}
define void @generic_monotonic_volatile_sys_double(ptr %a) {
; CHECK-LABEL: generic_monotonic_volatile_sys_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [generic_monotonic_volatile_sys_double_param_0];
; CHECK-NEXT: ld.volatile.b64 %rd2, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; CHECK-NEXT: st.volatile.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile double, ptr %a monotonic, align 8
%a.add = fadd double %a.load, 1.
store atomic volatile double %a.add, ptr %a monotonic, align 8
ret void
}
;; global statespace
; global
define void @global_i8(ptr addrspace(1) %a) {
; CHECK-LABEL: global_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [global_i8_param_0];
; CHECK-NEXT: ld.global.b8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.global.b8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load i8, ptr addrspace(1) %a
%a.add = add i8 %a.load, 1
store i8 %a.add, ptr addrspace(1) %a
ret void
}
define void @global_i16(ptr addrspace(1) %a) {
; CHECK-LABEL: global_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [global_i16_param_0];
; CHECK-NEXT: ld.global.b16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.global.b16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load i16, ptr addrspace(1) %a
%a.add = add i16 %a.load, 1
store i16 %a.add, ptr addrspace(1) %a
ret void
}
define void @global_i32(ptr addrspace(1) %a) {
; CHECK-LABEL: global_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [global_i32_param_0];
; CHECK-NEXT: ld.global.b32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.global.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load i32, ptr addrspace(1) %a
%a.add = add i32 %a.load, 1
store i32 %a.add, ptr addrspace(1) %a
ret void
}
define void @global_i64(ptr addrspace(1) %a) {
; CHECK-LABEL: global_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [global_i64_param_0];
; CHECK-NEXT: ld.global.b64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.global.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load i64, ptr addrspace(1) %a
%a.add = add i64 %a.load, 1
store i64 %a.add, ptr addrspace(1) %a
ret void
}
define void @global_float(ptr addrspace(1) %a) {
; CHECK-LABEL: global_float(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [global_float_param_0];
; CHECK-NEXT: ld.global.b32 %r1, [%rd1];
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; CHECK-NEXT: st.global.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load float, ptr addrspace(1) %a
%a.add = fadd float %a.load, 1.
store float %a.add, ptr addrspace(1) %a
ret void
}
define void @global_double(ptr addrspace(1) %a) {
; CHECK-LABEL: global_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [global_double_param_0];
; CHECK-NEXT: ld.global.b64 %rd2, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; CHECK-NEXT: st.global.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load double, ptr addrspace(1) %a
%a.add = fadd double %a.load, 1.
store double %a.add, ptr addrspace(1) %a
ret void
}
; global_volatile
define void @global_volatile_i8(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [global_volatile_i8_param_0];
; CHECK-NEXT: ld.volatile.global.b8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.global.b8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr addrspace(1) %a
%a.add = add i8 %a.load, 1
store volatile i8 %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_i16(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [global_volatile_i16_param_0];
; CHECK-NEXT: ld.volatile.global.b16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.global.b16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i16, ptr addrspace(1) %a
%a.add = add i16 %a.load, 1
store volatile i16 %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_i32(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [global_volatile_i32_param_0];
; CHECK-NEXT: ld.volatile.global.b32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.volatile.global.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile i32, ptr addrspace(1) %a
%a.add = add i32 %a.load, 1
store volatile i32 %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_i64(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [global_volatile_i64_param_0];
; CHECK-NEXT: ld.volatile.global.b64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.volatile.global.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load volatile i64, ptr addrspace(1) %a
%a.add = add i64 %a.load, 1
store volatile i64 %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_float(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_float(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [global_volatile_float_param_0];
; CHECK-NEXT: ld.volatile.global.b32 %r1, [%rd1];
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; CHECK-NEXT: st.volatile.global.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile float, ptr addrspace(1) %a
%a.add = fadd float %a.load, 1.
store volatile float %a.add, ptr addrspace(1) %a
ret void
}
define void @global_volatile_double(ptr addrspace(1) %a) {
; CHECK-LABEL: global_volatile_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [global_volatile_double_param_0];
; CHECK-NEXT: ld.volatile.global.b64 %rd2, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; CHECK-NEXT: st.volatile.global.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load volatile double, ptr addrspace(1) %a
%a.add = fadd double %a.load, 1.
store volatile double %a.add, ptr addrspace(1) %a
ret void
}
; global_unordered_sys
define void @global_unordered_sys_i8(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_sys_i8(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_sys_i8_param_0];
; SM60-NEXT: ld.volatile.global.b8 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.global.b8 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_sys_i8(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_sys_i8_param_0];
; SM70-NEXT: ld.relaxed.sys.global.b8 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.global.b8 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(1) %a unordered, align 1
%a.add = add i8 %a.load, 1
store atomic i8 %a.add, ptr addrspace(1) %a unordered, align 1
ret void
}
define void @global_unordered_sys_i16(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_sys_i16(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_sys_i16_param_0];
; SM60-NEXT: ld.volatile.global.b16 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.global.b16 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_sys_i16(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_sys_i16_param_0];
; SM70-NEXT: ld.relaxed.sys.global.b16 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.global.b16 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i16, ptr addrspace(1) %a unordered, align 2
%a.add = add i16 %a.load, 1
store atomic i16 %a.add, ptr addrspace(1) %a unordered, align 2
ret void
}
define void @global_unordered_sys_i32(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_sys_i32(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_sys_i32_param_0];
; SM60-NEXT: ld.volatile.global.b32 %r1, [%rd1];
; SM60-NEXT: add.s32 %r2, %r1, 1;
; SM60-NEXT: st.volatile.global.b32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_sys_i32(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_sys_i32_param_0];
; SM70-NEXT: ld.relaxed.sys.global.b32 %r1, [%rd1];
; SM70-NEXT: add.s32 %r2, %r1, 1;
; SM70-NEXT: st.relaxed.sys.global.b32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic i32, ptr addrspace(1) %a unordered, align 4
%a.add = add i32 %a.load, 1
store atomic i32 %a.add, ptr addrspace(1) %a unordered, align 4
ret void
}
define void @global_unordered_sys_i64(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_sys_i64(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_sys_i64_param_0];
; SM60-NEXT: ld.volatile.global.b64 %rd2, [%rd1];
; SM60-NEXT: add.s64 %rd3, %rd2, 1;
; SM60-NEXT: st.volatile.global.b64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_sys_i64(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_sys_i64_param_0];
; SM70-NEXT: ld.relaxed.sys.global.b64 %rd2, [%rd1];
; SM70-NEXT: add.s64 %rd3, %rd2, 1;
; SM70-NEXT: st.relaxed.sys.global.b64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic i64, ptr addrspace(1) %a unordered, align 8
%a.add = add i64 %a.load, 1
store atomic i64 %a.add, ptr addrspace(1) %a unordered, align 8
ret void
}
define void @global_unordered_sys_float(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_sys_float(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_sys_float_param_0];
; SM60-NEXT: ld.volatile.global.b32 %r1, [%rd1];
; SM60-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; SM60-NEXT: st.volatile.global.b32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_sys_float(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_sys_float_param_0];
; SM70-NEXT: ld.relaxed.sys.global.b32 %r1, [%rd1];
; SM70-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; SM70-NEXT: st.relaxed.sys.global.b32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic float, ptr addrspace(1) %a unordered, align 4
%a.add = fadd float %a.load, 1.
store atomic float %a.add, ptr addrspace(1) %a unordered, align 4
ret void
}
define void @global_unordered_sys_double(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_sys_double(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_sys_double_param_0];
; SM60-NEXT: ld.volatile.global.b64 %rd2, [%rd1];
; SM60-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; SM60-NEXT: st.volatile.global.b64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_sys_double(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_sys_double_param_0];
; SM70-NEXT: ld.relaxed.sys.global.b64 %rd2, [%rd1];
; SM70-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; SM70-NEXT: st.relaxed.sys.global.b64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic double, ptr addrspace(1) %a unordered, align 8
%a.add = fadd double %a.load, 1.
store atomic double %a.add, ptr addrspace(1) %a unordered, align 8
ret void
}
; global_unordered_volatile_sys
define void @global_unordered_volatile_sys_i8(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_volatile_sys_i8(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_i8_param_0];
; SM60-NEXT: ld.volatile.global.b8 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.global.b8 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_volatile_sys_i8(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_i8_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.b8 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.mmio.relaxed.sys.global.b8 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(1) %a unordered, align 1
%a.add = add i8 %a.load, 1
store atomic volatile i8 %a.add, ptr addrspace(1) %a unordered, align 1
ret void
}
define void @global_unordered_volatile_sys_i16(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_volatile_sys_i16(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_i16_param_0];
; SM60-NEXT: ld.volatile.global.b16 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.global.b16 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_volatile_sys_i16(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_i16_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.b16 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.mmio.relaxed.sys.global.b16 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic volatile i16, ptr addrspace(1) %a unordered, align 2
%a.add = add i16 %a.load, 1
store atomic volatile i16 %a.add, ptr addrspace(1) %a unordered, align 2
ret void
}
define void @global_unordered_volatile_sys_i32(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_volatile_sys_i32(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_i32_param_0];
; SM60-NEXT: ld.volatile.global.b32 %r1, [%rd1];
; SM60-NEXT: add.s32 %r2, %r1, 1;
; SM60-NEXT: st.volatile.global.b32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_volatile_sys_i32(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_i32_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.b32 %r1, [%rd1];
; SM70-NEXT: add.s32 %r2, %r1, 1;
; SM70-NEXT: st.mmio.relaxed.sys.global.b32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic volatile i32, ptr addrspace(1) %a unordered, align 4
%a.add = add i32 %a.load, 1
store atomic volatile i32 %a.add, ptr addrspace(1) %a unordered, align 4
ret void
}
define void @global_unordered_volatile_sys_i64(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_volatile_sys_i64(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_i64_param_0];
; SM60-NEXT: ld.volatile.global.b64 %rd2, [%rd1];
; SM60-NEXT: add.s64 %rd3, %rd2, 1;
; SM60-NEXT: st.volatile.global.b64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_volatile_sys_i64(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_i64_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.b64 %rd2, [%rd1];
; SM70-NEXT: add.s64 %rd3, %rd2, 1;
; SM70-NEXT: st.mmio.relaxed.sys.global.b64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic volatile i64, ptr addrspace(1) %a unordered, align 8
%a.add = add i64 %a.load, 1
store atomic volatile i64 %a.add, ptr addrspace(1) %a unordered, align 8
ret void
}
define void @global_unordered_volatile_sys_float(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_volatile_sys_float(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_float_param_0];
; SM60-NEXT: ld.volatile.global.b32 %r1, [%rd1];
; SM60-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; SM60-NEXT: st.volatile.global.b32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_volatile_sys_float(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_float_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.b32 %r1, [%rd1];
; SM70-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; SM70-NEXT: st.mmio.relaxed.sys.global.b32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic volatile float, ptr addrspace(1) %a unordered, align 4
%a.add = fadd float %a.load, 1.
store atomic volatile float %a.add, ptr addrspace(1) %a unordered, align 4
ret void
}
define void @global_unordered_volatile_sys_double(ptr addrspace(1) %a) {
; SM60-LABEL: global_unordered_volatile_sys_double(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_double_param_0];
; SM60-NEXT: ld.volatile.global.b64 %rd2, [%rd1];
; SM60-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; SM60-NEXT: st.volatile.global.b64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_unordered_volatile_sys_double(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_double_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.b64 %rd2, [%rd1];
; SM70-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; SM70-NEXT: st.mmio.relaxed.sys.global.b64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic volatile double, ptr addrspace(1) %a unordered, align 8
%a.add = fadd double %a.load, 1.
store atomic volatile double %a.add, ptr addrspace(1) %a unordered, align 8
ret void
}
; global_monotonic_sys
define void @global_monotonic_sys_i8(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_sys_i8(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_i8_param_0];
; SM60-NEXT: ld.volatile.global.b8 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.global.b8 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_sys_i8(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_i8_param_0];
; SM70-NEXT: ld.relaxed.sys.global.b8 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.global.b8 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(1) %a monotonic, align 1
%a.add = add i8 %a.load, 1
store atomic i8 %a.add, ptr addrspace(1) %a monotonic, align 1
ret void
}
define void @global_monotonic_sys_i16(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_sys_i16(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_i16_param_0];
; SM60-NEXT: ld.volatile.global.b16 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.global.b16 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_sys_i16(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_i16_param_0];
; SM70-NEXT: ld.relaxed.sys.global.b16 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.global.b16 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i16, ptr addrspace(1) %a monotonic, align 2
%a.add = add i16 %a.load, 1
store atomic i16 %a.add, ptr addrspace(1) %a monotonic, align 2
ret void
}
define void @global_monotonic_sys_i32(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_sys_i32(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_i32_param_0];
; SM60-NEXT: ld.volatile.global.b32 %r1, [%rd1];
; SM60-NEXT: add.s32 %r2, %r1, 1;
; SM60-NEXT: st.volatile.global.b32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_sys_i32(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_i32_param_0];
; SM70-NEXT: ld.relaxed.sys.global.b32 %r1, [%rd1];
; SM70-NEXT: add.s32 %r2, %r1, 1;
; SM70-NEXT: st.relaxed.sys.global.b32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic i32, ptr addrspace(1) %a monotonic, align 4
%a.add = add i32 %a.load, 1
store atomic i32 %a.add, ptr addrspace(1) %a monotonic, align 4
ret void
}
define void @global_monotonic_sys_i64(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_sys_i64(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_i64_param_0];
; SM60-NEXT: ld.volatile.global.b64 %rd2, [%rd1];
; SM60-NEXT: add.s64 %rd3, %rd2, 1;
; SM60-NEXT: st.volatile.global.b64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_sys_i64(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_i64_param_0];
; SM70-NEXT: ld.relaxed.sys.global.b64 %rd2, [%rd1];
; SM70-NEXT: add.s64 %rd3, %rd2, 1;
; SM70-NEXT: st.relaxed.sys.global.b64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic i64, ptr addrspace(1) %a monotonic, align 8
%a.add = add i64 %a.load, 1
store atomic i64 %a.add, ptr addrspace(1) %a monotonic, align 8
ret void
}
define void @global_monotonic_sys_float(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_sys_float(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_float_param_0];
; SM60-NEXT: ld.volatile.global.b32 %r1, [%rd1];
; SM60-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; SM60-NEXT: st.volatile.global.b32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_sys_float(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_float_param_0];
; SM70-NEXT: ld.relaxed.sys.global.b32 %r1, [%rd1];
; SM70-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; SM70-NEXT: st.relaxed.sys.global.b32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic float, ptr addrspace(1) %a monotonic, align 4
%a.add = fadd float %a.load, 1.
store atomic float %a.add, ptr addrspace(1) %a monotonic, align 4
ret void
}
define void @global_monotonic_sys_double(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_sys_double(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_double_param_0];
; SM60-NEXT: ld.volatile.global.b64 %rd2, [%rd1];
; SM60-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; SM60-NEXT: st.volatile.global.b64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_sys_double(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_double_param_0];
; SM70-NEXT: ld.relaxed.sys.global.b64 %rd2, [%rd1];
; SM70-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; SM70-NEXT: st.relaxed.sys.global.b64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic double, ptr addrspace(1) %a monotonic, align 8
%a.add = fadd double %a.load, 1.
store atomic double %a.add, ptr addrspace(1) %a monotonic, align 8
ret void
}
; global_monotonic_volatile_sys
define void @global_monotonic_volatile_sys_i8(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_volatile_sys_i8(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_i8_param_0];
; SM60-NEXT: ld.volatile.global.b8 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.global.b8 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_volatile_sys_i8(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_i8_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.b8 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.mmio.relaxed.sys.global.b8 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(1) %a monotonic, align 1
%a.add = add i8 %a.load, 1
store atomic volatile i8 %a.add, ptr addrspace(1) %a monotonic, align 1
ret void
}
define void @global_monotonic_volatile_sys_i16(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_volatile_sys_i16(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_i16_param_0];
; SM60-NEXT: ld.volatile.global.b16 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.global.b16 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_volatile_sys_i16(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_i16_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.b16 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.mmio.relaxed.sys.global.b16 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic volatile i16, ptr addrspace(1) %a monotonic, align 2
%a.add = add i16 %a.load, 1
store atomic volatile i16 %a.add, ptr addrspace(1) %a monotonic, align 2
ret void
}
define void @global_monotonic_volatile_sys_i32(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_volatile_sys_i32(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_i32_param_0];
; SM60-NEXT: ld.volatile.global.b32 %r1, [%rd1];
; SM60-NEXT: add.s32 %r2, %r1, 1;
; SM60-NEXT: st.volatile.global.b32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_volatile_sys_i32(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_i32_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.b32 %r1, [%rd1];
; SM70-NEXT: add.s32 %r2, %r1, 1;
; SM70-NEXT: st.mmio.relaxed.sys.global.b32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic volatile i32, ptr addrspace(1) %a monotonic, align 4
%a.add = add i32 %a.load, 1
store atomic volatile i32 %a.add, ptr addrspace(1) %a monotonic, align 4
ret void
}
define void @global_monotonic_volatile_sys_i64(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_volatile_sys_i64(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_i64_param_0];
; SM60-NEXT: ld.volatile.global.b64 %rd2, [%rd1];
; SM60-NEXT: add.s64 %rd3, %rd2, 1;
; SM60-NEXT: st.volatile.global.b64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_volatile_sys_i64(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_i64_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.b64 %rd2, [%rd1];
; SM70-NEXT: add.s64 %rd3, %rd2, 1;
; SM70-NEXT: st.mmio.relaxed.sys.global.b64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic volatile i64, ptr addrspace(1) %a monotonic, align 8
%a.add = add i64 %a.load, 1
store atomic volatile i64 %a.add, ptr addrspace(1) %a monotonic, align 8
ret void
}
define void @global_monotonic_volatile_sys_float(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_volatile_sys_float(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_float_param_0];
; SM60-NEXT: ld.volatile.global.b32 %r1, [%rd1];
; SM60-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; SM60-NEXT: st.volatile.global.b32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_volatile_sys_float(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_float_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.b32 %r1, [%rd1];
; SM70-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; SM70-NEXT: st.mmio.relaxed.sys.global.b32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic volatile float, ptr addrspace(1) %a monotonic, align 4
%a.add = fadd float %a.load, 1.
store atomic volatile float %a.add, ptr addrspace(1) %a monotonic, align 4
ret void
}
define void @global_monotonic_volatile_sys_double(ptr addrspace(1) %a) {
; SM60-LABEL: global_monotonic_volatile_sys_double(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_double_param_0];
; SM60-NEXT: ld.volatile.global.b64 %rd2, [%rd1];
; SM60-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; SM60-NEXT: st.volatile.global.b64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: global_monotonic_volatile_sys_double(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_double_param_0];
; SM70-NEXT: ld.mmio.relaxed.sys.global.b64 %rd2, [%rd1];
; SM70-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; SM70-NEXT: st.mmio.relaxed.sys.global.b64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic volatile double, ptr addrspace(1) %a monotonic, align 8
%a.add = fadd double %a.load, 1.
store atomic volatile double %a.add, ptr addrspace(1) %a monotonic, align 8
ret void
}
;; shared statespace
; shared
define void @shared_i8(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_i8_param_0];
; CHECK-NEXT: ld.shared.b8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.shared.b8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load i8, ptr addrspace(3) %a
%a.add = add i8 %a.load, 1
store i8 %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_i16(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_i16_param_0];
; CHECK-NEXT: ld.shared.b16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.shared.b16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load i16, ptr addrspace(3) %a
%a.add = add i16 %a.load, 1
store i16 %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_i32(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_i32_param_0];
; CHECK-NEXT: ld.shared.b32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.shared.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load i32, ptr addrspace(3) %a
%a.add = add i32 %a.load, 1
store i32 %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_i64(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_i64_param_0];
; CHECK-NEXT: ld.shared.b64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.shared.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load i64, ptr addrspace(3) %a
%a.add = add i64 %a.load, 1
store i64 %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_float(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_float(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_float_param_0];
; CHECK-NEXT: ld.shared.b32 %r1, [%rd1];
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; CHECK-NEXT: st.shared.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load float, ptr addrspace(3) %a
%a.add = fadd float %a.load, 1.
store float %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_double(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_double_param_0];
; CHECK-NEXT: ld.shared.b64 %rd2, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; CHECK-NEXT: st.shared.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load double, ptr addrspace(3) %a
%a.add = fadd double %a.load, 1.
store double %a.add, ptr addrspace(3) %a
ret void
}
; shared_volatile
define void @shared_volatile_i8(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_volatile_i8_param_0];
; CHECK-NEXT: ld.volatile.shared.b8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.shared.b8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr addrspace(3) %a
%a.add = add i8 %a.load, 1
store volatile i8 %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_i16(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_volatile_i16_param_0];
; CHECK-NEXT: ld.volatile.shared.b16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.shared.b16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i16, ptr addrspace(3) %a
%a.add = add i16 %a.load, 1
store volatile i16 %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_i32(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_volatile_i32_param_0];
; CHECK-NEXT: ld.volatile.shared.b32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.volatile.shared.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile i32, ptr addrspace(3) %a
%a.add = add i32 %a.load, 1
store volatile i32 %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_i64(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_volatile_i64_param_0];
; CHECK-NEXT: ld.volatile.shared.b64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.volatile.shared.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load volatile i64, ptr addrspace(3) %a
%a.add = add i64 %a.load, 1
store volatile i64 %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_float(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_float(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_volatile_float_param_0];
; CHECK-NEXT: ld.volatile.shared.b32 %r1, [%rd1];
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; CHECK-NEXT: st.volatile.shared.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile float, ptr addrspace(3) %a
%a.add = fadd float %a.load, 1.
store volatile float %a.add, ptr addrspace(3) %a
ret void
}
define void @shared_volatile_double(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_volatile_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_volatile_double_param_0];
; CHECK-NEXT: ld.volatile.shared.b64 %rd2, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; CHECK-NEXT: st.volatile.shared.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load volatile double, ptr addrspace(3) %a
%a.add = fadd double %a.load, 1.
store volatile double %a.add, ptr addrspace(3) %a
ret void
}
; shared_unordered_sys
define void @shared_unordered_sys_i8(ptr addrspace(3) %a) {
; SM60-LABEL: shared_unordered_sys_i8(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_i8_param_0];
; SM60-NEXT: ld.volatile.shared.b8 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.shared.b8 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_unordered_sys_i8(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_i8_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.b8 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.shared.b8 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(3) %a unordered, align 1
%a.add = add i8 %a.load, 1
store atomic i8 %a.add, ptr addrspace(3) %a unordered, align 1
ret void
}
define void @shared_unordered_sys_i16(ptr addrspace(3) %a) {
; SM60-LABEL: shared_unordered_sys_i16(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_i16_param_0];
; SM60-NEXT: ld.volatile.shared.b16 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.shared.b16 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_unordered_sys_i16(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_i16_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.b16 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.shared.b16 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i16, ptr addrspace(3) %a unordered, align 2
%a.add = add i16 %a.load, 1
store atomic i16 %a.add, ptr addrspace(3) %a unordered, align 2
ret void
}
define void @shared_unordered_sys_i32(ptr addrspace(3) %a) {
; SM60-LABEL: shared_unordered_sys_i32(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_i32_param_0];
; SM60-NEXT: ld.volatile.shared.b32 %r1, [%rd1];
; SM60-NEXT: add.s32 %r2, %r1, 1;
; SM60-NEXT: st.volatile.shared.b32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_unordered_sys_i32(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_i32_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.b32 %r1, [%rd1];
; SM70-NEXT: add.s32 %r2, %r1, 1;
; SM70-NEXT: st.relaxed.sys.shared.b32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic i32, ptr addrspace(3) %a unordered, align 4
%a.add = add i32 %a.load, 1
store atomic i32 %a.add, ptr addrspace(3) %a unordered, align 4
ret void
}
define void @shared_unordered_sys_i64(ptr addrspace(3) %a) {
; SM60-LABEL: shared_unordered_sys_i64(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_i64_param_0];
; SM60-NEXT: ld.volatile.shared.b64 %rd2, [%rd1];
; SM60-NEXT: add.s64 %rd3, %rd2, 1;
; SM60-NEXT: st.volatile.shared.b64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_unordered_sys_i64(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_i64_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.b64 %rd2, [%rd1];
; SM70-NEXT: add.s64 %rd3, %rd2, 1;
; SM70-NEXT: st.relaxed.sys.shared.b64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic i64, ptr addrspace(3) %a unordered, align 8
%a.add = add i64 %a.load, 1
store atomic i64 %a.add, ptr addrspace(3) %a unordered, align 8
ret void
}
define void @shared_unordered_sys_float(ptr addrspace(3) %a) {
; SM60-LABEL: shared_unordered_sys_float(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_float_param_0];
; SM60-NEXT: ld.volatile.shared.b32 %r1, [%rd1];
; SM60-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; SM60-NEXT: st.volatile.shared.b32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_unordered_sys_float(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_float_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.b32 %r1, [%rd1];
; SM70-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; SM70-NEXT: st.relaxed.sys.shared.b32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic float, ptr addrspace(3) %a unordered, align 4
%a.add = fadd float %a.load, 1.
store atomic float %a.add, ptr addrspace(3) %a unordered, align 4
ret void
}
define void @shared_unordered_sys_double(ptr addrspace(3) %a) {
; SM60-LABEL: shared_unordered_sys_double(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_double_param_0];
; SM60-NEXT: ld.volatile.shared.b64 %rd2, [%rd1];
; SM60-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; SM60-NEXT: st.volatile.shared.b64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_unordered_sys_double(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_double_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.b64 %rd2, [%rd1];
; SM70-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; SM70-NEXT: st.relaxed.sys.shared.b64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic double, ptr addrspace(3) %a unordered, align 8
%a.add = fadd double %a.load, 1.
store atomic double %a.add, ptr addrspace(3) %a unordered, align 8
ret void
}
; shared_unordered_volatile_sys
define void @shared_unordered_volatile_sys_i8(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_unordered_volatile_sys_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_unordered_volatile_sys_i8_param_0];
; CHECK-NEXT: ld.volatile.shared.b8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.shared.b8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(3) %a unordered, align 1
%a.add = add i8 %a.load, 1
store atomic volatile i8 %a.add, ptr addrspace(3) %a unordered, align 1
ret void
}
define void @shared_unordered_volatile_sys_i16(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_unordered_volatile_sys_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_unordered_volatile_sys_i16_param_0];
; CHECK-NEXT: ld.volatile.shared.b16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.shared.b16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i16, ptr addrspace(3) %a unordered, align 2
%a.add = add i16 %a.load, 1
store atomic volatile i16 %a.add, ptr addrspace(3) %a unordered, align 2
ret void
}
define void @shared_unordered_volatile_sys_i32(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_unordered_volatile_sys_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_unordered_volatile_sys_i32_param_0];
; CHECK-NEXT: ld.volatile.shared.b32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.volatile.shared.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i32, ptr addrspace(3) %a unordered, align 4
%a.add = add i32 %a.load, 1
store atomic volatile i32 %a.add, ptr addrspace(3) %a unordered, align 4
ret void
}
define void @shared_unordered_volatile_sys_i64(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_unordered_volatile_sys_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_unordered_volatile_sys_i64_param_0];
; CHECK-NEXT: ld.volatile.shared.b64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.volatile.shared.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i64, ptr addrspace(3) %a unordered, align 8
%a.add = add i64 %a.load, 1
store atomic volatile i64 %a.add, ptr addrspace(3) %a unordered, align 8
ret void
}
define void @shared_unordered_volatile_sys_float(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_unordered_volatile_sys_float(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_unordered_volatile_sys_float_param_0];
; CHECK-NEXT: ld.volatile.shared.b32 %r1, [%rd1];
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; CHECK-NEXT: st.volatile.shared.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile float, ptr addrspace(3) %a unordered, align 4
%a.add = fadd float %a.load, 1.
store atomic volatile float %a.add, ptr addrspace(3) %a unordered, align 4
ret void
}
define void @shared_unordered_volatile_sys_double(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_unordered_volatile_sys_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_unordered_volatile_sys_double_param_0];
; CHECK-NEXT: ld.volatile.shared.b64 %rd2, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; CHECK-NEXT: st.volatile.shared.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile double, ptr addrspace(3) %a unordered, align 8
%a.add = fadd double %a.load, 1.
store atomic volatile double %a.add, ptr addrspace(3) %a unordered, align 8
ret void
}
; shared_monotonic_sys
define void @shared_monotonic_sys_i8(ptr addrspace(3) %a) {
; SM60-LABEL: shared_monotonic_sys_i8(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_i8_param_0];
; SM60-NEXT: ld.volatile.shared.b8 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.shared.b8 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_monotonic_sys_i8(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_i8_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.b8 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.shared.b8 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(3) %a monotonic, align 1
%a.add = add i8 %a.load, 1
store atomic i8 %a.add, ptr addrspace(3) %a monotonic, align 1
ret void
}
define void @shared_monotonic_sys_i16(ptr addrspace(3) %a) {
; SM60-LABEL: shared_monotonic_sys_i16(
; SM60: {
; SM60-NEXT: .reg .b16 %rs<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_i16_param_0];
; SM60-NEXT: ld.volatile.shared.b16 %rs1, [%rd1];
; SM60-NEXT: add.s16 %rs2, %rs1, 1;
; SM60-NEXT: st.volatile.shared.b16 [%rd1], %rs2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_monotonic_sys_i16(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_i16_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.b16 %rs1, [%rd1];
; SM70-NEXT: add.s16 %rs2, %rs1, 1;
; SM70-NEXT: st.relaxed.sys.shared.b16 [%rd1], %rs2;
; SM70-NEXT: ret;
%a.load = load atomic i16, ptr addrspace(3) %a monotonic, align 2
%a.add = add i16 %a.load, 1
store atomic i16 %a.add, ptr addrspace(3) %a monotonic, align 2
ret void
}
define void @shared_monotonic_sys_i32(ptr addrspace(3) %a) {
; SM60-LABEL: shared_monotonic_sys_i32(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_i32_param_0];
; SM60-NEXT: ld.volatile.shared.b32 %r1, [%rd1];
; SM60-NEXT: add.s32 %r2, %r1, 1;
; SM60-NEXT: st.volatile.shared.b32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_monotonic_sys_i32(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_i32_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.b32 %r1, [%rd1];
; SM70-NEXT: add.s32 %r2, %r1, 1;
; SM70-NEXT: st.relaxed.sys.shared.b32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic i32, ptr addrspace(3) %a monotonic, align 4
%a.add = add i32 %a.load, 1
store atomic i32 %a.add, ptr addrspace(3) %a monotonic, align 4
ret void
}
define void @shared_monotonic_sys_i64(ptr addrspace(3) %a) {
; SM60-LABEL: shared_monotonic_sys_i64(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_i64_param_0];
; SM60-NEXT: ld.volatile.shared.b64 %rd2, [%rd1];
; SM60-NEXT: add.s64 %rd3, %rd2, 1;
; SM60-NEXT: st.volatile.shared.b64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_monotonic_sys_i64(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_i64_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.b64 %rd2, [%rd1];
; SM70-NEXT: add.s64 %rd3, %rd2, 1;
; SM70-NEXT: st.relaxed.sys.shared.b64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic i64, ptr addrspace(3) %a monotonic, align 8
%a.add = add i64 %a.load, 1
store atomic i64 %a.add, ptr addrspace(3) %a monotonic, align 8
ret void
}
define void @shared_monotonic_sys_float(ptr addrspace(3) %a) {
; SM60-LABEL: shared_monotonic_sys_float(
; SM60: {
; SM60-NEXT: .reg .b32 %r<3>;
; SM60-NEXT: .reg .b64 %rd<2>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_float_param_0];
; SM60-NEXT: ld.volatile.shared.b32 %r1, [%rd1];
; SM60-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; SM60-NEXT: st.volatile.shared.b32 [%rd1], %r2;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_monotonic_sys_float(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_float_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.b32 %r1, [%rd1];
; SM70-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; SM70-NEXT: st.relaxed.sys.shared.b32 [%rd1], %r2;
; SM70-NEXT: ret;
%a.load = load atomic float, ptr addrspace(3) %a monotonic, align 4
%a.add = fadd float %a.load, 1.
store atomic float %a.add, ptr addrspace(3) %a monotonic, align 4
ret void
}
define void @shared_monotonic_sys_double(ptr addrspace(3) %a) {
; SM60-LABEL: shared_monotonic_sys_double(
; SM60: {
; SM60-NEXT: .reg .b64 %rd<4>;
; SM60-EMPTY:
; SM60-NEXT: // %bb.0:
; SM60-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_double_param_0];
; SM60-NEXT: ld.volatile.shared.b64 %rd2, [%rd1];
; SM60-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; SM60-NEXT: st.volatile.shared.b64 [%rd1], %rd3;
; SM60-NEXT: ret;
;
; SM70-LABEL: shared_monotonic_sys_double(
; SM70: {
; SM70-NEXT: .reg .b64 %rd<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_double_param_0];
; SM70-NEXT: ld.relaxed.sys.shared.b64 %rd2, [%rd1];
; SM70-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; SM70-NEXT: st.relaxed.sys.shared.b64 [%rd1], %rd3;
; SM70-NEXT: ret;
%a.load = load atomic double, ptr addrspace(3) %a monotonic, align 8
%a.add = fadd double %a.load, 1.
store atomic double %a.add, ptr addrspace(3) %a monotonic, align 8
ret void
}
; shared_monotonic_volatile_sys
define void @shared_monotonic_volatile_sys_i8(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_monotonic_volatile_sys_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_monotonic_volatile_sys_i8_param_0];
; CHECK-NEXT: ld.volatile.shared.b8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.shared.b8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(3) %a monotonic, align 1
%a.add = add i8 %a.load, 1
store atomic volatile i8 %a.add, ptr addrspace(3) %a monotonic, align 1
ret void
}
define void @shared_monotonic_volatile_sys_i16(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_monotonic_volatile_sys_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_monotonic_volatile_sys_i16_param_0];
; CHECK-NEXT: ld.volatile.shared.b16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.volatile.shared.b16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i16, ptr addrspace(3) %a monotonic, align 2
%a.add = add i16 %a.load, 1
store atomic volatile i16 %a.add, ptr addrspace(3) %a monotonic, align 2
ret void
}
define void @shared_monotonic_volatile_sys_i32(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_monotonic_volatile_sys_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_monotonic_volatile_sys_i32_param_0];
; CHECK-NEXT: ld.volatile.shared.b32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.volatile.shared.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i32, ptr addrspace(3) %a monotonic, align 4
%a.add = add i32 %a.load, 1
store atomic volatile i32 %a.add, ptr addrspace(3) %a monotonic, align 4
ret void
}
define void @shared_monotonic_volatile_sys_i64(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_monotonic_volatile_sys_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_monotonic_volatile_sys_i64_param_0];
; CHECK-NEXT: ld.volatile.shared.b64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.volatile.shared.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i64, ptr addrspace(3) %a monotonic, align 8
%a.add = add i64 %a.load, 1
store atomic volatile i64 %a.add, ptr addrspace(3) %a monotonic, align 8
ret void
}
define void @shared_monotonic_volatile_sys_float(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_monotonic_volatile_sys_float(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_monotonic_volatile_sys_float_param_0];
; CHECK-NEXT: ld.volatile.shared.b32 %r1, [%rd1];
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; CHECK-NEXT: st.volatile.shared.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile float, ptr addrspace(3) %a monotonic, align 4
%a.add = fadd float %a.load, 1.
store atomic volatile float %a.add, ptr addrspace(3) %a monotonic, align 4
ret void
}
define void @shared_monotonic_volatile_sys_double(ptr addrspace(3) %a) {
; CHECK-LABEL: shared_monotonic_volatile_sys_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [shared_monotonic_volatile_sys_double_param_0];
; CHECK-NEXT: ld.volatile.shared.b64 %rd2, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; CHECK-NEXT: st.volatile.shared.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile double, ptr addrspace(3) %a monotonic, align 8
%a.add = fadd double %a.load, 1.
store atomic volatile double %a.add, ptr addrspace(3) %a monotonic, align 8
ret void
}
;; local statespace
; local
define void @local_i8(ptr addrspace(5) %a) {
; CHECK-LABEL: local_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_i8_param_0];
; CHECK-NEXT: ld.local.b8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.b8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load i8, ptr addrspace(5) %a
%a.add = add i8 %a.load, 1
store i8 %a.add, ptr addrspace(5) %a
ret void
}
define void @local_i16(ptr addrspace(5) %a) {
; CHECK-LABEL: local_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_i16_param_0];
; CHECK-NEXT: ld.local.b16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.b16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load i16, ptr addrspace(5) %a
%a.add = add i16 %a.load, 1
store i16 %a.add, ptr addrspace(5) %a
ret void
}
define void @local_i32(ptr addrspace(5) %a) {
; CHECK-LABEL: local_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_i32_param_0];
; CHECK-NEXT: ld.local.b32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.local.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load i32, ptr addrspace(5) %a
%a.add = add i32 %a.load, 1
store i32 %a.add, ptr addrspace(5) %a
ret void
}
define void @local_i64(ptr addrspace(5) %a) {
; CHECK-LABEL: local_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_i64_param_0];
; CHECK-NEXT: ld.local.b64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.local.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load i64, ptr addrspace(5) %a
%a.add = add i64 %a.load, 1
store i64 %a.add, ptr addrspace(5) %a
ret void
}
define void @local_float(ptr addrspace(5) %a) {
; CHECK-LABEL: local_float(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_float_param_0];
; CHECK-NEXT: ld.local.b32 %r1, [%rd1];
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; CHECK-NEXT: st.local.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load float, ptr addrspace(5) %a
%a.add = fadd float %a.load, 1.
store float %a.add, ptr addrspace(5) %a
ret void
}
define void @local_double(ptr addrspace(5) %a) {
; CHECK-LABEL: local_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_double_param_0];
; CHECK-NEXT: ld.local.b64 %rd2, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; CHECK-NEXT: st.local.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load double, ptr addrspace(5) %a
%a.add = fadd double %a.load, 1.
store double %a.add, ptr addrspace(5) %a
ret void
}
; local_volatile
define void @local_volatile_i8(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i8_param_0];
; CHECK-NEXT: ld.local.b8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.b8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i8, ptr addrspace(5) %a
%a.add = add i8 %a.load, 1
store volatile i8 %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_i16(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i16_param_0];
; CHECK-NEXT: ld.local.b16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.b16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load volatile i16, ptr addrspace(5) %a
%a.add = add i16 %a.load, 1
store volatile i16 %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_i32(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i32_param_0];
; CHECK-NEXT: ld.local.b32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.local.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile i32, ptr addrspace(5) %a
%a.add = add i32 %a.load, 1
store volatile i32 %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_i64(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i64_param_0];
; CHECK-NEXT: ld.local.b64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.local.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load volatile i64, ptr addrspace(5) %a
%a.add = add i64 %a.load, 1
store volatile i64 %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_float(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_float(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_float_param_0];
; CHECK-NEXT: ld.local.b32 %r1, [%rd1];
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; CHECK-NEXT: st.local.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load volatile float, ptr addrspace(5) %a
%a.add = fadd float %a.load, 1.
store volatile float %a.add, ptr addrspace(5) %a
ret void
}
define void @local_volatile_double(ptr addrspace(5) %a) {
; CHECK-LABEL: local_volatile_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_double_param_0];
; CHECK-NEXT: ld.local.b64 %rd2, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; CHECK-NEXT: st.local.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load volatile double, ptr addrspace(5) %a
%a.add = fadd double %a.load, 1.
store volatile double %a.add, ptr addrspace(5) %a
ret void
}
; local_unordered_sys
define void @local_unordered_sys_i8(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_sys_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_sys_i8_param_0];
; CHECK-NEXT: ld.local.b8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.b8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(5) %a unordered, align 1
%a.add = add i8 %a.load, 1
store atomic i8 %a.add, ptr addrspace(5) %a unordered, align 1
ret void
}
define void @local_unordered_sys_i16(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_sys_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_sys_i16_param_0];
; CHECK-NEXT: ld.local.b16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.b16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic i16, ptr addrspace(5) %a unordered, align 2
%a.add = add i16 %a.load, 1
store atomic i16 %a.add, ptr addrspace(5) %a unordered, align 2
ret void
}
define void @local_unordered_sys_i32(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_sys_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_sys_i32_param_0];
; CHECK-NEXT: ld.local.b32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.local.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic i32, ptr addrspace(5) %a unordered, align 4
%a.add = add i32 %a.load, 1
store atomic i32 %a.add, ptr addrspace(5) %a unordered, align 4
ret void
}
define void @local_unordered_sys_i64(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_sys_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_sys_i64_param_0];
; CHECK-NEXT: ld.local.b64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.local.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic i64, ptr addrspace(5) %a unordered, align 8
%a.add = add i64 %a.load, 1
store atomic i64 %a.add, ptr addrspace(5) %a unordered, align 8
ret void
}
define void @local_unordered_sys_float(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_sys_float(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_sys_float_param_0];
; CHECK-NEXT: ld.local.b32 %r1, [%rd1];
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; CHECK-NEXT: st.local.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic float, ptr addrspace(5) %a unordered, align 4
%a.add = fadd float %a.load, 1.
store atomic float %a.add, ptr addrspace(5) %a unordered, align 4
ret void
}
define void @local_unordered_sys_double(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_sys_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_sys_double_param_0];
; CHECK-NEXT: ld.local.b64 %rd2, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; CHECK-NEXT: st.local.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic double, ptr addrspace(5) %a unordered, align 8
%a.add = fadd double %a.load, 1.
store atomic double %a.add, ptr addrspace(5) %a unordered, align 8
ret void
}
; local_unordered_volatile_sys
define void @local_unordered_volatile_sys_i8(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_volatile_sys_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_volatile_sys_i8_param_0];
; CHECK-NEXT: ld.local.b8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.b8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(5) %a unordered, align 1
%a.add = add i8 %a.load, 1
store atomic volatile i8 %a.add, ptr addrspace(5) %a unordered, align 1
ret void
}
define void @local_unordered_volatile_sys_i16(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_volatile_sys_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_volatile_sys_i16_param_0];
; CHECK-NEXT: ld.local.b16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.b16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i16, ptr addrspace(5) %a unordered, align 2
%a.add = add i16 %a.load, 1
store atomic volatile i16 %a.add, ptr addrspace(5) %a unordered, align 2
ret void
}
define void @local_unordered_volatile_sys_i32(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_volatile_sys_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_volatile_sys_i32_param_0];
; CHECK-NEXT: ld.local.b32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.local.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i32, ptr addrspace(5) %a unordered, align 4
%a.add = add i32 %a.load, 1
store atomic volatile i32 %a.add, ptr addrspace(5) %a unordered, align 4
ret void
}
define void @local_unordered_volatile_sys_i64(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_volatile_sys_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_volatile_sys_i64_param_0];
; CHECK-NEXT: ld.local.b64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.local.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i64, ptr addrspace(5) %a unordered, align 8
%a.add = add i64 %a.load, 1
store atomic volatile i64 %a.add, ptr addrspace(5) %a unordered, align 8
ret void
}
define void @local_unordered_volatile_sys_float(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_volatile_sys_float(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_volatile_sys_float_param_0];
; CHECK-NEXT: ld.local.b32 %r1, [%rd1];
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; CHECK-NEXT: st.local.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile float, ptr addrspace(5) %a unordered, align 4
%a.add = fadd float %a.load, 1.
store atomic volatile float %a.add, ptr addrspace(5) %a unordered, align 4
ret void
}
define void @local_unordered_volatile_sys_double(ptr addrspace(5) %a) {
; CHECK-LABEL: local_unordered_volatile_sys_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_volatile_sys_double_param_0];
; CHECK-NEXT: ld.local.b64 %rd2, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; CHECK-NEXT: st.local.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile double, ptr addrspace(5) %a unordered, align 8
%a.add = fadd double %a.load, 1.
store atomic volatile double %a.add, ptr addrspace(5) %a unordered, align 8
ret void
}
; local_monotonic_sys
define void @local_monotonic_sys_i8(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_sys_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_sys_i8_param_0];
; CHECK-NEXT: ld.local.b8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.b8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic i8, ptr addrspace(5) %a monotonic, align 1
%a.add = add i8 %a.load, 1
store atomic i8 %a.add, ptr addrspace(5) %a monotonic, align 1
ret void
}
define void @local_monotonic_sys_i16(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_sys_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_sys_i16_param_0];
; CHECK-NEXT: ld.local.b16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.b16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic i16, ptr addrspace(5) %a monotonic, align 2
%a.add = add i16 %a.load, 1
store atomic i16 %a.add, ptr addrspace(5) %a monotonic, align 2
ret void
}
define void @local_monotonic_sys_i32(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_sys_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_sys_i32_param_0];
; CHECK-NEXT: ld.local.b32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.local.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic i32, ptr addrspace(5) %a monotonic, align 4
%a.add = add i32 %a.load, 1
store atomic i32 %a.add, ptr addrspace(5) %a monotonic, align 4
ret void
}
define void @local_monotonic_sys_i64(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_sys_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_sys_i64_param_0];
; CHECK-NEXT: ld.local.b64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.local.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic i64, ptr addrspace(5) %a monotonic, align 8
%a.add = add i64 %a.load, 1
store atomic i64 %a.add, ptr addrspace(5) %a monotonic, align 8
ret void
}
define void @local_monotonic_sys_float(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_sys_float(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_sys_float_param_0];
; CHECK-NEXT: ld.local.b32 %r1, [%rd1];
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; CHECK-NEXT: st.local.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic float, ptr addrspace(5) %a monotonic, align 4
%a.add = fadd float %a.load, 1.
store atomic float %a.add, ptr addrspace(5) %a monotonic, align 4
ret void
}
define void @local_monotonic_sys_double(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_sys_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_sys_double_param_0];
; CHECK-NEXT: ld.local.b64 %rd2, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; CHECK-NEXT: st.local.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic double, ptr addrspace(5) %a monotonic, align 8
%a.add = fadd double %a.load, 1.
store atomic double %a.add, ptr addrspace(5) %a monotonic, align 8
ret void
}
; local_monotonic_volatile_sys
define void @local_monotonic_volatile_sys_i8(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_volatile_sys_i8(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_volatile_sys_i8_param_0];
; CHECK-NEXT: ld.local.b8 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.b8 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i8, ptr addrspace(5) %a monotonic, align 1
%a.add = add i8 %a.load, 1
store atomic volatile i8 %a.add, ptr addrspace(5) %a monotonic, align 1
ret void
}
define void @local_monotonic_volatile_sys_i16(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_volatile_sys_i16(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_volatile_sys_i16_param_0];
; CHECK-NEXT: ld.local.b16 %rs1, [%rd1];
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: st.local.b16 [%rd1], %rs2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i16, ptr addrspace(5) %a monotonic, align 2
%a.add = add i16 %a.load, 1
store atomic volatile i16 %a.add, ptr addrspace(5) %a monotonic, align 2
ret void
}
define void @local_monotonic_volatile_sys_i32(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_volatile_sys_i32(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_volatile_sys_i32_param_0];
; CHECK-NEXT: ld.local.b32 %r1, [%rd1];
; CHECK-NEXT: add.s32 %r2, %r1, 1;
; CHECK-NEXT: st.local.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i32, ptr addrspace(5) %a monotonic, align 4
%a.add = add i32 %a.load, 1
store atomic volatile i32 %a.add, ptr addrspace(5) %a monotonic, align 4
ret void
}
define void @local_monotonic_volatile_sys_i64(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_volatile_sys_i64(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_volatile_sys_i64_param_0];
; CHECK-NEXT: ld.local.b64 %rd2, [%rd1];
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
; CHECK-NEXT: st.local.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile i64, ptr addrspace(5) %a monotonic, align 8
%a.add = add i64 %a.load, 1
store atomic volatile i64 %a.add, ptr addrspace(5) %a monotonic, align 8
ret void
}
define void @local_monotonic_volatile_sys_float(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_volatile_sys_float(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_volatile_sys_float_param_0];
; CHECK-NEXT: ld.local.b32 %r1, [%rd1];
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
; CHECK-NEXT: st.local.b32 [%rd1], %r2;
; CHECK-NEXT: ret;
%a.load = load atomic volatile float, ptr addrspace(5) %a monotonic, align 4
%a.add = fadd float %a.load, 1.
store atomic volatile float %a.add, ptr addrspace(5) %a monotonic, align 4
ret void
}
define void @local_monotonic_volatile_sys_double(ptr addrspace(5) %a) {
; CHECK-LABEL: local_monotonic_volatile_sys_double(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_volatile_sys_double_param_0];
; CHECK-NEXT: ld.local.b64 %rd2, [%rd1];
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
; CHECK-NEXT: st.local.b64 [%rd1], %rd3;
; CHECK-NEXT: ret;
%a.load = load atomic volatile double, ptr addrspace(5) %a monotonic, align 8
%a.add = fadd double %a.load, 1.
store atomic volatile double %a.add, ptr addrspace(5) %a monotonic, align 8
ret void
}