; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck -check-prefixes=CHECK,SM60 %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | FileCheck %s -check-prefixes=CHECK,SM70 ; RUN: %if ptxas-12.2 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | %ptxas-verify -arch=sm_70 %} ; TODO: generate PTX that preserves Concurrent Forward Progress ; for atomic operations to local statespace ; by generating atomic or volatile operations. ; TODO: add weak,atomic,volatile,atomic volatile tests ; for .const and .param statespaces. ; TODO: optimize .sys.shared into .cta.shared or .cluster.shared . ;; generic statespace ; generic define void @generic_i8(ptr %a) { ; CHECK-LABEL: generic_i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_i8_param_0]; ; CHECK-NEXT: ld.b8 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.b8 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load i8, ptr %a %a.add = add i8 %a.load, 1 store i8 %a.add, ptr %a ret void } define void @generic_i16(ptr %a) { ; CHECK-LABEL: generic_i16( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_i16_param_0]; ; CHECK-NEXT: ld.b16 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.b16 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load i16, ptr %a %a.add = add i16 %a.load, 1 store i16 %a.add, ptr %a ret void } define void @generic_i32(ptr %a) { ; CHECK-LABEL: generic_i32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_i32_param_0]; ; CHECK-NEXT: ld.b32 %r1, [%rd1]; ; CHECK-NEXT: add.s32 %r2, %r1, 1; ; CHECK-NEXT: st.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load i32, ptr %a %a.add = add i32 %a.load, 1 store i32 %a.add, ptr %a ret void } define void @generic_i64(ptr %a) { ; CHECK-LABEL: generic_i64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_i64_param_0]; ; CHECK-NEXT: ld.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; ; CHECK-NEXT: st.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load i64, ptr %a %a.add = add i64 %a.load, 1 store i64 %a.add, ptr %a ret void } define void @generic_float(ptr %a) { ; CHECK-LABEL: generic_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_float_param_0]; ; CHECK-NEXT: ld.b32 %r1, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; CHECK-NEXT: st.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load float, ptr %a %a.add = fadd float %a.load, 1. store float %a.add, ptr %a ret void } define void @generic_double(ptr %a) { ; CHECK-LABEL: generic_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_double_param_0]; ; CHECK-NEXT: ld.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; CHECK-NEXT: st.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load double, ptr %a %a.add = fadd double %a.load, 1. store double %a.add, ptr %a ret void } ; generic_volatile define void @generic_volatile_i8(ptr %a) { ; CHECK-LABEL: generic_volatile_i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_volatile_i8_param_0]; ; CHECK-NEXT: ld.volatile.b8 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.volatile.b8 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load volatile i8, ptr %a %a.add = add i8 %a.load, 1 store volatile i8 %a.add, ptr %a ret void } define void @generic_volatile_i16(ptr %a) { ; CHECK-LABEL: generic_volatile_i16( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_volatile_i16_param_0]; ; CHECK-NEXT: ld.volatile.b16 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.volatile.b16 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load volatile i16, ptr %a %a.add = add i16 %a.load, 1 store volatile i16 %a.add, ptr %a ret void } define void @generic_volatile_i32(ptr %a) { ; CHECK-LABEL: generic_volatile_i32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_volatile_i32_param_0]; ; CHECK-NEXT: ld.volatile.b32 %r1, [%rd1]; ; CHECK-NEXT: add.s32 %r2, %r1, 1; ; CHECK-NEXT: st.volatile.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load volatile i32, ptr %a %a.add = add i32 %a.load, 1 store volatile i32 %a.add, ptr %a ret void } define void @generic_volatile_i64(ptr %a) { ; CHECK-LABEL: generic_volatile_i64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_volatile_i64_param_0]; ; CHECK-NEXT: ld.volatile.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; ; CHECK-NEXT: st.volatile.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load volatile i64, ptr %a %a.add = add i64 %a.load, 1 store volatile i64 %a.add, ptr %a ret void } define void @generic_volatile_float(ptr %a) { ; CHECK-LABEL: generic_volatile_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_volatile_float_param_0]; ; CHECK-NEXT: ld.volatile.b32 %r1, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; CHECK-NEXT: st.volatile.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load volatile float, ptr %a %a.add = fadd float %a.load, 1. store volatile float %a.add, ptr %a ret void } define void @generic_volatile_double(ptr %a) { ; CHECK-LABEL: generic_volatile_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_volatile_double_param_0]; ; CHECK-NEXT: ld.volatile.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; CHECK-NEXT: st.volatile.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load volatile double, ptr %a %a.add = fadd double %a.load, 1. store volatile double %a.add, ptr %a ret void } ; generic_unordered_sys define void @generic_unordered_sys_i8(ptr %a) { ; SM60-LABEL: generic_unordered_sys_i8( ; SM60: { ; SM60-NEXT: .reg .b16 %rs<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_i8_param_0]; ; SM60-NEXT: ld.volatile.b8 %rs1, [%rd1]; ; SM60-NEXT: add.s16 %rs2, %rs1, 1; ; SM60-NEXT: st.volatile.b8 [%rd1], %rs2; ; SM60-NEXT: ret; ; ; SM70-LABEL: generic_unordered_sys_i8( ; SM70: { ; SM70-NEXT: .reg .b16 %rs<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_i8_param_0]; ; SM70-NEXT: ld.relaxed.sys.b8 %rs1, [%rd1]; ; SM70-NEXT: add.s16 %rs2, %rs1, 1; ; SM70-NEXT: st.relaxed.sys.b8 [%rd1], %rs2; ; SM70-NEXT: ret; %a.load = load atomic i8, ptr %a unordered, align 1 %a.add = add i8 %a.load, 1 store atomic i8 %a.add, ptr %a unordered, align 1 ret void } define void @generic_unordered_sys_i16(ptr %a) { ; SM60-LABEL: generic_unordered_sys_i16( ; SM60: { ; SM60-NEXT: .reg .b16 %rs<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_i16_param_0]; ; SM60-NEXT: ld.volatile.b16 %rs1, [%rd1]; ; SM60-NEXT: add.s16 %rs2, %rs1, 1; ; SM60-NEXT: st.volatile.b16 [%rd1], %rs2; ; SM60-NEXT: ret; ; ; SM70-LABEL: generic_unordered_sys_i16( ; SM70: { ; SM70-NEXT: .reg .b16 %rs<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_i16_param_0]; ; SM70-NEXT: ld.relaxed.sys.b16 %rs1, [%rd1]; ; SM70-NEXT: add.s16 %rs2, %rs1, 1; ; SM70-NEXT: st.relaxed.sys.b16 [%rd1], %rs2; ; SM70-NEXT: ret; %a.load = load atomic i16, ptr %a unordered, align 2 %a.add = add i16 %a.load, 1 store atomic i16 %a.add, ptr %a unordered, align 2 ret void } define void @generic_unordered_sys_i32(ptr %a) { ; SM60-LABEL: generic_unordered_sys_i32( ; SM60: { ; SM60-NEXT: .reg .b32 %r<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_i32_param_0]; ; SM60-NEXT: ld.volatile.b32 %r1, [%rd1]; ; SM60-NEXT: add.s32 %r2, %r1, 1; ; SM60-NEXT: st.volatile.b32 [%rd1], %r2; ; SM60-NEXT: ret; ; ; SM70-LABEL: generic_unordered_sys_i32( ; SM70: { ; SM70-NEXT: .reg .b32 %r<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_i32_param_0]; ; SM70-NEXT: ld.relaxed.sys.b32 %r1, [%rd1]; ; SM70-NEXT: add.s32 %r2, %r1, 1; ; SM70-NEXT: st.relaxed.sys.b32 [%rd1], %r2; ; SM70-NEXT: ret; %a.load = load atomic i32, ptr %a unordered, align 4 %a.add = add i32 %a.load, 1 store atomic i32 %a.add, ptr %a unordered, align 4 ret void } define void @generic_unordered_sys_i64(ptr %a) { ; SM60-LABEL: generic_unordered_sys_i64( ; SM60: { ; SM60-NEXT: .reg .b64 %rd<4>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_i64_param_0]; ; SM60-NEXT: ld.volatile.b64 %rd2, [%rd1]; ; SM60-NEXT: add.s64 %rd3, %rd2, 1; ; SM60-NEXT: st.volatile.b64 [%rd1], %rd3; ; SM60-NEXT: ret; ; ; SM70-LABEL: generic_unordered_sys_i64( ; SM70: { ; SM70-NEXT: .reg .b64 %rd<4>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_i64_param_0]; ; SM70-NEXT: ld.relaxed.sys.b64 %rd2, [%rd1]; ; SM70-NEXT: add.s64 %rd3, %rd2, 1; ; SM70-NEXT: st.relaxed.sys.b64 [%rd1], %rd3; ; SM70-NEXT: ret; %a.load = load atomic i64, ptr %a unordered, align 8 %a.add = add i64 %a.load, 1 store atomic i64 %a.add, ptr %a unordered, align 8 ret void } define void @generic_unordered_sys_float(ptr %a) { ; SM60-LABEL: generic_unordered_sys_float( ; SM60: { ; SM60-NEXT: .reg .b32 %r<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_float_param_0]; ; SM60-NEXT: ld.volatile.b32 %r1, [%rd1]; ; SM60-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; SM60-NEXT: st.volatile.b32 [%rd1], %r2; ; SM60-NEXT: ret; ; ; SM70-LABEL: generic_unordered_sys_float( ; SM70: { ; SM70-NEXT: .reg .b32 %r<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_float_param_0]; ; SM70-NEXT: ld.relaxed.sys.b32 %r1, [%rd1]; ; SM70-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; SM70-NEXT: st.relaxed.sys.b32 [%rd1], %r2; ; SM70-NEXT: ret; %a.load = load atomic float, ptr %a unordered, align 4 %a.add = fadd float %a.load, 1. store atomic float %a.add, ptr %a unordered, align 4 ret void } define void @generic_unordered_sys_double(ptr %a) { ; SM60-LABEL: generic_unordered_sys_double( ; SM60: { ; SM60-NEXT: .reg .b64 %rd<4>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_double_param_0]; ; SM60-NEXT: ld.volatile.b64 %rd2, [%rd1]; ; SM60-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; SM60-NEXT: st.volatile.b64 [%rd1], %rd3; ; SM60-NEXT: ret; ; ; SM70-LABEL: generic_unordered_sys_double( ; SM70: { ; SM70-NEXT: .reg .b64 %rd<4>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [generic_unordered_sys_double_param_0]; ; SM70-NEXT: ld.relaxed.sys.b64 %rd2, [%rd1]; ; SM70-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; SM70-NEXT: st.relaxed.sys.b64 [%rd1], %rd3; ; SM70-NEXT: ret; %a.load = load atomic double, ptr %a unordered, align 8 %a.add = fadd double %a.load, 1. store atomic double %a.add, ptr %a unordered, align 8 ret void } ; generic_unordered_volatile_sys define void @generic_unordered_volatile_sys_i8(ptr %a) { ; CHECK-LABEL: generic_unordered_volatile_sys_i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_unordered_volatile_sys_i8_param_0]; ; CHECK-NEXT: ld.volatile.b8 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.volatile.b8 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i8, ptr %a unordered, align 1 %a.add = add i8 %a.load, 1 store atomic volatile i8 %a.add, ptr %a unordered, align 1 ret void } define void @generic_unordered_volatile_sys_i16(ptr %a) { ; CHECK-LABEL: generic_unordered_volatile_sys_i16( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_unordered_volatile_sys_i16_param_0]; ; CHECK-NEXT: ld.volatile.b16 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.volatile.b16 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i16, ptr %a unordered, align 2 %a.add = add i16 %a.load, 1 store atomic volatile i16 %a.add, ptr %a unordered, align 2 ret void } define void @generic_unordered_volatile_sys_i32(ptr %a) { ; CHECK-LABEL: generic_unordered_volatile_sys_i32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_unordered_volatile_sys_i32_param_0]; ; CHECK-NEXT: ld.volatile.b32 %r1, [%rd1]; ; CHECK-NEXT: add.s32 %r2, %r1, 1; ; CHECK-NEXT: st.volatile.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i32, ptr %a unordered, align 4 %a.add = add i32 %a.load, 1 store atomic volatile i32 %a.add, ptr %a unordered, align 4 ret void } define void @generic_unordered_volatile_sys_i64(ptr %a) { ; CHECK-LABEL: generic_unordered_volatile_sys_i64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_unordered_volatile_sys_i64_param_0]; ; CHECK-NEXT: ld.volatile.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; ; CHECK-NEXT: st.volatile.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load atomic volatile i64, ptr %a unordered, align 8 %a.add = add i64 %a.load, 1 store atomic volatile i64 %a.add, ptr %a unordered, align 8 ret void } define void @generic_unordered_volatile_sys_float(ptr %a) { ; CHECK-LABEL: generic_unordered_volatile_sys_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_unordered_volatile_sys_float_param_0]; ; CHECK-NEXT: ld.volatile.b32 %r1, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; CHECK-NEXT: st.volatile.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load atomic volatile float, ptr %a unordered, align 4 %a.add = fadd float %a.load, 1. store atomic volatile float %a.add, ptr %a unordered, align 4 ret void } define void @generic_unordered_volatile_sys_double(ptr %a) { ; CHECK-LABEL: generic_unordered_volatile_sys_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_unordered_volatile_sys_double_param_0]; ; CHECK-NEXT: ld.volatile.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; CHECK-NEXT: st.volatile.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load atomic volatile double, ptr %a unordered, align 8 %a.add = fadd double %a.load, 1. store atomic volatile double %a.add, ptr %a unordered, align 8 ret void } ; generic_monotonic_sys define void @generic_monotonic_sys_i8(ptr %a) { ; SM60-LABEL: generic_monotonic_sys_i8( ; SM60: { ; SM60-NEXT: .reg .b16 %rs<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_i8_param_0]; ; SM60-NEXT: ld.volatile.b8 %rs1, [%rd1]; ; SM60-NEXT: add.s16 %rs2, %rs1, 1; ; SM60-NEXT: st.volatile.b8 [%rd1], %rs2; ; SM60-NEXT: ret; ; ; SM70-LABEL: generic_monotonic_sys_i8( ; SM70: { ; SM70-NEXT: .reg .b16 %rs<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_i8_param_0]; ; SM70-NEXT: ld.relaxed.sys.b8 %rs1, [%rd1]; ; SM70-NEXT: add.s16 %rs2, %rs1, 1; ; SM70-NEXT: st.relaxed.sys.b8 [%rd1], %rs2; ; SM70-NEXT: ret; %a.load = load atomic i8, ptr %a monotonic, align 1 %a.add = add i8 %a.load, 1 store atomic i8 %a.add, ptr %a monotonic, align 1 ret void } define void @generic_monotonic_sys_i16(ptr %a) { ; SM60-LABEL: generic_monotonic_sys_i16( ; SM60: { ; SM60-NEXT: .reg .b16 %rs<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_i16_param_0]; ; SM60-NEXT: ld.volatile.b16 %rs1, [%rd1]; ; SM60-NEXT: add.s16 %rs2, %rs1, 1; ; SM60-NEXT: st.volatile.b16 [%rd1], %rs2; ; SM60-NEXT: ret; ; ; SM70-LABEL: generic_monotonic_sys_i16( ; SM70: { ; SM70-NEXT: .reg .b16 %rs<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_i16_param_0]; ; SM70-NEXT: ld.relaxed.sys.b16 %rs1, [%rd1]; ; SM70-NEXT: add.s16 %rs2, %rs1, 1; ; SM70-NEXT: st.relaxed.sys.b16 [%rd1], %rs2; ; SM70-NEXT: ret; %a.load = load atomic i16, ptr %a monotonic, align 2 %a.add = add i16 %a.load, 1 store atomic i16 %a.add, ptr %a monotonic, align 2 ret void } define void @generic_monotonic_sys_i32(ptr %a) { ; SM60-LABEL: generic_monotonic_sys_i32( ; SM60: { ; SM60-NEXT: .reg .b32 %r<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_i32_param_0]; ; SM60-NEXT: ld.volatile.b32 %r1, [%rd1]; ; SM60-NEXT: add.s32 %r2, %r1, 1; ; SM60-NEXT: st.volatile.b32 [%rd1], %r2; ; SM60-NEXT: ret; ; ; SM70-LABEL: generic_monotonic_sys_i32( ; SM70: { ; SM70-NEXT: .reg .b32 %r<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_i32_param_0]; ; SM70-NEXT: ld.relaxed.sys.b32 %r1, [%rd1]; ; SM70-NEXT: add.s32 %r2, %r1, 1; ; SM70-NEXT: st.relaxed.sys.b32 [%rd1], %r2; ; SM70-NEXT: ret; %a.load = load atomic i32, ptr %a monotonic, align 4 %a.add = add i32 %a.load, 1 store atomic i32 %a.add, ptr %a monotonic, align 4 ret void } define void @generic_monotonic_sys_i64(ptr %a) { ; SM60-LABEL: generic_monotonic_sys_i64( ; SM60: { ; SM60-NEXT: .reg .b64 %rd<4>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_i64_param_0]; ; SM60-NEXT: ld.volatile.b64 %rd2, [%rd1]; ; SM60-NEXT: add.s64 %rd3, %rd2, 1; ; SM60-NEXT: st.volatile.b64 [%rd1], %rd3; ; SM60-NEXT: ret; ; ; SM70-LABEL: generic_monotonic_sys_i64( ; SM70: { ; SM70-NEXT: .reg .b64 %rd<4>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_i64_param_0]; ; SM70-NEXT: ld.relaxed.sys.b64 %rd2, [%rd1]; ; SM70-NEXT: add.s64 %rd3, %rd2, 1; ; SM70-NEXT: st.relaxed.sys.b64 [%rd1], %rd3; ; SM70-NEXT: ret; %a.load = load atomic i64, ptr %a monotonic, align 8 %a.add = add i64 %a.load, 1 store atomic i64 %a.add, ptr %a monotonic, align 8 ret void } define void @generic_monotonic_sys_float(ptr %a) { ; SM60-LABEL: generic_monotonic_sys_float( ; SM60: { ; SM60-NEXT: .reg .b32 %r<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_float_param_0]; ; SM60-NEXT: ld.volatile.b32 %r1, [%rd1]; ; SM60-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; SM60-NEXT: st.volatile.b32 [%rd1], %r2; ; SM60-NEXT: ret; ; ; SM70-LABEL: generic_monotonic_sys_float( ; SM70: { ; SM70-NEXT: .reg .b32 %r<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_float_param_0]; ; SM70-NEXT: ld.relaxed.sys.b32 %r1, [%rd1]; ; SM70-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; SM70-NEXT: st.relaxed.sys.b32 [%rd1], %r2; ; SM70-NEXT: ret; %a.load = load atomic float, ptr %a monotonic, align 4 %a.add = fadd float %a.load, 1. store atomic float %a.add, ptr %a monotonic, align 4 ret void } define void @generic_monotonic_sys_double(ptr %a) { ; SM60-LABEL: generic_monotonic_sys_double( ; SM60: { ; SM60-NEXT: .reg .b64 %rd<4>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_double_param_0]; ; SM60-NEXT: ld.volatile.b64 %rd2, [%rd1]; ; SM60-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; SM60-NEXT: st.volatile.b64 [%rd1], %rd3; ; SM60-NEXT: ret; ; ; SM70-LABEL: generic_monotonic_sys_double( ; SM70: { ; SM70-NEXT: .reg .b64 %rd<4>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [generic_monotonic_sys_double_param_0]; ; SM70-NEXT: ld.relaxed.sys.b64 %rd2, [%rd1]; ; SM70-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; SM70-NEXT: st.relaxed.sys.b64 [%rd1], %rd3; ; SM70-NEXT: ret; %a.load = load atomic double, ptr %a monotonic, align 8 %a.add = fadd double %a.load, 1. store atomic double %a.add, ptr %a monotonic, align 8 ret void } ; generic_monotonic_volatile_sys define void @generic_monotonic_volatile_sys_i8(ptr %a) { ; CHECK-LABEL: generic_monotonic_volatile_sys_i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_monotonic_volatile_sys_i8_param_0]; ; CHECK-NEXT: ld.volatile.b8 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.volatile.b8 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i8, ptr %a monotonic, align 1 %a.add = add i8 %a.load, 1 store atomic volatile i8 %a.add, ptr %a monotonic, align 1 ret void } define void @generic_monotonic_volatile_sys_i16(ptr %a) { ; CHECK-LABEL: generic_monotonic_volatile_sys_i16( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_monotonic_volatile_sys_i16_param_0]; ; CHECK-NEXT: ld.volatile.b16 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.volatile.b16 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i16, ptr %a monotonic, align 2 %a.add = add i16 %a.load, 1 store atomic volatile i16 %a.add, ptr %a monotonic, align 2 ret void } define void @generic_monotonic_volatile_sys_i32(ptr %a) { ; CHECK-LABEL: generic_monotonic_volatile_sys_i32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_monotonic_volatile_sys_i32_param_0]; ; CHECK-NEXT: ld.volatile.b32 %r1, [%rd1]; ; CHECK-NEXT: add.s32 %r2, %r1, 1; ; CHECK-NEXT: st.volatile.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i32, ptr %a monotonic, align 4 %a.add = add i32 %a.load, 1 store atomic volatile i32 %a.add, ptr %a monotonic, align 4 ret void } define void @generic_monotonic_volatile_sys_i64(ptr %a) { ; CHECK-LABEL: generic_monotonic_volatile_sys_i64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_monotonic_volatile_sys_i64_param_0]; ; CHECK-NEXT: ld.volatile.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; ; CHECK-NEXT: st.volatile.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load atomic volatile i64, ptr %a monotonic, align 8 %a.add = add i64 %a.load, 1 store atomic volatile i64 %a.add, ptr %a monotonic, align 8 ret void } define void @generic_monotonic_volatile_sys_float(ptr %a) { ; CHECK-LABEL: generic_monotonic_volatile_sys_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_monotonic_volatile_sys_float_param_0]; ; CHECK-NEXT: ld.volatile.b32 %r1, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; CHECK-NEXT: st.volatile.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load atomic volatile float, ptr %a monotonic, align 4 %a.add = fadd float %a.load, 1. store atomic volatile float %a.add, ptr %a monotonic, align 4 ret void } define void @generic_monotonic_volatile_sys_double(ptr %a) { ; CHECK-LABEL: generic_monotonic_volatile_sys_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [generic_monotonic_volatile_sys_double_param_0]; ; CHECK-NEXT: ld.volatile.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; CHECK-NEXT: st.volatile.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load atomic volatile double, ptr %a monotonic, align 8 %a.add = fadd double %a.load, 1. store atomic volatile double %a.add, ptr %a monotonic, align 8 ret void } ;; global statespace ; global define void @global_i8(ptr addrspace(1) %a) { ; CHECK-LABEL: global_i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [global_i8_param_0]; ; CHECK-NEXT: ld.global.b8 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.global.b8 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load i8, ptr addrspace(1) %a %a.add = add i8 %a.load, 1 store i8 %a.add, ptr addrspace(1) %a ret void } define void @global_i16(ptr addrspace(1) %a) { ; CHECK-LABEL: global_i16( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [global_i16_param_0]; ; CHECK-NEXT: ld.global.b16 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.global.b16 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load i16, ptr addrspace(1) %a %a.add = add i16 %a.load, 1 store i16 %a.add, ptr addrspace(1) %a ret void } define void @global_i32(ptr addrspace(1) %a) { ; CHECK-LABEL: global_i32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [global_i32_param_0]; ; CHECK-NEXT: ld.global.b32 %r1, [%rd1]; ; CHECK-NEXT: add.s32 %r2, %r1, 1; ; CHECK-NEXT: st.global.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load i32, ptr addrspace(1) %a %a.add = add i32 %a.load, 1 store i32 %a.add, ptr addrspace(1) %a ret void } define void @global_i64(ptr addrspace(1) %a) { ; CHECK-LABEL: global_i64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [global_i64_param_0]; ; CHECK-NEXT: ld.global.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; ; CHECK-NEXT: st.global.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load i64, ptr addrspace(1) %a %a.add = add i64 %a.load, 1 store i64 %a.add, ptr addrspace(1) %a ret void } define void @global_float(ptr addrspace(1) %a) { ; CHECK-LABEL: global_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [global_float_param_0]; ; CHECK-NEXT: ld.global.b32 %r1, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; CHECK-NEXT: st.global.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load float, ptr addrspace(1) %a %a.add = fadd float %a.load, 1. store float %a.add, ptr addrspace(1) %a ret void } define void @global_double(ptr addrspace(1) %a) { ; CHECK-LABEL: global_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [global_double_param_0]; ; CHECK-NEXT: ld.global.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; CHECK-NEXT: st.global.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load double, ptr addrspace(1) %a %a.add = fadd double %a.load, 1. store double %a.add, ptr addrspace(1) %a ret void } ; global_volatile define void @global_volatile_i8(ptr addrspace(1) %a) { ; CHECK-LABEL: global_volatile_i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [global_volatile_i8_param_0]; ; CHECK-NEXT: ld.volatile.global.b8 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.volatile.global.b8 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load volatile i8, ptr addrspace(1) %a %a.add = add i8 %a.load, 1 store volatile i8 %a.add, ptr addrspace(1) %a ret void } define void @global_volatile_i16(ptr addrspace(1) %a) { ; CHECK-LABEL: global_volatile_i16( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [global_volatile_i16_param_0]; ; CHECK-NEXT: ld.volatile.global.b16 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.volatile.global.b16 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load volatile i16, ptr addrspace(1) %a %a.add = add i16 %a.load, 1 store volatile i16 %a.add, ptr addrspace(1) %a ret void } define void @global_volatile_i32(ptr addrspace(1) %a) { ; CHECK-LABEL: global_volatile_i32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [global_volatile_i32_param_0]; ; CHECK-NEXT: ld.volatile.global.b32 %r1, [%rd1]; ; CHECK-NEXT: add.s32 %r2, %r1, 1; ; CHECK-NEXT: st.volatile.global.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load volatile i32, ptr addrspace(1) %a %a.add = add i32 %a.load, 1 store volatile i32 %a.add, ptr addrspace(1) %a ret void } define void @global_volatile_i64(ptr addrspace(1) %a) { ; CHECK-LABEL: global_volatile_i64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [global_volatile_i64_param_0]; ; CHECK-NEXT: ld.volatile.global.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; ; CHECK-NEXT: st.volatile.global.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load volatile i64, ptr addrspace(1) %a %a.add = add i64 %a.load, 1 store volatile i64 %a.add, ptr addrspace(1) %a ret void } define void @global_volatile_float(ptr addrspace(1) %a) { ; CHECK-LABEL: global_volatile_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [global_volatile_float_param_0]; ; CHECK-NEXT: ld.volatile.global.b32 %r1, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; CHECK-NEXT: st.volatile.global.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load volatile float, ptr addrspace(1) %a %a.add = fadd float %a.load, 1. store volatile float %a.add, ptr addrspace(1) %a ret void } define void @global_volatile_double(ptr addrspace(1) %a) { ; CHECK-LABEL: global_volatile_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [global_volatile_double_param_0]; ; CHECK-NEXT: ld.volatile.global.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; CHECK-NEXT: st.volatile.global.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load volatile double, ptr addrspace(1) %a %a.add = fadd double %a.load, 1. store volatile double %a.add, ptr addrspace(1) %a ret void } ; global_unordered_sys define void @global_unordered_sys_i8(ptr addrspace(1) %a) { ; SM60-LABEL: global_unordered_sys_i8( ; SM60: { ; SM60-NEXT: .reg .b16 %rs<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_sys_i8_param_0]; ; SM60-NEXT: ld.volatile.global.b8 %rs1, [%rd1]; ; SM60-NEXT: add.s16 %rs2, %rs1, 1; ; SM60-NEXT: st.volatile.global.b8 [%rd1], %rs2; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_unordered_sys_i8( ; SM70: { ; SM70-NEXT: .reg .b16 %rs<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_sys_i8_param_0]; ; SM70-NEXT: ld.relaxed.sys.global.b8 %rs1, [%rd1]; ; SM70-NEXT: add.s16 %rs2, %rs1, 1; ; SM70-NEXT: st.relaxed.sys.global.b8 [%rd1], %rs2; ; SM70-NEXT: ret; %a.load = load atomic i8, ptr addrspace(1) %a unordered, align 1 %a.add = add i8 %a.load, 1 store atomic i8 %a.add, ptr addrspace(1) %a unordered, align 1 ret void } define void @global_unordered_sys_i16(ptr addrspace(1) %a) { ; SM60-LABEL: global_unordered_sys_i16( ; SM60: { ; SM60-NEXT: .reg .b16 %rs<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_sys_i16_param_0]; ; SM60-NEXT: ld.volatile.global.b16 %rs1, [%rd1]; ; SM60-NEXT: add.s16 %rs2, %rs1, 1; ; SM60-NEXT: st.volatile.global.b16 [%rd1], %rs2; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_unordered_sys_i16( ; SM70: { ; SM70-NEXT: .reg .b16 %rs<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_sys_i16_param_0]; ; SM70-NEXT: ld.relaxed.sys.global.b16 %rs1, [%rd1]; ; SM70-NEXT: add.s16 %rs2, %rs1, 1; ; SM70-NEXT: st.relaxed.sys.global.b16 [%rd1], %rs2; ; SM70-NEXT: ret; %a.load = load atomic i16, ptr addrspace(1) %a unordered, align 2 %a.add = add i16 %a.load, 1 store atomic i16 %a.add, ptr addrspace(1) %a unordered, align 2 ret void } define void @global_unordered_sys_i32(ptr addrspace(1) %a) { ; SM60-LABEL: global_unordered_sys_i32( ; SM60: { ; SM60-NEXT: .reg .b32 %r<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_sys_i32_param_0]; ; SM60-NEXT: ld.volatile.global.b32 %r1, [%rd1]; ; SM60-NEXT: add.s32 %r2, %r1, 1; ; SM60-NEXT: st.volatile.global.b32 [%rd1], %r2; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_unordered_sys_i32( ; SM70: { ; SM70-NEXT: .reg .b32 %r<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_sys_i32_param_0]; ; SM70-NEXT: ld.relaxed.sys.global.b32 %r1, [%rd1]; ; SM70-NEXT: add.s32 %r2, %r1, 1; ; SM70-NEXT: st.relaxed.sys.global.b32 [%rd1], %r2; ; SM70-NEXT: ret; %a.load = load atomic i32, ptr addrspace(1) %a unordered, align 4 %a.add = add i32 %a.load, 1 store atomic i32 %a.add, ptr addrspace(1) %a unordered, align 4 ret void } define void @global_unordered_sys_i64(ptr addrspace(1) %a) { ; SM60-LABEL: global_unordered_sys_i64( ; SM60: { ; SM60-NEXT: .reg .b64 %rd<4>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_sys_i64_param_0]; ; SM60-NEXT: ld.volatile.global.b64 %rd2, [%rd1]; ; SM60-NEXT: add.s64 %rd3, %rd2, 1; ; SM60-NEXT: st.volatile.global.b64 [%rd1], %rd3; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_unordered_sys_i64( ; SM70: { ; SM70-NEXT: .reg .b64 %rd<4>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_sys_i64_param_0]; ; SM70-NEXT: ld.relaxed.sys.global.b64 %rd2, [%rd1]; ; SM70-NEXT: add.s64 %rd3, %rd2, 1; ; SM70-NEXT: st.relaxed.sys.global.b64 [%rd1], %rd3; ; SM70-NEXT: ret; %a.load = load atomic i64, ptr addrspace(1) %a unordered, align 8 %a.add = add i64 %a.load, 1 store atomic i64 %a.add, ptr addrspace(1) %a unordered, align 8 ret void } define void @global_unordered_sys_float(ptr addrspace(1) %a) { ; SM60-LABEL: global_unordered_sys_float( ; SM60: { ; SM60-NEXT: .reg .b32 %r<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_sys_float_param_0]; ; SM60-NEXT: ld.volatile.global.b32 %r1, [%rd1]; ; SM60-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; SM60-NEXT: st.volatile.global.b32 [%rd1], %r2; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_unordered_sys_float( ; SM70: { ; SM70-NEXT: .reg .b32 %r<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_sys_float_param_0]; ; SM70-NEXT: ld.relaxed.sys.global.b32 %r1, [%rd1]; ; SM70-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; SM70-NEXT: st.relaxed.sys.global.b32 [%rd1], %r2; ; SM70-NEXT: ret; %a.load = load atomic float, ptr addrspace(1) %a unordered, align 4 %a.add = fadd float %a.load, 1. store atomic float %a.add, ptr addrspace(1) %a unordered, align 4 ret void } define void @global_unordered_sys_double(ptr addrspace(1) %a) { ; SM60-LABEL: global_unordered_sys_double( ; SM60: { ; SM60-NEXT: .reg .b64 %rd<4>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_sys_double_param_0]; ; SM60-NEXT: ld.volatile.global.b64 %rd2, [%rd1]; ; SM60-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; SM60-NEXT: st.volatile.global.b64 [%rd1], %rd3; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_unordered_sys_double( ; SM70: { ; SM70-NEXT: .reg .b64 %rd<4>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_sys_double_param_0]; ; SM70-NEXT: ld.relaxed.sys.global.b64 %rd2, [%rd1]; ; SM70-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; SM70-NEXT: st.relaxed.sys.global.b64 [%rd1], %rd3; ; SM70-NEXT: ret; %a.load = load atomic double, ptr addrspace(1) %a unordered, align 8 %a.add = fadd double %a.load, 1. store atomic double %a.add, ptr addrspace(1) %a unordered, align 8 ret void } ; global_unordered_volatile_sys define void @global_unordered_volatile_sys_i8(ptr addrspace(1) %a) { ; SM60-LABEL: global_unordered_volatile_sys_i8( ; SM60: { ; SM60-NEXT: .reg .b16 %rs<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_i8_param_0]; ; SM60-NEXT: ld.volatile.global.b8 %rs1, [%rd1]; ; SM60-NEXT: add.s16 %rs2, %rs1, 1; ; SM60-NEXT: st.volatile.global.b8 [%rd1], %rs2; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_unordered_volatile_sys_i8( ; SM70: { ; SM70-NEXT: .reg .b16 %rs<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_i8_param_0]; ; SM70-NEXT: ld.mmio.relaxed.sys.global.b8 %rs1, [%rd1]; ; SM70-NEXT: add.s16 %rs2, %rs1, 1; ; SM70-NEXT: st.mmio.relaxed.sys.global.b8 [%rd1], %rs2; ; SM70-NEXT: ret; %a.load = load atomic volatile i8, ptr addrspace(1) %a unordered, align 1 %a.add = add i8 %a.load, 1 store atomic volatile i8 %a.add, ptr addrspace(1) %a unordered, align 1 ret void } define void @global_unordered_volatile_sys_i16(ptr addrspace(1) %a) { ; SM60-LABEL: global_unordered_volatile_sys_i16( ; SM60: { ; SM60-NEXT: .reg .b16 %rs<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_i16_param_0]; ; SM60-NEXT: ld.volatile.global.b16 %rs1, [%rd1]; ; SM60-NEXT: add.s16 %rs2, %rs1, 1; ; SM60-NEXT: st.volatile.global.b16 [%rd1], %rs2; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_unordered_volatile_sys_i16( ; SM70: { ; SM70-NEXT: .reg .b16 %rs<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_i16_param_0]; ; SM70-NEXT: ld.mmio.relaxed.sys.global.b16 %rs1, [%rd1]; ; SM70-NEXT: add.s16 %rs2, %rs1, 1; ; SM70-NEXT: st.mmio.relaxed.sys.global.b16 [%rd1], %rs2; ; SM70-NEXT: ret; %a.load = load atomic volatile i16, ptr addrspace(1) %a unordered, align 2 %a.add = add i16 %a.load, 1 store atomic volatile i16 %a.add, ptr addrspace(1) %a unordered, align 2 ret void } define void @global_unordered_volatile_sys_i32(ptr addrspace(1) %a) { ; SM60-LABEL: global_unordered_volatile_sys_i32( ; SM60: { ; SM60-NEXT: .reg .b32 %r<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_i32_param_0]; ; SM60-NEXT: ld.volatile.global.b32 %r1, [%rd1]; ; SM60-NEXT: add.s32 %r2, %r1, 1; ; SM60-NEXT: st.volatile.global.b32 [%rd1], %r2; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_unordered_volatile_sys_i32( ; SM70: { ; SM70-NEXT: .reg .b32 %r<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_i32_param_0]; ; SM70-NEXT: ld.mmio.relaxed.sys.global.b32 %r1, [%rd1]; ; SM70-NEXT: add.s32 %r2, %r1, 1; ; SM70-NEXT: st.mmio.relaxed.sys.global.b32 [%rd1], %r2; ; SM70-NEXT: ret; %a.load = load atomic volatile i32, ptr addrspace(1) %a unordered, align 4 %a.add = add i32 %a.load, 1 store atomic volatile i32 %a.add, ptr addrspace(1) %a unordered, align 4 ret void } define void @global_unordered_volatile_sys_i64(ptr addrspace(1) %a) { ; SM60-LABEL: global_unordered_volatile_sys_i64( ; SM60: { ; SM60-NEXT: .reg .b64 %rd<4>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_i64_param_0]; ; SM60-NEXT: ld.volatile.global.b64 %rd2, [%rd1]; ; SM60-NEXT: add.s64 %rd3, %rd2, 1; ; SM60-NEXT: st.volatile.global.b64 [%rd1], %rd3; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_unordered_volatile_sys_i64( ; SM70: { ; SM70-NEXT: .reg .b64 %rd<4>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_i64_param_0]; ; SM70-NEXT: ld.mmio.relaxed.sys.global.b64 %rd2, [%rd1]; ; SM70-NEXT: add.s64 %rd3, %rd2, 1; ; SM70-NEXT: st.mmio.relaxed.sys.global.b64 [%rd1], %rd3; ; SM70-NEXT: ret; %a.load = load atomic volatile i64, ptr addrspace(1) %a unordered, align 8 %a.add = add i64 %a.load, 1 store atomic volatile i64 %a.add, ptr addrspace(1) %a unordered, align 8 ret void } define void @global_unordered_volatile_sys_float(ptr addrspace(1) %a) { ; SM60-LABEL: global_unordered_volatile_sys_float( ; SM60: { ; SM60-NEXT: .reg .b32 %r<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_float_param_0]; ; SM60-NEXT: ld.volatile.global.b32 %r1, [%rd1]; ; SM60-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; SM60-NEXT: st.volatile.global.b32 [%rd1], %r2; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_unordered_volatile_sys_float( ; SM70: { ; SM70-NEXT: .reg .b32 %r<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_float_param_0]; ; SM70-NEXT: ld.mmio.relaxed.sys.global.b32 %r1, [%rd1]; ; SM70-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; SM70-NEXT: st.mmio.relaxed.sys.global.b32 [%rd1], %r2; ; SM70-NEXT: ret; %a.load = load atomic volatile float, ptr addrspace(1) %a unordered, align 4 %a.add = fadd float %a.load, 1. store atomic volatile float %a.add, ptr addrspace(1) %a unordered, align 4 ret void } define void @global_unordered_volatile_sys_double(ptr addrspace(1) %a) { ; SM60-LABEL: global_unordered_volatile_sys_double( ; SM60: { ; SM60-NEXT: .reg .b64 %rd<4>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_double_param_0]; ; SM60-NEXT: ld.volatile.global.b64 %rd2, [%rd1]; ; SM60-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; SM60-NEXT: st.volatile.global.b64 [%rd1], %rd3; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_unordered_volatile_sys_double( ; SM70: { ; SM70-NEXT: .reg .b64 %rd<4>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_unordered_volatile_sys_double_param_0]; ; SM70-NEXT: ld.mmio.relaxed.sys.global.b64 %rd2, [%rd1]; ; SM70-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; SM70-NEXT: st.mmio.relaxed.sys.global.b64 [%rd1], %rd3; ; SM70-NEXT: ret; %a.load = load atomic volatile double, ptr addrspace(1) %a unordered, align 8 %a.add = fadd double %a.load, 1. store atomic volatile double %a.add, ptr addrspace(1) %a unordered, align 8 ret void } ; global_monotonic_sys define void @global_monotonic_sys_i8(ptr addrspace(1) %a) { ; SM60-LABEL: global_monotonic_sys_i8( ; SM60: { ; SM60-NEXT: .reg .b16 %rs<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_i8_param_0]; ; SM60-NEXT: ld.volatile.global.b8 %rs1, [%rd1]; ; SM60-NEXT: add.s16 %rs2, %rs1, 1; ; SM60-NEXT: st.volatile.global.b8 [%rd1], %rs2; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_monotonic_sys_i8( ; SM70: { ; SM70-NEXT: .reg .b16 %rs<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_i8_param_0]; ; SM70-NEXT: ld.relaxed.sys.global.b8 %rs1, [%rd1]; ; SM70-NEXT: add.s16 %rs2, %rs1, 1; ; SM70-NEXT: st.relaxed.sys.global.b8 [%rd1], %rs2; ; SM70-NEXT: ret; %a.load = load atomic i8, ptr addrspace(1) %a monotonic, align 1 %a.add = add i8 %a.load, 1 store atomic i8 %a.add, ptr addrspace(1) %a monotonic, align 1 ret void } define void @global_monotonic_sys_i16(ptr addrspace(1) %a) { ; SM60-LABEL: global_monotonic_sys_i16( ; SM60: { ; SM60-NEXT: .reg .b16 %rs<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_i16_param_0]; ; SM60-NEXT: ld.volatile.global.b16 %rs1, [%rd1]; ; SM60-NEXT: add.s16 %rs2, %rs1, 1; ; SM60-NEXT: st.volatile.global.b16 [%rd1], %rs2; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_monotonic_sys_i16( ; SM70: { ; SM70-NEXT: .reg .b16 %rs<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_i16_param_0]; ; SM70-NEXT: ld.relaxed.sys.global.b16 %rs1, [%rd1]; ; SM70-NEXT: add.s16 %rs2, %rs1, 1; ; SM70-NEXT: st.relaxed.sys.global.b16 [%rd1], %rs2; ; SM70-NEXT: ret; %a.load = load atomic i16, ptr addrspace(1) %a monotonic, align 2 %a.add = add i16 %a.load, 1 store atomic i16 %a.add, ptr addrspace(1) %a monotonic, align 2 ret void } define void @global_monotonic_sys_i32(ptr addrspace(1) %a) { ; SM60-LABEL: global_monotonic_sys_i32( ; SM60: { ; SM60-NEXT: .reg .b32 %r<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_i32_param_0]; ; SM60-NEXT: ld.volatile.global.b32 %r1, [%rd1]; ; SM60-NEXT: add.s32 %r2, %r1, 1; ; SM60-NEXT: st.volatile.global.b32 [%rd1], %r2; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_monotonic_sys_i32( ; SM70: { ; SM70-NEXT: .reg .b32 %r<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_i32_param_0]; ; SM70-NEXT: ld.relaxed.sys.global.b32 %r1, [%rd1]; ; SM70-NEXT: add.s32 %r2, %r1, 1; ; SM70-NEXT: st.relaxed.sys.global.b32 [%rd1], %r2; ; SM70-NEXT: ret; %a.load = load atomic i32, ptr addrspace(1) %a monotonic, align 4 %a.add = add i32 %a.load, 1 store atomic i32 %a.add, ptr addrspace(1) %a monotonic, align 4 ret void } define void @global_monotonic_sys_i64(ptr addrspace(1) %a) { ; SM60-LABEL: global_monotonic_sys_i64( ; SM60: { ; SM60-NEXT: .reg .b64 %rd<4>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_i64_param_0]; ; SM60-NEXT: ld.volatile.global.b64 %rd2, [%rd1]; ; SM60-NEXT: add.s64 %rd3, %rd2, 1; ; SM60-NEXT: st.volatile.global.b64 [%rd1], %rd3; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_monotonic_sys_i64( ; SM70: { ; SM70-NEXT: .reg .b64 %rd<4>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_i64_param_0]; ; SM70-NEXT: ld.relaxed.sys.global.b64 %rd2, [%rd1]; ; SM70-NEXT: add.s64 %rd3, %rd2, 1; ; SM70-NEXT: st.relaxed.sys.global.b64 [%rd1], %rd3; ; SM70-NEXT: ret; %a.load = load atomic i64, ptr addrspace(1) %a monotonic, align 8 %a.add = add i64 %a.load, 1 store atomic i64 %a.add, ptr addrspace(1) %a monotonic, align 8 ret void } define void @global_monotonic_sys_float(ptr addrspace(1) %a) { ; SM60-LABEL: global_monotonic_sys_float( ; SM60: { ; SM60-NEXT: .reg .b32 %r<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_float_param_0]; ; SM60-NEXT: ld.volatile.global.b32 %r1, [%rd1]; ; SM60-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; SM60-NEXT: st.volatile.global.b32 [%rd1], %r2; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_monotonic_sys_float( ; SM70: { ; SM70-NEXT: .reg .b32 %r<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_float_param_0]; ; SM70-NEXT: ld.relaxed.sys.global.b32 %r1, [%rd1]; ; SM70-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; SM70-NEXT: st.relaxed.sys.global.b32 [%rd1], %r2; ; SM70-NEXT: ret; %a.load = load atomic float, ptr addrspace(1) %a monotonic, align 4 %a.add = fadd float %a.load, 1. store atomic float %a.add, ptr addrspace(1) %a monotonic, align 4 ret void } define void @global_monotonic_sys_double(ptr addrspace(1) %a) { ; SM60-LABEL: global_monotonic_sys_double( ; SM60: { ; SM60-NEXT: .reg .b64 %rd<4>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_double_param_0]; ; SM60-NEXT: ld.volatile.global.b64 %rd2, [%rd1]; ; SM60-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; SM60-NEXT: st.volatile.global.b64 [%rd1], %rd3; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_monotonic_sys_double( ; SM70: { ; SM70-NEXT: .reg .b64 %rd<4>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_sys_double_param_0]; ; SM70-NEXT: ld.relaxed.sys.global.b64 %rd2, [%rd1]; ; SM70-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; SM70-NEXT: st.relaxed.sys.global.b64 [%rd1], %rd3; ; SM70-NEXT: ret; %a.load = load atomic double, ptr addrspace(1) %a monotonic, align 8 %a.add = fadd double %a.load, 1. store atomic double %a.add, ptr addrspace(1) %a monotonic, align 8 ret void } ; global_monotonic_volatile_sys define void @global_monotonic_volatile_sys_i8(ptr addrspace(1) %a) { ; SM60-LABEL: global_monotonic_volatile_sys_i8( ; SM60: { ; SM60-NEXT: .reg .b16 %rs<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_i8_param_0]; ; SM60-NEXT: ld.volatile.global.b8 %rs1, [%rd1]; ; SM60-NEXT: add.s16 %rs2, %rs1, 1; ; SM60-NEXT: st.volatile.global.b8 [%rd1], %rs2; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_monotonic_volatile_sys_i8( ; SM70: { ; SM70-NEXT: .reg .b16 %rs<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_i8_param_0]; ; SM70-NEXT: ld.mmio.relaxed.sys.global.b8 %rs1, [%rd1]; ; SM70-NEXT: add.s16 %rs2, %rs1, 1; ; SM70-NEXT: st.mmio.relaxed.sys.global.b8 [%rd1], %rs2; ; SM70-NEXT: ret; %a.load = load atomic volatile i8, ptr addrspace(1) %a monotonic, align 1 %a.add = add i8 %a.load, 1 store atomic volatile i8 %a.add, ptr addrspace(1) %a monotonic, align 1 ret void } define void @global_monotonic_volatile_sys_i16(ptr addrspace(1) %a) { ; SM60-LABEL: global_monotonic_volatile_sys_i16( ; SM60: { ; SM60-NEXT: .reg .b16 %rs<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_i16_param_0]; ; SM60-NEXT: ld.volatile.global.b16 %rs1, [%rd1]; ; SM60-NEXT: add.s16 %rs2, %rs1, 1; ; SM60-NEXT: st.volatile.global.b16 [%rd1], %rs2; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_monotonic_volatile_sys_i16( ; SM70: { ; SM70-NEXT: .reg .b16 %rs<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_i16_param_0]; ; SM70-NEXT: ld.mmio.relaxed.sys.global.b16 %rs1, [%rd1]; ; SM70-NEXT: add.s16 %rs2, %rs1, 1; ; SM70-NEXT: st.mmio.relaxed.sys.global.b16 [%rd1], %rs2; ; SM70-NEXT: ret; %a.load = load atomic volatile i16, ptr addrspace(1) %a monotonic, align 2 %a.add = add i16 %a.load, 1 store atomic volatile i16 %a.add, ptr addrspace(1) %a monotonic, align 2 ret void } define void @global_monotonic_volatile_sys_i32(ptr addrspace(1) %a) { ; SM60-LABEL: global_monotonic_volatile_sys_i32( ; SM60: { ; SM60-NEXT: .reg .b32 %r<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_i32_param_0]; ; SM60-NEXT: ld.volatile.global.b32 %r1, [%rd1]; ; SM60-NEXT: add.s32 %r2, %r1, 1; ; SM60-NEXT: st.volatile.global.b32 [%rd1], %r2; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_monotonic_volatile_sys_i32( ; SM70: { ; SM70-NEXT: .reg .b32 %r<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_i32_param_0]; ; SM70-NEXT: ld.mmio.relaxed.sys.global.b32 %r1, [%rd1]; ; SM70-NEXT: add.s32 %r2, %r1, 1; ; SM70-NEXT: st.mmio.relaxed.sys.global.b32 [%rd1], %r2; ; SM70-NEXT: ret; %a.load = load atomic volatile i32, ptr addrspace(1) %a monotonic, align 4 %a.add = add i32 %a.load, 1 store atomic volatile i32 %a.add, ptr addrspace(1) %a monotonic, align 4 ret void } define void @global_monotonic_volatile_sys_i64(ptr addrspace(1) %a) { ; SM60-LABEL: global_monotonic_volatile_sys_i64( ; SM60: { ; SM60-NEXT: .reg .b64 %rd<4>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_i64_param_0]; ; SM60-NEXT: ld.volatile.global.b64 %rd2, [%rd1]; ; SM60-NEXT: add.s64 %rd3, %rd2, 1; ; SM60-NEXT: st.volatile.global.b64 [%rd1], %rd3; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_monotonic_volatile_sys_i64( ; SM70: { ; SM70-NEXT: .reg .b64 %rd<4>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_i64_param_0]; ; SM70-NEXT: ld.mmio.relaxed.sys.global.b64 %rd2, [%rd1]; ; SM70-NEXT: add.s64 %rd3, %rd2, 1; ; SM70-NEXT: st.mmio.relaxed.sys.global.b64 [%rd1], %rd3; ; SM70-NEXT: ret; %a.load = load atomic volatile i64, ptr addrspace(1) %a monotonic, align 8 %a.add = add i64 %a.load, 1 store atomic volatile i64 %a.add, ptr addrspace(1) %a monotonic, align 8 ret void } define void @global_monotonic_volatile_sys_float(ptr addrspace(1) %a) { ; SM60-LABEL: global_monotonic_volatile_sys_float( ; SM60: { ; SM60-NEXT: .reg .b32 %r<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_float_param_0]; ; SM60-NEXT: ld.volatile.global.b32 %r1, [%rd1]; ; SM60-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; SM60-NEXT: st.volatile.global.b32 [%rd1], %r2; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_monotonic_volatile_sys_float( ; SM70: { ; SM70-NEXT: .reg .b32 %r<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_float_param_0]; ; SM70-NEXT: ld.mmio.relaxed.sys.global.b32 %r1, [%rd1]; ; SM70-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; SM70-NEXT: st.mmio.relaxed.sys.global.b32 [%rd1], %r2; ; SM70-NEXT: ret; %a.load = load atomic volatile float, ptr addrspace(1) %a monotonic, align 4 %a.add = fadd float %a.load, 1. store atomic volatile float %a.add, ptr addrspace(1) %a monotonic, align 4 ret void } define void @global_monotonic_volatile_sys_double(ptr addrspace(1) %a) { ; SM60-LABEL: global_monotonic_volatile_sys_double( ; SM60: { ; SM60-NEXT: .reg .b64 %rd<4>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_double_param_0]; ; SM60-NEXT: ld.volatile.global.b64 %rd2, [%rd1]; ; SM60-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; SM60-NEXT: st.volatile.global.b64 [%rd1], %rd3; ; SM60-NEXT: ret; ; ; SM70-LABEL: global_monotonic_volatile_sys_double( ; SM70: { ; SM70-NEXT: .reg .b64 %rd<4>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [global_monotonic_volatile_sys_double_param_0]; ; SM70-NEXT: ld.mmio.relaxed.sys.global.b64 %rd2, [%rd1]; ; SM70-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; SM70-NEXT: st.mmio.relaxed.sys.global.b64 [%rd1], %rd3; ; SM70-NEXT: ret; %a.load = load atomic volatile double, ptr addrspace(1) %a monotonic, align 8 %a.add = fadd double %a.load, 1. store atomic volatile double %a.add, ptr addrspace(1) %a monotonic, align 8 ret void } ;; shared statespace ; shared define void @shared_i8(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_i8_param_0]; ; CHECK-NEXT: ld.shared.b8 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.shared.b8 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load i8, ptr addrspace(3) %a %a.add = add i8 %a.load, 1 store i8 %a.add, ptr addrspace(3) %a ret void } define void @shared_i16(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_i16( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_i16_param_0]; ; CHECK-NEXT: ld.shared.b16 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.shared.b16 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load i16, ptr addrspace(3) %a %a.add = add i16 %a.load, 1 store i16 %a.add, ptr addrspace(3) %a ret void } define void @shared_i32(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_i32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_i32_param_0]; ; CHECK-NEXT: ld.shared.b32 %r1, [%rd1]; ; CHECK-NEXT: add.s32 %r2, %r1, 1; ; CHECK-NEXT: st.shared.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load i32, ptr addrspace(3) %a %a.add = add i32 %a.load, 1 store i32 %a.add, ptr addrspace(3) %a ret void } define void @shared_i64(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_i64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_i64_param_0]; ; CHECK-NEXT: ld.shared.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; ; CHECK-NEXT: st.shared.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load i64, ptr addrspace(3) %a %a.add = add i64 %a.load, 1 store i64 %a.add, ptr addrspace(3) %a ret void } define void @shared_float(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_float_param_0]; ; CHECK-NEXT: ld.shared.b32 %r1, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; CHECK-NEXT: st.shared.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load float, ptr addrspace(3) %a %a.add = fadd float %a.load, 1. store float %a.add, ptr addrspace(3) %a ret void } define void @shared_double(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_double_param_0]; ; CHECK-NEXT: ld.shared.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; CHECK-NEXT: st.shared.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load double, ptr addrspace(3) %a %a.add = fadd double %a.load, 1. store double %a.add, ptr addrspace(3) %a ret void } ; shared_volatile define void @shared_volatile_i8(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_volatile_i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_volatile_i8_param_0]; ; CHECK-NEXT: ld.volatile.shared.b8 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.volatile.shared.b8 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load volatile i8, ptr addrspace(3) %a %a.add = add i8 %a.load, 1 store volatile i8 %a.add, ptr addrspace(3) %a ret void } define void @shared_volatile_i16(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_volatile_i16( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_volatile_i16_param_0]; ; CHECK-NEXT: ld.volatile.shared.b16 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.volatile.shared.b16 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load volatile i16, ptr addrspace(3) %a %a.add = add i16 %a.load, 1 store volatile i16 %a.add, ptr addrspace(3) %a ret void } define void @shared_volatile_i32(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_volatile_i32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_volatile_i32_param_0]; ; CHECK-NEXT: ld.volatile.shared.b32 %r1, [%rd1]; ; CHECK-NEXT: add.s32 %r2, %r1, 1; ; CHECK-NEXT: st.volatile.shared.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load volatile i32, ptr addrspace(3) %a %a.add = add i32 %a.load, 1 store volatile i32 %a.add, ptr addrspace(3) %a ret void } define void @shared_volatile_i64(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_volatile_i64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_volatile_i64_param_0]; ; CHECK-NEXT: ld.volatile.shared.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; ; CHECK-NEXT: st.volatile.shared.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load volatile i64, ptr addrspace(3) %a %a.add = add i64 %a.load, 1 store volatile i64 %a.add, ptr addrspace(3) %a ret void } define void @shared_volatile_float(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_volatile_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_volatile_float_param_0]; ; CHECK-NEXT: ld.volatile.shared.b32 %r1, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; CHECK-NEXT: st.volatile.shared.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load volatile float, ptr addrspace(3) %a %a.add = fadd float %a.load, 1. store volatile float %a.add, ptr addrspace(3) %a ret void } define void @shared_volatile_double(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_volatile_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_volatile_double_param_0]; ; CHECK-NEXT: ld.volatile.shared.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; CHECK-NEXT: st.volatile.shared.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load volatile double, ptr addrspace(3) %a %a.add = fadd double %a.load, 1. store volatile double %a.add, ptr addrspace(3) %a ret void } ; shared_unordered_sys define void @shared_unordered_sys_i8(ptr addrspace(3) %a) { ; SM60-LABEL: shared_unordered_sys_i8( ; SM60: { ; SM60-NEXT: .reg .b16 %rs<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_i8_param_0]; ; SM60-NEXT: ld.volatile.shared.b8 %rs1, [%rd1]; ; SM60-NEXT: add.s16 %rs2, %rs1, 1; ; SM60-NEXT: st.volatile.shared.b8 [%rd1], %rs2; ; SM60-NEXT: ret; ; ; SM70-LABEL: shared_unordered_sys_i8( ; SM70: { ; SM70-NEXT: .reg .b16 %rs<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_i8_param_0]; ; SM70-NEXT: ld.relaxed.sys.shared.b8 %rs1, [%rd1]; ; SM70-NEXT: add.s16 %rs2, %rs1, 1; ; SM70-NEXT: st.relaxed.sys.shared.b8 [%rd1], %rs2; ; SM70-NEXT: ret; %a.load = load atomic i8, ptr addrspace(3) %a unordered, align 1 %a.add = add i8 %a.load, 1 store atomic i8 %a.add, ptr addrspace(3) %a unordered, align 1 ret void } define void @shared_unordered_sys_i16(ptr addrspace(3) %a) { ; SM60-LABEL: shared_unordered_sys_i16( ; SM60: { ; SM60-NEXT: .reg .b16 %rs<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_i16_param_0]; ; SM60-NEXT: ld.volatile.shared.b16 %rs1, [%rd1]; ; SM60-NEXT: add.s16 %rs2, %rs1, 1; ; SM60-NEXT: st.volatile.shared.b16 [%rd1], %rs2; ; SM60-NEXT: ret; ; ; SM70-LABEL: shared_unordered_sys_i16( ; SM70: { ; SM70-NEXT: .reg .b16 %rs<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_i16_param_0]; ; SM70-NEXT: ld.relaxed.sys.shared.b16 %rs1, [%rd1]; ; SM70-NEXT: add.s16 %rs2, %rs1, 1; ; SM70-NEXT: st.relaxed.sys.shared.b16 [%rd1], %rs2; ; SM70-NEXT: ret; %a.load = load atomic i16, ptr addrspace(3) %a unordered, align 2 %a.add = add i16 %a.load, 1 store atomic i16 %a.add, ptr addrspace(3) %a unordered, align 2 ret void } define void @shared_unordered_sys_i32(ptr addrspace(3) %a) { ; SM60-LABEL: shared_unordered_sys_i32( ; SM60: { ; SM60-NEXT: .reg .b32 %r<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_i32_param_0]; ; SM60-NEXT: ld.volatile.shared.b32 %r1, [%rd1]; ; SM60-NEXT: add.s32 %r2, %r1, 1; ; SM60-NEXT: st.volatile.shared.b32 [%rd1], %r2; ; SM60-NEXT: ret; ; ; SM70-LABEL: shared_unordered_sys_i32( ; SM70: { ; SM70-NEXT: .reg .b32 %r<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_i32_param_0]; ; SM70-NEXT: ld.relaxed.sys.shared.b32 %r1, [%rd1]; ; SM70-NEXT: add.s32 %r2, %r1, 1; ; SM70-NEXT: st.relaxed.sys.shared.b32 [%rd1], %r2; ; SM70-NEXT: ret; %a.load = load atomic i32, ptr addrspace(3) %a unordered, align 4 %a.add = add i32 %a.load, 1 store atomic i32 %a.add, ptr addrspace(3) %a unordered, align 4 ret void } define void @shared_unordered_sys_i64(ptr addrspace(3) %a) { ; SM60-LABEL: shared_unordered_sys_i64( ; SM60: { ; SM60-NEXT: .reg .b64 %rd<4>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_i64_param_0]; ; SM60-NEXT: ld.volatile.shared.b64 %rd2, [%rd1]; ; SM60-NEXT: add.s64 %rd3, %rd2, 1; ; SM60-NEXT: st.volatile.shared.b64 [%rd1], %rd3; ; SM60-NEXT: ret; ; ; SM70-LABEL: shared_unordered_sys_i64( ; SM70: { ; SM70-NEXT: .reg .b64 %rd<4>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_i64_param_0]; ; SM70-NEXT: ld.relaxed.sys.shared.b64 %rd2, [%rd1]; ; SM70-NEXT: add.s64 %rd3, %rd2, 1; ; SM70-NEXT: st.relaxed.sys.shared.b64 [%rd1], %rd3; ; SM70-NEXT: ret; %a.load = load atomic i64, ptr addrspace(3) %a unordered, align 8 %a.add = add i64 %a.load, 1 store atomic i64 %a.add, ptr addrspace(3) %a unordered, align 8 ret void } define void @shared_unordered_sys_float(ptr addrspace(3) %a) { ; SM60-LABEL: shared_unordered_sys_float( ; SM60: { ; SM60-NEXT: .reg .b32 %r<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_float_param_0]; ; SM60-NEXT: ld.volatile.shared.b32 %r1, [%rd1]; ; SM60-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; SM60-NEXT: st.volatile.shared.b32 [%rd1], %r2; ; SM60-NEXT: ret; ; ; SM70-LABEL: shared_unordered_sys_float( ; SM70: { ; SM70-NEXT: .reg .b32 %r<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_float_param_0]; ; SM70-NEXT: ld.relaxed.sys.shared.b32 %r1, [%rd1]; ; SM70-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; SM70-NEXT: st.relaxed.sys.shared.b32 [%rd1], %r2; ; SM70-NEXT: ret; %a.load = load atomic float, ptr addrspace(3) %a unordered, align 4 %a.add = fadd float %a.load, 1. store atomic float %a.add, ptr addrspace(3) %a unordered, align 4 ret void } define void @shared_unordered_sys_double(ptr addrspace(3) %a) { ; SM60-LABEL: shared_unordered_sys_double( ; SM60: { ; SM60-NEXT: .reg .b64 %rd<4>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_double_param_0]; ; SM60-NEXT: ld.volatile.shared.b64 %rd2, [%rd1]; ; SM60-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; SM60-NEXT: st.volatile.shared.b64 [%rd1], %rd3; ; SM60-NEXT: ret; ; ; SM70-LABEL: shared_unordered_sys_double( ; SM70: { ; SM70-NEXT: .reg .b64 %rd<4>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [shared_unordered_sys_double_param_0]; ; SM70-NEXT: ld.relaxed.sys.shared.b64 %rd2, [%rd1]; ; SM70-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; SM70-NEXT: st.relaxed.sys.shared.b64 [%rd1], %rd3; ; SM70-NEXT: ret; %a.load = load atomic double, ptr addrspace(3) %a unordered, align 8 %a.add = fadd double %a.load, 1. store atomic double %a.add, ptr addrspace(3) %a unordered, align 8 ret void } ; shared_unordered_volatile_sys define void @shared_unordered_volatile_sys_i8(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_unordered_volatile_sys_i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_unordered_volatile_sys_i8_param_0]; ; CHECK-NEXT: ld.volatile.shared.b8 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.volatile.shared.b8 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i8, ptr addrspace(3) %a unordered, align 1 %a.add = add i8 %a.load, 1 store atomic volatile i8 %a.add, ptr addrspace(3) %a unordered, align 1 ret void } define void @shared_unordered_volatile_sys_i16(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_unordered_volatile_sys_i16( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_unordered_volatile_sys_i16_param_0]; ; CHECK-NEXT: ld.volatile.shared.b16 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.volatile.shared.b16 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i16, ptr addrspace(3) %a unordered, align 2 %a.add = add i16 %a.load, 1 store atomic volatile i16 %a.add, ptr addrspace(3) %a unordered, align 2 ret void } define void @shared_unordered_volatile_sys_i32(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_unordered_volatile_sys_i32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_unordered_volatile_sys_i32_param_0]; ; CHECK-NEXT: ld.volatile.shared.b32 %r1, [%rd1]; ; CHECK-NEXT: add.s32 %r2, %r1, 1; ; CHECK-NEXT: st.volatile.shared.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i32, ptr addrspace(3) %a unordered, align 4 %a.add = add i32 %a.load, 1 store atomic volatile i32 %a.add, ptr addrspace(3) %a unordered, align 4 ret void } define void @shared_unordered_volatile_sys_i64(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_unordered_volatile_sys_i64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_unordered_volatile_sys_i64_param_0]; ; CHECK-NEXT: ld.volatile.shared.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; ; CHECK-NEXT: st.volatile.shared.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load atomic volatile i64, ptr addrspace(3) %a unordered, align 8 %a.add = add i64 %a.load, 1 store atomic volatile i64 %a.add, ptr addrspace(3) %a unordered, align 8 ret void } define void @shared_unordered_volatile_sys_float(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_unordered_volatile_sys_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_unordered_volatile_sys_float_param_0]; ; CHECK-NEXT: ld.volatile.shared.b32 %r1, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; CHECK-NEXT: st.volatile.shared.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load atomic volatile float, ptr addrspace(3) %a unordered, align 4 %a.add = fadd float %a.load, 1. store atomic volatile float %a.add, ptr addrspace(3) %a unordered, align 4 ret void } define void @shared_unordered_volatile_sys_double(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_unordered_volatile_sys_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_unordered_volatile_sys_double_param_0]; ; CHECK-NEXT: ld.volatile.shared.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; CHECK-NEXT: st.volatile.shared.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load atomic volatile double, ptr addrspace(3) %a unordered, align 8 %a.add = fadd double %a.load, 1. store atomic volatile double %a.add, ptr addrspace(3) %a unordered, align 8 ret void } ; shared_monotonic_sys define void @shared_monotonic_sys_i8(ptr addrspace(3) %a) { ; SM60-LABEL: shared_monotonic_sys_i8( ; SM60: { ; SM60-NEXT: .reg .b16 %rs<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_i8_param_0]; ; SM60-NEXT: ld.volatile.shared.b8 %rs1, [%rd1]; ; SM60-NEXT: add.s16 %rs2, %rs1, 1; ; SM60-NEXT: st.volatile.shared.b8 [%rd1], %rs2; ; SM60-NEXT: ret; ; ; SM70-LABEL: shared_monotonic_sys_i8( ; SM70: { ; SM70-NEXT: .reg .b16 %rs<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_i8_param_0]; ; SM70-NEXT: ld.relaxed.sys.shared.b8 %rs1, [%rd1]; ; SM70-NEXT: add.s16 %rs2, %rs1, 1; ; SM70-NEXT: st.relaxed.sys.shared.b8 [%rd1], %rs2; ; SM70-NEXT: ret; %a.load = load atomic i8, ptr addrspace(3) %a monotonic, align 1 %a.add = add i8 %a.load, 1 store atomic i8 %a.add, ptr addrspace(3) %a monotonic, align 1 ret void } define void @shared_monotonic_sys_i16(ptr addrspace(3) %a) { ; SM60-LABEL: shared_monotonic_sys_i16( ; SM60: { ; SM60-NEXT: .reg .b16 %rs<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_i16_param_0]; ; SM60-NEXT: ld.volatile.shared.b16 %rs1, [%rd1]; ; SM60-NEXT: add.s16 %rs2, %rs1, 1; ; SM60-NEXT: st.volatile.shared.b16 [%rd1], %rs2; ; SM60-NEXT: ret; ; ; SM70-LABEL: shared_monotonic_sys_i16( ; SM70: { ; SM70-NEXT: .reg .b16 %rs<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_i16_param_0]; ; SM70-NEXT: ld.relaxed.sys.shared.b16 %rs1, [%rd1]; ; SM70-NEXT: add.s16 %rs2, %rs1, 1; ; SM70-NEXT: st.relaxed.sys.shared.b16 [%rd1], %rs2; ; SM70-NEXT: ret; %a.load = load atomic i16, ptr addrspace(3) %a monotonic, align 2 %a.add = add i16 %a.load, 1 store atomic i16 %a.add, ptr addrspace(3) %a monotonic, align 2 ret void } define void @shared_monotonic_sys_i32(ptr addrspace(3) %a) { ; SM60-LABEL: shared_monotonic_sys_i32( ; SM60: { ; SM60-NEXT: .reg .b32 %r<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_i32_param_0]; ; SM60-NEXT: ld.volatile.shared.b32 %r1, [%rd1]; ; SM60-NEXT: add.s32 %r2, %r1, 1; ; SM60-NEXT: st.volatile.shared.b32 [%rd1], %r2; ; SM60-NEXT: ret; ; ; SM70-LABEL: shared_monotonic_sys_i32( ; SM70: { ; SM70-NEXT: .reg .b32 %r<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_i32_param_0]; ; SM70-NEXT: ld.relaxed.sys.shared.b32 %r1, [%rd1]; ; SM70-NEXT: add.s32 %r2, %r1, 1; ; SM70-NEXT: st.relaxed.sys.shared.b32 [%rd1], %r2; ; SM70-NEXT: ret; %a.load = load atomic i32, ptr addrspace(3) %a monotonic, align 4 %a.add = add i32 %a.load, 1 store atomic i32 %a.add, ptr addrspace(3) %a monotonic, align 4 ret void } define void @shared_monotonic_sys_i64(ptr addrspace(3) %a) { ; SM60-LABEL: shared_monotonic_sys_i64( ; SM60: { ; SM60-NEXT: .reg .b64 %rd<4>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_i64_param_0]; ; SM60-NEXT: ld.volatile.shared.b64 %rd2, [%rd1]; ; SM60-NEXT: add.s64 %rd3, %rd2, 1; ; SM60-NEXT: st.volatile.shared.b64 [%rd1], %rd3; ; SM60-NEXT: ret; ; ; SM70-LABEL: shared_monotonic_sys_i64( ; SM70: { ; SM70-NEXT: .reg .b64 %rd<4>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_i64_param_0]; ; SM70-NEXT: ld.relaxed.sys.shared.b64 %rd2, [%rd1]; ; SM70-NEXT: add.s64 %rd3, %rd2, 1; ; SM70-NEXT: st.relaxed.sys.shared.b64 [%rd1], %rd3; ; SM70-NEXT: ret; %a.load = load atomic i64, ptr addrspace(3) %a monotonic, align 8 %a.add = add i64 %a.load, 1 store atomic i64 %a.add, ptr addrspace(3) %a monotonic, align 8 ret void } define void @shared_monotonic_sys_float(ptr addrspace(3) %a) { ; SM60-LABEL: shared_monotonic_sys_float( ; SM60: { ; SM60-NEXT: .reg .b32 %r<3>; ; SM60-NEXT: .reg .b64 %rd<2>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_float_param_0]; ; SM60-NEXT: ld.volatile.shared.b32 %r1, [%rd1]; ; SM60-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; SM60-NEXT: st.volatile.shared.b32 [%rd1], %r2; ; SM60-NEXT: ret; ; ; SM70-LABEL: shared_monotonic_sys_float( ; SM70: { ; SM70-NEXT: .reg .b32 %r<3>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_float_param_0]; ; SM70-NEXT: ld.relaxed.sys.shared.b32 %r1, [%rd1]; ; SM70-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; SM70-NEXT: st.relaxed.sys.shared.b32 [%rd1], %r2; ; SM70-NEXT: ret; %a.load = load atomic float, ptr addrspace(3) %a monotonic, align 4 %a.add = fadd float %a.load, 1. store atomic float %a.add, ptr addrspace(3) %a monotonic, align 4 ret void } define void @shared_monotonic_sys_double(ptr addrspace(3) %a) { ; SM60-LABEL: shared_monotonic_sys_double( ; SM60: { ; SM60-NEXT: .reg .b64 %rd<4>; ; SM60-EMPTY: ; SM60-NEXT: // %bb.0: ; SM60-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_double_param_0]; ; SM60-NEXT: ld.volatile.shared.b64 %rd2, [%rd1]; ; SM60-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; SM60-NEXT: st.volatile.shared.b64 [%rd1], %rd3; ; SM60-NEXT: ret; ; ; SM70-LABEL: shared_monotonic_sys_double( ; SM70: { ; SM70-NEXT: .reg .b64 %rd<4>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [shared_monotonic_sys_double_param_0]; ; SM70-NEXT: ld.relaxed.sys.shared.b64 %rd2, [%rd1]; ; SM70-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; SM70-NEXT: st.relaxed.sys.shared.b64 [%rd1], %rd3; ; SM70-NEXT: ret; %a.load = load atomic double, ptr addrspace(3) %a monotonic, align 8 %a.add = fadd double %a.load, 1. store atomic double %a.add, ptr addrspace(3) %a monotonic, align 8 ret void } ; shared_monotonic_volatile_sys define void @shared_monotonic_volatile_sys_i8(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_monotonic_volatile_sys_i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_monotonic_volatile_sys_i8_param_0]; ; CHECK-NEXT: ld.volatile.shared.b8 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.volatile.shared.b8 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i8, ptr addrspace(3) %a monotonic, align 1 %a.add = add i8 %a.load, 1 store atomic volatile i8 %a.add, ptr addrspace(3) %a monotonic, align 1 ret void } define void @shared_monotonic_volatile_sys_i16(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_monotonic_volatile_sys_i16( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_monotonic_volatile_sys_i16_param_0]; ; CHECK-NEXT: ld.volatile.shared.b16 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.volatile.shared.b16 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i16, ptr addrspace(3) %a monotonic, align 2 %a.add = add i16 %a.load, 1 store atomic volatile i16 %a.add, ptr addrspace(3) %a monotonic, align 2 ret void } define void @shared_monotonic_volatile_sys_i32(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_monotonic_volatile_sys_i32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_monotonic_volatile_sys_i32_param_0]; ; CHECK-NEXT: ld.volatile.shared.b32 %r1, [%rd1]; ; CHECK-NEXT: add.s32 %r2, %r1, 1; ; CHECK-NEXT: st.volatile.shared.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i32, ptr addrspace(3) %a monotonic, align 4 %a.add = add i32 %a.load, 1 store atomic volatile i32 %a.add, ptr addrspace(3) %a monotonic, align 4 ret void } define void @shared_monotonic_volatile_sys_i64(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_monotonic_volatile_sys_i64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_monotonic_volatile_sys_i64_param_0]; ; CHECK-NEXT: ld.volatile.shared.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; ; CHECK-NEXT: st.volatile.shared.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load atomic volatile i64, ptr addrspace(3) %a monotonic, align 8 %a.add = add i64 %a.load, 1 store atomic volatile i64 %a.add, ptr addrspace(3) %a monotonic, align 8 ret void } define void @shared_monotonic_volatile_sys_float(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_monotonic_volatile_sys_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_monotonic_volatile_sys_float_param_0]; ; CHECK-NEXT: ld.volatile.shared.b32 %r1, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; CHECK-NEXT: st.volatile.shared.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load atomic volatile float, ptr addrspace(3) %a monotonic, align 4 %a.add = fadd float %a.load, 1. store atomic volatile float %a.add, ptr addrspace(3) %a monotonic, align 4 ret void } define void @shared_monotonic_volatile_sys_double(ptr addrspace(3) %a) { ; CHECK-LABEL: shared_monotonic_volatile_sys_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [shared_monotonic_volatile_sys_double_param_0]; ; CHECK-NEXT: ld.volatile.shared.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; CHECK-NEXT: st.volatile.shared.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load atomic volatile double, ptr addrspace(3) %a monotonic, align 8 %a.add = fadd double %a.load, 1. store atomic volatile double %a.add, ptr addrspace(3) %a monotonic, align 8 ret void } ;; local statespace ; local define void @local_i8(ptr addrspace(5) %a) { ; CHECK-LABEL: local_i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_i8_param_0]; ; CHECK-NEXT: ld.local.b8 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.local.b8 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load i8, ptr addrspace(5) %a %a.add = add i8 %a.load, 1 store i8 %a.add, ptr addrspace(5) %a ret void } define void @local_i16(ptr addrspace(5) %a) { ; CHECK-LABEL: local_i16( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_i16_param_0]; ; CHECK-NEXT: ld.local.b16 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.local.b16 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load i16, ptr addrspace(5) %a %a.add = add i16 %a.load, 1 store i16 %a.add, ptr addrspace(5) %a ret void } define void @local_i32(ptr addrspace(5) %a) { ; CHECK-LABEL: local_i32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_i32_param_0]; ; CHECK-NEXT: ld.local.b32 %r1, [%rd1]; ; CHECK-NEXT: add.s32 %r2, %r1, 1; ; CHECK-NEXT: st.local.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load i32, ptr addrspace(5) %a %a.add = add i32 %a.load, 1 store i32 %a.add, ptr addrspace(5) %a ret void } define void @local_i64(ptr addrspace(5) %a) { ; CHECK-LABEL: local_i64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_i64_param_0]; ; CHECK-NEXT: ld.local.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; ; CHECK-NEXT: st.local.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load i64, ptr addrspace(5) %a %a.add = add i64 %a.load, 1 store i64 %a.add, ptr addrspace(5) %a ret void } define void @local_float(ptr addrspace(5) %a) { ; CHECK-LABEL: local_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_float_param_0]; ; CHECK-NEXT: ld.local.b32 %r1, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; CHECK-NEXT: st.local.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load float, ptr addrspace(5) %a %a.add = fadd float %a.load, 1. store float %a.add, ptr addrspace(5) %a ret void } define void @local_double(ptr addrspace(5) %a) { ; CHECK-LABEL: local_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_double_param_0]; ; CHECK-NEXT: ld.local.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; CHECK-NEXT: st.local.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load double, ptr addrspace(5) %a %a.add = fadd double %a.load, 1. store double %a.add, ptr addrspace(5) %a ret void } ; local_volatile define void @local_volatile_i8(ptr addrspace(5) %a) { ; CHECK-LABEL: local_volatile_i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i8_param_0]; ; CHECK-NEXT: ld.local.b8 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.local.b8 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load volatile i8, ptr addrspace(5) %a %a.add = add i8 %a.load, 1 store volatile i8 %a.add, ptr addrspace(5) %a ret void } define void @local_volatile_i16(ptr addrspace(5) %a) { ; CHECK-LABEL: local_volatile_i16( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i16_param_0]; ; CHECK-NEXT: ld.local.b16 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.local.b16 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load volatile i16, ptr addrspace(5) %a %a.add = add i16 %a.load, 1 store volatile i16 %a.add, ptr addrspace(5) %a ret void } define void @local_volatile_i32(ptr addrspace(5) %a) { ; CHECK-LABEL: local_volatile_i32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i32_param_0]; ; CHECK-NEXT: ld.local.b32 %r1, [%rd1]; ; CHECK-NEXT: add.s32 %r2, %r1, 1; ; CHECK-NEXT: st.local.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load volatile i32, ptr addrspace(5) %a %a.add = add i32 %a.load, 1 store volatile i32 %a.add, ptr addrspace(5) %a ret void } define void @local_volatile_i64(ptr addrspace(5) %a) { ; CHECK-LABEL: local_volatile_i64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i64_param_0]; ; CHECK-NEXT: ld.local.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; ; CHECK-NEXT: st.local.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load volatile i64, ptr addrspace(5) %a %a.add = add i64 %a.load, 1 store volatile i64 %a.add, ptr addrspace(5) %a ret void } define void @local_volatile_float(ptr addrspace(5) %a) { ; CHECK-LABEL: local_volatile_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_float_param_0]; ; CHECK-NEXT: ld.local.b32 %r1, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; CHECK-NEXT: st.local.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load volatile float, ptr addrspace(5) %a %a.add = fadd float %a.load, 1. store volatile float %a.add, ptr addrspace(5) %a ret void } define void @local_volatile_double(ptr addrspace(5) %a) { ; CHECK-LABEL: local_volatile_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_double_param_0]; ; CHECK-NEXT: ld.local.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; CHECK-NEXT: st.local.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load volatile double, ptr addrspace(5) %a %a.add = fadd double %a.load, 1. store volatile double %a.add, ptr addrspace(5) %a ret void } ; local_unordered_sys define void @local_unordered_sys_i8(ptr addrspace(5) %a) { ; CHECK-LABEL: local_unordered_sys_i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_sys_i8_param_0]; ; CHECK-NEXT: ld.local.b8 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.local.b8 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load atomic i8, ptr addrspace(5) %a unordered, align 1 %a.add = add i8 %a.load, 1 store atomic i8 %a.add, ptr addrspace(5) %a unordered, align 1 ret void } define void @local_unordered_sys_i16(ptr addrspace(5) %a) { ; CHECK-LABEL: local_unordered_sys_i16( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_sys_i16_param_0]; ; CHECK-NEXT: ld.local.b16 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.local.b16 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load atomic i16, ptr addrspace(5) %a unordered, align 2 %a.add = add i16 %a.load, 1 store atomic i16 %a.add, ptr addrspace(5) %a unordered, align 2 ret void } define void @local_unordered_sys_i32(ptr addrspace(5) %a) { ; CHECK-LABEL: local_unordered_sys_i32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_sys_i32_param_0]; ; CHECK-NEXT: ld.local.b32 %r1, [%rd1]; ; CHECK-NEXT: add.s32 %r2, %r1, 1; ; CHECK-NEXT: st.local.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load atomic i32, ptr addrspace(5) %a unordered, align 4 %a.add = add i32 %a.load, 1 store atomic i32 %a.add, ptr addrspace(5) %a unordered, align 4 ret void } define void @local_unordered_sys_i64(ptr addrspace(5) %a) { ; CHECK-LABEL: local_unordered_sys_i64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_sys_i64_param_0]; ; CHECK-NEXT: ld.local.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; ; CHECK-NEXT: st.local.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load atomic i64, ptr addrspace(5) %a unordered, align 8 %a.add = add i64 %a.load, 1 store atomic i64 %a.add, ptr addrspace(5) %a unordered, align 8 ret void } define void @local_unordered_sys_float(ptr addrspace(5) %a) { ; CHECK-LABEL: local_unordered_sys_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_sys_float_param_0]; ; CHECK-NEXT: ld.local.b32 %r1, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; CHECK-NEXT: st.local.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load atomic float, ptr addrspace(5) %a unordered, align 4 %a.add = fadd float %a.load, 1. store atomic float %a.add, ptr addrspace(5) %a unordered, align 4 ret void } define void @local_unordered_sys_double(ptr addrspace(5) %a) { ; CHECK-LABEL: local_unordered_sys_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_sys_double_param_0]; ; CHECK-NEXT: ld.local.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; CHECK-NEXT: st.local.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load atomic double, ptr addrspace(5) %a unordered, align 8 %a.add = fadd double %a.load, 1. store atomic double %a.add, ptr addrspace(5) %a unordered, align 8 ret void } ; local_unordered_volatile_sys define void @local_unordered_volatile_sys_i8(ptr addrspace(5) %a) { ; CHECK-LABEL: local_unordered_volatile_sys_i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_volatile_sys_i8_param_0]; ; CHECK-NEXT: ld.local.b8 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.local.b8 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i8, ptr addrspace(5) %a unordered, align 1 %a.add = add i8 %a.load, 1 store atomic volatile i8 %a.add, ptr addrspace(5) %a unordered, align 1 ret void } define void @local_unordered_volatile_sys_i16(ptr addrspace(5) %a) { ; CHECK-LABEL: local_unordered_volatile_sys_i16( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_volatile_sys_i16_param_0]; ; CHECK-NEXT: ld.local.b16 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.local.b16 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i16, ptr addrspace(5) %a unordered, align 2 %a.add = add i16 %a.load, 1 store atomic volatile i16 %a.add, ptr addrspace(5) %a unordered, align 2 ret void } define void @local_unordered_volatile_sys_i32(ptr addrspace(5) %a) { ; CHECK-LABEL: local_unordered_volatile_sys_i32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_volatile_sys_i32_param_0]; ; CHECK-NEXT: ld.local.b32 %r1, [%rd1]; ; CHECK-NEXT: add.s32 %r2, %r1, 1; ; CHECK-NEXT: st.local.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i32, ptr addrspace(5) %a unordered, align 4 %a.add = add i32 %a.load, 1 store atomic volatile i32 %a.add, ptr addrspace(5) %a unordered, align 4 ret void } define void @local_unordered_volatile_sys_i64(ptr addrspace(5) %a) { ; CHECK-LABEL: local_unordered_volatile_sys_i64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_volatile_sys_i64_param_0]; ; CHECK-NEXT: ld.local.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; ; CHECK-NEXT: st.local.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load atomic volatile i64, ptr addrspace(5) %a unordered, align 8 %a.add = add i64 %a.load, 1 store atomic volatile i64 %a.add, ptr addrspace(5) %a unordered, align 8 ret void } define void @local_unordered_volatile_sys_float(ptr addrspace(5) %a) { ; CHECK-LABEL: local_unordered_volatile_sys_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_volatile_sys_float_param_0]; ; CHECK-NEXT: ld.local.b32 %r1, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; CHECK-NEXT: st.local.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load atomic volatile float, ptr addrspace(5) %a unordered, align 4 %a.add = fadd float %a.load, 1. store atomic volatile float %a.add, ptr addrspace(5) %a unordered, align 4 ret void } define void @local_unordered_volatile_sys_double(ptr addrspace(5) %a) { ; CHECK-LABEL: local_unordered_volatile_sys_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_unordered_volatile_sys_double_param_0]; ; CHECK-NEXT: ld.local.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; CHECK-NEXT: st.local.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load atomic volatile double, ptr addrspace(5) %a unordered, align 8 %a.add = fadd double %a.load, 1. store atomic volatile double %a.add, ptr addrspace(5) %a unordered, align 8 ret void } ; local_monotonic_sys define void @local_monotonic_sys_i8(ptr addrspace(5) %a) { ; CHECK-LABEL: local_monotonic_sys_i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_sys_i8_param_0]; ; CHECK-NEXT: ld.local.b8 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.local.b8 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load atomic i8, ptr addrspace(5) %a monotonic, align 1 %a.add = add i8 %a.load, 1 store atomic i8 %a.add, ptr addrspace(5) %a monotonic, align 1 ret void } define void @local_monotonic_sys_i16(ptr addrspace(5) %a) { ; CHECK-LABEL: local_monotonic_sys_i16( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_sys_i16_param_0]; ; CHECK-NEXT: ld.local.b16 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.local.b16 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load atomic i16, ptr addrspace(5) %a monotonic, align 2 %a.add = add i16 %a.load, 1 store atomic i16 %a.add, ptr addrspace(5) %a monotonic, align 2 ret void } define void @local_monotonic_sys_i32(ptr addrspace(5) %a) { ; CHECK-LABEL: local_monotonic_sys_i32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_sys_i32_param_0]; ; CHECK-NEXT: ld.local.b32 %r1, [%rd1]; ; CHECK-NEXT: add.s32 %r2, %r1, 1; ; CHECK-NEXT: st.local.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load atomic i32, ptr addrspace(5) %a monotonic, align 4 %a.add = add i32 %a.load, 1 store atomic i32 %a.add, ptr addrspace(5) %a monotonic, align 4 ret void } define void @local_monotonic_sys_i64(ptr addrspace(5) %a) { ; CHECK-LABEL: local_monotonic_sys_i64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_sys_i64_param_0]; ; CHECK-NEXT: ld.local.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; ; CHECK-NEXT: st.local.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load atomic i64, ptr addrspace(5) %a monotonic, align 8 %a.add = add i64 %a.load, 1 store atomic i64 %a.add, ptr addrspace(5) %a monotonic, align 8 ret void } define void @local_monotonic_sys_float(ptr addrspace(5) %a) { ; CHECK-LABEL: local_monotonic_sys_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_sys_float_param_0]; ; CHECK-NEXT: ld.local.b32 %r1, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; CHECK-NEXT: st.local.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load atomic float, ptr addrspace(5) %a monotonic, align 4 %a.add = fadd float %a.load, 1. store atomic float %a.add, ptr addrspace(5) %a monotonic, align 4 ret void } define void @local_monotonic_sys_double(ptr addrspace(5) %a) { ; CHECK-LABEL: local_monotonic_sys_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_sys_double_param_0]; ; CHECK-NEXT: ld.local.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; CHECK-NEXT: st.local.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load atomic double, ptr addrspace(5) %a monotonic, align 8 %a.add = fadd double %a.load, 1. store atomic double %a.add, ptr addrspace(5) %a monotonic, align 8 ret void } ; local_monotonic_volatile_sys define void @local_monotonic_volatile_sys_i8(ptr addrspace(5) %a) { ; CHECK-LABEL: local_monotonic_volatile_sys_i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_volatile_sys_i8_param_0]; ; CHECK-NEXT: ld.local.b8 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.local.b8 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i8, ptr addrspace(5) %a monotonic, align 1 %a.add = add i8 %a.load, 1 store atomic volatile i8 %a.add, ptr addrspace(5) %a monotonic, align 1 ret void } define void @local_monotonic_volatile_sys_i16(ptr addrspace(5) %a) { ; CHECK-LABEL: local_monotonic_volatile_sys_i16( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_volatile_sys_i16_param_0]; ; CHECK-NEXT: ld.local.b16 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; ; CHECK-NEXT: st.local.b16 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i16, ptr addrspace(5) %a monotonic, align 2 %a.add = add i16 %a.load, 1 store atomic volatile i16 %a.add, ptr addrspace(5) %a monotonic, align 2 ret void } define void @local_monotonic_volatile_sys_i32(ptr addrspace(5) %a) { ; CHECK-LABEL: local_monotonic_volatile_sys_i32( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_volatile_sys_i32_param_0]; ; CHECK-NEXT: ld.local.b32 %r1, [%rd1]; ; CHECK-NEXT: add.s32 %r2, %r1, 1; ; CHECK-NEXT: st.local.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load atomic volatile i32, ptr addrspace(5) %a monotonic, align 4 %a.add = add i32 %a.load, 1 store atomic volatile i32 %a.add, ptr addrspace(5) %a monotonic, align 4 ret void } define void @local_monotonic_volatile_sys_i64(ptr addrspace(5) %a) { ; CHECK-LABEL: local_monotonic_volatile_sys_i64( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_volatile_sys_i64_param_0]; ; CHECK-NEXT: ld.local.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; ; CHECK-NEXT: st.local.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load atomic volatile i64, ptr addrspace(5) %a monotonic, align 8 %a.add = add i64 %a.load, 1 store atomic volatile i64 %a.add, ptr addrspace(5) %a monotonic, align 8 ret void } define void @local_monotonic_volatile_sys_float(ptr addrspace(5) %a) { ; CHECK-LABEL: local_monotonic_volatile_sys_float( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_volatile_sys_float_param_0]; ; CHECK-NEXT: ld.local.b32 %r1, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; ; CHECK-NEXT: st.local.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load atomic volatile float, ptr addrspace(5) %a monotonic, align 4 %a.add = fadd float %a.load, 1. store atomic volatile float %a.add, ptr addrspace(5) %a monotonic, align 4 ret void } define void @local_monotonic_volatile_sys_double(ptr addrspace(5) %a) { ; CHECK-LABEL: local_monotonic_volatile_sys_double( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_monotonic_volatile_sys_double_param_0]; ; CHECK-NEXT: ld.local.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; ; CHECK-NEXT: st.local.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load atomic volatile double, ptr addrspace(5) %a monotonic, align 8 %a.add = fadd double %a.load, 1. store atomic volatile double %a.add, ptr addrspace(5) %a monotonic, align 8 ret void }