llvm-project/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16-system.ll
Matt Arsenault 01f785cac4
AMDGPU: Expand remaining system atomic operations (#122137)
System scope atomics need to use cmpxchg loops if we know
nothing about the allocation the address is from.
aea5980e26e6a87dab9f8acb10eb3a59dd143cb1 started this, this
expands the set to cover the remaining integer operations.

Don't expand xchg and add, those theoretically should work over PCIe.
This is a pre-commit which will introduce performance regressions.
Subsequent changes will add handling of new atomicrmw metadata, which
will avoid the expansion.

Note this still isn't conservative enough; we do need to expand
some device scope atomics if the memory is in fine-grained remote
memory.
2025-08-22 13:55:04 +09:00

887 lines
52 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand %s | FileCheck %s
define i16 @test_atomicrmw_xchg_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_xchg_i16_global_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]]
; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP6]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED]]
;
%res = atomicrmw xchg ptr addrspace(1) %ptr, i16 %value seq_cst
ret i16 %res
}
define i16 @test_atomicrmw_xchg_i16_global_system_align4(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_xchg_i16_global_system_align4(
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[LOADED]], -65536
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP4]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED]]
;
%res = atomicrmw xchg ptr addrspace(1) %ptr, i16 %value seq_cst, align 4
ret i16 %res
}
define i16 @test_atomicrmw_add_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_add_i16_global_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]]
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED]]
;
%res = atomicrmw add ptr addrspace(1) %ptr, i16 %value seq_cst
ret i16 %res
}
define i16 @test_atomicrmw_add_i16_global_system_align4(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_add_i16_global_system_align4(
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[NEW]], 65535
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED]]
;
%res = atomicrmw add ptr addrspace(1) %ptr, i16 %value seq_cst, align 4
ret i16 %res
}
define i16 @test_atomicrmw_sub_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_sub_i16_global_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]]
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED]]
;
%res = atomicrmw sub ptr addrspace(1) %ptr, i16 %value seq_cst
ret i16 %res
}
define i16 @test_atomicrmw_and_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_and_i16_global_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[ANDOPERAND]]
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED]]
;
%res = atomicrmw and ptr addrspace(1) %ptr, i16 %value seq_cst
ret i16 %res
}
define i16 @test_atomicrmw_nand_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_nand_i16_global_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[TMP5]], -1
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]]
; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED]]
;
%res = atomicrmw nand ptr addrspace(1) %ptr, i16 %value seq_cst
ret i16 %res
}
define i16 @test_atomicrmw_or_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_or_i16_global_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i16 %value seq_cst
ret i16 %res
}
define i16 @test_atomicrmw_xor_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_xor_i16_global_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED]]
;
%res = atomicrmw xor ptr addrspace(1) %ptr, i16 %value seq_cst
ret i16 %res
}
define i16 @test_atomicrmw_max_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_max_i16_global_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i16 [[EXTRACTED]], [[VALUE:%.*]]
; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]]
; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED3]]
;
%res = atomicrmw max ptr addrspace(1) %ptr, i16 %value seq_cst
ret i16 %res
}
define i16 @test_atomicrmw_min_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_min_i16_global_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = icmp sle i16 [[EXTRACTED]], [[VALUE:%.*]]
; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]]
; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED3]]
;
%res = atomicrmw min ptr addrspace(1) %ptr, i16 %value seq_cst
ret i16 %res
}
define i16 @test_atomicrmw_umax_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_umax_i16_global_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]]
; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]]
; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED3]]
;
%res = atomicrmw umax ptr addrspace(1) %ptr, i16 %value seq_cst
ret i16 %res
}
define i16 @test_atomicrmw_umin_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_umin_i16_global_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = icmp ule i16 [[EXTRACTED]], [[VALUE:%.*]]
; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]]
; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED3]]
;
%res = atomicrmw umin ptr addrspace(1) %ptr, i16 %value seq_cst
ret i16 %res
}
define i16 @test_cmpxchg_i16_global_system(ptr addrspace(1) %out, i16 %in, i16 %old) {
; CHECK-LABEL: @test_cmpxchg_i16_global_system(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr addrspace(1) [[OUT:%.*]], i64 4
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[GEP]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[GEP]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[IN:%.*]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[OLD:%.*]] to i32
; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]]
; CHECK-NEXT: br label [[PARTWORD_CMPXCHG_LOOP:%.*]]
; CHECK: partword.cmpxchg.loop:
; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[TMP0:%.*]] ], [ [[TMP15:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ]
; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]]
; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]]
; CHECK-NEXT: [[TMP12:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[TMP11]], i32 [[TMP10]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0
; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1
; CHECK-NEXT: br i1 [[TMP14]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]]
; CHECK: partword.cmpxchg.failure:
; CHECK-NEXT: [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]]
; CHECK-NEXT: br i1 [[TMP16]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]]
; CHECK: partword.cmpxchg.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0
; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 [[TMP14]], 1
; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP18]], 0
; CHECK-NEXT: ret i16 [[EXTRACT]]
;
%gep = getelementptr i16, ptr addrspace(1) %out, i64 4
%res = cmpxchg ptr addrspace(1) %gep, i16 %old, i16 %in seq_cst seq_cst
%extract = extractvalue {i16, i1} %res, 0
ret i16 %extract
}
define i16 @test_cmpxchg_i16_global_system_align4(ptr addrspace(1) %out, i16 %in, i16 %old) {
; CHECK-LABEL: @test_cmpxchg_i16_global_system_align4(
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr addrspace(1) [[OUT:%.*]], i64 4
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[IN:%.*]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[OLD:%.*]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], -65536
; CHECK-NEXT: br label [[PARTWORD_CMPXCHG_LOOP:%.*]]
; CHECK: partword.cmpxchg.loop:
; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[TMP11:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ]
; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[TMP1]]
; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[GEP]], i32 [[TMP7]], i32 [[TMP6]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i1 } [[TMP8]], 0
; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]]
; CHECK: partword.cmpxchg.failure:
; CHECK-NEXT: [[TMP11]] = and i32 [[TMP9]], -65536
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP5]], [[TMP11]]
; CHECK-NEXT: br i1 [[TMP12]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]]
; CHECK: partword.cmpxchg.end:
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP9]] to i16
; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0
; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { i16, i1 } [[TMP13]], i1 [[TMP10]], 1
; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP14]], 0
; CHECK-NEXT: ret i16 [[EXTRACT]]
;
%gep = getelementptr i16, ptr addrspace(1) %out, i64 4
%res = cmpxchg ptr addrspace(1) %gep, i16 %old, i16 %in seq_cst seq_cst, align 4
%extract = extractvalue {i16, i1} %res, 0
ret i16 %extract
}
define i16 @test_atomicrmw_inc_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_inc_i16_global_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]]
; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]]
; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED3]]
;
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i16 %value seq_cst
ret i16 %res
}
define i16 @test_atomicrmw_inc_i16_global_system_align4(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_inc_i16_global_system_align4(
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16
; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]]
; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]]
; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536
; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED1]]
;
%res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i16 %value seq_cst, align 4
ret i16 %res
}
define i16 @test_atomicrmw_inc_i16_flat_system(ptr %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_inc_i16_flat_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]]
; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]]
; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED3]]
;
%res = atomicrmw uinc_wrap ptr %ptr, i16 %value seq_cst
ret i16 %res
}
define i16 @test_atomicrmw_inc_i16_flat_system_align4(ptr %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_inc_i16_flat_system_align4(
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16
; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]]
; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]]
; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536
; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED1]]
;
%res = atomicrmw uinc_wrap ptr %ptr, i16 %value seq_cst, align 4
ret i16 %res
}
define i16 @test_atomicrmw_dec_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_dec_i16_global_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]]
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]]
; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED3]]
;
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i16 %value seq_cst
ret i16 %res
}
define i16 @test_atomicrmw_dec_i16_global_system_align4(ptr addrspace(1) %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_dec_i16_global_system_align4(
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16
; CHECK-NEXT: [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]]
; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]]
; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536
; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED1]]
;
%res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i16 %value seq_cst, align 4
ret i16 %res
}
define i16 @test_atomicrmw_dec_i16_flat_system(ptr %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_dec_i16_flat_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]]
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]]
; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED3]]
;
%res = atomicrmw udec_wrap ptr %ptr, i16 %value seq_cst
ret i16 %res
}
define i16 @test_atomicrmw_dec_i16_flat_system_align4(ptr %ptr, i16 %value) {
; CHECK-LABEL: @test_atomicrmw_dec_i16_flat_system_align4(
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16
; CHECK-NEXT: [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]]
; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]]
; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536
; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16
; CHECK-NEXT: ret i16 [[EXTRACTED1]]
;
%res = atomicrmw udec_wrap ptr %ptr, i16 %value seq_cst, align 4
ret i16 %res
}
define half @test_atomicrmw_xchg_f16_global_system(ptr addrspace(1) %ptr, half %value) {
; CHECK-LABEL: @test_atomicrmw_xchg_f16_global_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[VALUE:%.*]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]]
; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[EXTRACTED]] to half
; CHECK-NEXT: ret half [[TMP9]]
;
%res = atomicrmw xchg ptr addrspace(1) %ptr, half %value seq_cst
ret half %res
}
define half @test_atomicrmw_xchg_f16_global_system_align4(ptr addrspace(1) %ptr, half %value) {
; CHECK-LABEL: @test_atomicrmw_xchg_f16_global_system_align4(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[VALUE:%.*]] to i16
; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED]] to half
; CHECK-NEXT: ret half [[TMP7]]
;
%res = atomicrmw xchg ptr addrspace(1) %ptr, half %value seq_cst, align 4
ret half %res
}
define half @test_atomicrmw_xchg_f16_flat_system(ptr %ptr, half %value) {
; CHECK-LABEL: @test_atomicrmw_xchg_f16_flat_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[VALUE:%.*]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]]
; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[EXTRACTED]] to half
; CHECK-NEXT: ret half [[TMP9]]
;
%res = atomicrmw xchg ptr %ptr, half %value seq_cst
ret half %res
}
define half @test_atomicrmw_xchg_f16_flat_system_align4(ptr %ptr, half %value) {
; CHECK-LABEL: @test_atomicrmw_xchg_f16_flat_system_align4(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[VALUE:%.*]] to i16
; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[TMP5]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED]] to half
; CHECK-NEXT: ret half [[TMP7]]
;
%res = atomicrmw xchg ptr %ptr, half %value seq_cst, align 4
ret half %res
}
define bfloat @test_atomicrmw_xchg_bf16_flat_system(ptr %ptr, bfloat %value) {
; CHECK-LABEL: @test_atomicrmw_xchg_bf16_flat_system(
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
; CHECK-NEXT: [[TMP3:%.*]] = bitcast bfloat [[VALUE:%.*]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]]
; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
; CHECK-NEXT: ret bfloat [[TMP9]]
;
%res = atomicrmw xchg ptr %ptr, bfloat %value seq_cst
ret bfloat %res
}
define bfloat @test_atomicrmw_xchg_bf16_flat_system_align4(ptr %ptr, bfloat %value) {
; CHECK-LABEL: @test_atomicrmw_xchg_bf16_flat_system_align4(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat [[VALUE:%.*]] to i16
; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536
; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[TMP5]] seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
; CHECK-NEXT: ret bfloat [[TMP7]]
;
%res = atomicrmw xchg ptr %ptr, bfloat %value seq_cst, align 4
ret bfloat %res
}