[AMDGPU][NFC] Mark GEPs in flat offset folding tests as inbounds (#165426)
This is in preparation for a patch that will only fold offsets into flat instructions if their addition is inbounds. Marking the GEPs inbounds here means that their output won't change with the later patch. Basically a retry of the very similar PR #131994, as part of an updated stack of PRs. For SWDEV-516125.
This commit is contained in:
parent
4d4a60cde0
commit
550522d07e
@ -62,7 +62,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = nsw V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr25, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr46, renamable $vcc = V_ADD_CO_U32_e64 $sgpr24, $vgpr0, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr47, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr2, killed $vgpr1, killed $vcc, 0, implicit $exec
|
||||
@ -958,7 +958,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: successors: %bb.71(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr4_vgpr5 = V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr4_vgpr5 = nsw V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr27, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr4, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr26, $vgpr4, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr2, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr2, killed $vgpr5, killed $vcc, 0, implicit $exec
|
||||
@ -1006,12 +1006,12 @@ bb:
|
||||
%i11 = icmp eq i32 %i, 0
|
||||
%i12 = load i32, ptr addrspace(3) null, align 8
|
||||
%i13 = zext i32 %i12 to i64
|
||||
%i14 = getelementptr i32, ptr addrspace(1) %arg, i64 %i13
|
||||
%i14 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %i13
|
||||
br i1 %arg3, label %bb15, label %bb103
|
||||
|
||||
bb15:
|
||||
%i16 = zext i32 %i to i64
|
||||
%i17 = getelementptr i32, ptr addrspace(1) %i14, i64 %i16
|
||||
%i17 = getelementptr inbounds i32, ptr addrspace(1) %i14, i64 %i16
|
||||
%i18 = ptrtoint ptr addrspace(1) %i17 to i64
|
||||
br i1 %arg4, label %bb19, label %bb20
|
||||
|
||||
@ -1020,7 +1020,7 @@ bb19:
|
||||
unreachable
|
||||
|
||||
bb20:
|
||||
%i21 = getelementptr i32, ptr addrspace(1) %i17, i64 256
|
||||
%i21 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 256
|
||||
%i22 = ptrtoint ptr addrspace(1) %i21 to i64
|
||||
%i23 = inttoptr i64 %i22 to ptr
|
||||
%i24 = load i8, ptr %i23, align 1
|
||||
@ -1032,7 +1032,7 @@ bb26:
|
||||
unreachable
|
||||
|
||||
bb27:
|
||||
%i28 = getelementptr i32, ptr addrspace(1) %i17, i64 512
|
||||
%i28 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 512
|
||||
%i29 = ptrtoint ptr addrspace(1) %i28 to i64
|
||||
%i30 = inttoptr i64 %i29 to ptr
|
||||
%i31 = load i8, ptr %i30, align 1
|
||||
@ -1044,7 +1044,7 @@ bb33:
|
||||
unreachable
|
||||
|
||||
bb34:
|
||||
%i35 = getelementptr i32, ptr addrspace(1) %i17, i64 768
|
||||
%i35 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 768
|
||||
%i36 = ptrtoint ptr addrspace(1) %i35 to i64
|
||||
%i37 = inttoptr i64 %i36 to ptr
|
||||
%i38 = load i8, ptr %i37, align 1
|
||||
@ -1056,7 +1056,7 @@ bb40:
|
||||
unreachable
|
||||
|
||||
bb41:
|
||||
%i42 = getelementptr i32, ptr addrspace(1) %i17, i64 1024
|
||||
%i42 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1024
|
||||
%i43 = ptrtoint ptr addrspace(1) %i42 to i64
|
||||
%i44 = inttoptr i64 %i43 to ptr
|
||||
%i45 = load i8, ptr %i44, align 1
|
||||
@ -1068,7 +1068,7 @@ bb47:
|
||||
unreachable
|
||||
|
||||
bb48:
|
||||
%i49 = getelementptr i32, ptr addrspace(1) %i17, i64 1280
|
||||
%i49 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1280
|
||||
%i50 = ptrtoint ptr addrspace(1) %i49 to i64
|
||||
%i51 = inttoptr i64 %i50 to ptr
|
||||
%i52 = load i8, ptr %i51, align 1
|
||||
@ -1080,7 +1080,7 @@ bb54:
|
||||
unreachable
|
||||
|
||||
bb55:
|
||||
%i56 = getelementptr i32, ptr addrspace(1) %i17, i64 1536
|
||||
%i56 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1536
|
||||
%i57 = ptrtoint ptr addrspace(1) %i56 to i64
|
||||
%i58 = or i64 %i57, 1
|
||||
%i59 = inttoptr i64 %i58 to ptr
|
||||
@ -1112,7 +1112,7 @@ bb67:
|
||||
|
||||
bb68:
|
||||
%i69 = zext i1 %arg5 to i8
|
||||
%i70 = getelementptr [2 x i32], ptr addrspace(1) null, i64 %i16
|
||||
%i70 = getelementptr inbounds [2 x i32], ptr addrspace(1) null, i64 %i16
|
||||
%i71 = ptrtoint ptr addrspace(1) %i70 to i64
|
||||
br i1 %arg5, label %bb72, label %bb73
|
||||
|
||||
@ -1121,7 +1121,7 @@ bb72:
|
||||
unreachable
|
||||
|
||||
bb73:
|
||||
%i74 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 256
|
||||
%i74 = getelementptr inbounds [2 x i32], ptr addrspace(1) %i70, i64 256
|
||||
%i75 = ptrtoint ptr addrspace(1) %i74 to i64
|
||||
%i76 = inttoptr i64 %i75 to ptr
|
||||
%i77 = load i8, ptr %i76, align 1
|
||||
@ -1133,7 +1133,7 @@ bb79:
|
||||
unreachable
|
||||
|
||||
bb80:
|
||||
%i81 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 512
|
||||
%i81 = getelementptr inbounds [2 x i32], ptr addrspace(1) %i70, i64 512
|
||||
%i82 = ptrtoint ptr addrspace(1) %i81 to i64
|
||||
%i83 = or i64 %i82, 1
|
||||
br i1 %arg6, label %bb84, label %bb85
|
||||
@ -1268,7 +1268,7 @@ bb174:
|
||||
%i182 = select i1 %arg3, i32 %i181, i32 0
|
||||
%i183 = or i32 %i182, %i154
|
||||
%i184 = or i32 %i183, %i156
|
||||
%i185 = getelementptr [2 x i32], ptr addrspace(1) %arg1, i64 %i13
|
||||
%i185 = getelementptr inbounds [2 x i32], ptr addrspace(1) %arg1, i64 %i13
|
||||
br i1 %arg3, label %bb186, label %bb196
|
||||
|
||||
bb186:
|
||||
|
||||
@ -12,8 +12,8 @@
|
||||
define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
|
||||
; OPT-GFX7-LABEL: @test_sinkable_flat_small_offset_i32(
|
||||
; OPT-GFX7-NEXT: entry:
|
||||
; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
|
||||
; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7
|
||||
; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
|
||||
; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
|
||||
; OPT-GFX7: if:
|
||||
@ -28,8 +28,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
|
||||
;
|
||||
; OPT-GFX8-LABEL: @test_sinkable_flat_small_offset_i32(
|
||||
; OPT-GFX8-NEXT: entry:
|
||||
; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
|
||||
; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7
|
||||
; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
|
||||
; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
|
||||
; OPT-GFX8: if:
|
||||
@ -44,11 +44,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
|
||||
;
|
||||
; OPT-GFX9-LABEL: @test_sinkable_flat_small_offset_i32(
|
||||
; OPT-GFX9-NEXT: entry:
|
||||
; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
|
||||
; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
|
||||
; OPT-GFX9: if:
|
||||
; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28
|
||||
; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 28
|
||||
; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
|
||||
; OPT-GFX9-NEXT: br label [[ENDIF]]
|
||||
; OPT-GFX9: endif:
|
||||
@ -58,11 +58,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
|
||||
;
|
||||
; OPT-GFX10-LABEL: @test_sinkable_flat_small_offset_i32(
|
||||
; OPT-GFX10-NEXT: entry:
|
||||
; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
|
||||
; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
|
||||
; OPT-GFX10: if:
|
||||
; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28
|
||||
; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 28
|
||||
; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
|
||||
; OPT-GFX10-NEXT: br label [[ENDIF]]
|
||||
; OPT-GFX10: endif:
|
||||
@ -146,8 +146,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
%out.gep = getelementptr i32, ptr %out, i64 999999
|
||||
%in.gep = getelementptr i32, ptr %in, i64 7
|
||||
%out.gep = getelementptr inbounds i32, ptr %out, i64 999999
|
||||
%in.gep = getelementptr inbounds i32, ptr %in, i64 7
|
||||
%cmp0 = icmp eq i32 %cond, 0
|
||||
br i1 %cmp0, label %endif, label %if
|
||||
|
||||
@ -167,12 +167,12 @@ done:
|
||||
define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, i32 %cond) {
|
||||
; OPT-GFX7-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
|
||||
; OPT-GFX7-NEXT: entry:
|
||||
; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
|
||||
; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
|
||||
; OPT-GFX7: if:
|
||||
; OPT-GFX7-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
|
||||
; OPT-GFX7-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
|
||||
; OPT-GFX7-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28
|
||||
; OPT-GFX7-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
|
||||
; OPT-GFX7-NEXT: br label [[ENDIF]]
|
||||
; OPT-GFX7: endif:
|
||||
@ -182,8 +182,8 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
|
||||
;
|
||||
; OPT-GFX8-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
|
||||
; OPT-GFX8-NEXT: entry:
|
||||
; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
|
||||
; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7
|
||||
; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
|
||||
; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
|
||||
; OPT-GFX8: if:
|
||||
@ -197,12 +197,12 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
|
||||
;
|
||||
; OPT-GFX9-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
|
||||
; OPT-GFX9-NEXT: entry:
|
||||
; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
|
||||
; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
|
||||
; OPT-GFX9: if:
|
||||
; OPT-GFX9-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
|
||||
; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
|
||||
; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28
|
||||
; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
|
||||
; OPT-GFX9-NEXT: br label [[ENDIF]]
|
||||
; OPT-GFX9: endif:
|
||||
@ -212,12 +212,12 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
|
||||
;
|
||||
; OPT-GFX10-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
|
||||
; OPT-GFX10-NEXT: entry:
|
||||
; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
|
||||
; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
|
||||
; OPT-GFX10: if:
|
||||
; OPT-GFX10-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
|
||||
; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
|
||||
; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28
|
||||
; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
|
||||
; OPT-GFX10-NEXT: br label [[ENDIF]]
|
||||
; OPT-GFX10: endif:
|
||||
@ -303,8 +303,8 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
%out.gep = getelementptr i32, ptr %out, i64 999999
|
||||
%in.gep = getelementptr i32, ptr %in, i64 7
|
||||
%out.gep = getelementptr inbounds i32, ptr %out, i64 999999
|
||||
%in.gep = getelementptr inbounds i32, ptr %in, i64 7
|
||||
%cast = addrspacecast ptr %in.gep to ptr addrspace(1)
|
||||
%cmp0 = icmp eq i32 %cond, 0
|
||||
br i1 %cmp0, label %endif, label %if
|
||||
@ -325,12 +325,12 @@ done:
|
||||
define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in, i32 %cond) {
|
||||
; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32(
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
|
||||
; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
|
||||
; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
|
||||
; OPT: if:
|
||||
; OPT-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(4)
|
||||
; OPT-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i64 28
|
||||
; OPT-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP0]], i64 28
|
||||
; OPT-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[SUNKADDR]], align 4
|
||||
; OPT-NEXT: br label [[ENDIF]]
|
||||
; OPT: endif:
|
||||
@ -416,8 +416,8 @@ define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
%out.gep = getelementptr i32, ptr %out, i64 999999
|
||||
%in.gep = getelementptr i32, ptr %in, i64 7
|
||||
%out.gep = getelementptr inbounds i32, ptr %out, i64 999999
|
||||
%in.gep = getelementptr inbounds i32, ptr %in, i64 7
|
||||
%cast = addrspacecast ptr %in.gep to ptr addrspace(4)
|
||||
%cmp0 = icmp eq i32 %cond, 0
|
||||
br i1 %cmp0, label %endif, label %if
|
||||
@ -438,8 +438,8 @@ done:
|
||||
define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
|
||||
; OPT-GFX7-LABEL: @test_sink_flat_small_max_flat_offset(
|
||||
; OPT-GFX7-NEXT: entry:
|
||||
; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
|
||||
; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
|
||||
; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
|
||||
; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
|
||||
; OPT-GFX7-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
|
||||
; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
|
||||
; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
|
||||
@ -456,8 +456,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
|
||||
;
|
||||
; OPT-GFX8-LABEL: @test_sink_flat_small_max_flat_offset(
|
||||
; OPT-GFX8-NEXT: entry:
|
||||
; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
|
||||
; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
|
||||
; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
|
||||
; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
|
||||
; OPT-GFX8-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
|
||||
; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
|
||||
; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
|
||||
@ -474,12 +474,12 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
|
||||
;
|
||||
; OPT-GFX9-LABEL: @test_sink_flat_small_max_flat_offset(
|
||||
; OPT-GFX9-NEXT: entry:
|
||||
; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
|
||||
; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
|
||||
; OPT-GFX9-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
|
||||
; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
|
||||
; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
|
||||
; OPT-GFX9: if:
|
||||
; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
|
||||
; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
|
||||
; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i8, ptr [[SUNKADDR]], align 1
|
||||
; OPT-GFX9-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32
|
||||
; OPT-GFX9-NEXT: br label [[ENDIF]]
|
||||
@ -490,8 +490,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
|
||||
;
|
||||
; OPT-GFX10-LABEL: @test_sink_flat_small_max_flat_offset(
|
||||
; OPT-GFX10-NEXT: entry:
|
||||
; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
|
||||
; OPT-GFX10-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
|
||||
; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
|
||||
; OPT-GFX10-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
|
||||
; OPT-GFX10-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
|
||||
; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
|
||||
; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
|
||||
@ -588,8 +588,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
%out.gep = getelementptr i32, ptr %out, i32 1024
|
||||
%in.gep = getelementptr i8, ptr %in, i64 4095
|
||||
%out.gep = getelementptr inbounds i32, ptr %out, i32 1024
|
||||
%in.gep = getelementptr inbounds i8, ptr %in, i64 4095
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%cmp0 = icmp eq i32 %tid, 0
|
||||
br i1 %cmp0, label %endif, label %if
|
||||
@ -611,8 +611,8 @@ done:
|
||||
define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
|
||||
; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset(
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 99999
|
||||
; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4096
|
||||
; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 99999
|
||||
; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4096
|
||||
; OPT-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
|
||||
; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
|
||||
; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
|
||||
@ -711,8 +711,8 @@ define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
%out.gep = getelementptr i32, ptr %out, i64 99999
|
||||
%in.gep = getelementptr i8, ptr %in, i64 4096
|
||||
%out.gep = getelementptr inbounds i32, ptr %out, i64 99999
|
||||
%in.gep = getelementptr inbounds i8, ptr %in, i64 4096
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%cmp0 = icmp eq i32 %tid, 0
|
||||
br i1 %cmp0, label %endif, label %if
|
||||
@ -734,8 +734,8 @@ done:
|
||||
define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
|
||||
; OPT-LABEL: @test_sinkable_flat_reg_offset(
|
||||
; OPT-NEXT: entry:
|
||||
; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
|
||||
; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[REG:%.*]]
|
||||
; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
|
||||
; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 [[REG:%.*]]
|
||||
; OPT-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3]]
|
||||
; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
|
||||
; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
|
||||
@ -834,8 +834,8 @@ define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
%out.gep = getelementptr i32, ptr %out, i32 1024
|
||||
%in.gep = getelementptr i8, ptr %in, i64 %reg
|
||||
%out.gep = getelementptr inbounds i32, ptr %out, i32 1024
|
||||
%in.gep = getelementptr inbounds i8, ptr %in, i64 %reg
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%cmp0 = icmp eq i32 %tid, 0
|
||||
br i1 %cmp0, label %endif, label %if
|
||||
|
||||
@ -373,7 +373,7 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grai
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%result = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
|
||||
ret float %result
|
||||
}
|
||||
@ -570,7 +570,7 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grai
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 -512
|
||||
%result = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
|
||||
ret float %result
|
||||
}
|
||||
@ -995,7 +995,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
|
||||
ret void
|
||||
}
|
||||
@ -1219,7 +1219,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 -512
|
||||
%unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
|
||||
ret void
|
||||
}
|
||||
@ -1409,7 +1409,7 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%result = atomicrmw fadd ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
|
||||
ret float %result
|
||||
}
|
||||
@ -1630,7 +1630,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fadd ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
|
||||
ret void
|
||||
}
|
||||
@ -1795,7 +1795,7 @@ define void @flat_agent_atomic_fadd_noret_f32_maybe_remote(ptr %ptr, float %val)
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -1947,7 +1947,7 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory(pt
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -2165,7 +2165,7 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__a
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
|
||||
ret void
|
||||
}
|
||||
@ -2330,7 +2330,7 @@ define void @flat_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode(ptr %p
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.ignore.denormal.mode !0
|
||||
ret void
|
||||
}
|
||||
@ -2698,7 +2698,7 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%result = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret float %result
|
||||
}
|
||||
@ -2895,7 +2895,7 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 -512
|
||||
%result = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret float %result
|
||||
}
|
||||
@ -3320,7 +3320,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -3544,7 +3544,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 -512
|
||||
%unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -3734,7 +3734,7 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fin
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%result = atomicrmw fadd ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret float %result
|
||||
}
|
||||
@ -3955,7 +3955,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fadd ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -4145,7 +4145,7 @@ define float @flat_agent_atomic_fadd_ret_f32__ieee__amdgpu_no_fine_grained_memor
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%result = atomicrmw fadd ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
|
||||
ret float %result
|
||||
}
|
||||
@ -4366,7 +4366,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memo
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fadd ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
|
||||
ret void
|
||||
}
|
||||
@ -6590,7 +6590,7 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %ptr, i64 255
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 255
|
||||
%result = atomicrmw fadd ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret double %result
|
||||
}
|
||||
@ -7052,7 +7052,7 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %ptr, i64 -256
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 -256
|
||||
%result = atomicrmw fadd ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret double %result
|
||||
}
|
||||
@ -7931,7 +7931,7 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %ptr, i64 255
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 255
|
||||
%unused = atomicrmw fadd ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -8381,7 +8381,7 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %ptr, i64 -256
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 -256
|
||||
%unused = atomicrmw fadd ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -9141,7 +9141,7 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grain
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fadd ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret half %result
|
||||
}
|
||||
@ -9526,7 +9526,7 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grain
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 -1024
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 -1024
|
||||
%result = atomicrmw fadd ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret half %result
|
||||
}
|
||||
@ -10256,7 +10256,7 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fadd ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -10628,7 +10628,7 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 -1024
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 -1024
|
||||
%unused = atomicrmw fadd ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -10902,7 +10902,7 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fadd ptr %gep, half %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -11186,7 +11186,7 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fi
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fadd ptr %gep, half %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
|
||||
ret half %result
|
||||
}
|
||||
@ -11574,7 +11574,7 @@ define half @flat_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grai
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fadd ptr %gep, half %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret half %result
|
||||
}
|
||||
@ -11949,7 +11949,7 @@ define void @flat_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fadd ptr %gep, half %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -12872,7 +12872,7 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fadd ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret bfloat %result
|
||||
}
|
||||
@ -13340,7 +13340,7 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 -1024
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 -1024
|
||||
%result = atomicrmw fadd ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret bfloat %result
|
||||
}
|
||||
@ -13792,7 +13792,7 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fadd ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -14245,7 +14245,7 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 -1024
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 -1024
|
||||
%unused = atomicrmw fadd ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -14618,7 +14618,7 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fadd ptr %gep, bfloat %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
|
||||
ret bfloat %result
|
||||
}
|
||||
@ -14979,7 +14979,7 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fadd ptr %gep, bfloat %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -15887,7 +15887,7 @@ define bfloat @flat_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fadd ptr %gep, bfloat %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret bfloat %result
|
||||
}
|
||||
@ -16343,7 +16343,7 @@ define void @flat_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fadd ptr %gep, bfloat %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -16726,7 +16726,7 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fi
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511
|
||||
%result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x half> %result
|
||||
}
|
||||
@ -16930,7 +16930,7 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fi
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 -512
|
||||
%result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x half> %result
|
||||
}
|
||||
@ -17297,7 +17297,7 @@ define void @flat_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -17500,7 +17500,7 @@ define void @flat_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 -512
|
||||
%unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -17694,7 +17694,7 @@ define <2 x half> @flat_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_f
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511
|
||||
%result = atomicrmw fadd ptr %gep, <2 x half> %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x half> %result
|
||||
}
|
||||
@ -17884,7 +17884,7 @@ define void @flat_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fadd ptr %gep, <2 x half> %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -19273,7 +19273,7 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511
|
||||
%result = atomicrmw fadd ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x bfloat> %result
|
||||
}
|
||||
@ -19613,7 +19613,7 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 -512
|
||||
%result = atomicrmw fadd ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x bfloat> %result
|
||||
}
|
||||
@ -20246,7 +20246,7 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fadd ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -20585,7 +20585,7 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 -512
|
||||
%unused = atomicrmw fadd ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -20914,7 +20914,7 @@ define <2 x bfloat> @flat_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_n
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511
|
||||
%result = atomicrmw fadd ptr %gep, <2 x bfloat> %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x bfloat> %result
|
||||
}
|
||||
@ -21237,7 +21237,7 @@ define void @flat_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fadd ptr %gep, <2 x bfloat> %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -301,7 +301,7 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grai
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%result = atomicrmw fmax ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret float %result
|
||||
}
|
||||
@ -466,7 +466,7 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grai
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 -512
|
||||
%result = atomicrmw fmax ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret float %result
|
||||
}
|
||||
@ -755,7 +755,7 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fmax ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -922,7 +922,7 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 -512
|
||||
%unused = atomicrmw fmax ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -1074,7 +1074,7 @@ define float @flat_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%result = atomicrmw fmax ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret float %result
|
||||
}
|
||||
@ -1225,7 +1225,7 @@ define void @flat_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fmax ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -1857,7 +1857,7 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%result = atomicrmw fmax ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret float %result
|
||||
}
|
||||
@ -2022,7 +2022,7 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 -512
|
||||
%result = atomicrmw fmax ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret float %result
|
||||
}
|
||||
@ -2311,7 +2311,7 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fmax ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -2478,7 +2478,7 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 -512
|
||||
%unused = atomicrmw fmax ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -2630,7 +2630,7 @@ define float @flat_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fin
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%result = atomicrmw fmax ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret float %result
|
||||
}
|
||||
@ -2781,7 +2781,7 @@ define void @flat_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fmax ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -3587,7 +3587,7 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %ptr, i64 255
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 255
|
||||
%result = atomicrmw fmax ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret double %result
|
||||
}
|
||||
@ -4000,7 +4000,7 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %ptr, i64 -256
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 -256
|
||||
%result = atomicrmw fmax ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret double %result
|
||||
}
|
||||
@ -4791,7 +4791,7 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %ptr, i64 255
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 255
|
||||
%unused = atomicrmw fmax ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -5197,7 +5197,7 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %ptr, i64 -256
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 -256
|
||||
%unused = atomicrmw fmax ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -6816,7 +6816,7 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grain
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fmax ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret half %result
|
||||
}
|
||||
@ -7226,7 +7226,7 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grain
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 -1024
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 -1024
|
||||
%result = atomicrmw fmax ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret half %result
|
||||
}
|
||||
@ -8001,7 +8001,7 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fmax ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -8398,7 +8398,7 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 -1024
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 -1024
|
||||
%unused = atomicrmw fmax ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -8702,7 +8702,7 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fi
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fmax ptr %gep, half %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
|
||||
ret half %result
|
||||
}
|
||||
@ -8996,7 +8996,7 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fmax ptr %gep, half %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -9409,7 +9409,7 @@ define half @flat_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grai
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fmax ptr %gep, half %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret half %result
|
||||
}
|
||||
@ -9809,7 +9809,7 @@ define void @flat_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fmax ptr %gep, half %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -10734,7 +10734,7 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fmax ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret bfloat %result
|
||||
}
|
||||
@ -11203,7 +11203,7 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 -1024
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 -1024
|
||||
%result = atomicrmw fmax ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret bfloat %result
|
||||
}
|
||||
@ -12094,7 +12094,7 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fmax ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -12548,7 +12548,7 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 -1024
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 -1024
|
||||
%unused = atomicrmw fmax ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -12922,7 +12922,7 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fmax ptr %gep, bfloat %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
|
||||
ret bfloat %result
|
||||
}
|
||||
@ -13284,7 +13284,7 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fmax ptr %gep, bfloat %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -13756,7 +13756,7 @@ define bfloat @flat_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fmax ptr %gep, bfloat %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret bfloat %result
|
||||
}
|
||||
@ -14213,7 +14213,7 @@ define void @flat_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fmax ptr %gep, bfloat %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -14692,7 +14692,7 @@ define <2 x half> @flat_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fi
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511
|
||||
%result = atomicrmw fmax ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x half> %result
|
||||
}
|
||||
@ -14946,7 +14946,7 @@ define <2 x half> @flat_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_fi
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 -512
|
||||
%result = atomicrmw fmax ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x half> %result
|
||||
}
|
||||
@ -15407,7 +15407,7 @@ define void @flat_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fmax ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -15660,7 +15660,7 @@ define void @flat_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 -512
|
||||
%unused = atomicrmw fmax ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -15902,7 +15902,7 @@ define <2 x half> @flat_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_f
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511
|
||||
%result = atomicrmw fmax ptr %gep, <2 x half> %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x half> %result
|
||||
}
|
||||
@ -16139,7 +16139,7 @@ define void @flat_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fmax ptr %gep, <2 x half> %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -17040,7 +17040,7 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511
|
||||
%result = atomicrmw fmax ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x bfloat> %result
|
||||
}
|
||||
@ -17506,7 +17506,7 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 -512
|
||||
%result = atomicrmw fmax ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x bfloat> %result
|
||||
}
|
||||
@ -18377,7 +18377,7 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fmax ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -18838,7 +18838,7 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 -512
|
||||
%unused = atomicrmw fmax ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -19292,7 +19292,7 @@ define <2 x bfloat> @flat_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_n
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511
|
||||
%result = atomicrmw fmax ptr %gep, <2 x bfloat> %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x bfloat> %result
|
||||
}
|
||||
@ -19735,7 +19735,7 @@ define void @flat_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fmax ptr %gep, <2 x bfloat> %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -301,7 +301,7 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grai
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%result = atomicrmw fmin ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret float %result
|
||||
}
|
||||
@ -466,7 +466,7 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_grai
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 -512
|
||||
%result = atomicrmw fmin ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret float %result
|
||||
}
|
||||
@ -755,7 +755,7 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fmin ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -922,7 +922,7 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 -512
|
||||
%unused = atomicrmw fmin ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -1074,7 +1074,7 @@ define float @flat_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%result = atomicrmw fmin ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret float %result
|
||||
}
|
||||
@ -1225,7 +1225,7 @@ define void @flat_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fmin ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -1857,7 +1857,7 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%result = atomicrmw fmin ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret float %result
|
||||
}
|
||||
@ -2022,7 +2022,7 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fine
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 -512
|
||||
%result = atomicrmw fmin ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret float %result
|
||||
}
|
||||
@ -2311,7 +2311,7 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fmin ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -2478,7 +2478,7 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 -512
|
||||
%unused = atomicrmw fmin ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -2630,7 +2630,7 @@ define float @flat_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fin
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%result = atomicrmw fmin ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret float %result
|
||||
}
|
||||
@ -2781,7 +2781,7 @@ define void @flat_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fmin ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -3587,7 +3587,7 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %ptr, i64 255
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 255
|
||||
%result = atomicrmw fmin ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret double %result
|
||||
}
|
||||
@ -4000,7 +4000,7 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %ptr, i64 -256
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 -256
|
||||
%result = atomicrmw fmin ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret double %result
|
||||
}
|
||||
@ -4791,7 +4791,7 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %ptr, i64 255
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 255
|
||||
%unused = atomicrmw fmin ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -5197,7 +5197,7 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %ptr, i64 -256
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 -256
|
||||
%unused = atomicrmw fmin ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -6816,7 +6816,7 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grain
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fmin ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret half %result
|
||||
}
|
||||
@ -7226,7 +7226,7 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grain
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 -1024
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 -1024
|
||||
%result = atomicrmw fmin ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret half %result
|
||||
}
|
||||
@ -8001,7 +8001,7 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fmin ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -8398,7 +8398,7 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 -1024
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 -1024
|
||||
%unused = atomicrmw fmin ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -8702,7 +8702,7 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fi
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fmin ptr %gep, half %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
|
||||
ret half %result
|
||||
}
|
||||
@ -8996,7 +8996,7 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fmin ptr %gep, half %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -9409,7 +9409,7 @@ define half @flat_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grai
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fmin ptr %gep, half %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret half %result
|
||||
}
|
||||
@ -9809,7 +9809,7 @@ define void @flat_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fmin ptr %gep, half %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -10734,7 +10734,7 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fmin ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret bfloat %result
|
||||
}
|
||||
@ -11203,7 +11203,7 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 -1024
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 -1024
|
||||
%result = atomicrmw fmin ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret bfloat %result
|
||||
}
|
||||
@ -12094,7 +12094,7 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fmin ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -12548,7 +12548,7 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 -1024
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 -1024
|
||||
%unused = atomicrmw fmin ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -12922,7 +12922,7 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fmin ptr %gep, bfloat %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
|
||||
ret bfloat %result
|
||||
}
|
||||
@ -13284,7 +13284,7 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fmin ptr %gep, bfloat %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -13756,7 +13756,7 @@ define bfloat @flat_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fmin ptr %gep, bfloat %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret bfloat %result
|
||||
}
|
||||
@ -14213,7 +14213,7 @@ define void @flat_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fmin ptr %gep, bfloat %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -14692,7 +14692,7 @@ define <2 x half> @flat_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fi
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511
|
||||
%result = atomicrmw fmin ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x half> %result
|
||||
}
|
||||
@ -14946,7 +14946,7 @@ define <2 x half> @flat_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_fi
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 -512
|
||||
%result = atomicrmw fmin ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x half> %result
|
||||
}
|
||||
@ -15407,7 +15407,7 @@ define void @flat_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fmin ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -15660,7 +15660,7 @@ define void @flat_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 -512
|
||||
%unused = atomicrmw fmin ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -15902,7 +15902,7 @@ define <2 x half> @flat_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_f
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511
|
||||
%result = atomicrmw fmin ptr %gep, <2 x half> %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x half> %result
|
||||
}
|
||||
@ -16139,7 +16139,7 @@ define void @flat_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fmin ptr %gep, <2 x half> %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -17040,7 +17040,7 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511
|
||||
%result = atomicrmw fmin ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x bfloat> %result
|
||||
}
|
||||
@ -17506,7 +17506,7 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 -512
|
||||
%result = atomicrmw fmin ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x bfloat> %result
|
||||
}
|
||||
@ -18377,7 +18377,7 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fmin ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -18838,7 +18838,7 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 -512
|
||||
%unused = atomicrmw fmin ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -19292,7 +19292,7 @@ define <2 x bfloat> @flat_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_n
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511
|
||||
%result = atomicrmw fmin ptr %gep, <2 x bfloat> %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret <2 x bfloat> %result
|
||||
}
|
||||
@ -19735,7 +19735,7 @@ define void @flat_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fmin ptr %gep, <2 x bfloat> %val seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -405,7 +405,7 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_pos(ptr %ptr, float %val
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%result = atomicrmw fsub ptr %gep, float %val syncscope("agent") seq_cst
|
||||
ret float %result
|
||||
}
|
||||
@ -620,7 +620,7 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_neg(ptr %ptr, float %val
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 -512
|
||||
%result = atomicrmw fsub ptr %gep, float %val syncscope("agent") seq_cst
|
||||
ret float %result
|
||||
}
|
||||
@ -999,7 +999,7 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_pos(ptr %ptr, float %va
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fsub ptr %gep, float %val syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -1211,7 +1211,7 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_neg(ptr %ptr, float %va
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 -512
|
||||
%unused = atomicrmw fsub ptr %gep, float %val syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -1414,7 +1414,7 @@ define float @flat_system_atomic_fsub_ret_f32__offset12b_pos(ptr %ptr, float %va
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%result = atomicrmw fsub ptr %gep, float %val seq_cst
|
||||
ret float %result
|
||||
}
|
||||
@ -1610,7 +1610,7 @@ define void @flat_system_atomic_fsub_noret_f32__offset12b_pos(ptr %ptr, float %v
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fsub ptr %gep, float %val seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -2010,7 +2010,7 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr %ptr, float
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%result = atomicrmw fsub ptr %gep, float %val syncscope("agent") seq_cst
|
||||
ret float %result
|
||||
}
|
||||
@ -2225,7 +2225,7 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_neg__ftz(ptr %ptr, float
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 -512
|
||||
%result = atomicrmw fsub ptr %gep, float %val syncscope("agent") seq_cst
|
||||
ret float %result
|
||||
}
|
||||
@ -2604,7 +2604,7 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr %ptr, floa
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fsub ptr %gep, float %val syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -2816,7 +2816,7 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_neg__ftz(ptr %ptr, floa
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 -512
|
||||
%unused = atomicrmw fsub ptr %gep, float %val syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -3019,7 +3019,7 @@ define float @flat_system_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr %ptr, floa
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%result = atomicrmw fsub ptr %gep, float %val seq_cst
|
||||
ret float %result
|
||||
}
|
||||
@ -3215,7 +3215,7 @@ define void @flat_system_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr %ptr, flo
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fsub ptr %gep, float %val seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -4077,7 +4077,7 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_pos(ptr %ptr, double %v
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %ptr, i64 255
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 255
|
||||
%result = atomicrmw fsub ptr %gep, double %val syncscope("agent") seq_cst
|
||||
ret double %result
|
||||
}
|
||||
@ -4524,7 +4524,7 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_neg(ptr %ptr, double %v
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %ptr, i64 -256
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 -256
|
||||
%result = atomicrmw fsub ptr %gep, double %val syncscope("agent") seq_cst
|
||||
ret double %result
|
||||
}
|
||||
@ -5377,7 +5377,7 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_pos(ptr %ptr, double %v
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %ptr, i64 255
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 255
|
||||
%unused = atomicrmw fsub ptr %gep, double %val syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -5814,7 +5814,7 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_neg(ptr %ptr, double %v
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %ptr, i64 -256
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 -256
|
||||
%unused = atomicrmw fsub ptr %gep, double %val syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -6574,7 +6574,7 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_pos(ptr %ptr, half %val)
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fsub ptr %gep, half %val syncscope("agent") seq_cst
|
||||
ret half %result
|
||||
}
|
||||
@ -6959,7 +6959,7 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_neg(ptr %ptr, half %val)
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 -1024
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 -1024
|
||||
%result = atomicrmw fsub ptr %gep, half %val syncscope("agent") seq_cst
|
||||
ret half %result
|
||||
}
|
||||
@ -7689,7 +7689,7 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b_pos(ptr %ptr, half %val
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fsub ptr %gep, half %val syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -8061,7 +8061,7 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b_neg(ptr %ptr, half %val
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 -1024
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 -1024
|
||||
%unused = atomicrmw fsub ptr %gep, half %val syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -8345,7 +8345,7 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_pos__align4(ptr %ptr, hal
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fsub ptr %gep, half %val syncscope("agent") seq_cst, align 4
|
||||
ret half %result
|
||||
}
|
||||
@ -8619,7 +8619,7 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b__align4_pos(ptr %ptr, h
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fsub ptr %gep, half %val syncscope("agent") seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
@ -9007,7 +9007,7 @@ define half @flat_system_atomic_fsub_ret_f16__offset12b_pos(ptr %ptr, half %val)
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fsub ptr %gep, half %val seq_cst
|
||||
ret half %result
|
||||
}
|
||||
@ -9382,7 +9382,7 @@ define void @flat_system_atomic_fsub_noret_f16__offset12b_pos(ptr %ptr, half %va
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr half, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds half, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fsub ptr %gep, half %val seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -10305,7 +10305,7 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_pos(ptr %ptr, bfloat %
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fsub ptr %gep, bfloat %val syncscope("agent") seq_cst
|
||||
ret bfloat %result
|
||||
}
|
||||
@ -10773,7 +10773,7 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_neg(ptr %ptr, bfloat %
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 -1024
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 -1024
|
||||
%result = atomicrmw fsub ptr %gep, bfloat %val syncscope("agent") seq_cst
|
||||
ret bfloat %result
|
||||
}
|
||||
@ -11662,7 +11662,7 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b_pos(ptr %ptr, bfloat %
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fsub ptr %gep, bfloat %val syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -12115,7 +12115,7 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b_neg(ptr %ptr, bfloat %
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 -1024
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 -1024
|
||||
%unused = atomicrmw fsub ptr %gep, bfloat %val syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -12488,7 +12488,7 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_pos__align4(ptr %ptr,
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fsub ptr %gep, bfloat %val syncscope("agent") seq_cst, align 4
|
||||
ret bfloat %result
|
||||
}
|
||||
@ -12849,7 +12849,7 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b__align4_pos(ptr %ptr,
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fsub ptr %gep, bfloat %val syncscope("agent") seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
@ -13320,7 +13320,7 @@ define bfloat @flat_system_atomic_fsub_ret_bf16__offset12b_pos(ptr %ptr, bfloat
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%result = atomicrmw fsub ptr %gep, bfloat %val seq_cst
|
||||
ret bfloat %result
|
||||
}
|
||||
@ -13776,7 +13776,7 @@ define void @flat_system_atomic_fsub_noret_bf16__offset12b_pos(ptr %ptr, bfloat
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr bfloat, ptr %ptr, i64 1023
|
||||
%gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023
|
||||
%unused = atomicrmw fsub ptr %gep, bfloat %val seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -14221,7 +14221,7 @@ define <2 x half> @flat_agent_atomic_fsub_ret_v2f16__offset12b_pos(ptr %ptr, <2
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511
|
||||
%result = atomicrmw fsub ptr %gep, <2 x half> %val syncscope("agent") seq_cst
|
||||
ret <2 x half> %result
|
||||
}
|
||||
@ -14458,7 +14458,7 @@ define <2 x half> @flat_agent_atomic_fsub_ret_v2f16__offset12b_neg(ptr %ptr, <2
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 -512
|
||||
%result = atomicrmw fsub ptr %gep, <2 x half> %val syncscope("agent") seq_cst
|
||||
ret <2 x half> %result
|
||||
}
|
||||
@ -14881,7 +14881,7 @@ define void @flat_agent_atomic_fsub_noret_v2f16__offset12b_pos(ptr %ptr, <2 x ha
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fsub ptr %gep, <2 x half> %val syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -15115,7 +15115,7 @@ define void @flat_agent_atomic_fsub_noret_v2f16__offset12b_neg(ptr %ptr, <2 x ha
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 -512
|
||||
%unused = atomicrmw fsub ptr %gep, <2 x half> %val syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -15340,7 +15340,7 @@ define <2 x half> @flat_system_atomic_fsub_ret_v2f16__offset12b_pos(ptr %ptr, <2
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511
|
||||
%result = atomicrmw fsub ptr %gep, <2 x half> %val seq_cst
|
||||
ret <2 x half> %result
|
||||
}
|
||||
@ -15558,7 +15558,7 @@ define void @flat_system_atomic_fsub_noret_v2f16__offset12b_pos(ptr %ptr, <2 x h
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x half>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fsub ptr %gep, <2 x half> %val seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -16459,7 +16459,7 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16__offset12b_pos(ptr %ptr,
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511
|
||||
%result = atomicrmw fsub ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst
|
||||
ret <2 x bfloat> %result
|
||||
}
|
||||
@ -16925,7 +16925,7 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16__offset12b_neg(ptr %ptr,
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 -512
|
||||
%result = atomicrmw fsub ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst
|
||||
ret <2 x bfloat> %result
|
||||
}
|
||||
@ -17796,7 +17796,7 @@ define void @flat_agent_atomic_fsub_noret_v2bf16__offset12b_pos(ptr %ptr, <2 x b
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fsub ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -18257,7 +18257,7 @@ define void @flat_agent_atomic_fsub_noret_v2bf16__offset12b_neg(ptr %ptr, <2 x b
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 -512
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 -512
|
||||
%unused = atomicrmw fsub ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -18711,7 +18711,7 @@ define <2 x bfloat> @flat_system_atomic_fsub_ret_v2bf16__offset12b_pos(ptr %ptr,
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511
|
||||
%result = atomicrmw fsub ptr %gep, <2 x bfloat> %val seq_cst
|
||||
ret <2 x bfloat> %result
|
||||
}
|
||||
@ -19154,7 +19154,7 @@ define void @flat_system_atomic_fsub_noret_v2bf16__offset12b_pos(ptr %ptr, <2 x
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511
|
||||
%gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511
|
||||
%unused = atomicrmw fsub ptr %gep, <2 x bfloat> %val seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -64,7 +64,7 @@ define amdgpu_kernel void @atomic_add_i32_offset(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw add ptr %gep, i32 %in syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -128,7 +128,7 @@ define amdgpu_kernel void @atomic_add_i32_max_offset(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 1023
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 1023
|
||||
%val = atomicrmw volatile add ptr %gep, i32 %in syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -196,7 +196,7 @@ define amdgpu_kernel void @atomic_add_i32_max_offset_p1(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 1024
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 1024
|
||||
%val = atomicrmw volatile add ptr %gep, i32 %in syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -270,7 +270,7 @@ define amdgpu_kernel void @atomic_add_i32_ret_offset(ptr %out, ptr %out2, i32 %i
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile add ptr %gep, i32 %in syncscope("agent") seq_cst
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -352,8 +352,8 @@ define amdgpu_kernel void @atomic_add_i32_addr64_offset(ptr %out, i32 %in, i64 %
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile add ptr %gep, i32 %in syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -444,8 +444,8 @@ define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile add ptr %gep, i32 %in syncscope("agent") seq_cst
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -652,7 +652,7 @@ define amdgpu_kernel void @atomic_add_i32_addr64(ptr %out, i32 %in, i64 %index)
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile add ptr %ptr, i32 %in syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -739,7 +739,7 @@ define amdgpu_kernel void @atomic_add_i32_ret_addr64(ptr %out, ptr %out2, i32 %i
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile add ptr %ptr, i32 %in syncscope("agent") seq_cst
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -804,7 +804,7 @@ define amdgpu_kernel void @atomic_and_i32_offset(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile and ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -878,7 +878,7 @@ define amdgpu_kernel void @atomic_and_i32_ret_offset(ptr %out, ptr %out2, i32 %i
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile and ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -960,8 +960,8 @@ define amdgpu_kernel void @atomic_and_i32_addr64_offset(ptr %out, i32 %in, i64 %
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile and ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -1052,8 +1052,8 @@ define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile and ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -1260,7 +1260,7 @@ define amdgpu_kernel void @atomic_and_i32_addr64(ptr %out, i32 %in, i64 %index)
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile and ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -1347,7 +1347,7 @@ define amdgpu_kernel void @atomic_and_i32_ret_addr64(ptr %out, ptr %out2, i32 %i
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile and ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -1412,7 +1412,7 @@ define amdgpu_kernel void @atomic_sub_i32_offset(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile sub ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -1486,7 +1486,7 @@ define amdgpu_kernel void @atomic_sub_i32_ret_offset(ptr %out, ptr %out2, i32 %i
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile sub ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -1568,8 +1568,8 @@ define amdgpu_kernel void @atomic_sub_i32_addr64_offset(ptr %out, i32 %in, i64 %
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile sub ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -1660,8 +1660,8 @@ define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile sub ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -1868,7 +1868,7 @@ define amdgpu_kernel void @atomic_sub_i32_addr64(ptr %out, i32 %in, i64 %index)
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile sub ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -1955,7 +1955,7 @@ define amdgpu_kernel void @atomic_sub_i32_ret_addr64(ptr %out, ptr %out2, i32 %i
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile sub ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -2016,7 +2016,7 @@ define amdgpu_kernel void @atomic_max_i32_offset(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile max ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -2089,7 +2089,7 @@ define amdgpu_kernel void @atomic_max_i32_ret_offset(ptr %out, ptr %out2, i32 %i
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile max ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -2167,8 +2167,8 @@ define amdgpu_kernel void @atomic_max_i32_addr64_offset(ptr %out, i32 %in, i64 %
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile max ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -2258,8 +2258,8 @@ define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile max ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -2457,7 +2457,7 @@ define amdgpu_kernel void @atomic_max_i32_addr64(ptr %out, i32 %in, i64 %index)
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile max ptr %ptr, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -2543,7 +2543,7 @@ define amdgpu_kernel void @atomic_max_i32_ret_addr64(ptr %out, ptr %out2, i32 %i
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile max ptr %ptr, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -2604,7 +2604,7 @@ define amdgpu_kernel void @atomic_umax_i32_offset(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile umax ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -2677,7 +2677,7 @@ define amdgpu_kernel void @atomic_umax_i32_ret_offset(ptr %out, ptr %out2, i32 %
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile umax ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -2755,8 +2755,8 @@ define amdgpu_kernel void @atomic_umax_i32_addr64_offset(ptr %out, i32 %in, i64
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile umax ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -2846,8 +2846,8 @@ define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(ptr %out, ptr %out2
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile umax ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -3045,7 +3045,7 @@ define amdgpu_kernel void @atomic_umax_i32_addr64(ptr %out, i32 %in, i64 %index)
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile umax ptr %ptr, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -3131,7 +3131,7 @@ define amdgpu_kernel void @atomic_umax_i32_ret_addr64(ptr %out, ptr %out2, i32 %
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile umax ptr %ptr, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -3192,7 +3192,7 @@ define amdgpu_kernel void @atomic_min_i32_offset(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile min ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -3265,7 +3265,7 @@ define amdgpu_kernel void @atomic_min_i32_ret_offset(ptr %out, ptr %out2, i32 %i
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile min ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -3343,8 +3343,8 @@ define amdgpu_kernel void @atomic_min_i32_addr64_offset(ptr %out, i32 %in, i64 %
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile min ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -3434,8 +3434,8 @@ define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile min ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -3633,7 +3633,7 @@ define amdgpu_kernel void @atomic_min_i32_addr64(ptr %out, i32 %in, i64 %index)
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile min ptr %ptr, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -3719,7 +3719,7 @@ define amdgpu_kernel void @atomic_min_i32_ret_addr64(ptr %out, ptr %out2, i32 %i
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile min ptr %ptr, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -3780,7 +3780,7 @@ define amdgpu_kernel void @atomic_umin_i32_offset(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile umin ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -3853,7 +3853,7 @@ define amdgpu_kernel void @atomic_umin_i32_ret_offset(ptr %out, ptr %out2, i32 %
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile umin ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -3931,8 +3931,8 @@ define amdgpu_kernel void @atomic_umin_i32_addr64_offset(ptr %out, i32 %in, i64
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile umin ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -4022,8 +4022,8 @@ define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(ptr %out, ptr %out2
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile umin ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -4221,7 +4221,7 @@ define amdgpu_kernel void @atomic_umin_i32_addr64(ptr %out, i32 %in, i64 %index)
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile umin ptr %ptr, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -4307,7 +4307,7 @@ define amdgpu_kernel void @atomic_umin_i32_ret_addr64(ptr %out, ptr %out2, i32 %
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile umin ptr %ptr, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -4372,7 +4372,7 @@ define amdgpu_kernel void @atomic_or_i32_offset(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile or ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -4446,7 +4446,7 @@ define amdgpu_kernel void @atomic_or_i32_ret_offset(ptr %out, ptr %out2, i32 %in
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile or ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -4528,8 +4528,8 @@ define amdgpu_kernel void @atomic_or_i32_addr64_offset(ptr %out, i32 %in, i64 %i
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile or ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -4620,8 +4620,8 @@ define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile or ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -4828,7 +4828,7 @@ define amdgpu_kernel void @atomic_or_i32_addr64(ptr %out, i32 %in, i64 %index) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile or ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -4915,7 +4915,7 @@ define amdgpu_kernel void @atomic_or_i32_ret_addr64(ptr %out, ptr %out2, i32 %in
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile or ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -4980,7 +4980,7 @@ define amdgpu_kernel void @atomic_xchg_i32_offset(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile xchg ptr %gep, i32 %in syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -5044,7 +5044,7 @@ define amdgpu_kernel void @atomic_xchg_f32_offset(ptr %out, float %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr float, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds float, ptr %out, i32 4
|
||||
%val = atomicrmw volatile xchg ptr %gep, float %in syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -5118,7 +5118,7 @@ define amdgpu_kernel void @atomic_xchg_i32_ret_offset(ptr %out, ptr %out2, i32 %
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile xchg ptr %gep, i32 %in syncscope("agent") seq_cst
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -5200,8 +5200,8 @@ define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(ptr %out, i32 %in, i64
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile xchg ptr %gep, i32 %in syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -5292,8 +5292,8 @@ define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(ptr %out, ptr %out2
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile xchg ptr %gep, i32 %in syncscope("agent") seq_cst
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -5500,7 +5500,7 @@ define amdgpu_kernel void @atomic_xchg_i32_addr64(ptr %out, i32 %in, i64 %index)
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile xchg ptr %ptr, i32 %in syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -5587,7 +5587,7 @@ define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(ptr %out, ptr %out2, i32 %
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile xchg ptr %ptr, i32 %in syncscope("agent") seq_cst
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -5652,7 +5652,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_offset(ptr %out, i32 %in, i32 %old
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -5729,7 +5729,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(ptr %out, ptr %out2, i3
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
|
||||
%flag = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %flag, ptr %out2
|
||||
@ -5819,8 +5819,8 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(ptr %out, i32 %in, i
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -5918,8 +5918,8 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(ptr %out, ptr %o
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
|
||||
%flag = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %flag, ptr %out2
|
||||
@ -6136,7 +6136,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(ptr %out, i32 %in, i64 %ind
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = cmpxchg volatile ptr %ptr, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -6230,7 +6230,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(ptr %out, ptr %out2, i3
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = cmpxchg volatile ptr %ptr, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
|
||||
%flag = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %flag, ptr %out2
|
||||
@ -6296,7 +6296,7 @@ define amdgpu_kernel void @atomic_xor_i32_offset(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile xor ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -6370,7 +6370,7 @@ define amdgpu_kernel void @atomic_xor_i32_ret_offset(ptr %out, ptr %out2, i32 %i
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile xor ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -6452,8 +6452,8 @@ define amdgpu_kernel void @atomic_xor_i32_addr64_offset(ptr %out, i32 %in, i64 %
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile xor ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -6544,8 +6544,8 @@ define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile xor ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -6752,7 +6752,7 @@ define amdgpu_kernel void @atomic_xor_i32_addr64(ptr %out, i32 %in, i64 %index)
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile xor ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -6839,7 +6839,7 @@ define amdgpu_kernel void @atomic_xor_i32_ret_addr64(ptr %out, ptr %out2, i32 %i
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile xor ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -6905,7 +6905,7 @@ define amdgpu_kernel void @atomic_load_i32_offset(ptr %in, ptr %out) {
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %in, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %in, i32 4
|
||||
%val = load atomic i32, ptr %gep seq_cst, align 4
|
||||
store i32 %val, ptr %out
|
||||
ret void
|
||||
@ -7050,8 +7050,8 @@ define amdgpu_kernel void @atomic_load_i32_addr64_offset(ptr %in, ptr %out, i64
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %in, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %in, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = load atomic i32, ptr %gep seq_cst, align 4
|
||||
store i32 %val, ptr %out
|
||||
ret void
|
||||
@ -7131,7 +7131,7 @@ define amdgpu_kernel void @atomic_load_i32_addr64(ptr %in, ptr %out, i64 %index)
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %in, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %in, i64 %index
|
||||
%val = load atomic i32, ptr %ptr seq_cst, align 4
|
||||
store i32 %val, ptr %out
|
||||
ret void
|
||||
@ -7186,7 +7186,7 @@ define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, ptr %out) {
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2 offset:16
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
store atomic i32 %in, ptr %gep seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
@ -7302,8 +7302,8 @@ define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, ptr %out, i64
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2 offset:16
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
store atomic i32 %in, ptr %gep seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
@ -7366,7 +7366,7 @@ define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, ptr %out, i64 %index
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
store atomic i32 %in, ptr %ptr seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
@ -7431,7 +7431,7 @@ define amdgpu_kernel void @atomic_load_f32_offset(ptr %in, ptr %out) {
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr float, ptr %in, i32 4
|
||||
%gep = getelementptr inbounds float, ptr %in, i32 4
|
||||
%val = load atomic float, ptr %gep seq_cst, align 4
|
||||
store float %val, ptr %out
|
||||
ret void
|
||||
@ -7576,8 +7576,8 @@ define amdgpu_kernel void @atomic_load_f32_addr64_offset(ptr %in, ptr %out, i64
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr float, ptr %in, i64 %index
|
||||
%gep = getelementptr float, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds float, ptr %in, i64 %index
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i32 4
|
||||
%val = load atomic float, ptr %gep seq_cst, align 4
|
||||
store float %val, ptr %out
|
||||
ret void
|
||||
@ -7657,7 +7657,7 @@ define amdgpu_kernel void @atomic_load_f32_addr64(ptr %in, ptr %out, i64 %index)
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr float, ptr %in, i64 %index
|
||||
%ptr = getelementptr inbounds float, ptr %in, i64 %index
|
||||
%val = load atomic float, ptr %ptr seq_cst, align 4
|
||||
store float %val, ptr %out
|
||||
ret void
|
||||
@ -7712,7 +7712,7 @@ define amdgpu_kernel void @atomic_store_f32_offset(float %in, ptr %out) {
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2 offset:16
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr float, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds float, ptr %out, i32 4
|
||||
store atomic float %in, ptr %gep seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
@ -7828,8 +7828,8 @@ define amdgpu_kernel void @atomic_store_f32_addr64_offset(float %in, ptr %out, i
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2 offset:16
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr float, ptr %out, i64 %index
|
||||
%gep = getelementptr float, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds float, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds float, ptr %ptr, i32 4
|
||||
store atomic float %in, ptr %gep seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
@ -7892,7 +7892,7 @@ define amdgpu_kernel void @atomic_store_f32_addr64(float %in, ptr %out, i64 %ind
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr float, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds float, ptr %out, i64 %index
|
||||
store atomic float %in, ptr %ptr seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
@ -7971,7 +7971,7 @@ define amdgpu_kernel void @atomic_load_i8_offset(ptr %in, ptr %out) {
|
||||
; GFX11-FAKE16-NEXT: flat_store_b8 v[0:1], v2
|
||||
; GFX11-FAKE16-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i8, ptr %in, i64 16
|
||||
%gep = getelementptr inbounds i8, ptr %in, i64 16
|
||||
%val = load atomic i8, ptr %gep seq_cst, align 1
|
||||
store i8 %val, ptr %out
|
||||
ret void
|
||||
@ -8145,8 +8145,8 @@ define amdgpu_kernel void @atomic_load_i8_addr64_offset(ptr %in, ptr %out, i64 %
|
||||
; GFX11-FAKE16-NEXT: flat_store_b8 v[0:1], v2
|
||||
; GFX11-FAKE16-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i8, ptr %in, i64 %index
|
||||
%gep = getelementptr i8, ptr %ptr, i64 16
|
||||
%ptr = getelementptr inbounds i8, ptr %in, i64 %index
|
||||
%gep = getelementptr inbounds i8, ptr %ptr, i64 16
|
||||
%val = load atomic i8, ptr %gep seq_cst, align 1
|
||||
store i8 %val, ptr %out
|
||||
ret void
|
||||
@ -8212,7 +8212,7 @@ define amdgpu_kernel void @atomic_store_i8_offset(i8 %in, ptr %out) {
|
||||
; GFX11-FAKE16-NEXT: flat_store_b8 v[0:1], v2 offset:16
|
||||
; GFX11-FAKE16-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i8, ptr %out, i64 16
|
||||
%gep = getelementptr inbounds i8, ptr %out, i64 16
|
||||
store atomic i8 %in, ptr %gep seq_cst, align 1
|
||||
ret void
|
||||
}
|
||||
@ -8348,8 +8348,8 @@ define amdgpu_kernel void @atomic_store_i8_addr64_offset(i8 %in, ptr %out, i64 %
|
||||
; GFX11-FAKE16-NEXT: flat_store_b8 v[0:1], v2 offset:16
|
||||
; GFX11-FAKE16-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i8, ptr %out, i64 %index
|
||||
%gep = getelementptr i8, ptr %ptr, i64 16
|
||||
%ptr = getelementptr inbounds i8, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i8, ptr %ptr, i64 16
|
||||
store atomic i8 %in, ptr %gep seq_cst, align 1
|
||||
ret void
|
||||
}
|
||||
@ -8428,7 +8428,7 @@ define amdgpu_kernel void @atomic_load_i16_offset(ptr %in, ptr %out) {
|
||||
; GFX11-FAKE16-NEXT: flat_store_b16 v[0:1], v2
|
||||
; GFX11-FAKE16-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i16, ptr %in, i64 8
|
||||
%gep = getelementptr inbounds i16, ptr %in, i64 8
|
||||
%val = load atomic i16, ptr %gep seq_cst, align 2
|
||||
store i16 %val, ptr %out
|
||||
ret void
|
||||
@ -8607,8 +8607,8 @@ define amdgpu_kernel void @atomic_load_i16_addr64_offset(ptr %in, ptr %out, i64
|
||||
; GFX11-FAKE16-NEXT: flat_store_b16 v[0:1], v2
|
||||
; GFX11-FAKE16-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i16, ptr %in, i64 %index
|
||||
%gep = getelementptr i16, ptr %ptr, i64 8
|
||||
%ptr = getelementptr inbounds i16, ptr %in, i64 %index
|
||||
%gep = getelementptr inbounds i16, ptr %ptr, i64 8
|
||||
%val = load atomic i16, ptr %gep seq_cst, align 2
|
||||
store i16 %val, ptr %out
|
||||
ret void
|
||||
@ -8674,7 +8674,7 @@ define amdgpu_kernel void @atomic_store_i16_offset(i16 %in, ptr %out) {
|
||||
; GFX11-FAKE16-NEXT: flat_store_b16 v[0:1], v2 offset:16
|
||||
; GFX11-FAKE16-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i16, ptr %out, i64 8
|
||||
%gep = getelementptr inbounds i16, ptr %out, i64 8
|
||||
store atomic i16 %in, ptr %gep seq_cst, align 2
|
||||
ret void
|
||||
}
|
||||
@ -8816,8 +8816,8 @@ define amdgpu_kernel void @atomic_store_i16_addr64_offset(i16 %in, ptr %out, i64
|
||||
; GFX11-FAKE16-NEXT: flat_store_b16 v[0:1], v2 offset:16
|
||||
; GFX11-FAKE16-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i16, ptr %out, i64 %index
|
||||
%gep = getelementptr i16, ptr %ptr, i64 8
|
||||
%ptr = getelementptr inbounds i16, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i16, ptr %ptr, i64 8
|
||||
store atomic i16 %in, ptr %gep seq_cst, align 2
|
||||
ret void
|
||||
}
|
||||
@ -8882,7 +8882,7 @@ define amdgpu_kernel void @atomic_store_f16_offset(half %in, ptr %out) {
|
||||
; GFX11-FAKE16-NEXT: flat_store_b16 v[0:1], v2 offset:16
|
||||
; GFX11-FAKE16-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr half, ptr %out, i64 8
|
||||
%gep = getelementptr inbounds half, ptr %out, i64 8
|
||||
store atomic half %in, ptr %gep seq_cst, align 2
|
||||
ret void
|
||||
}
|
||||
@ -9002,7 +9002,7 @@ define amdgpu_kernel void @atomic_store_bf16_offset(bfloat %in, ptr %out) {
|
||||
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, s2
|
||||
; GFX11-FAKE16-NEXT: flat_store_b16 v[0:1], v2
|
||||
; GFX11-FAKE16-NEXT: s_endpgm
|
||||
%gep = getelementptr bfloat, ptr %out, i64 8
|
||||
%gep = getelementptr inbounds bfloat, ptr %out, i64 8
|
||||
store atomic bfloat %in, ptr %out seq_cst, align 2
|
||||
ret void
|
||||
}
|
||||
@ -9125,7 +9125,7 @@ define amdgpu_kernel void @atomic_inc_i32_offset(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -9189,7 +9189,7 @@ define amdgpu_kernel void @atomic_inc_i32_max_offset(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 1023
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 1023
|
||||
%val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -9257,7 +9257,7 @@ define amdgpu_kernel void @atomic_inc_i32_max_offset_p1(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 1024
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 1024
|
||||
%val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -9331,7 +9331,7 @@ define amdgpu_kernel void @atomic_inc_i32_ret_offset(ptr %out, ptr %out2, i32 %i
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -9413,8 +9413,8 @@ define amdgpu_kernel void @atomic_inc_i32_incr64_offset(ptr %out, i32 %in, i64 %
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -9505,8 +9505,8 @@ define amdgpu_kernel void @atomic_inc_i32_ret_incr64_offset(ptr %out, ptr %out2,
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -9713,7 +9713,7 @@ define amdgpu_kernel void @atomic_inc_i32_incr64(ptr %out, i32 %in, i64 %index)
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile uinc_wrap ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -9800,7 +9800,7 @@ define amdgpu_kernel void @atomic_inc_i32_ret_incr64(ptr %out, ptr %out2, i32 %i
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile uinc_wrap ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -9865,7 +9865,7 @@ define amdgpu_kernel void @atomic_dec_i32_offset(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -9929,7 +9929,7 @@ define amdgpu_kernel void @atomic_dec_i32_max_offset(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 1023
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 1023
|
||||
%val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -9997,7 +9997,7 @@ define amdgpu_kernel void @atomic_dec_i32_max_offset_p1(ptr %out, i32 %in) {
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 1024
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 1024
|
||||
%val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -10071,7 +10071,7 @@ define amdgpu_kernel void @atomic_dec_i32_ret_offset(ptr %out, ptr %out2, i32 %i
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -10153,8 +10153,8 @@ define amdgpu_kernel void @atomic_dec_i32_decr64_offset(ptr %out, i32 %in, i64 %
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -10245,8 +10245,8 @@ define amdgpu_kernel void @atomic_dec_i32_ret_decr64_offset(ptr %out, ptr %out2,
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -10453,7 +10453,7 @@ define amdgpu_kernel void @atomic_dec_i32_decr64(ptr %out, i32 %in, i64 %index)
|
||||
; GFX11-NEXT: buffer_gl0_inv
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile udec_wrap ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -10540,7 +10540,7 @@ define amdgpu_kernel void @atomic_dec_i32_ret_decr64(ptr %out, ptr %out2, i32 %i
|
||||
; GFX11-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i64 %index
|
||||
%val = atomicrmw volatile udec_wrap ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i32 %val, ptr %out2
|
||||
ret void
|
||||
@ -10619,7 +10619,7 @@ define amdgpu_kernel void @atomic_load_f16_offset(ptr %in, ptr %out) {
|
||||
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX11-FAKE16-NEXT: flat_store_b16 v[0:1], v2
|
||||
; GFX11-FAKE16-NEXT: s_endpgm
|
||||
%gep = getelementptr half, ptr %in, i64 8
|
||||
%gep = getelementptr inbounds half, ptr %in, i64 8
|
||||
%val = load atomic half, ptr %gep seq_cst, align 2
|
||||
store half %val, ptr %out
|
||||
ret void
|
||||
@ -10772,7 +10772,7 @@ define amdgpu_kernel void @atomic_load_bf16_offset(ptr %in, ptr %out) {
|
||||
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX11-FAKE16-NEXT: flat_store_b16 v[0:1], v2
|
||||
; GFX11-FAKE16-NEXT: s_endpgm
|
||||
%gep = getelementptr bfloat, ptr %in, i64 8
|
||||
%gep = getelementptr inbounds bfloat, ptr %in, i64 8
|
||||
%val = load atomic bfloat, ptr %gep seq_cst, align 2
|
||||
store bfloat %val, ptr %out
|
||||
ret void
|
||||
|
||||
@ -63,7 +63,7 @@ define void @flat_atomic_xchg_i32_noret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw xchg ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -124,7 +124,7 @@ define i32 @flat_atomic_xchg_i32_ret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw xchg ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -203,7 +203,7 @@ define amdgpu_gfx void @flat_atomic_xchg_i32_noret_offset_scalar(ptr inreg %out,
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw xchg ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -282,7 +282,7 @@ define amdgpu_gfx i32 @flat_atomic_xchg_i32_ret_offset_scalar(ptr inreg %out, i3
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw xchg ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -315,7 +315,7 @@ define void @flat_atomic_xchg_i32_noret_offset__amdgpu_no_remote_memory(ptr %out
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw xchg ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -348,7 +348,7 @@ define i32 @flat_atomic_xchg_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%result = atomicrmw xchg ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret i32 %result
|
||||
}
|
||||
@ -413,7 +413,7 @@ define void @flat_atomic_xchg_f32_noret_offset(ptr %out, float %in) {
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds float, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw xchg ptr %gep, float %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -474,7 +474,7 @@ define float @flat_atomic_xchg_f32_ret_offset(ptr %out, float %in) {
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds float, ptr %out, i32 4
|
||||
%result = atomicrmw xchg ptr %gep, float %in seq_cst
|
||||
ret float %result
|
||||
}
|
||||
@ -553,7 +553,7 @@ define amdgpu_gfx void @flat_atomic_xchg_f32_noret_offset_scalar(ptr inreg %out,
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds float, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw xchg ptr %gep, float %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -632,7 +632,7 @@ define amdgpu_gfx float @flat_atomic_xchg_f32_ret_offset_scalar(ptr inreg %out,
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds float, ptr %out, i32 4
|
||||
%result = atomicrmw xchg ptr %gep, float %in seq_cst
|
||||
ret float %result
|
||||
}
|
||||
@ -665,7 +665,7 @@ define void @flat_atomic_xchg_f32_noret_offset__amdgpu_no_remote_memory(ptr %out
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds float, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw xchg ptr %gep, float %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -698,7 +698,7 @@ define float @flat_atomic_xchg_f32_ret_offset__amdgpu_no_remote_memory(ptr %out,
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr float, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds float, ptr %out, i64 4
|
||||
%result = atomicrmw xchg ptr %gep, float %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret float %result
|
||||
}
|
||||
@ -763,7 +763,7 @@ define void @flat_atomic_add_i32_noret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw add ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -824,7 +824,7 @@ define i32 @flat_atomic_add_i32_ret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw add ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -903,7 +903,7 @@ define amdgpu_gfx void @flat_atomic_add_i32_noret_offset_scalar(ptr inreg %out,
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw add ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -982,7 +982,7 @@ define amdgpu_gfx i32 @flat_atomic_add_i32_ret_offset_scalar(ptr inreg %out, i32
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw add ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -1015,7 +1015,7 @@ define void @flat_atomic_add_i32_noret_offset__amdgpu_no_remote_memory(ptr %out,
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw add ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -1048,7 +1048,7 @@ define i32 @flat_atomic_add_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i3
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%result = atomicrmw add ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret i32 %result
|
||||
}
|
||||
@ -1191,7 +1191,7 @@ define void @flat_atomic_sub_i32_noret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw sub ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -1334,7 +1334,7 @@ define i32 @flat_atomic_sub_i32_ret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw sub ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -1485,7 +1485,7 @@ define amdgpu_gfx void @flat_atomic_sub_i32_noret_offset_scalar(ptr inreg %out,
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw sub ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -1644,7 +1644,7 @@ define amdgpu_gfx i32 @flat_atomic_sub_i32_ret_offset_scalar(ptr inreg %out, i32
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw sub ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -1677,7 +1677,7 @@ define void @flat_atomic_sub_i32_noret_offset__amdgpu_no_remote_memory(ptr %out,
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw sub ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -1710,7 +1710,7 @@ define i32 @flat_atomic_sub_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i3
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%result = atomicrmw sub ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret i32 %result
|
||||
}
|
||||
@ -1853,7 +1853,7 @@ define void @flat_atomic_and_i32_noret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw and ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -1996,7 +1996,7 @@ define i32 @flat_atomic_and_i32_ret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw and ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -2147,7 +2147,7 @@ define amdgpu_gfx void @flat_atomic_and_i32_noret_offset_scalar(ptr inreg %out,
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw and ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -2306,7 +2306,7 @@ define amdgpu_gfx i32 @flat_atomic_and_i32_ret_offset_scalar(ptr inreg %out, i32
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw and ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -2339,7 +2339,7 @@ define void @flat_atomic_and_i32_noret_offset__amdgpu_no_remote_memory(ptr %out,
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw and ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -2372,7 +2372,7 @@ define i32 @flat_atomic_and_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i3
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%result = atomicrmw and ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret i32 %result
|
||||
}
|
||||
@ -2521,7 +2521,7 @@ define void @flat_atomic_nand_i32_noret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw nand ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -2670,7 +2670,7 @@ define i32 @flat_atomic_nand_i32_ret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw nand ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -2827,7 +2827,7 @@ define amdgpu_gfx void @flat_atomic_nand_i32_noret_offset_scalar(ptr inreg %out,
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw nand ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -2992,7 +2992,7 @@ define amdgpu_gfx i32 @flat_atomic_nand_i32_ret_offset_scalar(ptr inreg %out, i3
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw nand ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -3067,7 +3067,7 @@ define void @flat_atomic_nand_i32_noret_offset__amdgpu_no_remote_memory(ptr %out
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw nand ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -3143,7 +3143,7 @@ define i32 @flat_atomic_nand_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%result = atomicrmw nand ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret i32 %result
|
||||
}
|
||||
@ -3286,7 +3286,7 @@ define void @flat_atomic_or_i32_noret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw or ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -3429,7 +3429,7 @@ define i32 @flat_atomic_or_i32_ret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw or ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -3580,7 +3580,7 @@ define amdgpu_gfx void @flat_atomic_or_i32_noret_offset_scalar(ptr inreg %out, i
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw or ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -3739,7 +3739,7 @@ define amdgpu_gfx i32 @flat_atomic_or_i32_ret_offset_scalar(ptr inreg %out, i32
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw or ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -3772,7 +3772,7 @@ define void @flat_atomic_or_i32_noret_offset__amdgpu_no_remote_memory(ptr %out,
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw or ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -3805,7 +3805,7 @@ define i32 @flat_atomic_or_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i32
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%result = atomicrmw or ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret i32 %result
|
||||
}
|
||||
@ -3948,7 +3948,7 @@ define void @flat_atomic_xor_i32_noret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw xor ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -4091,7 +4091,7 @@ define i32 @flat_atomic_xor_i32_ret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw xor ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -4242,7 +4242,7 @@ define amdgpu_gfx void @flat_atomic_xor_i32_noret_offset_scalar(ptr inreg %out,
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw xor ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -4401,7 +4401,7 @@ define amdgpu_gfx i32 @flat_atomic_xor_i32_ret_offset_scalar(ptr inreg %out, i32
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw xor ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -4434,7 +4434,7 @@ define void @flat_xor_i32_noret_offset__amdgpu_no_remote_memory(ptr %out, i32 %i
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw xor ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -4467,7 +4467,7 @@ define i32 @flat_atomic_xor_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i3
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%result = atomicrmw xor ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret i32 %result
|
||||
}
|
||||
@ -4610,7 +4610,7 @@ define void @flat_atomic_max_i32_noret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw max ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -4753,7 +4753,7 @@ define i32 @flat_atomic_max_i32_ret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw max ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -4904,7 +4904,7 @@ define amdgpu_gfx void @flat_atomic_max_i32_noret_offset_scalar(ptr inreg %out,
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw max ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -5063,7 +5063,7 @@ define amdgpu_gfx i32 @flat_atomic_max_i32_ret_offset_scalar(ptr inreg %out, i32
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw max ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -5157,8 +5157,8 @@ define amdgpu_kernel void @atomic_max_i32_addr64_offset(ptr %out, i32 %in, i32 %
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i32 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i32 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%tmp0 = atomicrmw max ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -5267,8 +5267,8 @@ define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GCN3-NEXT: flat_store_dword v[0:1], v2
|
||||
; GCN3-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i32 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i32 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%tmp0 = atomicrmw max ptr %gep, i32 %in seq_cst
|
||||
store i32 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -5359,7 +5359,7 @@ define amdgpu_kernel void @atomic_max_i32_addr64(ptr %out, i32 %in, i32 %index)
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i32 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i32 %index
|
||||
%tmp0 = atomicrmw max ptr %ptr, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -5464,7 +5464,7 @@ define amdgpu_kernel void @atomic_max_i32_ret_addr64(ptr %out, ptr %out2, i32 %i
|
||||
; GCN3-NEXT: flat_store_dword v[0:1], v2
|
||||
; GCN3-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i32 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i32 %index
|
||||
%tmp0 = atomicrmw max ptr %ptr, i32 %in seq_cst
|
||||
store i32 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -5498,7 +5498,7 @@ define void @flat_max_i32_noret_offset__amdgpu_no_remote_memory(ptr %out, i32 %i
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw max ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -5531,7 +5531,7 @@ define i32 @flat_atomic_max_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i3
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%result = atomicrmw max ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret i32 %result
|
||||
}
|
||||
@ -5674,7 +5674,7 @@ define void @flat_atomic_umax_i32_noret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw umax ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -5817,7 +5817,7 @@ define i32 @flat_atomic_umax_i32_ret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw umax ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -5968,7 +5968,7 @@ define amdgpu_gfx void @flat_atomic_umax_i32_noret_offset_scalar(ptr inreg %out,
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw umax ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -6127,7 +6127,7 @@ define amdgpu_gfx i32 @flat_atomic_umax_i32_ret_offset_scalar(ptr inreg %out, i3
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw umax ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -6221,8 +6221,8 @@ define amdgpu_kernel void @atomic_umax_i32_addr64_offset(ptr %out, i32 %in, i32
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i32 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i32 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%tmp0 = atomicrmw umax ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -6331,8 +6331,8 @@ define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(ptr %out, ptr %out2
|
||||
; GCN3-NEXT: flat_store_dword v[0:1], v2
|
||||
; GCN3-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i32 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i32 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%tmp0 = atomicrmw umax ptr %gep, i32 %in seq_cst
|
||||
store i32 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -6438,7 +6438,7 @@ define amdgpu_kernel void @atomic_umax_i32_ret_addr64(ptr %out, ptr %out2, i32 %
|
||||
; GCN3-NEXT: flat_store_dword v[0:1], v2
|
||||
; GCN3-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i32 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i32 %index
|
||||
%tmp0 = atomicrmw umax ptr %ptr, i32 %in seq_cst
|
||||
store i32 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -6472,7 +6472,7 @@ define void @flat_umax_i32_noret_offset__amdgpu_no_remote_memory(ptr %out, i32 %
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw umax ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -6505,7 +6505,7 @@ define i32 @flat_atomic_umax_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%result = atomicrmw umax ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret i32 %result
|
||||
}
|
||||
@ -6648,7 +6648,7 @@ define void @flat_atomic_umin_i32_noret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw umin ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -6791,7 +6791,7 @@ define i32 @flat_atomic_umin_i32_ret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw umin ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -6942,7 +6942,7 @@ define amdgpu_gfx void @flat_atomic_umin_i32_noret_offset_scalar(ptr inreg %out,
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw umin ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -7101,7 +7101,7 @@ define amdgpu_gfx i32 @flat_atomic_umin_i32_ret_offset_scalar(ptr inreg %out, i3
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw umin ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -7134,7 +7134,7 @@ define void @flat_umin_i32_noret_offset__amdgpu_no_remote_memory(ptr %out, i32 %
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw umin ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -7167,7 +7167,7 @@ define i32 @flat_atomic_umin_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%result = atomicrmw umin ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret i32 %result
|
||||
}
|
||||
@ -7310,7 +7310,7 @@ define void @flat_atomic_min_i32_noret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw min ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -7453,7 +7453,7 @@ define i32 @flat_atomic_min_i32_ret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw min ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -7604,7 +7604,7 @@ define amdgpu_gfx void @flat_atomic_min_i32_noret_offset_scalar(ptr inreg %out,
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw min ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -7763,7 +7763,7 @@ define amdgpu_gfx i32 @flat_atomic_min_i32_ret_offset_scalar(ptr inreg %out, i32
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw min ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -7857,8 +7857,8 @@ define amdgpu_kernel void @atomic_min_i32_addr64_offset(ptr %out, i32 %in, i32 %
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i32 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i32 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%tmp0 = atomicrmw min ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -7967,8 +7967,8 @@ define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GCN3-NEXT: flat_store_dword v[0:1], v2
|
||||
; GCN3-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i32 %index
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i32 %index
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%tmp0 = atomicrmw min ptr %gep, i32 %in seq_cst
|
||||
store i32 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -8151,7 +8151,7 @@ define amdgpu_kernel void @atomic_min_i32_ret_addr64(ptr %out, ptr %out2, i32 %i
|
||||
; GCN3-NEXT: flat_store_dword v[0:1], v2
|
||||
; GCN3-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i32, ptr %out, i32 %index
|
||||
%ptr = getelementptr inbounds i32, ptr %out, i32 %index
|
||||
%tmp0 = atomicrmw min ptr %ptr, i32 %in seq_cst
|
||||
store i32 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -8185,7 +8185,7 @@ define void @flat_min_i32_noret_offset__amdgpu_no_remote_memory(ptr %out, i32 %i
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw min ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -8218,7 +8218,7 @@ define i32 @flat_atomic_min_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i3
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%result = atomicrmw min ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret i32 %result
|
||||
}
|
||||
@ -8373,7 +8373,7 @@ define void @flat_atomic_uinc_wrap_i32_noret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw uinc_wrap ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -8528,7 +8528,7 @@ define i32 @flat_atomic_uinc_wrap_i32_ret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GCN3-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw uinc_wrap ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -8691,7 +8691,7 @@ define amdgpu_gfx void @flat_atomic_uinc_wrap_i32_noret_offset_scalar(ptr inreg
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw uinc_wrap ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -8862,7 +8862,7 @@ define amdgpu_gfx i32 @flat_atomic_uinc_wrap_i32_ret_offset_scalar(ptr inreg %ou
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw uinc_wrap ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -8895,7 +8895,7 @@ define void @flat_uinc_wrap_i32_noret_offset__amdgpu_no_remote_memory(ptr %out,
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw uinc_wrap ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -8928,7 +8928,7 @@ define i32 @flat_atomic_uinc_wrap_i32_ret_offset__amdgpu_no_remote_memory(ptr %o
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%result = atomicrmw uinc_wrap ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret i32 %result
|
||||
}
|
||||
@ -9089,7 +9089,7 @@ define void @flat_atomic_udec_wrap_i32_noret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw udec_wrap ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -9250,7 +9250,7 @@ define i32 @flat_atomic_udec_wrap_i32_ret_offset(ptr %out, i32 %in) {
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[6:7]
|
||||
; GCN3-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw udec_wrap ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -9433,7 +9433,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i32_noret_offset_scalar(ptr inreg
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[36:37]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw udec_wrap ptr %gep, i32 %in seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -9616,7 +9616,7 @@ define amdgpu_gfx i32 @flat_atomic_udec_wrap_i32_ret_offset_scalar(ptr inreg %ou
|
||||
; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GCN3-NEXT: s_or_b64 exec, exec, s[36:37]
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%result = atomicrmw udec_wrap ptr %gep, i32 %in seq_cst
|
||||
ret i32 %result
|
||||
}
|
||||
@ -9649,7 +9649,7 @@ define void @flat_udec_wrap_i32_noret_offset__amdgpu_no_remote_memory(ptr %out,
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw udec_wrap ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -9682,7 +9682,7 @@ define i32 @flat_atomic_udec_wrap_i32_ret_offset__amdgpu_no_remote_memory(ptr %o
|
||||
; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GCN3-NEXT: buffer_wbinvl1_vol
|
||||
; GCN3-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i32, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i64 4
|
||||
%result = atomicrmw udec_wrap ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
@ -138,7 +138,7 @@ define amdgpu_kernel void @atomic_add_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -284,7 +284,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_offset(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -438,8 +438,8 @@ define amdgpu_kernel void @atomic_add_i64_addr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -590,8 +590,8 @@ define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -1015,7 +1015,7 @@ define amdgpu_kernel void @atomic_add_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile add ptr %ptr, i64 %in syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -1161,7 +1161,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile add ptr %ptr, i64 %in syncscope("agent") seq_cst
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -1299,7 +1299,7 @@ define amdgpu_kernel void @atomic_and_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -1442,7 +1442,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_offset(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -1593,8 +1593,8 @@ define amdgpu_kernel void @atomic_and_i64_addr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -1742,8 +1742,8 @@ define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -2158,7 +2158,7 @@ define amdgpu_kernel void @atomic_and_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile and ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -2301,7 +2301,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile and ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -2442,7 +2442,7 @@ define amdgpu_kernel void @atomic_sub_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -2588,7 +2588,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_offset(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -2742,8 +2742,8 @@ define amdgpu_kernel void @atomic_sub_i64_addr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -2894,8 +2894,8 @@ define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -3319,7 +3319,7 @@ define amdgpu_kernel void @atomic_sub_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile sub ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -3465,7 +3465,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile sub ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -3606,7 +3606,7 @@ define amdgpu_kernel void @atomic_max_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile max ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -3754,7 +3754,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_offset(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile max ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -3908,8 +3908,8 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile max ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -4062,8 +4062,8 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile max ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -4489,7 +4489,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile max ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -4637,7 +4637,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile max ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -4778,7 +4778,7 @@ define amdgpu_kernel void @atomic_umax_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile umax ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -4926,7 +4926,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_offset(ptr %out, ptr %out2, i64 %
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile umax ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -5080,8 +5080,8 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile umax ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -5234,8 +5234,8 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile umax ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -5661,7 +5661,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile umax ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -5809,7 +5809,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile umax ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -5950,7 +5950,7 @@ define amdgpu_kernel void @atomic_min_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile min ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -6098,7 +6098,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_offset(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile min ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -6252,8 +6252,8 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile min ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -6406,8 +6406,8 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile min ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -6833,7 +6833,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile min ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -6981,7 +6981,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile min ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -7122,7 +7122,7 @@ define amdgpu_kernel void @atomic_umin_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile umin ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -7270,7 +7270,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_offset(ptr %out, ptr %out2, i64 %
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile umin ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -7424,8 +7424,8 @@ define amdgpu_kernel void @atomic_umin_i64_addr64_offset(ptr %out, i64 %in, i64
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile umin ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -7578,8 +7578,8 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(ptr %out, ptr %out2
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile umin ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -8005,7 +8005,7 @@ define amdgpu_kernel void @atomic_umin_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile umin ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -8153,7 +8153,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64(ptr %out, ptr %out2, i64 %
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile umin ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -8291,7 +8291,7 @@ define amdgpu_kernel void @atomic_or_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -8434,7 +8434,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_offset(ptr %out, ptr %out2, i64 %in
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -8585,8 +8585,8 @@ define amdgpu_kernel void @atomic_or_i64_addr64_offset(ptr %out, i64 %in, i64 %i
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -8734,8 +8734,8 @@ define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -9150,7 +9150,7 @@ define amdgpu_kernel void @atomic_or_i64_addr64(ptr %out, i64 %in, i64 %index) {
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile or ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -9293,7 +9293,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_addr64(ptr %out, ptr %out2, i64 %in
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile or ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -9420,7 +9420,7 @@ define amdgpu_kernel void @atomic_xchg_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -9546,7 +9546,7 @@ define amdgpu_kernel void @atomic_xchg_f64_offset(ptr %out, double %in) {
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr double, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds double, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile xchg ptr %gep, double %in syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -9672,7 +9672,7 @@ define amdgpu_kernel void @atomic_xchg_pointer_offset(ptr %out, ptr %in) {
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr ptr, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds ptr, ptr %out, i32 4
|
||||
%val = atomicrmw volatile xchg ptr %gep, ptr %in syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -9812,7 +9812,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_offset(ptr %out, ptr %out2, i64 %
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -9952,8 +9952,8 @@ define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(ptr %out, i64 %in, i64
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -10098,8 +10098,8 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(ptr %out, ptr %out2
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -10489,7 +10489,7 @@ define amdgpu_kernel void @atomic_xchg_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile xchg ptr %ptr, i64 %in syncscope("agent") seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -10629,7 +10629,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(ptr %out, ptr %out2, i64 %
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile xchg ptr %ptr, i64 %in syncscope("agent") seq_cst
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -10767,7 +10767,7 @@ define amdgpu_kernel void @atomic_xor_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -10910,7 +10910,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_offset(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -11061,8 +11061,8 @@ define amdgpu_kernel void @atomic_xor_i64_addr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -11210,8 +11210,8 @@ define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -11626,7 +11626,7 @@ define amdgpu_kernel void @atomic_xor_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile xor ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -11769,7 +11769,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile xor ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -11820,7 +11820,7 @@ define amdgpu_kernel void @atomic_load_i64_offset(ptr %in, ptr %out) {
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %in, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %in, i64 4
|
||||
%val = load atomic i64, ptr %gep seq_cst, align 8
|
||||
store i64 %val, ptr %out
|
||||
ret void
|
||||
@ -11930,8 +11930,8 @@ define amdgpu_kernel void @atomic_load_i64_addr64_offset(ptr %in, ptr %out, i64
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %in, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %in, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%val = load atomic i64, ptr %gep seq_cst, align 8
|
||||
store i64 %val, ptr %out
|
||||
ret void
|
||||
@ -11991,7 +11991,7 @@ define amdgpu_kernel void @atomic_load_i64_addr64(ptr %in, ptr %out, i64 %index)
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %in, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %in, i64 %index
|
||||
%val = load atomic i64, ptr %ptr seq_cst, align 8
|
||||
store i64 %val, ptr %out
|
||||
ret void
|
||||
@ -12035,7 +12035,7 @@ define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, ptr %out) {
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] offset:32 scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
store atomic i64 %in, ptr %gep seq_cst, align 8
|
||||
ret void
|
||||
}
|
||||
@ -12129,8 +12129,8 @@ define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, ptr %out, i64
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] offset:32 scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
store atomic i64 %in, ptr %gep seq_cst, align 8
|
||||
ret void
|
||||
}
|
||||
@ -12182,7 +12182,7 @@ define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, ptr %out, i64 %index
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
store atomic i64 %in, ptr %ptr seq_cst, align 8
|
||||
ret void
|
||||
}
|
||||
@ -12333,7 +12333,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr %out, i64 %in, i64 %old
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -12484,7 +12484,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr %out, i64 %in, i64 %ol
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 9000
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 9000
|
||||
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -12633,7 +12633,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(ptr %out, ptr %out2, i6
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
|
||||
%extract0 = extractvalue { i64, i1 } %val, 0
|
||||
store i64 %extract0, ptr %out2
|
||||
@ -12791,8 +12791,8 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(ptr %out, i64 %in, i
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -12954,8 +12954,8 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(ptr %out, ptr %o
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
|
||||
%extract0 = extractvalue { i64, i1 } %val, 0
|
||||
store i64 %extract0, ptr %out2
|
||||
@ -13398,7 +13398,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr %out, i64 %in, i64 %ind
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -13555,7 +13555,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(ptr %out, ptr %out2, i6
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst
|
||||
%extract0 = extractvalue { i64, i1 } %val, 0
|
||||
store i64 %extract0, ptr %out2
|
||||
@ -13607,7 +13607,7 @@ define amdgpu_kernel void @atomic_load_f64_offset(ptr %in, ptr %out) {
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr double, ptr %in, i64 4
|
||||
%gep = getelementptr inbounds double, ptr %in, i64 4
|
||||
%val = load atomic double, ptr %gep seq_cst, align 8
|
||||
store double %val, ptr %out
|
||||
ret void
|
||||
@ -13717,8 +13717,8 @@ define amdgpu_kernel void @atomic_load_f64_addr64_offset(ptr %in, ptr %out, i64
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr double, ptr %in, i64 %index
|
||||
%gep = getelementptr double, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds double, ptr %in, i64 %index
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 4
|
||||
%val = load atomic double, ptr %gep seq_cst, align 8
|
||||
store double %val, ptr %out
|
||||
ret void
|
||||
@ -13778,7 +13778,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64(ptr %in, ptr %out, i64 %index)
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr double, ptr %in, i64 %index
|
||||
%ptr = getelementptr inbounds double, ptr %in, i64 %index
|
||||
%val = load atomic double, ptr %ptr seq_cst, align 8
|
||||
store double %val, ptr %out
|
||||
ret void
|
||||
@ -13822,7 +13822,7 @@ define amdgpu_kernel void @atomic_store_f64_offset(double %in, ptr %out) {
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] offset:32 scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr double, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds double, ptr %out, i64 4
|
||||
store atomic double %in, ptr %gep seq_cst, align 8
|
||||
ret void
|
||||
}
|
||||
@ -13916,8 +13916,8 @@ define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, ptr %out,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] offset:32 scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr double, ptr %out, i64 %index
|
||||
%gep = getelementptr double, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds double, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 4
|
||||
store atomic double %in, ptr %gep seq_cst, align 8
|
||||
ret void
|
||||
}
|
||||
@ -13969,7 +13969,7 @@ define amdgpu_kernel void @atomic_store_f64_addr64(double %in, ptr %out, i64 %in
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr double, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds double, ptr %out, i64 %index
|
||||
store atomic double %in, ptr %ptr seq_cst, align 8
|
||||
ret void
|
||||
}
|
||||
@ -14116,7 +14116,7 @@ define amdgpu_kernel void @atomic_inc_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -14269,7 +14269,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_offset(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -14430,8 +14430,8 @@ define amdgpu_kernel void @atomic_inc_i64_incr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -14589,8 +14589,8 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -15035,7 +15035,7 @@ define amdgpu_kernel void @atomic_inc_i64_incr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile uinc_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -15188,7 +15188,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile uinc_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -15345,7 +15345,7 @@ define amdgpu_kernel void @atomic_dec_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s4
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -15509,7 +15509,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_offset(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -15679,8 +15679,8 @@ define amdgpu_kernel void @atomic_dec_i64_decr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s4
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -15849,8 +15849,8 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -16324,7 +16324,7 @@ define amdgpu_kernel void @atomic_dec_i64_decr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: scratch_store_b64 off, v[0:1], s4
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile udec_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
ret void
|
||||
}
|
||||
@ -16488,7 +16488,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile udec_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
|
||||
@ -45,7 +45,7 @@ define amdgpu_kernel void @atomic_add_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -104,7 +104,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_offset(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -165,8 +165,8 @@ define amdgpu_kernel void @atomic_add_i64_addr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -230,8 +230,8 @@ define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -385,7 +385,7 @@ define amdgpu_kernel void @atomic_add_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile add ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -445,7 +445,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile add ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -517,7 +517,7 @@ define amdgpu_kernel void @atomic_and_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -602,7 +602,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_offset(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -687,8 +687,8 @@ define amdgpu_kernel void @atomic_and_i64_addr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -778,8 +778,8 @@ define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -1015,7 +1015,7 @@ define amdgpu_kernel void @atomic_and_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile and ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -1101,7 +1101,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile and ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -1175,7 +1175,7 @@ define amdgpu_kernel void @atomic_sub_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -1262,7 +1262,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_offset(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -1349,8 +1349,8 @@ define amdgpu_kernel void @atomic_sub_i64_addr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -1442,8 +1442,8 @@ define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -1685,7 +1685,7 @@ define amdgpu_kernel void @atomic_sub_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile sub ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -1773,7 +1773,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile sub ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -1849,7 +1849,7 @@ define amdgpu_kernel void @atomic_max_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_SE
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile max ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -1938,7 +1938,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_offset(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile max ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -2027,8 +2027,8 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_SE
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile max ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -2122,8 +2122,8 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile max ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -2371,7 +2371,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_SE
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile max ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -2461,7 +2461,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile max ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -2537,7 +2537,7 @@ define amdgpu_kernel void @atomic_umax_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_SE
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile umax ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -2626,7 +2626,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_offset(ptr %out, ptr %out2, i64 %
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile umax ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -2715,8 +2715,8 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_SE
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile umax ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -2810,8 +2810,8 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile umax ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -3059,7 +3059,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_SE
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile umax ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -3149,7 +3149,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile umax ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -3225,7 +3225,7 @@ define amdgpu_kernel void @atomic_min_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_SE
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile min ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -3314,7 +3314,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_offset(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile min ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -3403,8 +3403,8 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_SE
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile min ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -3498,8 +3498,8 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile min ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -3747,7 +3747,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_SE
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile min ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -3837,7 +3837,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile min ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -3913,7 +3913,7 @@ define amdgpu_kernel void @atomic_umin_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_SE
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile umin ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -4002,7 +4002,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_offset(ptr %out, ptr %out2, i64 %
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile umin ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -4091,8 +4091,8 @@ define amdgpu_kernel void @atomic_umin_i64_addr64_offset(ptr %out, i64 %in, i64
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_SE
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile umin ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -4186,8 +4186,8 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(ptr %out, ptr %out2
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile umin ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -4435,7 +4435,7 @@ define amdgpu_kernel void @atomic_umin_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_SE
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile umin ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -4525,7 +4525,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64(ptr %out, ptr %out2, i64 %
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile umin ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -4597,7 +4597,7 @@ define amdgpu_kernel void @atomic_or_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -4682,7 +4682,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_offset(ptr %out, ptr %out2, i64 %in
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -4767,8 +4767,8 @@ define amdgpu_kernel void @atomic_or_i64_addr64_offset(ptr %out, i64 %in, i64 %i
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -4858,8 +4858,8 @@ define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -5095,7 +5095,7 @@ define amdgpu_kernel void @atomic_or_i64_addr64(ptr %out, i64 %in, i64 %index) {
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile or ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -5181,7 +5181,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_addr64(ptr %out, ptr %out2, i64 %in
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile or ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -5229,7 +5229,7 @@ define amdgpu_kernel void @atomic_xchg_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -5276,7 +5276,7 @@ define amdgpu_kernel void @atomic_xchg_f64_offset(ptr %out, double %in) {
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr double, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds double, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile xchg ptr %gep, double %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -5323,7 +5323,7 @@ define amdgpu_kernel void @atomic_xchg_pointer_offset(ptr %out, ptr %in) {
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr ptr, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds ptr, ptr %out, i32 4
|
||||
%val = atomicrmw volatile xchg ptr %gep, ptr %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -5382,7 +5382,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_offset(ptr %out, ptr %out2, i64 %
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -5443,8 +5443,8 @@ define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(ptr %out, i64 %in, i64
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -5508,8 +5508,8 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(ptr %out, ptr %out2
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -5663,7 +5663,7 @@ define amdgpu_kernel void @atomic_xchg_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile xchg ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -5723,7 +5723,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(ptr %out, ptr %out2, i64 %
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile xchg ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -5795,7 +5795,7 @@ define amdgpu_kernel void @atomic_xor_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -5880,7 +5880,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_offset(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -5965,8 +5965,8 @@ define amdgpu_kernel void @atomic_xor_i64_addr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -6056,8 +6056,8 @@ define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -6293,7 +6293,7 @@ define amdgpu_kernel void @atomic_xor_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile xor ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -6379,7 +6379,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile xor ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -6430,7 +6430,7 @@ define amdgpu_kernel void @atomic_load_i64_offset(ptr %in, ptr %out) {
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %in, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %in, i64 4
|
||||
%val = load atomic i64, ptr %gep seq_cst, align 8
|
||||
store i64 %val, ptr %out
|
||||
ret void
|
||||
@ -6540,8 +6540,8 @@ define amdgpu_kernel void @atomic_load_i64_addr64_offset(ptr %in, ptr %out, i64
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %in, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %in, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%val = load atomic i64, ptr %gep seq_cst, align 8
|
||||
store i64 %val, ptr %out
|
||||
ret void
|
||||
@ -6601,7 +6601,7 @@ define amdgpu_kernel void @atomic_load_i64_addr64(ptr %in, ptr %out, i64 %index)
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %in, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %in, i64 %index
|
||||
%val = load atomic i64, ptr %ptr seq_cst, align 8
|
||||
store i64 %val, ptr %out
|
||||
ret void
|
||||
@ -6645,7 +6645,7 @@ define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, ptr %out) {
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] offset:32 scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
store atomic i64 %in, ptr %gep seq_cst, align 8
|
||||
ret void
|
||||
}
|
||||
@ -6739,8 +6739,8 @@ define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, ptr %out, i64
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] offset:32 scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
store atomic i64 %in, ptr %gep seq_cst, align 8
|
||||
ret void
|
||||
}
|
||||
@ -6792,7 +6792,7 @@ define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, ptr %out, i64 %index
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
store atomic i64 %in, ptr %ptr seq_cst, align 8
|
||||
ret void
|
||||
}
|
||||
@ -6848,7 +6848,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr %out, i64 %in, i64 %old
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -6904,7 +6904,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr %out, i64 %in, i64 %ol
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 9000
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 9000
|
||||
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -6964,7 +6964,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(ptr %out, ptr %out2, i6
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
|
||||
%extract0 = extractvalue { i64, i1 } %val, 0
|
||||
store i64 %extract0, ptr %out2
|
||||
@ -7026,8 +7026,8 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(ptr %out, i64 %in, i
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -7099,8 +7099,8 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(ptr %out, ptr %o
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
|
||||
%extract0 = extractvalue { i64, i1 } %val, 0
|
||||
store i64 %extract0, ptr %out2
|
||||
@ -7266,7 +7266,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr %out, i64 %in, i64 %ind
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -7334,7 +7334,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(ptr %out, ptr %out2, i6
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0
|
||||
%extract0 = extractvalue { i64, i1 } %val, 0
|
||||
store i64 %extract0, ptr %out2
|
||||
@ -7386,7 +7386,7 @@ define amdgpu_kernel void @atomic_load_f64_offset(ptr %in, ptr %out) {
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr double, ptr %in, i64 4
|
||||
%gep = getelementptr inbounds double, ptr %in, i64 4
|
||||
%val = load atomic double, ptr %gep seq_cst, align 8, !noalias.addrspace !0
|
||||
store double %val, ptr %out
|
||||
ret void
|
||||
@ -7496,8 +7496,8 @@ define amdgpu_kernel void @atomic_load_f64_addr64_offset(ptr %in, ptr %out, i64
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr double, ptr %in, i64 %index
|
||||
%gep = getelementptr double, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds double, ptr %in, i64 %index
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 4
|
||||
%val = load atomic double, ptr %gep seq_cst, align 8, !noalias.addrspace !0
|
||||
store double %val, ptr %out
|
||||
ret void
|
||||
@ -7557,7 +7557,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64(ptr %in, ptr %out, i64 %index)
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr double, ptr %in, i64 %index
|
||||
%ptr = getelementptr inbounds double, ptr %in, i64 %index
|
||||
%val = load atomic double, ptr %ptr seq_cst, align 8, !noalias.addrspace !0
|
||||
store double %val, ptr %out
|
||||
ret void
|
||||
@ -7601,7 +7601,7 @@ define amdgpu_kernel void @atomic_store_f64_offset(double %in, ptr %out) {
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] offset:32 scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr double, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds double, ptr %out, i64 4
|
||||
store atomic double %in, ptr %gep seq_cst, align 8, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -7695,8 +7695,8 @@ define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, ptr %out,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] offset:32 scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr double, ptr %out, i64 %index
|
||||
%gep = getelementptr double, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds double, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds double, ptr %ptr, i64 4
|
||||
store atomic double %in, ptr %gep seq_cst, align 8, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -7748,7 +7748,7 @@ define amdgpu_kernel void @atomic_store_f64_addr64(double %in, ptr %out, i64 %in
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] scope:SCOPE_SYS
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr double, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds double, ptr %out, i64 %index
|
||||
store atomic double %in, ptr %ptr seq_cst, align 8, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -7825,7 +7825,7 @@ define amdgpu_kernel void @atomic_inc_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -7916,7 +7916,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_offset(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -8007,8 +8007,8 @@ define amdgpu_kernel void @atomic_inc_i64_incr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -8104,8 +8104,8 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -8355,7 +8355,7 @@ define amdgpu_kernel void @atomic_inc_i64_incr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile uinc_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -8447,7 +8447,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile uinc_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -8533,7 +8533,7 @@ define amdgpu_kernel void @atomic_dec_i64_offset(ptr %out, i64 %in) {
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -8632,7 +8632,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_offset(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -8731,8 +8731,8 @@ define amdgpu_kernel void @atomic_dec_i64_decr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -8836,8 +8836,8 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64_offset(ptr %out, ptr %out2,
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -9111,7 +9111,7 @@ define amdgpu_kernel void @atomic_dec_i64_decr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX12-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile udec_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
ret void
|
||||
}
|
||||
@ -9211,7 +9211,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw volatile udec_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
|
||||
@ -63,7 +63,7 @@ define void @flat_atomic_xchg_i64_noret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw xchg ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -124,7 +124,7 @@ define i64 @flat_atomic_xchg_i64_ret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw xchg ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -209,7 +209,7 @@ define amdgpu_gfx void @flat_atomic_xchg_i64_noret_offset_scalar(ptr inreg %out,
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw xchg ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -294,7 +294,7 @@ define amdgpu_gfx i64 @flat_atomic_xchg_i64_ret_offset_scalar(ptr inreg %out, i6
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw xchg ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -327,7 +327,7 @@ define void @flat_atomic_xchg_i64_noret_offset__amdgpu_no_remote_memory(ptr %out
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw xchg ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -360,7 +360,7 @@ define i64 @flat_atomic_xchg_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw xchg ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -425,7 +425,7 @@ define void @flat_atomic_xchg_f64_noret_offset(ptr %out, double %in) {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds double, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw xchg ptr %gep, double %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -486,7 +486,7 @@ define double @flat_atomic_xchg_f64_ret_offset(ptr %out, double %in) {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds double, ptr %out, i32 4
|
||||
%result = atomicrmw xchg ptr %gep, double %in seq_cst, !noalias.addrspace !1
|
||||
ret double %result
|
||||
}
|
||||
@ -571,7 +571,7 @@ define amdgpu_gfx void @flat_atomic_xchg_f64_noret_offset_scalar(ptr inreg %out,
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds double, ptr %out, i32 4
|
||||
%tmp0 = atomicrmw xchg ptr %gep, double %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -656,7 +656,7 @@ define amdgpu_gfx double @flat_atomic_xchg_f64_ret_offset_scalar(ptr inreg %out,
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds double, ptr %out, i32 4
|
||||
%result = atomicrmw xchg ptr %gep, double %in seq_cst, !noalias.addrspace !1
|
||||
ret double %result
|
||||
}
|
||||
@ -689,7 +689,7 @@ define void @flat_atomic_xchg_f64_noret_offset__amdgpu_no_remote_memory(ptr %out
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds double, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw xchg ptr %gep, double %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -722,7 +722,7 @@ define double @flat_atomic_xchg_f64_ret_offset__amdgpu_no_remote_memory(ptr %out
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr double, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds double, ptr %out, i64 4
|
||||
%result = atomicrmw xchg ptr %gep, double %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret double %result
|
||||
}
|
||||
@ -787,7 +787,7 @@ define void @flat_atomic_add_i64_noret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw add ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -848,7 +848,7 @@ define i64 @flat_atomic_add_i64_ret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw add ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -933,7 +933,7 @@ define amdgpu_gfx void @flat_atomic_add_i64_noret_offset_scalar(ptr inreg %out,
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw add ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -1018,7 +1018,7 @@ define amdgpu_gfx i64 @flat_atomic_add_i64_ret_offset_scalar(ptr inreg %out, i64
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw add ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -1051,7 +1051,7 @@ define void @flat_atomic_add_i64_noret_offset__amdgpu_no_remote_memory(ptr %out,
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw add ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -1084,7 +1084,7 @@ define i64 @flat_atomic_add_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw add ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -1251,7 +1251,7 @@ define void @flat_atomic_sub_i64_noret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw sub ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -1422,7 +1422,7 @@ define i64 @flat_atomic_sub_i64_ret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw sub ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -1619,7 +1619,7 @@ define amdgpu_gfx void @flat_atomic_sub_i64_noret_offset_scalar(ptr inreg %out,
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw sub ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -1816,7 +1816,7 @@ define amdgpu_gfx i64 @flat_atomic_sub_i64_ret_offset_scalar(ptr inreg %out, i64
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw sub ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -1849,7 +1849,7 @@ define void @flat_atomic_sub_i64_noret_offset__amdgpu_no_remote_memory(ptr %out,
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw sub ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -1882,7 +1882,7 @@ define i64 @flat_atomic_sub_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw sub ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -2049,7 +2049,7 @@ define void @flat_atomic_and_i64_noret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw and ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -2220,7 +2220,7 @@ define i64 @flat_atomic_and_i64_ret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw and ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -2411,7 +2411,7 @@ define amdgpu_gfx void @flat_atomic_and_i64_noret_offset_scalar(ptr inreg %out,
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw and ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -2602,7 +2602,7 @@ define amdgpu_gfx i64 @flat_atomic_and_i64_ret_offset_scalar(ptr inreg %out, i64
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw and ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -2635,7 +2635,7 @@ define void @flat_atomic_and_i64_noret_offset__amdgpu_no_remote_memory(ptr %out,
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw and ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -2668,7 +2668,7 @@ define i64 @flat_atomic_and_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw and ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -2847,7 +2847,7 @@ define void @flat_atomic_nand_i64_noret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw nand ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -3030,7 +3030,7 @@ define i64 @flat_atomic_nand_i64_ret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw nand ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -3233,7 +3233,7 @@ define amdgpu_gfx void @flat_atomic_nand_i64_noret_offset_scalar(ptr inreg %out,
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw nand ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -3436,7 +3436,7 @@ define amdgpu_gfx i64 @flat_atomic_nand_i64_ret_offset_scalar(ptr inreg %out, i6
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw nand ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -3526,7 +3526,7 @@ define void @flat_atomic_nand_i64_noret_offset__amdgpu_no_remote_memory(ptr %out
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw nand ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -3618,7 +3618,7 @@ define i64 @flat_atomic_nand_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw nand ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -3785,7 +3785,7 @@ define void @flat_atomic_or_i64_noret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw or ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -3956,7 +3956,7 @@ define i64 @flat_atomic_or_i64_ret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw or ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -4147,7 +4147,7 @@ define amdgpu_gfx void @flat_atomic_or_i64_noret_offset_scalar(ptr inreg %out, i
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw or ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -4338,7 +4338,7 @@ define amdgpu_gfx i64 @flat_atomic_or_i64_ret_offset_scalar(ptr inreg %out, i64
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw or ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -4371,7 +4371,7 @@ define void @flat_atomic_or_i64_noret_offset__amdgpu_no_remote_memory(ptr %out,
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw or ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -4404,7 +4404,7 @@ define i64 @flat_atomic_or_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw or ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -4571,7 +4571,7 @@ define void @flat_atomic_xor_i64_noret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw xor ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -4742,7 +4742,7 @@ define i64 @flat_atomic_xor_i64_ret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw xor ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -4933,7 +4933,7 @@ define amdgpu_gfx void @flat_atomic_xor_i64_noret_offset_scalar(ptr inreg %out,
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw xor ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -5124,7 +5124,7 @@ define amdgpu_gfx i64 @flat_atomic_xor_i64_ret_offset_scalar(ptr inreg %out, i64
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw xor ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -5157,7 +5157,7 @@ define void @flat_atomic_xor_i64_noret_offset__amdgpu_no_remote_memory(ptr %out,
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw xor ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -5190,7 +5190,7 @@ define i64 @flat_atomic_xor_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw xor ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -5363,7 +5363,7 @@ define void @flat_atomic_max_i64_noret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -5540,7 +5540,7 @@ define i64 @flat_atomic_max_i64_ret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -5749,7 +5749,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_offset_scalar(ptr inreg %out,
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -5958,7 +5958,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_offset_scalar(ptr inreg %out, i64
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -6064,8 +6064,8 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -6180,8 +6180,8 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -6284,7 +6284,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index)
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw max ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -6395,7 +6395,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw max ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -6429,7 +6429,7 @@ define void @flat_atomic_max_i64_noret_offset__amdgpu_no_remote_memory(ptr %out,
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -6462,7 +6462,7 @@ define i64 @flat_atomic_max_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw max ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -6635,7 +6635,7 @@ define void @flat_atomic_umax_i64_noret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -6812,7 +6812,7 @@ define i64 @flat_atomic_umax_i64_ret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -7021,7 +7021,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_offset_scalar(ptr inreg %out,
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -7230,7 +7230,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_offset_scalar(ptr inreg %out, i6
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -7336,8 +7336,8 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -7452,8 +7452,8 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2
|
||||
; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -7565,7 +7565,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %
|
||||
; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw umax ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -7599,7 +7599,7 @@ define void @flat_atomic_umax_i64_noret_offset__amdgpu_no_remote_memory(ptr %out
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -7632,7 +7632,7 @@ define i64 @flat_atomic_umax_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw umax ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -7805,7 +7805,7 @@ define void @flat_atomic_umin_i64_noret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw umin ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -7982,7 +7982,7 @@ define i64 @flat_atomic_umin_i64_ret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw umin ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -8191,7 +8191,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_offset_scalar(ptr inreg %out,
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw umin ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -8400,7 +8400,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_offset_scalar(ptr inreg %out, i6
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw umin ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -8433,7 +8433,7 @@ define void @flat_atomic_umin_i64_noret_offset__amdgpu_no_remote_memory(ptr %out
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw umin ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -8466,7 +8466,7 @@ define i64 @flat_atomic_umin_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw umin ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -8639,7 +8639,7 @@ define void @flat_atomic_min_i64_noret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -8816,7 +8816,7 @@ define i64 @flat_atomic_min_i64_ret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -9025,7 +9025,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_offset_scalar(ptr inreg %out,
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -9234,7 +9234,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_offset_scalar(ptr inreg %out, i64
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -9340,8 +9340,8 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -9456,8 +9456,8 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2,
|
||||
; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%gep = getelementptr i64, ptr %ptr, i64 4
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i64 4
|
||||
%tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -9664,7 +9664,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i
|
||||
; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
entry:
|
||||
%ptr = getelementptr i64, ptr %out, i64 %index
|
||||
%ptr = getelementptr inbounds i64, ptr %out, i64 %index
|
||||
%tmp0 = atomicrmw min ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1, !noalias.addrspace !1
|
||||
store i64 %tmp0, ptr %out2
|
||||
ret void
|
||||
@ -9698,7 +9698,7 @@ define void @flat_atomic_min_i64_noret_offset__amdgpu_no_remote_memory(ptr %out,
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -9731,7 +9731,7 @@ define i64 @flat_atomic_min_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw min ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -9916,7 +9916,7 @@ define void @flat_atomic_uinc_wrap_i64_noret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -10105,7 +10105,7 @@ define i64 @flat_atomic_uinc_wrap_i64_ret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -10314,7 +10314,7 @@ define amdgpu_gfx void @flat_atomic_uinc_wrap_i64_noret_offset_scalar(ptr inreg
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -10523,7 +10523,7 @@ define amdgpu_gfx i64 @flat_atomic_uinc_wrap_i64_ret_offset_scalar(ptr inreg %ou
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[34:35]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -10556,7 +10556,7 @@ define void @flat_atomic_uinc_wrap_i64_noret_offset__amdgpu_no_remote_memory(ptr
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -10589,7 +10589,7 @@ define i64 @flat_atomic_uinc_wrap_i64_ret_offset__amdgpu_no_remote_memory(ptr %o
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -10786,7 +10786,7 @@ define void @flat_atomic_udec_wrap_i64_noret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -10987,7 +10987,7 @@ define i64 @flat_atomic_udec_wrap_i64_ret_offset(ptr %out, i64 %in) {
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v5
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -11220,7 +11220,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i64_noret_offset_scalar(ptr inreg
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[38:39]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -11453,7 +11453,7 @@ define amdgpu_gfx i64 @flat_atomic_udec_wrap_i64_ret_offset_scalar(ptr inreg %ou
|
||||
; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX9-NEXT: s_or_b64 exec, exec, s[38:39]
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
@ -11486,7 +11486,7 @@ define void @flat_atomic_udec_wrap_i64_noret_offset__amdgpu_no_remote_memory(ptr
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%tmp0 = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret void
|
||||
}
|
||||
@ -11519,7 +11519,7 @@ define i64 @flat_atomic_udec_wrap_i64_ret_offset__amdgpu_no_remote_memory(ptr %o
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i64, ptr %out, i64 4
|
||||
%gep = getelementptr inbounds i64, ptr %out, i64 4
|
||||
%result = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
@ -579,7 +579,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX9-NEXT: flat_store_dword v[0:1], v2
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
|
||||
store i32 %result, ptr %out
|
||||
ret void
|
||||
@ -665,7 +665,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) nounwind {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
|
||||
ret void
|
||||
}
|
||||
@ -729,9 +729,9 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr %
|
||||
; GFX9-NEXT: flat_store_dword v[0:1], v3
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
|
||||
%out.gep = getelementptr i32, ptr %out, i32 %id
|
||||
%gep = getelementptr i32, ptr %gep.tid, i32 5
|
||||
%gep.tid = getelementptr inbounds i32, ptr %ptr, i32 %id
|
||||
%out.gep = getelementptr inbounds i32, ptr %out, i32 %id
|
||||
%gep = getelementptr inbounds i32, ptr %gep.tid, i32 5
|
||||
%result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
|
||||
store i32 %result, ptr %out.gep
|
||||
ret void
|
||||
@ -784,8 +784,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #0
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
|
||||
%gep = getelementptr i32, ptr %gep.tid, i32 5
|
||||
%gep.tid = getelementptr inbounds i32, ptr %ptr, i32 %id
|
||||
%gep = getelementptr inbounds i32, ptr %gep.tid, i32 5
|
||||
%result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
|
||||
ret void
|
||||
}
|
||||
@ -895,7 +895,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GFX9-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%gep = getelementptr i64, ptr %ptr, i32 4
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i32 4
|
||||
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
|
||||
store i64 %result, ptr %out
|
||||
ret void
|
||||
@ -987,7 +987,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) nounwind {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%gep = getelementptr i64, ptr %ptr, i32 4
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i32 4
|
||||
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
|
||||
ret void
|
||||
}
|
||||
@ -1054,9 +1054,9 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr %
|
||||
; GFX9-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
|
||||
%out.gep = getelementptr i64, ptr %out, i32 %id
|
||||
%gep = getelementptr i64, ptr %gep.tid, i32 5
|
||||
%gep.tid = getelementptr inbounds i64, ptr %ptr, i32 %id
|
||||
%out.gep = getelementptr inbounds i64, ptr %out, i32 %id
|
||||
%gep = getelementptr inbounds i64, ptr %gep.tid, i32 5
|
||||
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
|
||||
store i64 %result, ptr %out.gep
|
||||
ret void
|
||||
@ -1112,8 +1112,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #0
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
|
||||
%gep = getelementptr i64, ptr %gep.tid, i32 5
|
||||
%gep.tid = getelementptr inbounds i64, ptr %ptr, i32 %id
|
||||
%gep = getelementptr inbounds i64, ptr %gep.tid, i32 5
|
||||
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -1145,7 +1145,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(ptr %out, ptr %ptr) #0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX9-NEXT: flat_store_dword v[0:1], v2
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
|
||||
store i32 %result, ptr %out
|
||||
ret void
|
||||
@ -1231,7 +1231,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) nounwind {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%gep = getelementptr i32, ptr %ptr, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
|
||||
%result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
|
||||
ret void
|
||||
}
|
||||
@ -1295,9 +1295,9 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %
|
||||
; GFX9-NEXT: flat_store_dword v[0:1], v3
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
|
||||
%out.gep = getelementptr i32, ptr %out, i32 %id
|
||||
%gep = getelementptr i32, ptr %gep.tid, i32 5
|
||||
%gep.tid = getelementptr inbounds i32, ptr %ptr, i32 %id
|
||||
%out.gep = getelementptr inbounds i32, ptr %out, i32 %id
|
||||
%gep = getelementptr inbounds i32, ptr %gep.tid, i32 5
|
||||
%result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
|
||||
store i32 %result, ptr %out.gep
|
||||
ret void
|
||||
@ -1350,8 +1350,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #0
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
|
||||
%gep = getelementptr i32, ptr %gep.tid, i32 5
|
||||
%gep.tid = getelementptr inbounds i32, ptr %ptr, i32 %id
|
||||
%gep = getelementptr inbounds i32, ptr %gep.tid, i32 5
|
||||
%result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
|
||||
ret void
|
||||
}
|
||||
@ -1533,7 +1533,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(ptr %out, ptr %ptr) #0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, s1
|
||||
; GFX9-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%gep = getelementptr i64, ptr %ptr, i32 4
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i32 4
|
||||
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
|
||||
store i64 %result, ptr %out
|
||||
ret void
|
||||
@ -1625,7 +1625,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(ptr %ptr) nounwind {
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%gep = getelementptr i64, ptr %ptr, i32 4
|
||||
%gep = getelementptr inbounds i64, ptr %ptr, i32 4
|
||||
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
|
||||
ret void
|
||||
}
|
||||
@ -1692,9 +1692,9 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %
|
||||
; GFX9-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
|
||||
%out.gep = getelementptr i64, ptr %out, i32 %id
|
||||
%gep = getelementptr i64, ptr %gep.tid, i32 5
|
||||
%gep.tid = getelementptr inbounds i64, ptr %ptr, i32 %id
|
||||
%out.gep = getelementptr inbounds i64, ptr %out, i32 %id
|
||||
%gep = getelementptr inbounds i64, ptr %gep.tid, i32 5
|
||||
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
|
||||
store i64 %result, ptr %out.gep
|
||||
ret void
|
||||
@ -1750,8 +1750,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #0
|
||||
; GFX9-NEXT: buffer_wbinvl1_vol
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
|
||||
%gep = getelementptr i64, ptr %gep.tid, i32 5
|
||||
%gep.tid = getelementptr inbounds i64, ptr %ptr, i32 %id
|
||||
%gep = getelementptr inbounds i64, ptr %gep.tid, i32 5
|
||||
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -3486,7 +3486,7 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
|
||||
ret void
|
||||
}
|
||||
@ -3772,7 +3772,7 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4052,7 +4052,7 @@ define amdgpu_kernel void @flat_agent_release_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4365,7 +4365,7 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4678,7 +4678,7 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4964,7 +4964,7 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire
|
||||
ret void
|
||||
}
|
||||
@ -5250,7 +5250,7 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
|
||||
ret void
|
||||
}
|
||||
@ -5563,7 +5563,7 @@ define amdgpu_kernel void @flat_agent_release_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release acquire
|
||||
ret void
|
||||
}
|
||||
@ -5876,7 +5876,7 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
|
||||
ret void
|
||||
}
|
||||
@ -6189,7 +6189,7 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
|
||||
ret void
|
||||
}
|
||||
@ -6502,7 +6502,7 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -6815,7 +6815,7 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -7128,7 +7128,7 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -7441,7 +7441,7 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -7754,7 +7754,7 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -8053,7 +8053,7 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -8370,7 +8370,7 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -8698,7 +8698,7 @@ define amdgpu_kernel void @flat_agent_release_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9042,7 +9042,7 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9386,7 +9386,7 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9703,7 +9703,7 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -10020,7 +10020,7 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -10364,7 +10364,7 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -10708,7 +10708,7 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -11052,7 +11052,7 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -11396,7 +11396,7 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -11740,7 +11740,7 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -12084,7 +12084,7 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -12428,7 +12428,7 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -12772,7 +12772,7 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -16295,7 +16295,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic
|
||||
ret void
|
||||
}
|
||||
@ -16577,7 +16577,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic
|
||||
ret void
|
||||
}
|
||||
@ -16857,7 +16857,7 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic
|
||||
ret void
|
||||
}
|
||||
@ -17166,7 +17166,7 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic
|
||||
ret void
|
||||
}
|
||||
@ -17475,7 +17475,7 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic
|
||||
ret void
|
||||
}
|
||||
@ -17757,7 +17757,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire
|
||||
ret void
|
||||
}
|
||||
@ -18039,7 +18039,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire
|
||||
ret void
|
||||
}
|
||||
@ -18348,7 +18348,7 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire
|
||||
ret void
|
||||
}
|
||||
@ -18657,7 +18657,7 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire
|
||||
ret void
|
||||
}
|
||||
@ -18966,7 +18966,7 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire
|
||||
ret void
|
||||
}
|
||||
@ -19275,7 +19275,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -19584,7 +19584,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -19893,7 +19893,7 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -20202,7 +20202,7 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -20511,7 +20511,7 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -20810,7 +20810,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21138,7 +21138,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21466,7 +21466,7 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21821,7 +21821,7 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -22176,7 +22176,7 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -22504,7 +22504,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -22832,7 +22832,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -23187,7 +23187,7 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -23542,7 +23542,7 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -23897,7 +23897,7 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -24252,7 +24252,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -24607,7 +24607,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -24962,7 +24962,7 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -25317,7 +25317,7 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -25672,7 +25672,7 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
|
||||
@ -3479,7 +3479,7 @@ define amdgpu_kernel void @flat_cluster_monotonic_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") monotonic monotonic
|
||||
ret void
|
||||
}
|
||||
@ -3765,7 +3765,7 @@ define amdgpu_kernel void @flat_cluster_acquire_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acquire monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4044,7 +4044,7 @@ define amdgpu_kernel void @flat_cluster_release_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") release monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4356,7 +4356,7 @@ define amdgpu_kernel void @flat_cluster_acq_rel_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acq_rel monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4668,7 +4668,7 @@ define amdgpu_kernel void @flat_cluster_seq_cst_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") seq_cst monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4954,7 +4954,7 @@ define amdgpu_kernel void @flat_cluster_monotonic_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") monotonic acquire
|
||||
ret void
|
||||
}
|
||||
@ -5240,7 +5240,7 @@ define amdgpu_kernel void @flat_cluster_acquire_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acquire acquire
|
||||
ret void
|
||||
}
|
||||
@ -5552,7 +5552,7 @@ define amdgpu_kernel void @flat_cluster_release_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") release acquire
|
||||
ret void
|
||||
}
|
||||
@ -5864,7 +5864,7 @@ define amdgpu_kernel void @flat_cluster_acq_rel_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acq_rel acquire
|
||||
ret void
|
||||
}
|
||||
@ -6176,7 +6176,7 @@ define amdgpu_kernel void @flat_cluster_seq_cst_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") seq_cst acquire
|
||||
ret void
|
||||
}
|
||||
@ -6488,7 +6488,7 @@ define amdgpu_kernel void @flat_cluster_monotonic_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") monotonic seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -6800,7 +6800,7 @@ define amdgpu_kernel void @flat_cluster_acquire_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acquire seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -7112,7 +7112,7 @@ define amdgpu_kernel void @flat_cluster_release_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") release seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -7424,7 +7424,7 @@ define amdgpu_kernel void @flat_cluster_acq_rel_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acq_rel seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -7736,7 +7736,7 @@ define amdgpu_kernel void @flat_cluster_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -8035,7 +8035,7 @@ define amdgpu_kernel void @flat_cluster_monotonic_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") monotonic monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -8352,7 +8352,7 @@ define amdgpu_kernel void @flat_cluster_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acquire monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -8679,7 +8679,7 @@ define amdgpu_kernel void @flat_cluster_release_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") release monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9022,7 +9022,7 @@ define amdgpu_kernel void @flat_cluster_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acq_rel monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9365,7 +9365,7 @@ define amdgpu_kernel void @flat_cluster_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") seq_cst monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9682,7 +9682,7 @@ define amdgpu_kernel void @flat_cluster_monotonic_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") monotonic acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9999,7 +9999,7 @@ define amdgpu_kernel void @flat_cluster_acquire_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acquire acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -10342,7 +10342,7 @@ define amdgpu_kernel void @flat_cluster_release_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") release acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -10685,7 +10685,7 @@ define amdgpu_kernel void @flat_cluster_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acq_rel acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -11028,7 +11028,7 @@ define amdgpu_kernel void @flat_cluster_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") seq_cst acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -11371,7 +11371,7 @@ define amdgpu_kernel void @flat_cluster_monotonic_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") monotonic seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -11714,7 +11714,7 @@ define amdgpu_kernel void @flat_cluster_acquire_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acquire seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -12057,7 +12057,7 @@ define amdgpu_kernel void @flat_cluster_release_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") release seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -12400,7 +12400,7 @@ define amdgpu_kernel void @flat_cluster_acq_rel_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acq_rel seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -12743,7 +12743,7 @@ define amdgpu_kernel void @flat_cluster_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") seq_cst seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -16259,7 +16259,7 @@ define amdgpu_kernel void @flat_cluster_one_as_monotonic_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") monotonic monotonic
|
||||
ret void
|
||||
}
|
||||
@ -16541,7 +16541,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acquire_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acquire monotonic
|
||||
ret void
|
||||
}
|
||||
@ -16820,7 +16820,7 @@ define amdgpu_kernel void @flat_cluster_one_as_release_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") release monotonic
|
||||
ret void
|
||||
}
|
||||
@ -17128,7 +17128,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acq_rel monotonic
|
||||
ret void
|
||||
}
|
||||
@ -17436,7 +17436,7 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") seq_cst monotonic
|
||||
ret void
|
||||
}
|
||||
@ -17718,7 +17718,7 @@ define amdgpu_kernel void @flat_cluster_one_as_monotonic_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") monotonic acquire
|
||||
ret void
|
||||
}
|
||||
@ -18000,7 +18000,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acquire_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acquire acquire
|
||||
ret void
|
||||
}
|
||||
@ -18308,7 +18308,7 @@ define amdgpu_kernel void @flat_cluster_one_as_release_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") release acquire
|
||||
ret void
|
||||
}
|
||||
@ -18616,7 +18616,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acq_rel acquire
|
||||
ret void
|
||||
}
|
||||
@ -18924,7 +18924,7 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") seq_cst acquire
|
||||
ret void
|
||||
}
|
||||
@ -19232,7 +19232,7 @@ define amdgpu_kernel void @flat_cluster_one_as_monotonic_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") monotonic seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -19540,7 +19540,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acquire_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acquire seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -19848,7 +19848,7 @@ define amdgpu_kernel void @flat_cluster_one_as_release_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") release seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -20156,7 +20156,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acq_rel seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -20464,7 +20464,7 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -20763,7 +20763,7 @@ define amdgpu_kernel void @flat_cluster_one_as_monotonic_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") monotonic monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21091,7 +21091,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acquire monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21418,7 +21418,7 @@ define amdgpu_kernel void @flat_cluster_one_as_release_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") release monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21772,7 +21772,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acq_rel monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -22126,7 +22126,7 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") seq_cst monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -22454,7 +22454,7 @@ define amdgpu_kernel void @flat_cluster_one_as_monotonic_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") monotonic acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -22782,7 +22782,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acquire_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acquire acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -23136,7 +23136,7 @@ define amdgpu_kernel void @flat_cluster_one_as_release_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") release acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -23490,7 +23490,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acq_rel acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -23844,7 +23844,7 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") seq_cst acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -24198,7 +24198,7 @@ define amdgpu_kernel void @flat_cluster_one_as_monotonic_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") monotonic seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -24552,7 +24552,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acquire_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acquire seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -24906,7 +24906,7 @@ define amdgpu_kernel void @flat_cluster_one_as_release_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") release seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -25260,7 +25260,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acq_rel seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -25614,7 +25614,7 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") seq_cst seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
|
||||
@ -3094,7 +3094,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic
|
||||
ret void
|
||||
}
|
||||
@ -3347,7 +3347,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic
|
||||
ret void
|
||||
}
|
||||
@ -3600,7 +3600,7 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic
|
||||
ret void
|
||||
}
|
||||
@ -3853,7 +3853,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4106,7 +4106,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4359,7 +4359,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic acquire
|
||||
ret void
|
||||
}
|
||||
@ -4612,7 +4612,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire
|
||||
ret void
|
||||
}
|
||||
@ -4865,7 +4865,7 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release acquire
|
||||
ret void
|
||||
}
|
||||
@ -5118,7 +5118,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire
|
||||
ret void
|
||||
}
|
||||
@ -5371,7 +5371,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire
|
||||
ret void
|
||||
}
|
||||
@ -5624,7 +5624,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -5877,7 +5877,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -6130,7 +6130,7 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -6383,7 +6383,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -6636,7 +6636,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -6935,7 +6935,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -7236,7 +7236,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -7537,7 +7537,7 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -7838,7 +7838,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -8139,7 +8139,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -8440,7 +8440,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -8741,7 +8741,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9042,7 +9042,7 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9343,7 +9343,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9644,7 +9644,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9945,7 +9945,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -10246,7 +10246,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -10547,7 +10547,7 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -10848,7 +10848,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -11149,7 +11149,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -14237,7 +14237,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic
|
||||
ret void
|
||||
}
|
||||
@ -14490,7 +14490,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic
|
||||
ret void
|
||||
}
|
||||
@ -14743,7 +14743,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic
|
||||
ret void
|
||||
}
|
||||
@ -14996,7 +14996,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic
|
||||
ret void
|
||||
}
|
||||
@ -15249,7 +15249,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic
|
||||
ret void
|
||||
}
|
||||
@ -15502,7 +15502,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic acquire
|
||||
ret void
|
||||
}
|
||||
@ -15755,7 +15755,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire
|
||||
ret void
|
||||
}
|
||||
@ -16008,7 +16008,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire
|
||||
ret void
|
||||
}
|
||||
@ -16261,7 +16261,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire
|
||||
ret void
|
||||
}
|
||||
@ -16514,7 +16514,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire
|
||||
ret void
|
||||
}
|
||||
@ -16767,7 +16767,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -17020,7 +17020,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -17273,7 +17273,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -17526,7 +17526,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -17779,7 +17779,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -18078,7 +18078,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_ret_cmpx
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -18379,7 +18379,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_ret_cmpxch
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -18680,7 +18680,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_ret_cmpxch
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -18981,7 +18981,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxch
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -19282,7 +19282,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxch
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -19583,7 +19583,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_ret_cmpxch
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -19884,7 +19884,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -20185,7 +20185,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -20486,7 +20486,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -20787,7 +20787,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21088,7 +21088,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxch
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21389,7 +21389,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21690,7 +21690,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21991,7 +21991,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -22292,7 +22292,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
|
||||
@ -3530,7 +3530,7 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic monotonic
|
||||
ret void
|
||||
}
|
||||
@ -3818,7 +3818,7 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4102,7 +4102,7 @@ define amdgpu_kernel void @flat_system_release_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4421,7 +4421,7 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4740,7 +4740,7 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst monotonic
|
||||
ret void
|
||||
}
|
||||
@ -5028,7 +5028,7 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic acquire
|
||||
ret void
|
||||
}
|
||||
@ -5316,7 +5316,7 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire acquire
|
||||
ret void
|
||||
}
|
||||
@ -5635,7 +5635,7 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release acquire
|
||||
ret void
|
||||
}
|
||||
@ -5954,7 +5954,7 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel acquire
|
||||
ret void
|
||||
}
|
||||
@ -6273,7 +6273,7 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst acquire
|
||||
ret void
|
||||
}
|
||||
@ -6592,7 +6592,7 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -6911,7 +6911,7 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -7230,7 +7230,7 @@ define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -7549,7 +7549,7 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -7868,7 +7868,7 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -8167,7 +8167,7 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -8486,7 +8486,7 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -8818,7 +8818,7 @@ define amdgpu_kernel void @flat_system_release_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9168,7 +9168,7 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9518,7 +9518,7 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9837,7 +9837,7 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -10156,7 +10156,7 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -10506,7 +10506,7 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -10856,7 +10856,7 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -11206,7 +11206,7 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -11556,7 +11556,7 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -11906,7 +11906,7 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -12256,7 +12256,7 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -12606,7 +12606,7 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -12956,7 +12956,7 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -16523,7 +16523,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic
|
||||
ret void
|
||||
}
|
||||
@ -16807,7 +16807,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic
|
||||
ret void
|
||||
}
|
||||
@ -17091,7 +17091,7 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release monotonic
|
||||
ret void
|
||||
}
|
||||
@ -17406,7 +17406,7 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic
|
||||
ret void
|
||||
}
|
||||
@ -17721,7 +17721,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic
|
||||
ret void
|
||||
}
|
||||
@ -18005,7 +18005,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire
|
||||
ret void
|
||||
}
|
||||
@ -18289,7 +18289,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire
|
||||
ret void
|
||||
}
|
||||
@ -18604,7 +18604,7 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release acquire
|
||||
ret void
|
||||
}
|
||||
@ -18919,7 +18919,7 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire
|
||||
ret void
|
||||
}
|
||||
@ -19234,7 +19234,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire
|
||||
ret void
|
||||
}
|
||||
@ -19549,7 +19549,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -19864,7 +19864,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -20179,7 +20179,7 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -20494,7 +20494,7 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -20809,7 +20809,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -21108,7 +21108,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21438,7 +21438,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21770,7 +21770,7 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -22131,7 +22131,7 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -22492,7 +22492,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -22822,7 +22822,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -23152,7 +23152,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -23513,7 +23513,7 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -23874,7 +23874,7 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -24235,7 +24235,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -24596,7 +24596,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -24957,7 +24957,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -25318,7 +25318,7 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -25679,7 +25679,7 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -26040,7 +26040,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
|
||||
@ -3094,7 +3094,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic
|
||||
ret void
|
||||
}
|
||||
@ -3347,7 +3347,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic
|
||||
ret void
|
||||
}
|
||||
@ -3600,7 +3600,7 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic
|
||||
ret void
|
||||
}
|
||||
@ -3853,7 +3853,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4106,7 +4106,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4359,7 +4359,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire
|
||||
ret void
|
||||
}
|
||||
@ -4612,7 +4612,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire
|
||||
ret void
|
||||
}
|
||||
@ -4865,7 +4865,7 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release acquire
|
||||
ret void
|
||||
}
|
||||
@ -5118,7 +5118,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire
|
||||
ret void
|
||||
}
|
||||
@ -5371,7 +5371,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire
|
||||
ret void
|
||||
}
|
||||
@ -5624,7 +5624,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -5877,7 +5877,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -6130,7 +6130,7 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -6383,7 +6383,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -6636,7 +6636,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -6935,7 +6935,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -7236,7 +7236,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -7537,7 +7537,7 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -7838,7 +7838,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -8139,7 +8139,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -8440,7 +8440,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -8741,7 +8741,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9042,7 +9042,7 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9343,7 +9343,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9644,7 +9644,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9945,7 +9945,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -10246,7 +10246,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -10547,7 +10547,7 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -10848,7 +10848,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -11149,7 +11149,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -14237,7 +14237,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic
|
||||
ret void
|
||||
}
|
||||
@ -14490,7 +14490,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic
|
||||
ret void
|
||||
}
|
||||
@ -14743,7 +14743,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic
|
||||
ret void
|
||||
}
|
||||
@ -14996,7 +14996,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic
|
||||
ret void
|
||||
}
|
||||
@ -15249,7 +15249,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic
|
||||
ret void
|
||||
}
|
||||
@ -15502,7 +15502,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire
|
||||
ret void
|
||||
}
|
||||
@ -15755,7 +15755,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire
|
||||
ret void
|
||||
}
|
||||
@ -16008,7 +16008,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire
|
||||
ret void
|
||||
}
|
||||
@ -16261,7 +16261,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire
|
||||
ret void
|
||||
}
|
||||
@ -16514,7 +16514,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire
|
||||
ret void
|
||||
}
|
||||
@ -16767,7 +16767,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -17020,7 +17020,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -17273,7 +17273,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -17526,7 +17526,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -17779,7 +17779,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -18078,7 +18078,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -18379,7 +18379,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -18680,7 +18680,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -18981,7 +18981,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -19282,7 +19282,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -19583,7 +19583,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -19884,7 +19884,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -20185,7 +20185,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -20486,7 +20486,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -20787,7 +20787,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21088,7 +21088,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21389,7 +21389,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21690,7 +21690,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21991,7 +21991,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
|
||||
@ -3401,7 +3401,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic
|
||||
ret void
|
||||
}
|
||||
@ -3674,7 +3674,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic
|
||||
ret void
|
||||
}
|
||||
@ -3951,7 +3951,7 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4250,7 +4250,7 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4549,7 +4549,7 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic
|
||||
ret void
|
||||
}
|
||||
@ -4822,7 +4822,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") monotonic acquire
|
||||
ret void
|
||||
}
|
||||
@ -5095,7 +5095,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire
|
||||
ret void
|
||||
}
|
||||
@ -5394,7 +5394,7 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") release acquire
|
||||
ret void
|
||||
}
|
||||
@ -5693,7 +5693,7 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire
|
||||
ret void
|
||||
}
|
||||
@ -5992,7 +5992,7 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire
|
||||
ret void
|
||||
}
|
||||
@ -6291,7 +6291,7 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -6590,7 +6590,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -6903,7 +6903,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -7228,7 +7228,7 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -7562,7 +7562,7 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -7896,7 +7896,7 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -8209,7 +8209,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") monotonic acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -8522,7 +8522,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -8856,7 +8856,7 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") release acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9190,7 +9190,7 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9524,7 +9524,7 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -9861,7 +9861,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") monotonic seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -10198,7 +10198,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acquire seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -10532,7 +10532,7 @@ define amdgpu_kernel void @flat_workgroup_release_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") release seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -10866,7 +10866,7 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -11200,7 +11200,7 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -14536,7 +14536,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic
|
||||
ret void
|
||||
}
|
||||
@ -14800,7 +14800,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic
|
||||
ret void
|
||||
}
|
||||
@ -15073,7 +15073,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic
|
||||
ret void
|
||||
}
|
||||
@ -15360,7 +15360,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic
|
||||
ret void
|
||||
}
|
||||
@ -15647,7 +15647,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic
|
||||
ret void
|
||||
}
|
||||
@ -15911,7 +15911,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic acquire
|
||||
ret void
|
||||
}
|
||||
@ -16175,7 +16175,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire
|
||||
ret void
|
||||
}
|
||||
@ -16462,7 +16462,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire
|
||||
ret void
|
||||
}
|
||||
@ -16749,7 +16749,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire
|
||||
ret void
|
||||
}
|
||||
@ -17036,7 +17036,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire
|
||||
ret void
|
||||
}
|
||||
@ -17320,7 +17320,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -17604,7 +17604,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -17891,7 +17891,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -18178,7 +18178,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -18465,7 +18465,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst
|
||||
ret void
|
||||
}
|
||||
@ -18764,7 +18764,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -19073,7 +19073,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -19394,7 +19394,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -19726,7 +19726,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -20058,7 +20058,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -20367,7 +20367,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -20676,7 +20676,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21008,7 +21008,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21340,7 +21340,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -21672,7 +21672,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -22001,7 +22001,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -22330,7 +22330,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -22662,7 +22662,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -22994,7 +22994,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
@ -23326,7 +23326,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg(
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %out, i32 %in, i32 %old) {
|
||||
entry:
|
||||
%gep = getelementptr i32, ptr %out, i32 4
|
||||
%gep = getelementptr inbounds i32, ptr %out, i32 4
|
||||
%val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst
|
||||
%val0 = extractvalue { i32, i1 } %val, 0
|
||||
store i32 %val0, ptr %out, align 4
|
||||
|
||||
@ -1492,7 +1492,7 @@ define i8 @flat_inst_valu_offset_2x_neg_24bit_max(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i8, ptr %p, i64 -16777215
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 -16777215
|
||||
%load = load i8, ptr %gep, align 4
|
||||
ret i8 %load
|
||||
}
|
||||
@ -1597,7 +1597,7 @@ define i8 @flat_inst_valu_offset_64bit_11bit_split0(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i8, ptr %p, i64 8589936639
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 8589936639
|
||||
%load = load i8, ptr %gep, align 4
|
||||
ret i8 %load
|
||||
}
|
||||
@ -1702,7 +1702,7 @@ define i8 @flat_inst_valu_offset_64bit_11bit_split1(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i8, ptr %p, i64 8589936640
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 8589936640
|
||||
%load = load i8, ptr %gep, align 4
|
||||
ret i8 %load
|
||||
}
|
||||
@ -1807,7 +1807,7 @@ define i8 @flat_inst_valu_offset_64bit_12bit_split0(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i8, ptr %p, i64 8589938687
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 8589938687
|
||||
%load = load i8, ptr %gep, align 4
|
||||
ret i8 %load
|
||||
}
|
||||
@ -1903,7 +1903,7 @@ define i8 @flat_inst_valu_offset_64bit_12bit_split1(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i8, ptr %p, i64 8589938688
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 8589938688
|
||||
%load = load i8, ptr %gep, align 4
|
||||
ret i8 %load
|
||||
}
|
||||
@ -2008,7 +2008,7 @@ define i8 @flat_inst_valu_offset_64bit_13bit_split0(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i8, ptr %p, i64 8589942783
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 8589942783
|
||||
%load = load i8, ptr %gep, align 4
|
||||
ret i8 %load
|
||||
}
|
||||
@ -2104,7 +2104,7 @@ define i8 @flat_inst_valu_offset_64bit_13bit_split1(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i8, ptr %p, i64 8589942784
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 8589942784
|
||||
%load = load i8, ptr %gep, align 4
|
||||
ret i8 %load
|
||||
}
|
||||
@ -2211,7 +2211,7 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i8, ptr %p, i64 -9223372036854773761
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854773761
|
||||
%load = load i8, ptr %gep, align 4
|
||||
ret i8 %load
|
||||
}
|
||||
@ -2318,7 +2318,7 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i8, ptr %p, i64 -9223372036854773760
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854773760
|
||||
%load = load i8, ptr %gep, align 4
|
||||
ret i8 %load
|
||||
}
|
||||
@ -2425,7 +2425,7 @@ define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i8, ptr %p, i64 -9223372036854771713
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854771713
|
||||
%load = load i8, ptr %gep, align 4
|
||||
ret i8 %load
|
||||
}
|
||||
@ -2532,7 +2532,7 @@ define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split1(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i8, ptr %p, i64 -9223372036854771712
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854771712
|
||||
%load = load i8, ptr %gep, align 4
|
||||
ret i8 %load
|
||||
}
|
||||
@ -2639,7 +2639,7 @@ define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i8, ptr %p, i64 -9223372036854767617
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854767617
|
||||
%load = load i8, ptr %gep, align 4
|
||||
ret i8 %load
|
||||
}
|
||||
@ -2746,7 +2746,7 @@ define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split1(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1]
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
%gep = getelementptr i8, ptr %p, i64 -9223372036854767616
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854767616
|
||||
%load = load i8, ptr %gep, align 4
|
||||
ret i8 %load
|
||||
}
|
||||
@ -4232,7 +4232,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split0(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%gep = getelementptr i8, ptr %p, i64 8589936639
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 8589936639
|
||||
%load = load volatile i8, ptr %gep, align 1
|
||||
store i8 %load, ptr poison
|
||||
ret void
|
||||
@ -4351,7 +4351,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split1(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%gep = getelementptr i8, ptr %p, i64 8589936640
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 8589936640
|
||||
%load = load volatile i8, ptr %gep, align 1
|
||||
store i8 %load, ptr poison
|
||||
ret void
|
||||
@ -4470,7 +4470,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split0(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%gep = getelementptr i8, ptr %p, i64 8589938687
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 8589938687
|
||||
%load = load volatile i8, ptr %gep, align 1
|
||||
store i8 %load, ptr poison
|
||||
ret void
|
||||
@ -4590,7 +4590,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split1(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%gep = getelementptr i8, ptr %p, i64 8589938688
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 8589938688
|
||||
%load = load volatile i8, ptr %gep, align 1
|
||||
store i8 %load, ptr poison
|
||||
ret void
|
||||
@ -4710,7 +4710,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split0(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%gep = getelementptr i8, ptr %p, i64 8589942783
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 8589942783
|
||||
%load = load volatile i8, ptr %gep, align 1
|
||||
store i8 %load, ptr poison
|
||||
ret void
|
||||
@ -4830,7 +4830,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split1(ptr %p) {
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%gep = getelementptr i8, ptr %p, i64 8589942784
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 8589942784
|
||||
%load = load volatile i8, ptr %gep, align 1
|
||||
store i8 %load, ptr poison
|
||||
ret void
|
||||
@ -4955,7 +4955,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split0(ptr
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%gep = getelementptr i8, ptr %p, i64 -9223372036854773761
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854773761
|
||||
%load = load volatile i8, ptr %gep, align 1
|
||||
store i8 %load, ptr poison
|
||||
ret void
|
||||
@ -5080,7 +5080,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split1(ptr
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%gep = getelementptr i8, ptr %p, i64 -9223372036854773760
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854773760
|
||||
%load = load volatile i8, ptr %gep, align 1
|
||||
store i8 %load, ptr poison
|
||||
ret void
|
||||
@ -5205,7 +5205,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split0(ptr
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%gep = getelementptr i8, ptr %p, i64 -9223372036854771713
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854771713
|
||||
%load = load volatile i8, ptr %gep, align 1
|
||||
store i8 %load, ptr poison
|
||||
ret void
|
||||
@ -5330,7 +5330,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split1(ptr
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%gep = getelementptr i8, ptr %p, i64 -9223372036854771712
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854771712
|
||||
%load = load volatile i8, ptr %gep, align 1
|
||||
store i8 %load, ptr poison
|
||||
ret void
|
||||
@ -5455,7 +5455,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split0(ptr
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%gep = getelementptr i8, ptr %p, i64 -9223372036854767617
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854767617
|
||||
%load = load volatile i8, ptr %gep, align 1
|
||||
store i8 %load, ptr poison
|
||||
ret void
|
||||
@ -5580,7 +5580,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split1(ptr
|
||||
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%gep = getelementptr i8, ptr %p, i64 -9223372036854767616
|
||||
%gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854767616
|
||||
%load = load volatile i8, ptr %gep, align 1
|
||||
store i8 %load, ptr poison
|
||||
ret void
|
||||
|
||||
@ -2658,7 +2658,7 @@ define amdgpu_kernel void @negativeoffsetnullptr(ptr %buffer) {
|
||||
; GFX11-FAKE16-NEXT: s_endpgm
|
||||
entry:
|
||||
%null = select i1 false, ptr %buffer, ptr addrspacecast (ptr addrspace(5) null to ptr)
|
||||
%gep = getelementptr i8, ptr %null, i64 -1
|
||||
%gep = getelementptr inbounds i8, ptr %null, i64 -1
|
||||
%ld = load i8, ptr %gep
|
||||
%cmp = icmp eq i8 %ld, 0
|
||||
br label %branch
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user