
Recent upstream trends have moved away from explicitly using `-verify-machineinstrs`, as it's already covered by the expensive checks. This PR removes almost all `-verify-machineinstrs` from tests in `llvm/test/CodeGen/AMDGPU/*.ll`, leaving only those tests where its removal currently causes failures.
108 lines
4.0 KiB
LLVM
108 lines
4.0 KiB
LLVM
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --enable-var-scope --check-prefix=GCN %s
|
|
|
|
; Check that vectorizer does not create slow misaligned loads
|
|
|
|
; GCN-LABEL: {{^}}ds1align1:
|
|
; GCN-COUNT-2: ds_read_u8
|
|
; GCN-COUNT-2: ds_write_b8
|
|
define amdgpu_kernel void @ds1align1(ptr addrspace(3) %in, ptr addrspace(3) %out) {
|
|
%val1 = load i8, ptr addrspace(3) %in, align 1
|
|
%gep1 = getelementptr i8, ptr addrspace(3) %in, i32 1
|
|
%val2 = load i8, ptr addrspace(3) %gep1, align 1
|
|
store i8 %val1, ptr addrspace(3) %out, align 1
|
|
%gep2 = getelementptr i8, ptr addrspace(3) %out, i32 1
|
|
store i8 %val2, ptr addrspace(3) %gep2, align 1
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}ds2align2:
|
|
; GCN-COUNT-2: ds_read_u16
|
|
; GCN-COUNT-2: ds_write_b16
|
|
define amdgpu_kernel void @ds2align2(ptr addrspace(3) %in, ptr addrspace(3) %out) {
|
|
%val1 = load i16, ptr addrspace(3) %in, align 2
|
|
%gep1 = getelementptr i16, ptr addrspace(3) %in, i32 1
|
|
%val2 = load i16, ptr addrspace(3) %gep1, align 2
|
|
store i16 %val1, ptr addrspace(3) %out, align 2
|
|
%gep2 = getelementptr i16, ptr addrspace(3) %out, i32 1
|
|
store i16 %val2, ptr addrspace(3) %gep2, align 2
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}ds4align4:
|
|
; GCN: ds_read2_b32
|
|
; GCN: ds_write2_b32
|
|
define amdgpu_kernel void @ds4align4(ptr addrspace(3) %in, ptr addrspace(3) %out) {
|
|
%val1 = load i32, ptr addrspace(3) %in, align 4
|
|
%gep1 = getelementptr i32, ptr addrspace(3) %in, i32 1
|
|
%val2 = load i32, ptr addrspace(3) %gep1, align 4
|
|
store i32 %val1, ptr addrspace(3) %out, align 4
|
|
%gep2 = getelementptr i32, ptr addrspace(3) %out, i32 1
|
|
store i32 %val2, ptr addrspace(3) %gep2, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}ds8align8:
|
|
; GCN: ds_read2_b64
|
|
; GCN: ds_write2_b64
|
|
define amdgpu_kernel void @ds8align8(ptr addrspace(3) %in, ptr addrspace(3) %out) {
|
|
%val1 = load i64, ptr addrspace(3) %in, align 8
|
|
%gep1 = getelementptr i64, ptr addrspace(3) %in, i64 1
|
|
%val2 = load i64, ptr addrspace(3) %gep1, align 8
|
|
store i64 %val1, ptr addrspace(3) %out, align 8
|
|
%gep2 = getelementptr i64, ptr addrspace(3) %out, i64 1
|
|
store i64 %val2, ptr addrspace(3) %gep2, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}ds1align2:
|
|
; GCN: ds_read_u16
|
|
; GCN: ds_write_b16
|
|
define amdgpu_kernel void @ds1align2(ptr addrspace(3) %in, ptr addrspace(3) %out) {
|
|
%val1 = load i8, ptr addrspace(3) %in, align 2
|
|
%gep1 = getelementptr i8, ptr addrspace(3) %in, i32 1
|
|
%val2 = load i8, ptr addrspace(3) %gep1, align 2
|
|
store i8 %val1, ptr addrspace(3) %out, align 2
|
|
%gep2 = getelementptr i8, ptr addrspace(3) %out, i32 1
|
|
store i8 %val2, ptr addrspace(3) %gep2, align 2
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}ds2align4:
|
|
; GCN: ds_read_b32
|
|
; GCN: ds_write_b32
|
|
define amdgpu_kernel void @ds2align4(ptr addrspace(3) %in, ptr addrspace(3) %out) {
|
|
%val1 = load i16, ptr addrspace(3) %in, align 4
|
|
%gep1 = getelementptr i16, ptr addrspace(3) %in, i32 1
|
|
%val2 = load i16, ptr addrspace(3) %gep1, align 4
|
|
store i16 %val1, ptr addrspace(3) %out, align 4
|
|
%gep2 = getelementptr i16, ptr addrspace(3) %out, i32 1
|
|
store i16 %val2, ptr addrspace(3) %gep2, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}ds4align8:
|
|
; GCN: ds_read_b64
|
|
; GCN: ds_write_b64
|
|
define amdgpu_kernel void @ds4align8(ptr addrspace(3) %in, ptr addrspace(3) %out) {
|
|
%val1 = load i32, ptr addrspace(3) %in, align 8
|
|
%gep1 = getelementptr i32, ptr addrspace(3) %in, i32 1
|
|
%val2 = load i32, ptr addrspace(3) %gep1, align 8
|
|
store i32 %val1, ptr addrspace(3) %out, align 8
|
|
%gep2 = getelementptr i32, ptr addrspace(3) %out, i32 1
|
|
store i32 %val2, ptr addrspace(3) %gep2, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}ds8align16:
|
|
; GCN: ds_read_b128
|
|
; GCN: ds_write_b128
|
|
define amdgpu_kernel void @ds8align16(ptr addrspace(3) %in, ptr addrspace(3) %out) {
|
|
%val1 = load i64, ptr addrspace(3) %in, align 16
|
|
%gep1 = getelementptr i64, ptr addrspace(3) %in, i64 1
|
|
%val2 = load i64, ptr addrspace(3) %gep1, align 16
|
|
store i64 %val1, ptr addrspace(3) %out, align 16
|
|
%gep2 = getelementptr i64, ptr addrspace(3) %out, i64 1
|
|
store i64 %val2, ptr addrspace(3) %gep2, align 16
|
|
ret void
|
|
}
|