
addr matching was the only gatekeeper for starting selecting G_LOAD and G_STORE using SDAG patterns. * Introduce a complex renderer gi_addr for addr. In this patch only the existing functionality has been implemented. The renderer's name is the same as in SDAG: selectAddr. Apparently the type of GIComplexOperandMatcher doesn't matter as RISCV also uses s32 for both 64 and 32 bit pointers. * X86SelectAddress now is used for both: pattern matching and manual selection. As a result it accumulates all the code that previously was distributed among different selection functions. * Replace getLoadStoreOp with getPtrLoadStoreOp in Load/Store selector as GlobalISel matcher or emitter can't map the pointer type into i32/i64 types used in SDAG patterns for pointers. So the load and store selection of pointers is still manual. getLoadStoreOp is still present because it is used in G_FCONSTANT lowering that requires extra efforts to select it using SDAG patterns. * Since truncating stores are not supported, we custom legalize them by matching types of store and MMO. * Introduce a constant pool flag in X86AddressMode because otherwise we need to introduce a GlobalISel copy for X86ISelAddressMode. * Also please notice in the tests that GlobalISel prefers to fold memory operands immediately comparing to SDAG. The reason is that GlobalISel doesn't have target hooks in GIM_CheckIsSafeToFold. Or maybe another check on profitability is required along with safety check that is currently not present.
66 lines
2.4 KiB
LLVM
66 lines
2.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sse2 %s -o - | FileCheck %s --check-prefixes SSE2,SSE2-X64
|
|
; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sse2 -fast-isel %s -o - | FileCheck %s --check-prefixes SSE2,SSE2-X64
|
|
; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sse2 -global-isel -global-isel-abort=1 %s -o - | FileCheck %s --check-prefixes SSE2,SSE2-GISEL
|
|
|
|
;
|
|
; 128 bit vectors
|
|
;
|
|
|
|
define <7 x i8> @test_vector_v7i8() {
|
|
; SSE2-X64-LABEL: test_vector_v7i8:
|
|
; SSE2-X64: # %bb.0:
|
|
; SSE2-X64-NEXT: movq %rdi, %rax
|
|
; SSE2-X64-NEXT: movl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx
|
|
; SSE2-X64-NEXT: movl %ecx, (%rdi)
|
|
; SSE2-X64-NEXT: movb $63, 6(%rdi)
|
|
; SSE2-X64-NEXT: movw $10775, 4(%rdi) # imm = 0x2A17
|
|
; SSE2-X64-NEXT: retq
|
|
;
|
|
; SSE2-GISEL-LABEL: test_vector_v7i8:
|
|
; SSE2-GISEL: # %bb.0:
|
|
; SSE2-GISEL-NEXT: movq %rdi, %rax
|
|
; SSE2-GISEL-NEXT: movb $4, (%rdi)
|
|
; SSE2-GISEL-NEXT: movb $8, 1(%rdi)
|
|
; SSE2-GISEL-NEXT: movb $15, 2(%rdi)
|
|
; SSE2-GISEL-NEXT: movb $16, 3(%rdi)
|
|
; SSE2-GISEL-NEXT: movb $23, 4(%rdi)
|
|
; SSE2-GISEL-NEXT: movb $42, 5(%rdi)
|
|
; SSE2-GISEL-NEXT: movb $63, 6(%rdi)
|
|
; SSE2-GISEL-NEXT: retq
|
|
ret <7 x i8> <i8 4, i8 8, i8 15, i8 16, i8 23, i8 42, i8 63>
|
|
}
|
|
|
|
define <16 x i8> @test_vector_v16i8() {
|
|
; SSE2-LABEL: test_vector_v16i8:
|
|
; SSE2: # %bb.0:
|
|
; SSE2-NEXT: movaps {{.*#+}} xmm0 = [4,8,15,16,23,42,63,70,92,105,123,133,157,160,174,180]
|
|
; SSE2-NEXT: retq
|
|
ret <16 x i8> <i8 4, i8 8, i8 15, i8 16, i8 23, i8 42, i8 63, i8 70, i8 92, i8 105, i8 123, i8 133, i8 157, i8 160, i8 174, i8 180>
|
|
}
|
|
|
|
define <8 x i16> @test_vector_v8i16() {
|
|
; SSE2-LABEL: test_vector_v8i16:
|
|
; SSE2: # %bb.0:
|
|
; SSE2-NEXT: movaps {{.*#+}} xmm0 = [4,15,23,63,92,123,157,174]
|
|
; SSE2-NEXT: retq
|
|
ret <8 x i16> <i16 4, i16 15, i16 23, i16 63, i16 92, i16 123, i16 157, i16 174>
|
|
}
|
|
|
|
define <4 x float> @test_vector_v4f32() {
|
|
; SSE2-LABEL: test_vector_v4f32:
|
|
; SSE2: # %bb.0:
|
|
; SSE2-NEXT: movaps {{.*#+}} xmm0 = [u,3.6627E+5,9.86864E+5,7.0851E+4]
|
|
; SSE2-NEXT: retq
|
|
ret <4 x float> <float undef, float 366270.0, float 986864.0, float 70851.0>
|
|
}
|
|
|
|
define <2 x i64> @test_vector_v4i64() {
|
|
; SSE2-LABEL: test_vector_v4i64:
|
|
; SSE2: # %bb.0:
|
|
; SSE2-NEXT: movaps {{.*#+}} xmm0 = [9406487659005566976,9903695591611287552]
|
|
; SSE2-NEXT: retq
|
|
ret <2 x i64> <i64 9406487659005566976, i64 9903695591611287552>
|
|
}
|
|
|