
This canonicalizes fneg/fabs (shuffle X, poison, mask) -> shuffle (fneg/fabs X), posion, mask This undoes part of b331a7ebc1e02f9939d1a4a1509e7eb6cdda3d38 and a8f13dbdeb31be37ee15b5febb7cc2137bbece67, but keeps the binary shuffle case i.e. shuffle fneg, fneg, mask. By pulling out the shuffle we bring it inline with the same canonicalisation we perform on binary ops and intrinsics, which the original commit acknowledges it goes in the opposite direction. However nowadays VectorCombine is more powerful and can do more optimisations when the shuffle is pulled out, so I think we should revisit this. In particular we get more shuffles folded and can perform scalarization.
325 lines
20 KiB
LLVM
325 lines
20 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
|
|
|
|
; The result has the fewest vector elements between the result and the two operands so the negation can be moved there
|
|
define <2 x double> @test_negation_move_to_result(<6 x double> %a, <3 x double> %b) {
|
|
; CHECK-LABEL: @test_negation_move_to_result(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
|
|
; CHECK-NEXT: [[RES:%.*]] = fneg <2 x double> [[TMP1]]
|
|
; CHECK-NEXT: ret <2 x double> [[RES]]
|
|
;
|
|
%a.neg = fneg <6 x double> %a
|
|
%res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1)
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
; The result has the fewest vector elements between the result and the two operands so the negation can be moved there
|
|
; Fast flag should be preserved
|
|
define <2 x double> @test_negation_move_to_result_with_fastflags(<6 x double> %a, <3 x double> %b) {
|
|
; CHECK-LABEL: @test_negation_move_to_result_with_fastflags(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
|
|
; CHECK-NEXT: [[RES:%.*]] = fneg fast <2 x double> [[TMP1]]
|
|
; CHECK-NEXT: ret <2 x double> [[RES]]
|
|
;
|
|
%a.neg = fneg <6 x double> %a
|
|
%res = tail call fast <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1)
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
define <2 x double> @test_negation_move_to_result_with_nnan_flag(<6 x double> %a, <3 x double> %b) {
|
|
; CHECK-LABEL: @test_negation_move_to_result_with_nnan_flag(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call nnan <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
|
|
; CHECK-NEXT: [[RES:%.*]] = fneg nnan <2 x double> [[TMP1]]
|
|
; CHECK-NEXT: ret <2 x double> [[RES]]
|
|
;
|
|
%a.neg = fneg <6 x double> %a
|
|
%res = tail call nnan <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1)
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
define <2 x double> @test_negation_move_to_result_with_nsz_flag(<6 x double> %a, <3 x double> %b) {
|
|
; CHECK-LABEL: @test_negation_move_to_result_with_nsz_flag(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call nsz <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
|
|
; CHECK-NEXT: [[RES:%.*]] = fneg nsz <2 x double> [[TMP1]]
|
|
; CHECK-NEXT: ret <2 x double> [[RES]]
|
|
;
|
|
%a.neg = fneg <6 x double> %a
|
|
%res = tail call nsz <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1)
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
define <2 x double> @test_negation_move_to_result_with_fastflag_on_negation(<6 x double> %a, <3 x double> %b) {
|
|
; CHECK-LABEL: @test_negation_move_to_result_with_fastflag_on_negation(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
|
|
; CHECK-NEXT: [[RES:%.*]] = fneg <2 x double> [[TMP1]]
|
|
; CHECK-NEXT: ret <2 x double> [[RES]]
|
|
;
|
|
%a.neg = fneg fast<6 x double> %a
|
|
%res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1)
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
; %b has the fewest vector elements between the result and the two operands so the negation can be moved there
|
|
define <9 x double> @test_move_negation_to_second_operand(<27 x double> %a, <3 x double> %b) {
|
|
; CHECK-LABEL: @test_move_negation_to_second_operand(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[B:%.*]]
|
|
; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[TMP1]], i32 9, i32 3, i32 1)
|
|
; CHECK-NEXT: ret <9 x double> [[RES]]
|
|
;
|
|
%a.neg = fneg <27 x double> %a
|
|
%res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b, i32 9, i32 3, i32 1)
|
|
ret <9 x double> %res
|
|
}
|
|
|
|
; %b has the fewest vector elements between the result and the two operands so the negation can be moved there
|
|
; Fast flag should be preserved
|
|
define <9 x double> @test_move_negation_to_second_operand_with_fast_flags(<27 x double> %a, <3 x double> %b) {
|
|
; CHECK-LABEL: @test_move_negation_to_second_operand_with_fast_flags(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[B:%.*]]
|
|
; CHECK-NEXT: [[RES:%.*]] = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[TMP1]], i32 9, i32 3, i32 1)
|
|
; CHECK-NEXT: ret <9 x double> [[RES]]
|
|
;
|
|
%a.neg = fneg <27 x double> %a
|
|
%res = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b, i32 9, i32 3, i32 1)
|
|
ret <9 x double> %res
|
|
}
|
|
|
|
; The result has the fewest vector elements between the result and the two operands so the negation can be moved there
|
|
define <2 x double> @test_negation_move_to_result_from_second_operand(<3 x double> %a, <6 x double> %b){
|
|
; CHECK-LABEL: @test_negation_move_to_result_from_second_operand(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double> [[A:%.*]], <6 x double> [[B:%.*]], i32 1, i32 3, i32 2)
|
|
; CHECK-NEXT: [[RES:%.*]] = fneg <2 x double> [[TMP1]]
|
|
; CHECK-NEXT: ret <2 x double> [[RES]]
|
|
;
|
|
%b.neg = fneg <6 x double> %b
|
|
%res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double> %a, <6 x double> %b.neg, i32 1, i32 3, i32 2)
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
; %a has the fewest vector elements between the result and the two operands so the negation can be moved there
|
|
define <9 x double> @test_move_negation_to_first_operand(<3 x double> %a, <27 x double> %b) {
|
|
; CHECK-LABEL: @test_move_negation_to_first_operand(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[A:%.*]]
|
|
; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[TMP1]], <27 x double> [[B:%.*]], i32 1, i32 3, i32 9)
|
|
; CHECK-NEXT: ret <9 x double> [[RES]]
|
|
;
|
|
%b.neg = fneg <27 x double> %b
|
|
%res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> %a, <27 x double> %b.neg, i32 1, i32 3, i32 9)
|
|
ret <9 x double> %res
|
|
}
|
|
|
|
; %a has the fewest vector elements between the result and the two operands so the negation is not moved
|
|
define <15 x double> @test_negation_not_moved(<3 x double> %a, <5 x double> %b) {
|
|
; CHECK-LABEL: @test_negation_not_moved(
|
|
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <3 x double> [[A:%.*]]
|
|
; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[A_NEG]], <5 x double> [[B:%.*]], i32 3, i32 1, i32 5)
|
|
; CHECK-NEXT: ret <15 x double> [[RES]]
|
|
;
|
|
%a.neg = fneg <3 x double> %a
|
|
%res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> %a.neg, <5 x double> %b, i32 3, i32 1, i32 5)
|
|
ret <15 x double> %res
|
|
}
|
|
|
|
; %b as the fewest vector elements between the result and the two operands so the negation is not moved
|
|
define <15 x double> @test_negation_not_moved_second_operand(<5 x double> %a, <3 x double> %b) {
|
|
; CHECK-LABEL: @test_negation_not_moved_second_operand(
|
|
; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]]
|
|
; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v5f64.v3f64(<5 x double> [[A:%.*]], <3 x double> [[B_NEG]], i32 5, i32 1, i32 3)
|
|
; CHECK-NEXT: ret <15 x double> [[RES]]
|
|
;
|
|
%b.neg = fneg <3 x double> %b
|
|
%res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v5f64.v3f64(<5 x double> %a, <3 x double> %b.neg, i32 5, i32 1, i32 3)
|
|
ret <15 x double> %res
|
|
}
|
|
|
|
; the negation should be moved from the result to operand %a because it has the smallest vector element count
|
|
define <15 x double> @test_negation_on_result(<3 x double> %a, <5 x double> %b) {
|
|
; CHECK-LABEL: @test_negation_on_result(
|
|
; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[A:%.*]], <5 x double> [[B:%.*]], i32 3, i32 1, i32 5)
|
|
; CHECK-NEXT: [[RES_2:%.*]] = fneg <15 x double> [[RES]]
|
|
; CHECK-NEXT: ret <15 x double> [[RES_2]]
|
|
;
|
|
%res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> %a, <5 x double> %b, i32 3, i32 1, i32 5)
|
|
%res.2 = fneg <15 x double> %res
|
|
ret <15 x double> %res.2
|
|
}
|
|
|
|
; both negations can be deleted
|
|
define <2 x double> @test_with_two_operands_negated1(<6 x double> %a, <3 x double> %b){
|
|
; CHECK-LABEL: @test_with_two_operands_negated1(
|
|
; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
|
|
; CHECK-NEXT: ret <2 x double> [[RES]]
|
|
;
|
|
%a.neg = fneg <6 x double> %a
|
|
%b.neg = fneg <3 x double> %b
|
|
%res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b.neg, i32 2, i32 3, i32 1)
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
; both negations will be removed
|
|
define <9 x double> @test_with_two_operands_negated2(<27 x double> %a, <3 x double> %b){
|
|
; CHECK-LABEL: @test_with_two_operands_negated2(
|
|
; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1)
|
|
; CHECK-NEXT: ret <9 x double> [[RES]]
|
|
;
|
|
%a.neg = fneg <27 x double> %a
|
|
%b.neg = fneg <3 x double> %b
|
|
%res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b.neg, i32 9, i32 3, i32 1)
|
|
ret <9 x double> %res
|
|
}
|
|
|
|
; both negations will be removed
|
|
define <9 x double> @test_with_two_operands_negated_with_fastflags(<27 x double> %a, <3 x double> %b){
|
|
; CHECK-LABEL: @test_with_two_operands_negated_with_fastflags(
|
|
; CHECK-NEXT: [[RES:%.*]] = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1)
|
|
; CHECK-NEXT: ret <9 x double> [[RES]]
|
|
;
|
|
%a.neg = fneg <27 x double> %a
|
|
%b.neg = fneg <3 x double> %b
|
|
%res = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b.neg, i32 9, i32 3, i32 1)
|
|
ret <9 x double> %res
|
|
}
|
|
|
|
; both negations should be removed
|
|
define <9 x double> @test_with_two_operands_negated2_commute(<3 x double> %a, <27 x double> %b){
|
|
; CHECK-LABEL: @test_with_two_operands_negated2_commute(
|
|
; CHECK-NEXT: [[RES:%.*]] = call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[A:%.*]], <27 x double> [[B:%.*]], i32 1, i32 3, i32 9)
|
|
; CHECK-NEXT: ret <9 x double> [[RES]]
|
|
;
|
|
%a.neg = fneg <3 x double> %a
|
|
%b.neg = fneg <27 x double> %b
|
|
%res = call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> %a.neg, <27 x double> %b.neg, i32 1, i32 3, i32 9)
|
|
ret <9 x double> %res
|
|
}
|
|
|
|
define <4 x double> @matrix_multiply_two_operands_negated_with_same_size(<2 x double> %a, <2 x double> %b) {
|
|
; CHECK-LABEL: @matrix_multiply_two_operands_negated_with_same_size(
|
|
; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 2, i32 1, i32 2)
|
|
; CHECK-NEXT: ret <4 x double> [[RES]]
|
|
;
|
|
%a.neg = fneg <2 x double> %a
|
|
%b.neg = fneg <2 x double> %b
|
|
%res = call <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double> %a.neg, <2 x double> %b.neg, i32 2, i32 1, i32 2)
|
|
ret <4 x double> %res
|
|
}
|
|
|
|
define <2 x double> @matrix_multiply_two_operands_with_multiple_uses(<6 x double> %a, <3 x double> %b) {
|
|
; CHECK-LABEL: @matrix_multiply_two_operands_with_multiple_uses(
|
|
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <6 x double> [[A:%.*]]
|
|
; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
|
|
; CHECK-NEXT: [[RES_2:%.*]] = shufflevector <6 x double> [[A_NEG]], <6 x double> poison, <2 x i32> <i32 0, i32 1>
|
|
; CHECK-NEXT: [[RES_3:%.*]] = fadd <2 x double> [[RES_2]], [[RES]]
|
|
; CHECK-NEXT: ret <2 x double> [[RES_3]]
|
|
;
|
|
%a.neg = fneg <6 x double> %a
|
|
%b.neg = fneg <3 x double> %b
|
|
%res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b.neg, i32 2, i32 3, i32 1)
|
|
%res.2 = shufflevector <6 x double> %a.neg, <6 x double> undef,
|
|
<2 x i32> <i32 0, i32 1>
|
|
%res.3 = fadd <2 x double> %res.2, %res
|
|
ret <2 x double> %res.3
|
|
}
|
|
|
|
define <9 x double> @matrix_multiply_two_operands_with_multiple_uses2(<27 x double> %a, <3 x double> %b, ptr %a_loc, ptr %b_loc){
|
|
; CHECK-LABEL: @matrix_multiply_two_operands_with_multiple_uses2(
|
|
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]]
|
|
; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]]
|
|
; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A]], <3 x double> [[B]], i32 9, i32 3, i32 1)
|
|
; CHECK-NEXT: store <27 x double> [[A_NEG]], ptr [[A_LOC:%.*]], align 256
|
|
; CHECK-NEXT: store <3 x double> [[B_NEG]], ptr [[B_LOC:%.*]], align 32
|
|
; CHECK-NEXT: ret <9 x double> [[RES]]
|
|
;
|
|
%a.neg = fneg <27 x double> %a
|
|
%b.neg = fneg <3 x double> %b
|
|
%res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b.neg, i32 9, i32 3, i32 1)
|
|
store <27 x double> %a.neg, ptr %a_loc
|
|
store <3 x double> %b.neg, ptr %b_loc
|
|
ret <9 x double> %res
|
|
}
|
|
|
|
define <12 x double> @fneg_with_multiple_uses(<15 x double> %a, <20 x double> %b){
|
|
; CHECK-LABEL: @fneg_with_multiple_uses(
|
|
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <15 x double> [[A:%.*]]
|
|
; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A_NEG]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4)
|
|
; CHECK-NEXT: [[RES_2:%.*]] = shufflevector <15 x double> [[A_NEG]], <15 x double> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
|
|
; CHECK-NEXT: [[RES_3:%.*]] = fadd <12 x double> [[RES_2]], [[RES]]
|
|
; CHECK-NEXT: ret <12 x double> [[RES_3]]
|
|
;
|
|
%a.neg = fneg <15 x double> %a
|
|
%res = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> %a.neg, <20 x double> %b, i32 3, i32 5, i32 4)
|
|
%res.2 = shufflevector <15 x double> %a.neg, <15 x double> undef,
|
|
<12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
|
|
%res.3 = fadd <12 x double> %res.2, %res
|
|
ret <12 x double> %res.3
|
|
}
|
|
|
|
define <12 x double> @fneg_with_multiple_uses_2(<15 x double> %a, <20 x double> %b, ptr %a_loc){
|
|
; CHECK-LABEL: @fneg_with_multiple_uses_2(
|
|
; CHECK-NEXT: [[A_NEG:%.*]] = fneg <15 x double> [[A:%.*]]
|
|
; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A_NEG]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4)
|
|
; CHECK-NEXT: store <15 x double> [[A_NEG]], ptr [[A_LOC:%.*]], align 128
|
|
; CHECK-NEXT: ret <12 x double> [[RES]]
|
|
;
|
|
%a.neg = fneg <15 x double> %a
|
|
%res = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> %a.neg, <20 x double> %b, i32 3, i32 5, i32 4)
|
|
store <15 x double> %a.neg, ptr %a_loc
|
|
ret <12 x double> %res
|
|
}
|
|
; negation should be moved to the second operand given it has the smallest operand count
|
|
define <72 x double> @chain_of_matrix_multiplies(<27 x double> %a, <3 x double> %b, <8 x double> %c) {
|
|
; CHECK-LABEL: @chain_of_matrix_multiplies(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[B:%.*]]
|
|
; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[TMP1]], i32 9, i32 3, i32 1)
|
|
; CHECK-NEXT: [[RES_2:%.*]] = tail call <72 x double> @llvm.matrix.multiply.v72f64.v9f64.v8f64(<9 x double> [[RES]], <8 x double> [[C:%.*]], i32 9, i32 1, i32 8)
|
|
; CHECK-NEXT: ret <72 x double> [[RES_2]]
|
|
;
|
|
%a.neg = fneg <27 x double> %a
|
|
%res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b, i32 9, i32 3, i32 1)
|
|
%res.2 = tail call <72 x double> @llvm.matrix.multiply.v72f64.v9f64.v8f64(<9 x double> %res, <8 x double> %c, i32 9, i32 1, i32 8)
|
|
ret <72 x double> %res.2
|
|
}
|
|
|
|
; first negation should be moved to %a
|
|
; second negation should be moved to the result of the second multipication
|
|
define <6 x double> @chain_of_matrix_multiplies_with_two_negations(<3 x double> %a, <5 x double> %b, <10 x double> %c) {
|
|
; CHECK-LABEL: @chain_of_matrix_multiplies_with_two_negations(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[A:%.*]]
|
|
; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[TMP1]], <5 x double> [[B:%.*]], i32 3, i32 1, i32 5)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double> [[RES]], <10 x double> [[C:%.*]], i32 3, i32 5, i32 2)
|
|
; CHECK-NEXT: [[RES_2:%.*]] = fneg <6 x double> [[TMP2]]
|
|
; CHECK-NEXT: ret <6 x double> [[RES_2]]
|
|
;
|
|
%b.neg = fneg <5 x double> %b
|
|
%res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> %a, <5 x double> %b.neg, i32 3, i32 1, i32 5)
|
|
%res.neg = fneg <15 x double> %res
|
|
%res.2 = tail call <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double> %res.neg, <10 x double> %c, i32 3, i32 5, i32 2)
|
|
ret <6 x double> %res.2
|
|
}
|
|
|
|
; negation should be propagated to the result of the second matrix multiplication
|
|
define <6 x double> @chain_of_matrix_multiplies_propagation(<15 x double> %a, <20 x double> %b, <8 x double> %c){
|
|
; CHECK-LABEL: @chain_of_matrix_multiplies_propagation(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A:%.*]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call <6 x double> @llvm.matrix.multiply.v6f64.v12f64.v8f64(<12 x double> [[TMP1]], <8 x double> [[C:%.*]], i32 3, i32 4, i32 2)
|
|
; CHECK-NEXT: [[RES_2:%.*]] = fneg <6 x double> [[TMP2]]
|
|
; CHECK-NEXT: ret <6 x double> [[RES_2]]
|
|
;
|
|
%a.neg = fneg <15 x double> %a
|
|
%res = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> %a.neg, <20 x double> %b, i32 3, i32 5, i32 4)
|
|
%res.2 = tail call <6 x double> @llvm.matrix.multiply.v6f64.v12f64.v8f64(<12 x double> %res, <8 x double> %c, i32 3, i32 4, i32 2)
|
|
ret <6 x double> %res.2
|
|
}
|
|
|
|
declare <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double>, <3 x double>, i32 immarg, i32 immarg, i32 immarg) #1
|
|
declare <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double>, <2 x double>, i32 immarg, i32 immarg, i32 immarg) #1
|
|
declare <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double>, <6 x double>, i32 immarg, i32 immarg, i32 immarg) #1
|
|
declare <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double>, <3 x double>, i32 immarg, i32 immarg, i32 immarg) #1
|
|
declare <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double>, <27 x double>, i32 immarg, i32 immarg, i32 immarg)
|
|
declare <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double>, <5 x double>, i32 immarg, i32 immarg, i32 immarg) #1
|
|
declare <15 x double> @llvm.matrix.multiply.v15f64.v5f64.v3f64(<5 x double>, <3 x double>, i32 immarg, i32 immarg, i32 immarg) #1
|
|
declare <72 x double> @llvm.matrix.multiply.v72f64.v9f64.v8f64(<9 x double>, <8 x double>, i32 immarg, i32 immarg, i32 immarg) #1
|
|
declare <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double>, <20 x double>, i32 immarg, i32 immarg, i32 immarg) #1
|
|
declare <21 x double> @llvm.matrix.multiply.v21f64.v15f64.v35f64(<15 x double>, <35 x double>, i32 immarg, i32 immarg, i32 immarg) #1
|
|
declare <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double>, <10 x double>, i32 immarg, i32 immarg, i32 immarg) #1
|
|
declare <6 x double> @llvm.matrix.multiply.v6f64.v12f64.v8f64(<12 x double>, <8 x double>, i32 immarg, i32 immarg, i32 immarg) #1
|