[CIR] Add proper handling for no prototype function calls (#150553)

This adds standard-comforming handling for calls to functions that were
declared in C source in the no prototype form.
This commit is contained in:
Andy Kaylor 2025-07-29 09:16:17 -07:00 committed by GitHub
parent 0a4c6522a6
commit 32779cd698
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 265 additions and 32 deletions

View File

@ -1946,6 +1946,10 @@ def CIR_FuncOp : CIR_Op<"func", [
The function linkage information is specified by `linkage`, as defined by
`GlobalLinkageKind` attribute.
The `no_proto` keyword is used to identify functions that were declared
without a prototype and, consequently, may contain calls with invalid
arguments and undefined behavior.
Example:
```mlir
@ -1964,6 +1968,7 @@ def CIR_FuncOp : CIR_Op<"func", [
let arguments = (ins SymbolNameAttr:$sym_name,
CIR_VisibilityAttr:$global_visibility,
TypeAttrOf<CIR_FuncType>:$function_type,
UnitAttr:$no_proto,
UnitAttr:$dso_local,
DefaultValuedAttr<CIR_GlobalLinkageKind,
"cir::GlobalLinkageKind::ExternalLinkage">:$linkage,
@ -2005,13 +2010,6 @@ def CIR_FuncOp : CIR_Op<"func", [
return getFunctionType().getReturnTypes();
}
// TODO(cir): this should be an operand attribute, but for now we just hard-
// wire this as a function. Will later add a $no_proto argument to this op.
bool getNoProto() {
assert(!cir::MissingFeatures::opFuncNoProto());
return false;
}
//===------------------------------------------------------------------===//
// SymbolOpInterface Methods
//===------------------------------------------------------------------===//

View File

@ -73,14 +73,16 @@ struct MissingFeatures {
// FuncOp handling
static bool opFuncOpenCLKernelMetadata() { return false; }
static bool opFuncAstDeclAttr() { return false; }
static bool opFuncCallingConv() { return false; }
static bool opFuncExtraAttrs() { return false; }
static bool opFuncNoProto() { return false; }
static bool opFuncCPUAndFeaturesAttributes() { return false; }
static bool opFuncSection() { return false; }
static bool opFuncMultipleReturnVals() { return false; }
static bool opFuncAttributesForDefinition() { return false; }
static bool opFuncCallingConv() { return false; }
static bool opFuncCPUAndFeaturesAttributes() { return false; }
static bool opFuncExceptions() { return false; }
static bool opFuncExtraAttrs() { return false; }
static bool opFuncMaybeHandleStaticInExternC() { return false; }
static bool opFuncMultipleReturnVals() { return false; }
static bool opFuncOperandBundles() { return false; }
static bool opFuncParameterAttributes() { return false; }
static bool opFuncSection() { return false; }
static bool setLLVMFunctionFEnvAttributes() { return false; }
static bool setFunctionAttributes() { return false; }
@ -96,7 +98,6 @@ struct MissingFeatures {
static bool opCallReturn() { return false; }
static bool opCallArgEvaluationOrder() { return false; }
static bool opCallCallConv() { return false; }
static bool opCallNoPrototypeFunc() { return false; }
static bool opCallMustTail() { return false; }
static bool opCallVirtual() { return false; }
static bool opCallInAlloca() { return false; }
@ -109,6 +110,7 @@ struct MissingFeatures {
static bool opCallCIRGenFuncInfoExtParamInfo() { return false; }
static bool opCallLandingPad() { return false; }
static bool opCallContinueBlock() { return false; }
static bool opCallChain() { return false; }
// CXXNewExpr
static bool exprNewNullCheck() { return false; }

View File

@ -582,6 +582,14 @@ RValue CIRGenFunction::emitCall(const CIRGenFunctionInfo &funcInfo,
cir::FuncOp directFuncOp;
if (auto fnOp = dyn_cast<cir::FuncOp>(calleePtr)) {
directFuncOp = fnOp;
} else if (auto getGlobalOp = mlir::dyn_cast<cir::GetGlobalOp>(calleePtr)) {
// FIXME(cir): This peephole optimization avoids indirect calls for
// builtins. This should be fixed in the builtin declaration instead by
// not emitting an unecessary get_global in the first place.
// However, this is also used for no-prototype functions.
mlir::Operation *globalOp = cgm.getGlobalValue(getGlobalOp.getName());
assert(globalOp && "undefined global function");
directFuncOp = mlir::cast<cir::FuncOp>(globalOp);
} else {
[[maybe_unused]] mlir::ValueTypeRange<mlir::ResultRange> resultTypes =
calleePtr->getResultTypes();

View File

@ -116,6 +116,11 @@ public:
assert(isOrdinary());
return reinterpret_cast<mlir::Operation *>(kindOrFunctionPtr);
}
void setFunctionPointer(mlir::Operation *functionPtr) {
assert(isOrdinary());
kindOrFunctionPtr = SpecialKind(reinterpret_cast<uintptr_t>(functionPtr));
}
};
/// Type for representing both the decl and type of parameters to a function.

View File

@ -1280,7 +1280,7 @@ RValue CIRGenFunction::getUndefRValue(QualType ty) {
}
RValue CIRGenFunction::emitCall(clang::QualType calleeTy,
const CIRGenCallee &callee,
const CIRGenCallee &origCallee,
const clang::CallExpr *e,
ReturnValueSlot returnValue) {
// Get the actual function type. The callee type will always be a pointer to
@ -1291,6 +1291,8 @@ RValue CIRGenFunction::emitCall(clang::QualType calleeTy,
calleeTy = getContext().getCanonicalType(calleeTy);
auto pointeeTy = cast<PointerType>(calleeTy)->getPointeeType();
CIRGenCallee callee = origCallee;
if (getLangOpts().CPlusPlus)
assert(!cir::MissingFeatures::sanitizers());
@ -1307,7 +1309,44 @@ RValue CIRGenFunction::emitCall(clang::QualType calleeTy,
const CIRGenFunctionInfo &funcInfo =
cgm.getTypes().arrangeFreeFunctionCall(args, fnType);
assert(!cir::MissingFeatures::opCallNoPrototypeFunc());
// C99 6.5.2.2p6:
// If the expression that denotes the called function has a type that does
// not include a prototype, [the default argument promotions are performed].
// If the number of arguments does not equal the number of parameters, the
// behavior is undefined. If the function is defined with a type that
// includes a prototype, and either the prototype ends with an ellipsis (,
// ...) or the types of the arguments after promotion are not compatible
// with the types of the parameters, the behavior is undefined. If the
// function is defined with a type that does not include a prototype, and
// the types of the arguments after promotion are not compatible with those
// of the parameters after promotion, the behavior is undefined [except in
// some trivial cases].
// That is, in the general case, we should assume that a call through an
// unprototyped function type works like a *non-variadic* call. The way we
// make this work is to cast to the exxact type fo the promoted arguments.
if (isa<FunctionNoProtoType>(fnType)) {
assert(!cir::MissingFeatures::opCallChain());
assert(!cir::MissingFeatures::addressSpace());
cir::FuncType calleeTy = getTypes().getFunctionType(funcInfo);
// get non-variadic function type
calleeTy = cir::FuncType::get(calleeTy.getInputs(),
calleeTy.getReturnType(), false);
auto calleePtrTy = cir::PointerType::get(calleeTy);
mlir::Operation *fn = callee.getFunctionPointer();
mlir::Value addr;
if (auto funcOp = mlir::dyn_cast<cir::FuncOp>(fn)) {
addr = builder.create<cir::GetGlobalOp>(
getLoc(e->getSourceRange()),
cir::PointerType::get(funcOp.getFunctionType()), funcOp.getSymName());
} else {
addr = fn->getResult(0);
}
fn = builder.createBitcast(addr, calleePtrTy).getDefiningOp();
callee.setFunctionPointer(fn);
}
assert(!cir::MissingFeatures::opCallFnInfoOpts());
assert(!cir::MissingFeatures::hip());
assert(!cir::MissingFeatures::opCallMustTail());

View File

@ -1103,6 +1103,60 @@ cir::GlobalLinkageKind CIRGenModule::getCIRLinkageForDeclarator(
return cir::GlobalLinkageKind::ExternalLinkage;
}
/// This function is called when we implement a function with no prototype, e.g.
/// "int foo() {}". If there are existing call uses of the old function in the
/// module, this adjusts them to call the new function directly.
///
/// This is not just a cleanup: the always_inline pass requires direct calls to
/// functions to be able to inline them. If there is a bitcast in the way, it
/// won't inline them. Instcombine normally deletes these calls, but it isn't
/// run at -O0.
void CIRGenModule::replaceUsesOfNonProtoTypeWithRealFunction(
mlir::Operation *old, cir::FuncOp newFn) {
// If we're redefining a global as a function, don't transform it.
auto oldFn = mlir::dyn_cast<cir::FuncOp>(old);
if (!oldFn)
return;
// TODO(cir): this RAUW ignores the features below.
assert(!cir::MissingFeatures::opFuncExceptions());
assert(!cir::MissingFeatures::opFuncParameterAttributes());
assert(!cir::MissingFeatures::opFuncOperandBundles());
if (oldFn->getAttrs().size() <= 1)
errorNYI(old->getLoc(),
"replaceUsesOfNonProtoTypeWithRealFunction: Attribute forwarding");
// Mark new function as originated from a no-proto declaration.
newFn.setNoProto(oldFn.getNoProto());
// Iterate through all calls of the no-proto function.
std::optional<mlir::SymbolTable::UseRange> symUses =
oldFn.getSymbolUses(oldFn->getParentOp());
for (const mlir::SymbolTable::SymbolUse &use : symUses.value()) {
mlir::OpBuilder::InsertionGuard guard(builder);
if (auto noProtoCallOp = mlir::dyn_cast<cir::CallOp>(use.getUser())) {
builder.setInsertionPoint(noProtoCallOp);
// Patch call type with the real function type.
cir::CallOp realCallOp = builder.createCallOp(
noProtoCallOp.getLoc(), newFn, noProtoCallOp.getOperands());
// Replace old no proto call with fixed call.
noProtoCallOp.replaceAllUsesWith(realCallOp);
noProtoCallOp.erase();
} else if (auto getGlobalOp =
mlir::dyn_cast<cir::GetGlobalOp>(use.getUser())) {
// Replace type
getGlobalOp.getAddr().setType(
cir::PointerType::get(newFn.getFunctionType()));
} else {
errorNYI(use.getUser()->getLoc(),
"replaceUsesOfNonProtoTypeWithRealFunction: unexpected use");
}
}
}
cir::GlobalLinkageKind
CIRGenModule::getCIRLinkageVarDefinition(const VarDecl *vd, bool isConstant) {
assert(!isConstant && "constant variables NYI");
@ -1701,8 +1755,7 @@ cir::FuncOp CIRGenModule::getOrCreateCIRFunction(
// Lookup the entry, lazily creating it if necessary.
mlir::Operation *entry = getGlobalValue(mangledName);
if (entry) {
if (!isa<cir::FuncOp>(entry))
errorNYI(d->getSourceRange(), "getOrCreateCIRFunction: non-FuncOp");
assert(mlir::isa<cir::FuncOp>(entry));
assert(!cir::MissingFeatures::weakRefReference());
@ -1738,6 +1791,30 @@ cir::FuncOp CIRGenModule::getOrCreateCIRFunction(
invalidLoc ? theModule->getLoc() : getLoc(funcDecl->getSourceRange()),
mangledName, mlir::cast<cir::FuncType>(funcType), funcDecl);
// If we already created a function with the same mangled name (but different
// type) before, take its name and add it to the list of functions to be
// replaced with F at the end of CodeGen.
//
// This happens if there is a prototype for a function (e.g. "int f()") and
// then a definition of a different type (e.g. "int f(int x)").
if (entry) {
// Fetch a generic symbol-defining operation and its uses.
auto symbolOp = mlir::cast<mlir::SymbolOpInterface>(entry);
// This might be an implementation of a function without a prototype, in
// which case, try to do special replacement of calls which match the new
// prototype. The really key thing here is that we also potentially drop
// arguments from the call site so as to make a direct call, which makes the
// inliner happier and suppresses a number of optimizer warnings (!) about
// dropping arguments.
if (symbolOp.getSymbolUses(symbolOp->getParentOp()))
replaceUsesOfNonProtoTypeWithRealFunction(entry, funcOp);
// Obliterate no-proto declaration.
entry->erase();
}
if (d)
setFunctionAttributes(gd, funcOp, /*isIncompleteFunction=*/false, isThunk);
@ -1814,7 +1891,9 @@ CIRGenModule::createCIRFunction(mlir::Location loc, StringRef name,
func = builder.create<cir::FuncOp>(loc, name, funcType);
assert(!cir::MissingFeatures::opFuncAstDeclAttr());
assert(!cir::MissingFeatures::opFuncNoProto());
if (funcDecl && !funcDecl->hasPrototype())
func.setNoProto(true);
assert(func.isDeclaration() && "expected empty body");

View File

@ -313,6 +313,9 @@ public:
static void setInitializer(cir::GlobalOp &op, mlir::Attribute value);
void replaceUsesOfNonProtoTypeWithRealFunction(mlir::Operation *old,
cir::FuncOp newFn);
cir::FuncOp
getOrCreateCIRFunction(llvm::StringRef mangledName, mlir::Type funcType,
clang::GlobalDecl gd, bool forVTable,

View File

@ -1470,10 +1470,14 @@ ParseResult cir::FuncOp::parse(OpAsmParser &parser, OperationState &state) {
llvm::SMLoc loc = parser.getCurrentLocation();
mlir::Builder &builder = parser.getBuilder();
mlir::StringAttr noProtoNameAttr = getNoProtoAttrName(state.name);
mlir::StringAttr visNameAttr = getSymVisibilityAttrName(state.name);
mlir::StringAttr visibilityNameAttr = getGlobalVisibilityAttrName(state.name);
mlir::StringAttr dsoLocalNameAttr = getDsoLocalAttrName(state.name);
if (parser.parseOptionalKeyword(noProtoNameAttr).succeeded())
state.addAttribute(noProtoNameAttr, parser.getBuilder().getUnitAttr());
// Default to external linkage if no keyword is provided.
state.addAttribute(getLinkageAttrNameString(),
GlobalLinkageKindAttr::get(
@ -1578,6 +1582,9 @@ mlir::Region *cir::FuncOp::getCallableRegion() {
}
void cir::FuncOp::print(OpAsmPrinter &p) {
if (getNoProto())
p << " no_proto";
if (getComdat())
p << " comdat";

View File

@ -11,7 +11,7 @@ struct S {
};
void f1(struct S);
void f2() {
void f2(void) {
struct S s;
f1(s);
}
@ -28,8 +28,8 @@ void f2() {
// OGCG: %[[S:.+]] = load i64, ptr %{{.+}}, align 4
// OGCG-NEXT: call void @f1(i64 %[[S]])
struct S f3();
void f4() {
struct S f3(void);
void f4(void) {
struct S s = f3();
}
@ -38,11 +38,11 @@ void f4() {
// CIR-NEXT: cir.store align(4) %[[S]], %{{.+}} : !rec_S, !cir.ptr<!rec_S>
// LLVM-LABEL: define{{.*}} void @f4() {
// LLVM: %[[S:.+]] = call %struct.S (...) @f3()
// LLVM: %[[S:.+]] = call %struct.S @f3()
// LLVM-NEXT: store %struct.S %[[S]], ptr %{{.+}}, align 4
// OGCG-LABEL: define{{.*}} void @f4() #0 {
// OGCG: %[[S:.+]] = call i64 (...) @f3()
// OGCG: %[[S:.+]] = call i64 @f3()
// OGCG-NEXT: store i64 %[[S]], ptr %{{.+}}, align 4
struct Big {
@ -50,9 +50,9 @@ struct Big {
};
void f5(struct Big);
struct Big f6();
struct Big f6(void);
void f7() {
void f7(void) {
struct Big b;
f5(b);
}
@ -69,7 +69,7 @@ void f7() {
// OGCG: %[[B:.+]] = alloca %struct.Big, align 8
// OGCG-NEXT: call void @f5(ptr noundef byval(%struct.Big) align 8 %[[B]])
void f8() {
void f8(void) {
struct Big b = f6();
}
@ -78,14 +78,14 @@ void f8() {
// CIR: cir.store align(4) %[[B]], %{{.+}} : !rec_Big, !cir.ptr<!rec_Big>
// LLVM-LABEL: define{{.*}} void @f8() {
// LLVM: %[[B:.+]] = call %struct.Big (...) @f6()
// LLVM: %[[B:.+]] = call %struct.Big @f6()
// LLVM-NEXT: store %struct.Big %[[B]], ptr %{{.+}}, align 4
// OGCG-LABEL: define{{.*}} void @f8() #0 {
// OGCG: %[[B:.+]] = alloca %struct.Big, align 4
// OGCG-NEXT: call void (ptr, ...) @f6(ptr dead_on_unwind writable sret(%struct.Big) align 4 %[[B]])
// OGCG-NEXT: call void @f6(ptr dead_on_unwind writable sret(%struct.Big) align 4 %[[B]])
void f9() {
void f9(void) {
f1(f3());
}
@ -98,14 +98,14 @@ void f9() {
// LLVM-LABEL: define{{.*}} void @f9() {
// LLVM: %[[SLOT:.+]] = alloca %struct.S, i64 1, align 4
// LLVM-NEXT: %[[RET:.+]] = call %struct.S (...) @f3()
// LLVM-NEXT: %[[RET:.+]] = call %struct.S @f3()
// LLVM-NEXT: store %struct.S %[[RET]], ptr %[[SLOT]], align 4
// LLVM-NEXT: %[[ARG:.+]] = load %struct.S, ptr %[[SLOT]], align 4
// LLVM-NEXT: call void @f1(%struct.S %[[ARG]])
// OGCG-LABEL: define{{.*}} void @f9() #0 {
// OGCG: %[[SLOT:.+]] = alloca %struct.S, align 4
// OGCG-NEXT: %[[RET:.+]] = call i64 (...) @f3()
// OGCG-NEXT: %[[RET:.+]] = call i64 @f3()
// OGCG-NEXT: store i64 %[[RET]], ptr %[[SLOT]], align 4
// OGCG-NEXT: %[[ARG:.+]] = load i64, ptr %[[SLOT]], align 4
// OGCG-NEXT: call void @f1(i64 %[[ARG]])

View File

@ -0,0 +1,84 @@
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
// RUN: FileCheck --input-file=%t.cir %s
//===----------------------------------------------------------------------===//
// DEFINED BEHAVIOUR
//===----------------------------------------------------------------------===//
// No-proto definition followed by a correct call.
int noProto0(x) int x; { return x; }
// CHECK: cir.func no_proto dso_local @noProto0(%arg0: !s32i {{.+}}) -> !s32i
int test0(int x) {
// CHECK: cir.func dso_local @test0
return noProto0(x); // We know the definition. Should be a direct call.
// CHECK: %{{.+}} = cir.call @noProto0(%{{.+}})
}
// Declaration without prototype followed by its definition, then a correct call.
//
// Prototyped definition overrides no-proto declaration before any call is made,
// only allowing calls with proper arguments. This is the only case where the
// definition is not marked as no-proto.
int noProto1();
int noProto1(int x) { return x; }
// CHECK: cir.func dso_local @noProto1(%arg0: !s32i {{.+}}) -> !s32i
int test1(int x) {
// CHECK: cir.func dso_local @test1
return noProto1(x);
// CHECK: %{{.+}} = cir.call @noProto1(%{{[0-9]+}}) : (!s32i) -> !s32i
}
// Declaration without prototype followed by a correct call, then its definition.
//
// Call to no-proto is made before definition, so a variadic call that takes anything
// is created. Later, when the definition is found, no-proto is replaced.
int noProto2();
int test2(int x) {
return noProto2(x);
// CHECK: [[GGO:%.*]] = cir.get_global @noProto2 : !cir.ptr<!cir.func<(!s32i) -> !s32i>>
// CHECK: {{.*}} = cir.call [[GGO]](%{{[0-9]+}}) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
}
int noProto2(int x) { return x; }
// CHECK: cir.func no_proto dso_local @noProto2(%arg0: !s32i {{.+}}) -> !s32i
// No-proto declaration without definition (any call here is "correct").
//
// Call to no-proto is made before definition, so a variadic call that takes anything
// is created. Definition is not in the translation unit, so it is left as is.
int noProto3();
// cir.func private no_proto @noProto3(...) -> !s32i
int test3(int x) {
// CHECK: cir.func dso_local @test3
return noProto3(x);
// CHECK: [[GGO:%.*]] = cir.get_global @noProto3 : !cir.ptr<!cir.func<(...) -> !s32i>>
// CHECK: [[CAST:%.*]] = cir.cast(bitcast, [[GGO]] : !cir.ptr<!cir.func<(...) -> !s32i>>), !cir.ptr<!cir.func<(!s32i) -> !s32i>>
// CHECK: {{%.*}} = cir.call [[CAST]](%{{[0-9]+}}) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
}
//===----------------------------------------------------------------------===//
// UNDEFINED BEHAVIOUR
//
// No-proto definitions followed by incorrect calls.
//===----------------------------------------------------------------------===//
// No-proto definition followed by an incorrect call due to extra args.
int noProto4() { return 0; }
// cir.func private no_proto @noProto4() -> !s32i
int test4(int x) {
return noProto4(x); // Even if we know the definition, this should compile.
// CHECK: [[GGO:%.*]] = cir.get_global @noProto4 : !cir.ptr<!cir.func<() -> !s32i>>
// CHECK: [[CAST:%.*]] = cir.cast(bitcast, [[GGO]] : !cir.ptr<!cir.func<() -> !s32i>>), !cir.ptr<!cir.func<(!s32i) -> !s32i>>
// CHECK: {{%.*}} = cir.call [[CAST]]({{%.*}}) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
}
// No-proto definition followed by an incorrect call due to lack of args.
int noProto5();
int test5(int x) {
return noProto5();
// CHECK: [[GGO:%.*]] = cir.get_global @noProto5 : !cir.ptr<!cir.func<(!s32i) -> !s32i>>
// CHECK: [[CAST:%.*]] = cir.cast(bitcast, [[GGO]] : !cir.ptr<!cir.func<(!s32i) -> !s32i>>), !cir.ptr<!cir.func<() -> !s32i>>
// CHECK: {{%.*}} = cir.call [[CAST]]() : (!cir.ptr<!cir.func<() -> !s32i>>) -> !s32i
}
int noProto5(int x) { return x; }
// CHECK: cir.func no_proto dso_local @noProto5(%arg0: !s32i {{.+}}) -> !s32i

View File

@ -14,6 +14,14 @@ cir.func @empty() {
// CHECK: cir.return
// CHECK: }
// void empty() { }
cir.func no_proto @noProto() {
cir.return
}
// CHECK: cir.func no_proto @noProto() {
// CHECK: cir.return
// CHECK: }
// void voidret() { return; }
cir.func @voidret() {
cir.return