[CIR] Upstream pointer arithmetic support (#138041)

This adds support for explicit pointer arithmetic, including unary increment and decrement of pointer values.
2025-05-01 11:11:02 -07:00 · 2025-05-01 11:11:02 -07:00 · 5209668e35
commit 5209668e35
parent 49a5dd3dac
3 changed files with 177 additions and 5 deletions
--- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@ -185,6 +185,14 @@ public:
  }
  bool isInt(mlir::Type i) { return mlir::isa<cir::IntType>(i); }
  //
  // Constant creation helpers
  // -------------------------
  //
  cir::ConstantOp getSInt32(int32_t c, mlir::Location loc) {
    return getConstantInt(loc, getSInt32Ty(), c);
  }
  // Creates constant nullptr for pointer type ty.
  cir::ConstantOp getNullPtr(mlir::Type ty, mlir::Location loc) {
    assert(!cir::MissingFeatures::targetCodeGenInfoGetNullPointer());
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@ -388,9 +388,26 @@ public:
        // NOTE(CIR): clang calls CreateAdd but folds this to a unary op
        value = emitUnaryOp(e, kind, input, /*nsw=*/false);
      }
-    } else if (isa<PointerType>(type)) {
+    } else if (const PointerType *ptr = type->getAs<PointerType>()) {
-      cgf.cgm.errorNYI(e->getSourceRange(), "Unary inc/dec pointer");
+      QualType type = ptr->getPointeeType();
      if (cgf.getContext().getAsVariableArrayType(type)) {
        // VLA types don't have constant size.
        cgf.cgm.errorNYI(e->getSourceRange(), "Pointer arithmetic on VLA");
        return {};
      } else if (type->isFunctionType()) {
        // Arithmetic on function pointers (!) is just +-1.
        cgf.cgm.errorNYI(e->getSourceRange(),
                         "Pointer arithmetic on function pointer");
        return {};
      } else {
        // For everything else, we can just do a simple increment.
        mlir::Location loc = cgf.getLoc(e->getSourceRange());
        CIRGenBuilderTy &builder = cgf.getBuilder();
        int amount = (isInc ? 1 : -1);
        mlir::Value amt = builder.getSInt32(amount, loc);
        assert(!cir::MissingFeatures::sanitizers());
        value = builder.createPtrStride(loc, value, amt);
      }
    } else if (type->isVectorType()) {
      cgf.cgm.errorNYI(e->getSourceRange(), "Unary inc/dec vector");
      return {};
@ -1152,8 +1169,78 @@ getUnwidenedIntegerType(const ASTContext &astContext, const Expr *e) {
 static mlir::Value emitPointerArithmetic(CIRGenFunction &cgf,
                                         const BinOpInfo &op,
                                         bool isSubtraction) {
-  cgf.cgm.errorNYI(op.loc, "pointer arithmetic");
+  // Must have binary (not unary) expr here.  Unary pointer
-  return {};
+  // increment/decrement doesn't use this path.
  const BinaryOperator *expr = cast<BinaryOperator>(op.e);
  mlir::Value pointer = op.lhs;
  Expr *pointerOperand = expr->getLHS();
  mlir::Value index = op.rhs;
  Expr *indexOperand = expr->getRHS();
  // In the case of subtraction, the FE has ensured that the LHS is always the
  // pointer. However, addition can have the pointer on either side. We will
  // always have a pointer operand and an integer operand, so if the LHS wasn't
  // a pointer, we need to swap our values.
  if (!isSubtraction && !mlir::isa<cir::PointerType>(pointer.getType())) {
    std::swap(pointer, index);
    std::swap(pointerOperand, indexOperand);
  }
  assert(mlir::isa<cir::PointerType>(pointer.getType()) &&
         "Need a pointer operand");
  assert(mlir::isa<cir::IntType>(index.getType()) && "Need an integer operand");
  // Some versions of glibc and gcc use idioms (particularly in their malloc
  // routines) that add a pointer-sized integer (known to be a pointer value)
  // to a null pointer in order to cast the value back to an integer or as
  // part of a pointer alignment algorithm.  This is undefined behavior, but
  // we'd like to be able to compile programs that use it.
  //
  // Normally, we'd generate a GEP with a null-pointer base here in response
  // to that code, but it's also UB to dereference a pointer created that
  // way.  Instead (as an acknowledged hack to tolerate the idiom) we will
  // generate a direct cast of the integer value to a pointer.
  //
  // The idiom (p = nullptr + N) is not met if any of the following are true:
  //
  //   The operation is subtraction.
  //   The index is not pointer-sized.
  //   The pointer type is not byte-sized.
  //
  if (BinaryOperator::isNullPointerArithmeticExtension(
          cgf.getContext(), op.opcode, expr->getLHS(), expr->getRHS()))
    return cgf.getBuilder().createIntToPtr(index, pointer.getType());
  // Differently from LLVM codegen, ABI bits for index sizes is handled during
  // LLVM lowering.
  // If this is subtraction, negate the index.
  if (isSubtraction)
    index = cgf.getBuilder().createNeg(index);
  assert(!cir::MissingFeatures::sanitizers());
  const PointerType *pointerType =
      pointerOperand->getType()->getAs<PointerType>();
  if (!pointerType) {
    cgf.cgm.errorNYI("Objective-C:pointer arithmetic with non-pointer type");
    return nullptr;
  }
  QualType elementType = pointerType->getPointeeType();
  if (cgf.getContext().getAsVariableArrayType(elementType)) {
    cgf.cgm.errorNYI("variable array type");
    return nullptr;
  }
  if (elementType->isVoidType() || elementType->isFunctionType()) {
    cgf.cgm.errorNYI("void* or function pointer arithmetic");
    return nullptr;
  }
  assert(!cir::MissingFeatures::sanitizers());
  return cgf.getBuilder().create<cir::PtrStrideOp>(
      cgf.getLoc(op.e->getExprLoc()), pointer.getType(), pointer, index);
 }
 mlir::Value ScalarExprEmitter::emitMul(const BinOpInfo &ops) {
--- a/clang/test/CIR/CodeGen/pointers.cpp
+++ b/clang/test/CIR/CodeGen/pointers.cpp
@ -0,0 +1,77 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
 // RUN: FileCheck --input-file=%t.cir %s
 // Should generate basic pointer arithmetics.
 void foo(int *iptr, char *cptr, unsigned ustride) {
  iptr + 2;
  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<2> : !s32i
  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i>
  cptr + 3;
  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<3> : !s32i
  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s8i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s8i>
  iptr - 2;
  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<2> : !s32i
  // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#STRIDE]]) : !s32i, !s32i
  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#NEGSTRIDE]] : !s32i), !cir.ptr<!s32i>
  cptr - 3;
  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<3> : !s32i
  // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#STRIDE]]) : !s32i, !s32i
  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s8i>, %[[#NEGSTRIDE]] : !s32i), !cir.ptr<!s8i>
  iptr + ustride;
  // CHECK: %[[#STRIDE:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !u32i), !cir.ptr<!s32i>
  // Must convert unsigned stride to a signed one.
  iptr - ustride;
  // CHECK: %[[#STRIDE:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
  // CHECK: %[[#SIGNSTRIDE:]] = cir.cast(integral, %[[#STRIDE]] : !u32i), !s32i
  // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#SIGNSTRIDE]]) : !s32i, !s32i
  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#NEGSTRIDE]] : !s32i), !cir.ptr<!s32i>
  4 + iptr;
  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<4> : !s32i
  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i>
  iptr++;
  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<1> : !s32i
  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i>
  iptr--;
  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<-1> : !s32i
  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i>
 }
 void testPointerSubscriptAccess(int *ptr) {
 // CHECK: testPointerSubscriptAccess
  ptr[1];
  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<1> : !s32i
  // CHECK: %[[#PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
  // CHECK: cir.ptr_stride(%[[#PTR]] : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i>
 }
 void testPointerMultiDimSubscriptAccess(int **ptr) {
 // CHECK: testPointerMultiDimSubscriptAccess
  ptr[1][2];
  // CHECK: %[[#STRIDE2:]] = cir.const #cir.int<2> : !s32i
  // CHECK: %[[#STRIDE1:]] = cir.const #cir.int<1> : !s32i
  // CHECK: %[[#PTR1:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!cir.ptr<!s32i>>>, !cir.ptr<!cir.ptr<!s32i>>
  // CHECK: %[[#PTR2:]] = cir.ptr_stride(%[[#PTR1]] : !cir.ptr<!cir.ptr<!s32i>>, %[[#STRIDE1]] : !s32i), !cir.ptr<!cir.ptr<!s32i>>
  // CHECK: %[[#PTR3:]] = cir.load %[[#PTR2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
  // CHECK: cir.ptr_stride(%[[#PTR3]] : !cir.ptr<!s32i>, %[[#STRIDE2]] : !s32i), !cir.ptr<!s32i>
 }
 // This test is meant to verify code that handles the 'p = nullptr + n' idiom
 // used by some versions of glibc and gcc.  This is undefined behavior but
 // it is intended there to act like a conversion from a pointer-sized integer
 // to a pointer, and we would like to tolerate that.
 #define NULLPTRINT ((int*)0)
 // This should get the inttoptr instruction.
 int *testGnuNullPtrArithmetic(unsigned n) {
 // CHECK: testGnuNullPtrArithmetic
  return NULLPTRINT + n;
  // CHECK: %[[NULLPTR:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
  // CHECK: %[[N:.*]] = cir.load %{{.*}} : !cir.ptr<!u32i>, !u32i
  // CHECK: %[[RESULT:.*]] = cir.ptr_stride(%[[NULLPTR]] : !cir.ptr<!s32i>, %[[N]] : !u32i), !cir.ptr<!s32i>
 }