llvm-project/mlir/lib/Bindings/Python/DialectQuant.cpp

//===- DialectQuant.cpp - 'quant' dialect submodule -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <cstdint>
#include <vector>

#include "mlir-c/BuiltinAttributes.h"
#include "mlir-c/Dialect/Quant.h"
#include "mlir-c/IR.h"
#include "mlir/Bindings/Python/Nanobind.h"
#include "mlir/Bindings/Python/NanobindAdaptors.h"

namespace nb = nanobind;
using namespace llvm;
using namespace mlir;
using namespace mlir::python::nanobind_adaptors;

static void populateDialectQuantSubmodule(const nb::module_ &m) {
  //===-------------------------------------------------------------------===//
  // QuantizedType
  //===-------------------------------------------------------------------===//

  auto quantizedType =
      mlir_type_subclass(m, "QuantizedType", mlirTypeIsAQuantizedType);
  quantizedType.def_staticmethod(
      "default_minimum_for_integer",
      [](bool isSigned, unsigned integralWidth) {
        return mlirQuantizedTypeGetDefaultMinimumForInteger(isSigned,
                                                            integralWidth);
      },
      "Default minimum value for the integer with the specified signedness and "
      "bit width.",
      nb::arg("is_signed"), nb::arg("integral_width"));
  quantizedType.def_staticmethod(
      "default_maximum_for_integer",
      [](bool isSigned, unsigned integralWidth) {
        return mlirQuantizedTypeGetDefaultMaximumForInteger(isSigned,
                                                            integralWidth);
      },
      "Default maximum value for the integer with the specified signedness and "
      "bit width.",
      nb::arg("is_signed"), nb::arg("integral_width"));
  quantizedType.def_property_readonly(
      "expressed_type",
      [](MlirType type) { return mlirQuantizedTypeGetExpressedType(type); },
      "Type expressed by this quantized type.");
  quantizedType.def_property_readonly(
      "flags", [](MlirType type) { return mlirQuantizedTypeGetFlags(type); },
      "Flags of this quantized type (named accessors should be preferred to "
      "this)");
  quantizedType.def_property_readonly(
      "is_signed",
      [](MlirType type) { return mlirQuantizedTypeIsSigned(type); },
      "Signedness of this quantized type.");
  quantizedType.def_property_readonly(
      "storage_type",
      [](MlirType type) { return mlirQuantizedTypeGetStorageType(type); },
      "Storage type backing this quantized type.");
  quantizedType.def_property_readonly(
      "storage_type_min",
      [](MlirType type) { return mlirQuantizedTypeGetStorageTypeMin(type); },
      "The minimum value held by the storage type of this quantized type.");
  quantizedType.def_property_readonly(
      "storage_type_max",
      [](MlirType type) { return mlirQuantizedTypeGetStorageTypeMax(type); },
      "The maximum value held by the storage type of this quantized type.");
  quantizedType.def_property_readonly(
      "storage_type_integral_width",
      [](MlirType type) {
        return mlirQuantizedTypeGetStorageTypeIntegralWidth(type);
      },
      "The bitwidth of the storage type of this quantized type.");
  quantizedType.def(
      "is_compatible_expressed_type",
      [](MlirType type, MlirType candidate) {
        return mlirQuantizedTypeIsCompatibleExpressedType(type, candidate);
      },
      "Checks whether the candidate type can be expressed by this quantized "
      "type.",
      nb::arg("candidate"));
  quantizedType.def_property_readonly(
      "quantized_element_type",
      [](MlirType type) {
        return mlirQuantizedTypeGetQuantizedElementType(type);
      },
      "Element type of this quantized type expressed as quantized type.");
  quantizedType.def(
      "cast_from_storage_type",
      [](MlirType type, MlirType candidate) {
        MlirType castResult =
            mlirQuantizedTypeCastFromStorageType(type, candidate);
        if (!mlirTypeIsNull(castResult))
          return castResult;
        throw nb::type_error("Invalid cast.");
      },
      "Casts from a type based on the storage type of this quantized type to a "
      "corresponding type based on the quantized type. Raises TypeError if the "
      "cast is not valid.",
      nb::arg("candidate"));
  quantizedType.def_staticmethod(
      "cast_to_storage_type",
      [](MlirType type) {
        MlirType castResult = mlirQuantizedTypeCastToStorageType(type);
        if (!mlirTypeIsNull(castResult))
          return castResult;
        throw nb::type_error("Invalid cast.");
      },
      "Casts from a type based on a quantized type to a corresponding type "
      "based on the storage type of this quantized type. Raises TypeError if "
      "the cast is not valid.",
      nb::arg("type"));
  quantizedType.def(
      "cast_from_expressed_type",
      [](MlirType type, MlirType candidate) {
        MlirType castResult =
            mlirQuantizedTypeCastFromExpressedType(type, candidate);
        if (!mlirTypeIsNull(castResult))
          return castResult;
        throw nb::type_error("Invalid cast.");
      },
      "Casts from a type based on the expressed type of this quantized type to "
      "a corresponding type based on the quantized type. Raises TypeError if "
      "the cast is not valid.",
      nb::arg("candidate"));
  quantizedType.def_staticmethod(
      "cast_to_expressed_type",
      [](MlirType type) {
        MlirType castResult = mlirQuantizedTypeCastToExpressedType(type);
        if (!mlirTypeIsNull(castResult))
          return castResult;
        throw nb::type_error("Invalid cast.");
      },
      "Casts from a type based on a quantized type to a corresponding type "
      "based on the expressed type of this quantized type. Raises TypeError if "
      "the cast is not valid.",
      nb::arg("type"));
  quantizedType.def(
      "cast_expressed_to_storage_type",
      [](MlirType type, MlirType candidate) {
        MlirType castResult =
            mlirQuantizedTypeCastExpressedToStorageType(type, candidate);
        if (!mlirTypeIsNull(castResult))
          return castResult;
        throw nb::type_error("Invalid cast.");
      },
      "Casts from a type based on the expressed type of this quantized type to "
      "a corresponding type based on the storage type. Raises TypeError if the "
      "cast is not valid.",
      nb::arg("candidate"));

  quantizedType.get_class().attr("FLAG_SIGNED") =
      mlirQuantizedTypeGetSignedFlag();

  //===-------------------------------------------------------------------===//
  // AnyQuantizedType
  //===-------------------------------------------------------------------===//

  auto anyQuantizedType =
      mlir_type_subclass(m, "AnyQuantizedType", mlirTypeIsAAnyQuantizedType,
                         quantizedType.get_class());
  anyQuantizedType.def_classmethod(
      "get",
      [](nb::object cls, unsigned flags, MlirType storageType,
         MlirType expressedType, int64_t storageTypeMin,
         int64_t storageTypeMax) {
        return cls(mlirAnyQuantizedTypeGet(flags, storageType, expressedType,
                                           storageTypeMin, storageTypeMax));
      },
      "Gets an instance of AnyQuantizedType in the same context as the "
      "provided storage type.",
      nb::arg("cls"), nb::arg("flags"), nb::arg("storage_type"),
      nb::arg("expressed_type"), nb::arg("storage_type_min"),
      nb::arg("storage_type_max"));

  //===-------------------------------------------------------------------===//
  // UniformQuantizedType
  //===-------------------------------------------------------------------===//

  auto uniformQuantizedType = mlir_type_subclass(
      m, "UniformQuantizedType", mlirTypeIsAUniformQuantizedType,
      quantizedType.get_class());
  uniformQuantizedType.def_classmethod(
      "get",
      [](nb::object cls, unsigned flags, MlirType storageType,
         MlirType expressedType, double scale, int64_t zeroPoint,
         int64_t storageTypeMin, int64_t storageTypeMax) {
        return cls(mlirUniformQuantizedTypeGet(flags, storageType,
                                               expressedType, scale, zeroPoint,
                                               storageTypeMin, storageTypeMax));
      },
      "Gets an instance of UniformQuantizedType in the same context as the "
      "provided storage type.",
      nb::arg("cls"), nb::arg("flags"), nb::arg("storage_type"),
      nb::arg("expressed_type"), nb::arg("scale"), nb::arg("zero_point"),
      nb::arg("storage_type_min"), nb::arg("storage_type_max"));
  uniformQuantizedType.def_property_readonly(
      "scale",
      [](MlirType type) { return mlirUniformQuantizedTypeGetScale(type); },
      "The scale designates the difference between the real values "
      "corresponding to consecutive quantized values differing by 1.");
  uniformQuantizedType.def_property_readonly(
      "zero_point",
      [](MlirType type) { return mlirUniformQuantizedTypeGetZeroPoint(type); },
      "The storage value corresponding to the real value 0 in the affine "
      "equation.");
  uniformQuantizedType.def_property_readonly(
      "is_fixed_point",
      [](MlirType type) { return mlirUniformQuantizedTypeIsFixedPoint(type); },
      "Fixed point values are real numbers divided by a scale.");

  //===-------------------------------------------------------------------===//
  // UniformQuantizedPerAxisType
  //===-------------------------------------------------------------------===//
  auto uniformQuantizedPerAxisType = mlir_type_subclass(
      m, "UniformQuantizedPerAxisType", mlirTypeIsAUniformQuantizedPerAxisType,
      quantizedType.get_class());
  uniformQuantizedPerAxisType.def_classmethod(
      "get",
      [](nb::object cls, unsigned flags, MlirType storageType,
         MlirType expressedType, std::vector<double> scales,
         std::vector<int64_t> zeroPoints, int32_t quantizedDimension,
         int64_t storageTypeMin, int64_t storageTypeMax) {
        if (scales.size() != zeroPoints.size())
          throw nb::value_error(
              "Mismatching number of scales and zero points.");
        auto nDims = static_cast<intptr_t>(scales.size());
        return cls(mlirUniformQuantizedPerAxisTypeGet(
            flags, storageType, expressedType, nDims, scales.data(),
            zeroPoints.data(), quantizedDimension, storageTypeMin,
            storageTypeMax));
      },
      "Gets an instance of UniformQuantizedPerAxisType in the same context as "
      "the provided storage type.",
      nb::arg("cls"), nb::arg("flags"), nb::arg("storage_type"),
      nb::arg("expressed_type"), nb::arg("scales"), nb::arg("zero_points"),
      nb::arg("quantized_dimension"), nb::arg("storage_type_min"),
      nb::arg("storage_type_max"));
  uniformQuantizedPerAxisType.def_property_readonly(
      "scales",
      [](MlirType type) {
        intptr_t nDim = mlirUniformQuantizedPerAxisTypeGetNumDims(type);
        std::vector<double> scales;
        scales.reserve(nDim);
        for (intptr_t i = 0; i < nDim; ++i) {
          double scale = mlirUniformQuantizedPerAxisTypeGetScale(type, i);
          scales.push_back(scale);
        }
        return scales;
      },
      "The scales designate the difference between the real values "
      "corresponding to consecutive quantized values differing by 1. The ith "
      "scale corresponds to the ith slice in the quantized_dimension.");
  uniformQuantizedPerAxisType.def_property_readonly(
      "zero_points",
      [](MlirType type) {
        intptr_t nDim = mlirUniformQuantizedPerAxisTypeGetNumDims(type);
        std::vector<int64_t> zeroPoints;
        zeroPoints.reserve(nDim);
        for (intptr_t i = 0; i < nDim; ++i) {
          int64_t zeroPoint =
              mlirUniformQuantizedPerAxisTypeGetZeroPoint(type, i);
          zeroPoints.push_back(zeroPoint);
        }
        return zeroPoints;
      },
      "the storage values corresponding to the real value 0 in the affine "
      "equation. The ith zero point corresponds to the ith slice in the "
      "quantized_dimension.");
  uniformQuantizedPerAxisType.def_property_readonly(
      "quantized_dimension",
      [](MlirType type) {
        return mlirUniformQuantizedPerAxisTypeGetQuantizedDimension(type);
      },
      "Specifies the dimension of the shape that the scales and zero points "
      "correspond to.");
  uniformQuantizedPerAxisType.def_property_readonly(
      "is_fixed_point",
      [](MlirType type) {
        return mlirUniformQuantizedPerAxisTypeIsFixedPoint(type);
      },
      "Fixed point values are real numbers divided by a scale.");

  //===-------------------------------------------------------------------===//
  // UniformQuantizedSubChannelType
  //===-------------------------------------------------------------------===//
  auto uniformQuantizedSubChannelType = mlir_type_subclass(
      m, "UniformQuantizedSubChannelType",
      mlirTypeIsAUniformQuantizedSubChannelType, quantizedType.get_class());
  uniformQuantizedSubChannelType.def_classmethod(
      "get",
      [](nb::object cls, unsigned flags, MlirType storageType,
         MlirType expressedType, MlirAttribute scales, MlirAttribute zeroPoints,
         std::vector<int32_t> quantizedDimensions,
         std::vector<int64_t> blockSizes, int64_t storageTypeMin,
         int64_t storageTypeMax) {
        return cls(mlirUniformQuantizedSubChannelTypeGet(
            flags, storageType, expressedType, scales, zeroPoints,
            static_cast<intptr_t>(blockSizes.size()),
            quantizedDimensions.data(), blockSizes.data(), storageTypeMin,
            storageTypeMax));
      },
      "Gets an instance of UniformQuantizedSubChannel in the same context as "
      "the provided storage type.",
      nb::arg("cls"), nb::arg("flags"), nb::arg("storage_type"),
      nb::arg("expressed_type"), nb::arg("scales"), nb::arg("zero_points"),
      nb::arg("quantized_dimensions"), nb::arg("block_sizes"),
      nb::arg("storage_type_min"), nb::arg("storage_type_max"));
  uniformQuantizedSubChannelType.def_property_readonly(
      "quantized_dimensions",
      [](MlirType type) {
        intptr_t nDim =
            mlirUniformQuantizedSubChannelTypeGetNumBlockSizes(type);
        std::vector<int32_t> quantizedDimensions;
        quantizedDimensions.reserve(nDim);
        for (intptr_t i = 0; i < nDim; ++i) {
          quantizedDimensions.push_back(
              mlirUniformQuantizedSubChannelTypeGetQuantizedDimension(type, i));
        }
        return quantizedDimensions;
      },
      "Gets the quantized dimensions. Each element in the returned list "
      "represents an axis of the quantized data tensor that has a specified "
      "block size. The order of elements corresponds to the order of block "
      "sizes returned by 'block_sizes' method. It means that the data tensor "
      "is quantized along the i-th dimension in the returned list using the "
      "i-th block size from block_sizes method.");
  uniformQuantizedSubChannelType.def_property_readonly(
      "block_sizes",
      [](MlirType type) {
        intptr_t nDim =
            mlirUniformQuantizedSubChannelTypeGetNumBlockSizes(type);
        std::vector<int64_t> blockSizes;
        blockSizes.reserve(nDim);
        for (intptr_t i = 0; i < nDim; ++i) {
          blockSizes.push_back(
              mlirUniformQuantizedSubChannelTypeGetBlockSize(type, i));
        }
        return blockSizes;
      },
      "Gets the block sizes for the quantized dimensions. The i-th element in "
      "the returned list corresponds to the block size for the i-th dimension "
      "in the list returned by quantized_dimensions method.");
  uniformQuantizedSubChannelType.def_property_readonly(
      "scales",
      [](MlirType type) -> MlirAttribute {
        return mlirUniformQuantizedSubChannelTypeGetScales(type);
      },
      "The scales of the quantized type.");
  uniformQuantizedSubChannelType.def_property_readonly(
      "zero_points",
      [](MlirType type) -> MlirAttribute {
        return mlirUniformQuantizedSubChannelTypeGetZeroPoints(type);
      },
      "The zero points of the quantized type.");

  //===-------------------------------------------------------------------===//
  // CalibratedQuantizedType
  //===-------------------------------------------------------------------===//

  auto calibratedQuantizedType = mlir_type_subclass(
      m, "CalibratedQuantizedType", mlirTypeIsACalibratedQuantizedType,
      quantizedType.get_class());
  calibratedQuantizedType.def_classmethod(
      "get",
      [](nb::object cls, MlirType expressedType, double min, double max) {
        return cls(mlirCalibratedQuantizedTypeGet(expressedType, min, max));
      },
      "Gets an instance of CalibratedQuantizedType in the same context as the "
      "provided expressed type.",
      nb::arg("cls"), nb::arg("expressed_type"), nb::arg("min"),
      nb::arg("max"));
  calibratedQuantizedType.def_property_readonly("min", [](MlirType type) {
    return mlirCalibratedQuantizedTypeGetMin(type);
  });
  calibratedQuantizedType.def_property_readonly("max", [](MlirType type) {
    return mlirCalibratedQuantizedTypeGetMax(type);
  });
}

NB_MODULE(_mlirDialectsQuant, m) {
  m.doc() = "MLIR Quantization dialect";

  populateDialectQuantSubmodule(m);
}