Build uploaded using `kernels`.

Browse files

Files changed (11) hide show

.gitattributes +2 -0
build/torch210-metal-aarch64-darwin/__init__.py +165 -0
build/torch210-metal-aarch64-darwin/_bitsandbytes_mps_c31f916.abi3.so +3 -0
build/torch210-metal-aarch64-darwin/_ops.py +9 -0
build/torch210-metal-aarch64-darwin/bitsandbytes_mps/__init__.py +26 -0
build/torch210-metal-aarch64-darwin/metadata.json +3 -0
build/torch29-metal-aarch64-darwin/__init__.py +165 -0
build/torch29-metal-aarch64-darwin/_bitsandbytes_mps_c31f916.abi3.so +3 -0
build/torch29-metal-aarch64-darwin/_ops.py +9 -0
build/torch29-metal-aarch64-darwin/bitsandbytes_mps/__init__.py +26 -0
build/torch29-metal-aarch64-darwin/metadata.json +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+build/torch210-metal-aarch64-darwin/_bitsandbytes_mps_c31f916.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-metal-aarch64-darwin/_bitsandbytes_mps_c31f916.abi3.so filter=lfs diff=lfs merge=lfs -text

build/torch210-metal-aarch64-darwin/__init__.py ADDED Viewed

	@@ -0,0 +1,165 @@

+from typing import Optional, Tuple
+import torch
+from ._ops import ops
+# Quant type constants (match bitsandbytes DataType_t)
+FP4 = 1
+NF4 = 2
+def quantize_4bit(
+    input: torch.Tensor,
+    blocksize: int = 64,
+    quant_type: int = NF4,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Blockwise 4-bit quantization using NF4 or FP4 codebook.
+    Args:
+        input: Input tensor on MPS device (float16, bfloat16, or float32).
+        blocksize: Number of elements per quantization block (64 or 128).
+        quant_type: FP4 (1) or NF4 (2).
+    Returns:
+        Tuple of (packed, absmax):
+            packed: uint8 tensor of packed 4-bit values [numel/2].
+            absmax: float32 tensor of per-block max absolute values.
+    """
+    return ops.bnb_quantize_4bit(input, blocksize, quant_type)
+def dequantize_4bit(
+    packed: torch.Tensor,
+    absmax: torch.Tensor,
+    blocksize: int = 64,
+    quant_type: int = NF4,
+    numel: int = -1,
+    output_dtype: torch.dtype = torch.float16,
+) -> torch.Tensor:
+    """Blockwise 4-bit dequantization using NF4 or FP4 codebook.
+    Args:
+        packed: uint8 tensor of packed 4-bit values.
+        absmax: float32 tensor of per-block max absolute values.
+        blocksize: Number of elements per quantization block (64 or 128).
+        quant_type: FP4 (1) or NF4 (2).
+        numel: Number of elements in the original tensor.
+               If -1, inferred as packed.numel() * 2.
+        output_dtype: Output scalar type.
+    Returns:
+        Dequantized tensor.
+    """
+    if numel < 0:
+        numel = packed.numel() * 2
+    return ops.bnb_dequantize_4bit(
+        packed, absmax, blocksize, quant_type, numel, output_dtype
+    )
+def gemv_4bit(
+    x: torch.Tensor,
+    w: torch.Tensor,
+    absmax: torch.Tensor,
+    output_features: int,
+    blocksize: int = 64,
+    quant_type: int = NF4,
+) -> torch.Tensor:
+    """Fused matrix-vector multiply with 4-bit quantized weights.
+    Computes y = dequant(W) @ x, where W is blockwise NF4/FP4 quantized.
+    Args:
+        x: Input vector [..., K] on MPS device.
+        w: Packed weight matrix [N, K/2] (uint8) on MPS device.
+        absmax: Per-block scales [N, ceil(K/blocksize)] (float32).
+        output_features: Number of output features (N).
+        blocksize: Quantization block size (64 or 128).
+        quant_type: FP4 (1) or NF4 (2).
+    Returns:
+        Output tensor [..., N].
+    """
+    return ops.bnb_gemv_4bit(x, w, absmax, blocksize, quant_type, output_features)
+def gemm_4bit(
+    x: torch.Tensor,
+    w: torch.Tensor,
+    absmax: torch.Tensor,
+    output_features: int,
+    blocksize: int = 64,
+    quant_type: int = NF4,
+) -> torch.Tensor:
+    """Fused matrix-matrix multiply with 4-bit quantized transposed weights.
+    Computes Y = X @ dequant(W).T, where W is blockwise NF4/FP4 quantized.
+    Args:
+        x: Input matrix [..., M, K] on MPS device.
+        w: Packed weight matrix [N, K/2] (uint8) on MPS device.
+        absmax: Per-block scales [N, ceil(K/blocksize)] (float32).
+        output_features: Number of output features (N).
+        blocksize: Quantization block size (64 or 128).
+        quant_type: FP4 (1) or NF4 (2).
+    Returns:
+        Output tensor [..., M, N].
+    """
+    return ops.bnb_gemm_4bit(x, w, absmax, blocksize, quant_type, output_features)
+def linear_4bit(
+    x: torch.Tensor,
+    w: torch.Tensor,
+    absmax: torch.Tensor,
+    output_features: int,
+    blocksize: int = 64,
+    quant_type: int = NF4,
+    bias: Optional[torch.Tensor] = None,
+) -> torch.Tensor:
+    """4-bit quantized linear layer (auto-selects GEMV or GEMM).
+    Args:
+        x: Input tensor on MPS device.
+        w: Packed weight [N, K/2] (uint8).
+        absmax: Scales [N, ceil(K/blocksize)] (float32).
+        output_features: N.
+        blocksize: 64 or 128.
+        quant_type: FP4 (1) or NF4 (2).
+        bias: Optional bias [N].
+    Returns:
+        Output tensor.
+    """
+    input_1d = x.dim() == 1
+    if input_1d or (x.dim() >= 2 and x.size(-2) == 1):
+        x_flat = x.view(x.size(-1)) if input_1d else x.squeeze(-2)
+        y = gemv_4bit(
+            x_flat,
+            w,
+            absmax,
+            output_features,
+            blocksize,
+            quant_type,
+        )
+        if input_1d:
+            y = y.squeeze(0)
+        elif x.dim() >= 2:
+            y = y.unsqueeze(-2)
+    else:
+        y = gemm_4bit(x, w, absmax, output_features, blocksize, quant_type)
+    if bias is not None:
+        y = y + bias
+    return y
+__all__ = [
+    "quantize_4bit",
+    "dequantize_4bit",
+    "gemv_4bit",
+    "gemm_4bit",
+    "linear_4bit",
+]

build/torch210-metal-aarch64-darwin/_bitsandbytes_mps_c31f916.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03e84d69f0649570e560d12267b9e1ef8cd187f8bb13a737f0a28d40af567259
+size 845120

build/torch210-metal-aarch64-darwin/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _bitsandbytes_mps_c31f916
+ops = torch.ops._bitsandbytes_mps_c31f916
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_bitsandbytes_mps_c31f916::{op_name}"

build/torch210-metal-aarch64-darwin/bitsandbytes_mps/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import sys
+import importlib
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch210-metal-aarch64-darwin/metadata.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "python-depends": []
+}

build/torch29-metal-aarch64-darwin/__init__.py ADDED Viewed

	@@ -0,0 +1,165 @@

+from typing import Optional, Tuple
+import torch
+from ._ops import ops
+# Quant type constants (match bitsandbytes DataType_t)
+FP4 = 1
+NF4 = 2
+def quantize_4bit(
+    input: torch.Tensor,
+    blocksize: int = 64,
+    quant_type: int = NF4,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Blockwise 4-bit quantization using NF4 or FP4 codebook.
+    Args:
+        input: Input tensor on MPS device (float16, bfloat16, or float32).
+        blocksize: Number of elements per quantization block (64 or 128).
+        quant_type: FP4 (1) or NF4 (2).
+    Returns:
+        Tuple of (packed, absmax):
+            packed: uint8 tensor of packed 4-bit values [numel/2].
+            absmax: float32 tensor of per-block max absolute values.
+    """
+    return ops.bnb_quantize_4bit(input, blocksize, quant_type)
+def dequantize_4bit(
+    packed: torch.Tensor,
+    absmax: torch.Tensor,
+    blocksize: int = 64,
+    quant_type: int = NF4,
+    numel: int = -1,
+    output_dtype: torch.dtype = torch.float16,
+) -> torch.Tensor:
+    """Blockwise 4-bit dequantization using NF4 or FP4 codebook.
+    Args:
+        packed: uint8 tensor of packed 4-bit values.
+        absmax: float32 tensor of per-block max absolute values.
+        blocksize: Number of elements per quantization block (64 or 128).
+        quant_type: FP4 (1) or NF4 (2).
+        numel: Number of elements in the original tensor.
+               If -1, inferred as packed.numel() * 2.
+        output_dtype: Output scalar type.
+    Returns:
+        Dequantized tensor.
+    """
+    if numel < 0:
+        numel = packed.numel() * 2
+    return ops.bnb_dequantize_4bit(
+        packed, absmax, blocksize, quant_type, numel, output_dtype
+    )
+def gemv_4bit(
+    x: torch.Tensor,
+    w: torch.Tensor,
+    absmax: torch.Tensor,
+    output_features: int,
+    blocksize: int = 64,
+    quant_type: int = NF4,
+) -> torch.Tensor:
+    """Fused matrix-vector multiply with 4-bit quantized weights.
+    Computes y = dequant(W) @ x, where W is blockwise NF4/FP4 quantized.
+    Args:
+        x: Input vector [..., K] on MPS device.
+        w: Packed weight matrix [N, K/2] (uint8) on MPS device.
+        absmax: Per-block scales [N, ceil(K/blocksize)] (float32).
+        output_features: Number of output features (N).
+        blocksize: Quantization block size (64 or 128).
+        quant_type: FP4 (1) or NF4 (2).
+    Returns:
+        Output tensor [..., N].
+    """
+    return ops.bnb_gemv_4bit(x, w, absmax, blocksize, quant_type, output_features)
+def gemm_4bit(
+    x: torch.Tensor,
+    w: torch.Tensor,
+    absmax: torch.Tensor,
+    output_features: int,
+    blocksize: int = 64,
+    quant_type: int = NF4,
+) -> torch.Tensor:
+    """Fused matrix-matrix multiply with 4-bit quantized transposed weights.
+    Computes Y = X @ dequant(W).T, where W is blockwise NF4/FP4 quantized.
+    Args:
+        x: Input matrix [..., M, K] on MPS device.
+        w: Packed weight matrix [N, K/2] (uint8) on MPS device.
+        absmax: Per-block scales [N, ceil(K/blocksize)] (float32).
+        output_features: Number of output features (N).
+        blocksize: Quantization block size (64 or 128).
+        quant_type: FP4 (1) or NF4 (2).
+    Returns:
+        Output tensor [..., M, N].
+    """
+    return ops.bnb_gemm_4bit(x, w, absmax, blocksize, quant_type, output_features)
+def linear_4bit(
+    x: torch.Tensor,
+    w: torch.Tensor,
+    absmax: torch.Tensor,
+    output_features: int,
+    blocksize: int = 64,
+    quant_type: int = NF4,
+    bias: Optional[torch.Tensor] = None,
+) -> torch.Tensor:
+    """4-bit quantized linear layer (auto-selects GEMV or GEMM).
+    Args:
+        x: Input tensor on MPS device.
+        w: Packed weight [N, K/2] (uint8).
+        absmax: Scales [N, ceil(K/blocksize)] (float32).
+        output_features: N.
+        blocksize: 64 or 128.
+        quant_type: FP4 (1) or NF4 (2).
+        bias: Optional bias [N].
+    Returns:
+        Output tensor.
+    """
+    input_1d = x.dim() == 1
+    if input_1d or (x.dim() >= 2 and x.size(-2) == 1):
+        x_flat = x.view(x.size(-1)) if input_1d else x.squeeze(-2)
+        y = gemv_4bit(
+            x_flat,
+            w,
+            absmax,
+            output_features,
+            blocksize,
+            quant_type,
+        )
+        if input_1d:
+            y = y.squeeze(0)
+        elif x.dim() >= 2:
+            y = y.unsqueeze(-2)
+    else:
+        y = gemm_4bit(x, w, absmax, output_features, blocksize, quant_type)
+    if bias is not None:
+        y = y + bias
+    return y
+__all__ = [
+    "quantize_4bit",
+    "dequantize_4bit",
+    "gemv_4bit",
+    "gemm_4bit",
+    "linear_4bit",
+]

build/torch29-metal-aarch64-darwin/_bitsandbytes_mps_c31f916.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2ab4506b4b2d6581d5a13e3824b8df1a49da98ee95166e3b85f059f51256e41
+size 844464

build/torch29-metal-aarch64-darwin/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _bitsandbytes_mps_c31f916
+ops = torch.ops._bitsandbytes_mps_c31f916
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_bitsandbytes_mps_c31f916::{op_name}"

build/torch29-metal-aarch64-darwin/bitsandbytes_mps/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import ctypes
+import sys
+import importlib
+from pathlib import Path
+from types import ModuleType
+def _import_from_path(file_path: Path) -> ModuleType:
+    # We cannot use the module name as-is, after adding it to `sys.modules`,
+    # it would also be used for other imports. So, we make a module name that
+    # depends on the path for it to be unique using the hex-encoded hash of
+    # the path.
+    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
+    module_name = path_hash
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    if spec is None:
+        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
+    module = importlib.util.module_from_spec(spec)
+    if module is None:
+        raise ImportError(f"Cannot load module {module_name} from spec")
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)  # type: ignore
+    return module
+globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))

build/torch29-metal-aarch64-darwin/metadata.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "python-depends": []
+}