Module Ir.Backend_intf

The interface types for backends

User-facing backend API.

type buffer_loc = {
  1. pool_id : Base.int;
  2. offset : Base.int;
}

A backend-agnostic, deterministic per-device buffer location: a pool_id into the device's backend-private pool_id -> 'base pool table, plus a byte offset within that pool. The concrete backend pointer (Metal.Buffer.t / CUdeviceptr / void*) lives only in that private table -- it never appears in any type of this shared interface -- so buffer_loc (pure integers) is stable across runs, diffable, and meaningful in logs and .expected files. Phase-1 policy is one pool per tnode at offset = 0, byte-for-byte equivalent to per-tnode allocation. An alias (future work) is the parent's { pool_id; offset = offset + delta }.

val buffer_loc_of_sexp : Sexplib0.Sexp.t -> buffer_loc
val sexp_of_buffer_loc : buffer_loc -> Sexplib0.Sexp.t
val compare_buffer_loc : buffer_loc -> buffer_loc -> Base.int
val equal_buffer_loc : buffer_loc -> buffer_loc -> Base.bool
type ctx_buffers = buffer_loc Base.Map.M(Ir.Tnode).t
val sexp_of_ctx_buffers : ctx_buffers -> Sexplib0.Sexp.t
module type Slab_alloc = sig ... end

The backend slab allocator, replacing the per-tnode Alloc_buffer interface. The shared allocator seam (see Backends) mints deterministic per-device pool_ids and calls these int-in / int-out primitives; the backend keeps the pool_id -> 'base table private. The pool_id -> 'base resolution (then base + offset) stays inside the backend.

type merge_buffer_use =
  1. | No
  2. | Copy
val sexp_of_merge_buffer_use : merge_buffer_use -> Sexplib0.Sexp.t
type kparam_source =
  1. | Log_file_name
  2. | Merge_buffer
  3. | Kparam_ptr of Tnode.t
  4. | Kparam_pool_slab of Base.int
    (*

    gh-ocannl-344: the i-th pool base-pointer parameter of a pooled kernel (Metal). A fixed number of these is emitted; at link the backend binds slab i to the pool assigned index i (or a duplicate of an in-use pool for the unused tail). Lets a kernel reach hundreds of tensor nodes through a handful of bound pools, staying under Metal's ~31 binding limit.

    *)
  5. | Kparam_pool_slots of Tnode.t Base.list
    (*

    gh-ocannl-344: the per-routine slot table accompanying Kparam_pool_slab. For the k-th tnode in this list the backend writes (pool_index, byte_offset); the shader reads it to form the typed pointer by casting (pools at pool_index) + byte_offset. Emitted only by pooled (Metal) codegen; per-tnode pointer backends (C, CUDA) never produce it.

    *)
  6. | Static_idx of Indexing.static_symbol
val sexp_of_kparam_source : kparam_source -> Sexplib0.Sexp.t
type 'context routine = {
  1. context : 'context;
  2. schedule : Task.t;
  3. bindings : Indexing.lowered_bindings;
  4. name : Base.string;
  5. inputs : Base.Set.M(Ir.Tnode).t;
    (*

    The materialized read-only and read-before-write (within the routine) non-constant nodes. They are inputs in a broad sense, as they could be recurrent nodes or parameters.

    *)
  6. merge_buffer_input : Tnode.t Base.option;
    (*

    Similar to inputs, for the merge buffer.

    *)
  7. outputs : Base.Set.M(Ir.Tnode).t;
    (*

    All the materialized nodes written-to by the routine.

    *)
}
val sexp_of_routine : 'context. ('context -> Sexplib0.Sexp.t) -> 'context routine -> Sexplib0.Sexp.t
module type Device_config_common = sig ... end
module type Device_config = sig ... end
type ('dev, 'runner, 'event) device = {
  1. dev : 'dev;
  2. ordinal : Base.int;
    (*

    The number of the represented backend's device, in the range from 0 to the number of the backend's devices - 1.

    *)
  3. device_id : Base.int;
    (*

    A unique identifier among all device instances of all backends. Note that multiple device_id (distinct device instances) might refer to the same physical device.

    *)
  4. runner : 'runner;
  5. merge_buffer : buffer_loc Base.option Base.ref;
    (*

    The merge buffer's reserved single-tenant pool location, or None if not yet allocated. The slab can be reused (grown in place) for nodes that fit.

    *)
  6. mutable merge_buffer_capacity : Base.int;
    (*

    Byte capacity of the reserved merge-buffer pool; drives the grow decision.

    *)
  7. updating_for : 'event Base.Hashtbl.M(Ir.Tnode).t;
    (*

    The completion event for the most recent updating (writing to) a node via this device.

    *)
  8. mutable updating_for_merge_buffer : (Tnode.t * 'event Base.option) Base.option;
    (*

    The tensor node that was most recently scheduled to be in the device's merge buffer. See also updating_for.

    *)
  9. constant_buffer_cache : buffer_loc Base.Hashtbl.M(Ir.Tnode).t;
    (*

    Per-device cache for read-only/constant buffer allocations.

    *)
  10. mutable next_pool_id : Base.int;
    (*

    Deterministic per-device pool-id counter, advanced by the shared allocator seam in tnode iteration order. Pool id 0 is reserved for the merge buffer; tnode pools start at 1.

    *)
}

A device folds in the (formerly per-stream) single compute runner and its buffer/event tracking: with one compute stream per device, the surviving runner / merge_buffer / updating_for / updating_for_merge_buffer fields live on the device. The updating_for writer-event tracking and Backend.device_to_device coherence are preserved (relocated here), now for cross-device coherence, and are forward-compatible with a future fixed-role prefetch/transfer runner.

val sexp_of_device : 'a -> 'b -> 'c -> ('d, 'e, 'f) device -> Sexplib0.Sexp.t
val equal_device : ('a, 'b, 'c) device -> ('d, 'e, 'f) device -> bool
val merge_buffer_pool_id : int

Pool id 0 on every device is reserved for the (single-tenant) merge buffer.

type ('dev, 'runner, 'event, 'optimize_ctx) context = {
  1. device : ('dev, 'runner, 'event) device;
  2. parent : ('dev, 'runner, 'event, 'optimize_ctx) context Base.option;
  3. ctx_buffers : ctx_buffers;
    (*

    This map contains the deterministic buffer locations used in this context or an ancestor context.

    *)
  4. finalized : Utils.atomic_bool;
  5. optimize_ctx : 'optimize_ctx;
  6. merge_buffer_node : Tnode.t Base.option;
    (*

    The tensor node that a Backend.device_to_device transfer with into_merge_buffer:Copy placed (or will place) into this context's device's merge buffer. It is a static, immutably-chained fact carried producer -> consumer: linking a consumer whose code expects a merge-buffer node verifies it against this field at link time. A transfer with into_merge_buffer:No does not touch the merge buffer and inherits the parent's value.

    *)
}
val sexp_of_context : 'dev 'runner 'event 'optimize_ctx. ('dev -> Sexplib0.Sexp.t) -> ('runner -> Sexplib0.Sexp.t) -> ('event -> Sexplib0.Sexp.t) -> ('optimize_ctx -> Sexplib0.Sexp.t) -> ('dev, 'runner, 'event, 'optimize_ctx) context -> Sexplib0.Sexp.t
module type Device_types = sig ... end
module type Device = sig ... end
module type Backend_common = sig ... end

Parts shared by assignments-level backend interfaces.

module type Backend_device_common = sig ... end

Parts shared by both assignments-level and lowered-level backend interfaces providing streams and devices, both user-facing and implementation-facing. Does not include: compilation and linking (differnt for assignments-level and lowered-level); copying and tensor-node-level synchronization (copying is different for user-facing and implementation-facing APIs, synchronization is provided by a component outside of backend implementations).

module type With_buffer_retrieval_and_syncing = sig ... end
module type Backend = sig ... end