Backend (arrayjit.Ir.Backend

include Backend_common

include Buffer

type buffer_ptr

include sig ... end

type code

val sexp_of_code : code -> Sexplib0.Sexp.t

type code_batch

val sexp_of_code_batch : code_batch -> Sexplib0.Sexp.t

type optimize_ctx

val get_optimize_ctx : code -> optimize_ctx

val get_optimize_ctx_batch : code_batch -> optimize_ctx

val compile : 
  optimize_ctx ->
  ?name:Base.string ->
  Indexing.unit_bindings ->
  Assignments.comp ->
  code

name is used to derive names for compilation artifacts. If omitted, it's derived via Assignments.get_name_exn.

val compile_batch : 
  optimize_ctx ->
  ?names:Base.string Base.array ->
  ?occupancy:(name:Base.string -> src_n:Base.int -> Base.bool) ->
  Indexing.unit_bindings ->
  Assignments.comp Base.array ->
  code_batch

compile_batch vs. compile is mostly about improving the compile time and debugging convenience by generating fewer files -- ideally does not affect execution, but there can be backend-specific differences. Only array entries for which occupancy returns true are included. names are used to derive names for compilation artifacts. If omitted, they're derived via Assignments.get_name_exn.

include Backend_device_common
  with type buffer_ptr := buffer_ptr
   and type optimize_ctx := optimize_ctx

include Device
  with type buffer_ptr := buffer_ptr
  with type optimize_ctx := optimize_ctx

include Device_types
  with type buffer_ptr := buffer_ptr
  with type optimize_ctx := optimize_ctx

include Device_config
  with type buffer_ptr := buffer_ptr
  with type optimize_ctx := optimize_ctx

include Device_config_common with type buffer_ptr := buffer_ptr

include Buffer with type buffer_ptr := buffer_ptr

include sig ... end

type dev

Interface to a device driver.

val sexp_of_dev : dev -> Sexplib0.Sexp.t

type runner

Interface to a stream driver.

val sexp_of_runner : runner -> Sexplib0.Sexp.t

type event

An event tracks if a stream finished computing past a particular point in its schedue. These values are used internally for scheduling across streams of the backend, and can be used for explicit scheduling.

val sexp_of_event : event -> Sexplib0.Sexp.t

val name : Base.string

val sexp_of_optimize_ctx : optimize_ctx -> Sexplib0.Sexp.t

val empty_optimize_ctx : Base.unit -> optimize_ctx

type nonrec device = (buffer_ptr, dev, runner, event) device

val sexp_of_device : device -> Sexplib0.Sexp.t

type nonrec stream = (buffer_ptr, dev, runner, event) stream

val sexp_of_stream : stream -> Sexplib0.Sexp.t

type nonrec context = (buffer_ptr, stream, optimize_ctx) context

val sexp_of_context : context -> Sexplib0.Sexp.t

include Alloc_buffer
  with type buffer_ptr := buffer_ptr
   and type stream := stream

include Buffer with type buffer_ptr := buffer_ptr

val sexp_of_buffer_ptr : buffer_ptr -> Sexplib0.Sexp.t

include sig ... end

type nonrec buffer = buffer_ptr buffer

val sexp_of_buffer : buffer -> Sexplib0.Sexp.t

type nonrec ctx_arrays = buffer_ptr ctx_arrays

val sexp_of_ctx_arrays : ctx_arrays -> Sexplib0.Sexp.t

val alloc_buffer : 
  ?old_buffer:buffer ->
  size_in_bytes:Base.int ->
  stream ->
  buffer

val alloc_array : Ops.prec -> dims:Base.int Base.array -> stream -> buffer_ptr

val alloc_zeros : Ops.prec -> dims:Base.int Base.array -> stream -> buffer_ptr

val free_buffer : (stream -> buffer_ptr -> Base.unit) Base.option

val make_device : dev -> ordinal:Base.int -> device

val make_stream : device -> runner -> stream

val make_context : 
  ?ctx_arrays:ctx_arrays ->
  ?optimize_ctx:optimize_ctx ->
  stream ->
  context

Returns a context without a parent.

val make_child : 
  ?ctx_arrays:ctx_arrays ->
  ?optimize_ctx:optimize_ctx ->
  context ->
  context

Returns a context with the same Backend_intf.context.stream, and Backend_intf.context.ctx_arrays, Backend_intf.context.optimize_ctx if omitted, as the given context's, which is also the Backend_intf.context.parent.

val get_name : stream -> Base.string

val sync : event -> Base.unit

Blocks till the event completes, if it's not done already.

It is rarely needed to call sync explicitly, because it should always be called internally when necessary, in particular before extracting values from host.

val is_done : event -> Base.bool

Whether the event completed.

val will_wait_for : context -> event -> Base.unit

Schedules waiting for the given event on the context's stream.

NOTE: it should rarely be needed to call will_wait_for explicitly, because it should always be called internally when necessary.

val static_properties : Base.Sexp.t

Returns a sexp description of the properties of all devices.

val get_used_memory : device -> Base.int

Returns (an upper bound of) the memory used for arrays, in bytes.

val get_global_debug_info : Base.unit -> Base.Sexp.t

Global debug information; backend-specific and might evolve independently on the backends.

val get_debug_info : stream -> Base.Sexp.t

Per-stream debug information; backend-specific and might evolve independently on the backends

val await : stream -> Base.unit

Blocks till the stream becomes idle, i.e. synchronizes the stream.

val all_work : stream -> event

Returns the event indicating if any currently running or scheduled computations on the stream have completed.

val is_idle : stream -> Base.bool

Whether the stream is currently waiting for work.

val get_device : ordinal:Base.int -> device

val num_devices : Base.unit -> Base.int

val suggested_num_streams : device -> Base.int

The optimal number of streams for the given device to follow the Backend_intf.config strategy.

val new_stream : device -> stream

val link : context -> code -> context routine

Returns the routine for the code's procedure, in a new context derived from the given context.

val link_batch : 
  context ->
  code_batch ->
  context * context routine Base.option Base.array

Returns the routines for the procedures included in the code batch. The returned context is downstream of all the returned routines.

include With_buffer_retrieval_and_syncing
  with type device := device
   and type context := context
   and type event := event

val from_host : context -> Tnode.t -> Base.bool

If the tensor node is both hosted and in-context, schedules a copy(^) from host to context and returns true, otherwise returns false.

^ On unified memory devices, the copy is not scheduled if the source and destination are the same buffer (note that this depends on the memory mode of the tensor node).

val init_from_host : context -> Tnode.t -> context

Schedules a copy from host to context: a variant of from_host that requires the input context to not contain the tensor node, and outputs the context with the tensor node.

val to_host : context -> Tnode.t -> Base.bool

If the tensor node is both hosted and in-context, schedules a copy(^) from context to host and returns true, otherwise returns false.

^ On unified memory devices, the copy is not scheduled if the source and destination are the same buffer (note that this depends on the memory mode of the tensor node).

val device_to_device : 
  Tnode.t ->
  into_merge_buffer:merge_buffer_use ->
  dst:context ->
  src:context ->
  Base.bool

device_to_device tn ~into_merge_buffer ~dst ~src proceeds as follows:

If the node is absent from the src context and either it is present in the dst context or into_merge_buffer is different from No: raises an error.
If the node is absent from dst and into_merge_buffer=No: returns false.
Schedules waiting for writing into the tensor node on src to finish, if any.
If into_merge_buffer=No: schedules a copy of the tensor node from src to dst and updates the writer event for the node.
If into_merge_buffer is different from No: sets on dst the merge buffer source to the given node.
If into_merge_buffer=Streaming_for task, remembers the buffer pointer of the source node to use for streaming, runs task -- intended to be the routine making use of the merge buffer, and initializes the merge buffer's streaming event.
If into_merge_buffer=Copy, schedules copying from src to the merge buffer of dst's stream, and updates the writer event for the merge buffer.

val init_from_device : Tnode.t -> dst:context -> src:context -> context

Schedules a copy from src to dst: a variant of device_to_device with into_merge_buffer=No that requires the input src context to not contain the tensor node, and outputs the dst context with the tensor node.

val sync_device : device -> Base.unit

Synchronizes all the streams on a device, and cleans up (removes) all associated events.