Backend_intf.Backendinclude Backend_commonval sexp_of_code : code -> Sexplib0.Sexp.tval sexp_of_code_batch : code_batch -> Sexplib0.Sexp.tval get_optimize_ctx : code -> optimize_ctxval get_optimize_ctx_batch : code_batch -> optimize_ctxval compile :
optimize_ctx ->
?name:Base.string ->
Indexing.unit_bindings ->
Assignments.comp ->
codename is used to derive names for compilation artifacts. If omitted, it's derived via Assignments.get_name_exn.
val compile_batch :
optimize_ctx ->
?names:Base.string Base.array ->
?occupancy:(name:Base.string -> src_n:Base.int -> Base.bool) ->
Indexing.unit_bindings ->
Assignments.comp Base.array ->
code_batchcompile_batch vs. compile is mostly about improving the compile time and debugging convenience by generating fewer files -- ideally does not affect execution, but there can be backend-specific differences. Only array entries for which occupancy returns true are included. names are used to derive names for compilation artifacts. If omitted, they're derived via Assignments.get_name_exn.
include Backend_device_common
with type buffer_ptr := buffer_ptr
and type optimize_ctx := optimize_ctxinclude Device
with type buffer_ptr := buffer_ptr
with type optimize_ctx := optimize_ctxinclude Device_types
with type buffer_ptr := buffer_ptr
with type optimize_ctx := optimize_ctxinclude Device_config
with type buffer_ptr := buffer_ptr
with type optimize_ctx := optimize_ctxinclude Device_config_common with type buffer_ptr := buffer_ptrinclude Buffer with type buffer_ptr := buffer_ptrinclude sig ... endval sexp_of_dev : dev -> Sexplib0.Sexp.tval sexp_of_runner : runner -> Sexplib0.Sexp.tAn event tracks if a stream finished computing past a particular point in its schedue. These values are used internally for scheduling across streams of the backend, and can be used for explicit scheduling.
val sexp_of_event : event -> Sexplib0.Sexp.tval sexp_of_optimize_ctx : optimize_ctx -> Sexplib0.Sexp.tval empty_optimize_ctx : Base.unit -> optimize_ctxtype nonrec device = (buffer_ptr, dev, runner, event) deviceval sexp_of_device : device -> Sexplib0.Sexp.ttype nonrec stream = (buffer_ptr, dev, runner, event) streamval sexp_of_stream : stream -> Sexplib0.Sexp.ttype nonrec context = (buffer_ptr, stream, optimize_ctx) contextval sexp_of_context : context -> Sexplib0.Sexp.tinclude Alloc_buffer
with type buffer_ptr := buffer_ptr
and type stream := streaminclude Buffer with type buffer_ptr := buffer_ptrval sexp_of_buffer_ptr : buffer_ptr -> Sexplib0.Sexp.tinclude sig ... endtype nonrec buffer = buffer_ptr bufferval sexp_of_buffer : buffer -> Sexplib0.Sexp.ttype nonrec ctx_arrays = buffer_ptr ctx_arraysval sexp_of_ctx_arrays : ctx_arrays -> Sexplib0.Sexp.tval alloc_array : Ops.prec -> dims:Base.int Base.array -> stream -> buffer_ptrval alloc_zeros : Ops.prec -> dims:Base.int Base.array -> stream -> buffer_ptrval free_buffer : (stream -> buffer_ptr -> Base.unit) Base.optionval make_context :
?ctx_arrays:ctx_arrays ->
?optimize_ctx:optimize_ctx ->
stream ->
contextReturns a context without a parent.
val make_child :
?ctx_arrays:ctx_arrays ->
?optimize_ctx:optimize_ctx ->
context ->
contextReturns a context with the same Backend_intf.context.stream, and Backend_intf.context.ctx_arrays, Backend_intf.context.optimize_ctx if omitted, as the given context's, which is also the Backend_intf.context.parent.
val get_name : stream -> Base.stringval sync : event -> Base.unitBlocks till the event completes, if it's not done already.
It is rarely needed to call sync explicitly, because it should always be called internally when necessary, in particular before extracting values from host.
val is_done : event -> Base.boolWhether the event completed.
Schedules waiting for the given event on the context's stream.
NOTE: it should rarely be needed to call will_wait_for explicitly, because it should always be called internally when necessary.
val get_used_memory : device -> Base.intReturns (an upper bound of) the memory used for arrays, in bytes.
Global debug information; backend-specific and might evolve independently on the backends.
val get_debug_info : stream -> Base.Sexp.tPer-stream debug information; backend-specific and might evolve independently on the backends
val await : stream -> Base.unitBlocks till the stream becomes idle, i.e. synchronizes the stream.
Returns the event indicating if any currently running or scheduled computations on the stream have completed.
val is_idle : stream -> Base.boolWhether the stream is currently waiting for work.
val get_device : ordinal:Base.int -> deviceval suggested_num_streams : device -> Base.intThe optimal number of streams for the given device to follow the Backend_intf.config strategy.
Returns the routine for the code's procedure, in a new context derived from the given context.
val link_batch :
context ->
code_batch ->
context * context routine Base.option Base.arrayReturns the routines for the procedures included in the code batch. The returned context is downstream of all the returned routines.
include With_buffer_retrieval_and_syncing
with type device := device
and type context := context
and type event := eventIf the tensor node is both hosted and in-context, schedules a copy(^) from host to context and returns true, otherwise returns false.
^ On unified memory devices, the copy is not scheduled if the source and destination are the same buffer (note that this depends on the memory mode of the tensor node).
Schedules a copy from host to context: a variant of from_host that requires the input context to not contain the tensor node, and outputs the context with the tensor node.
If the tensor node is both hosted and in-context, schedules a copy(^) from context to host and returns true, otherwise returns false.
^ On unified memory devices, the copy is not scheduled if the source and destination are the same buffer (note that this depends on the memory mode of the tensor node).
val device_to_device :
Tnode.t ->
into_merge_buffer:merge_buffer_use ->
dst:context ->
src:context ->
Base.booldevice_to_device tn ~into_merge_buffer ~dst ~src proceeds as follows:
src context and either it is present in the dst context or into_merge_buffer is different from No: raises an error.dst and into_merge_buffer=No: returns false.src to finish, if any.into_merge_buffer=No: schedules a copy of the tensor node from src to dst and updates the writer event for the node.into_merge_buffer is different from No: sets on dst the merge buffer source to the given node.into_merge_buffer=Streaming_for task, remembers the buffer pointer of the source node to use for streaming, runs task -- intended to be the routine making use of the merge buffer, and initializes the merge buffer's streaming event.into_merge_buffer=Copy, schedules copying from src to the merge buffer of dst's stream, and updates the writer event for the merge buffer.Schedules a copy from src to dst: a variant of device_to_device with into_merge_buffer=No that requires the input src context to not contain the tensor node, and outputs the dst context with the tensor node.
val sync_device : device -> Base.unitSynchronizes all the streams on a device, and cleans up (removes) all associated events.