mirror of
https://github.com/pykeio/ort
synced 2026-04-25 16:34:55 +02:00
refactor!: flatten session module
This commit is contained in:
@@ -77,7 +77,7 @@
|
||||
//! though.
|
||||
//!
|
||||
//! ## Limitations
|
||||
//! - [`OutputSelector`](ort::session::run_options::OutputSelector) is not currently implemented.
|
||||
//! - [`OutputSelector`](ort::session::OutputSelector) is not currently implemented.
|
||||
//! - [`IoBinding`](ort::io_binding) is not supported by ONNX Runtime on the web.
|
||||
|
||||
#![deny(clippy::panic, clippy::panicking_unwrap)]
|
||||
|
||||
@@ -239,7 +239,7 @@ impl CUDA {
|
||||
/// - Models with control flow operators (like `If`, `Loop`, or `Scan`) are not supported.
|
||||
/// - Input/output shapes cannot change across inference calls.
|
||||
/// - The address of inputs/outputs cannot change across inference calls, so
|
||||
/// [`IoBinding`](crate::io_binding::IoBinding) must be used.
|
||||
/// [`IoBinding`](crate::session::IoBinding) must be used.
|
||||
/// - `Session`s using CUDA graphs are technically not `Send` or `Sync`.
|
||||
///
|
||||
/// Consult the [ONNX Runtime documentation on CUDA graphs](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#using-cuda-graphs-preview) for more information.
|
||||
|
||||
@@ -25,13 +25,11 @@ pub mod __private {
|
||||
#[macro_use]
|
||||
pub(crate) mod private;
|
||||
|
||||
pub mod adapter;
|
||||
pub mod compiler;
|
||||
pub mod editor;
|
||||
pub mod environment;
|
||||
pub mod ep;
|
||||
pub mod error;
|
||||
pub mod io_binding;
|
||||
pub mod logging;
|
||||
pub mod memory;
|
||||
pub mod operator;
|
||||
|
||||
@@ -1,67 +1,3 @@
|
||||
//! An input adapter, allowing for loading many static inputs from disk at once.
|
||||
//!
|
||||
//! [`Adapter`] essentially acts as a collection of predefined inputs allocated on a specific device that can easily be
|
||||
//! swapped out between session runs via [`RunOptions::add_adapter`]. With slight modifications to the session
|
||||
//! graph, [`Adapter`]s can be used as low-rank adapters (LoRAs) or as containers of style embeddings.
|
||||
//!
|
||||
//! # Example
|
||||
//! An adapter can be created in Python with the `AdapterFormat` class:
|
||||
//! ```python
|
||||
//! import numpy as np
|
||||
//! import onnxruntime as ort
|
||||
//!
|
||||
//! param_a = ort.OrtValue.ortvalue_from_numpy(np.array([[3], [4], [5], [6]], dtype=np.float32))
|
||||
//! param_b = ort.OrtValue.ortvalue_from_numpy(np.array([[7, 8, 9, 10]], dtype=np.float32))
|
||||
//!
|
||||
//! adapter = ort.AdapterFormat()
|
||||
//! adapter.set_parameters({
|
||||
//! 'lora_param_a': param_a,
|
||||
//! 'lora_param_b': param_b
|
||||
//! })
|
||||
//! adapter.export_adapter('tests/data/adapter.orl')
|
||||
//! ```
|
||||
//!
|
||||
//! Then, in Rust:
|
||||
//! ```
|
||||
//! # use ort::{adapter::Adapter, session::{run_options::RunOptions, Session}, value::Tensor};
|
||||
//! # fn main() -> ort::Result<()> {
|
||||
//! let mut model = Session::builder()?.commit_from_file("tests/data/lora_model.onnx")?;
|
||||
//! let lora = Adapter::from_file("tests/data/adapter.orl", None)?;
|
||||
//!
|
||||
//! let mut run_options = RunOptions::new()?;
|
||||
//! run_options.add_adapter(&lora)?;
|
||||
//!
|
||||
//! let outputs =
|
||||
//! model.run_with_options(ort::inputs![Tensor::<f32>::from_array(([4, 4], vec![1.0; 16]))?], &run_options)?;
|
||||
//! # Ok(())
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
//! Using [`Adapter`] is identical to, but more convenient than:
|
||||
//! ```
|
||||
//! # use ort::{adapter::Adapter, session::{run_options::RunOptions, Session}, value::Tensor};
|
||||
//! # fn main() -> ort::Result<()> {
|
||||
//! let mut model = Session::builder()?.commit_from_file("tests/data/lora_model.onnx")?;
|
||||
//!
|
||||
//! // Load our parameters from disk somehow
|
||||
//! let param_a = Tensor::<f32>::from_array(([4, 1], vec![3., 4., 5., 6.]))?;
|
||||
//! let param_b = Tensor::<f32>::from_array(([1, 4], vec![7., 8., 6., 10.]))?;
|
||||
//!
|
||||
//! let outputs = model.run(ort::inputs![
|
||||
//! "input" => Tensor::<f32>::from_array(([4, 4], vec![1.0; 16]))?,
|
||||
//! // Adapter parameters are just inputs.
|
||||
//! "lora_param_a" => param_a.view(),
|
||||
//! "lora_param_b" => param_b.view()
|
||||
//! ])?;
|
||||
//! # Ok(())
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
//! [`Adapter`] also lets us copy the parameters to a GPU at load time, so that they don't need to be copied on each
|
||||
//! session run.
|
||||
//!
|
||||
//! [`RunOptions::add_adapter`]: crate::session::run_options::RunOptions::add_adapter
|
||||
|
||||
use alloc::sync::Arc;
|
||||
use core::ptr::{self, NonNull};
|
||||
#[cfg(feature = "std")]
|
||||
@@ -91,7 +27,67 @@ impl Drop for AdapterInner {
|
||||
|
||||
/// An input adapter, allowing for loading many static inputs from disk at once.
|
||||
///
|
||||
/// See the [module-level documentation][self] for more information.
|
||||
/// [`Adapter`] essentially acts as a collection of predefined inputs allocated on a specific device that can easily be
|
||||
/// swapped out between session runs via [`RunOptions::add_adapter`]. With slight modifications to the session
|
||||
/// graph, [`Adapter`]s can be used as low-rank adapters (LoRAs) or as containers of style embeddings.
|
||||
///
|
||||
/// # Example
|
||||
/// An adapter can be created in Python with the `AdapterFormat` class:
|
||||
/// ```python
|
||||
/// import numpy as np
|
||||
/// import onnxruntime as ort
|
||||
///
|
||||
/// param_a = ort.OrtValue.ortvalue_from_numpy(np.array([[3], [4], [5], [6]], dtype=np.float32))
|
||||
/// param_b = ort.OrtValue.ortvalue_from_numpy(np.array([[7, 8, 9, 10]], dtype=np.float32))
|
||||
///
|
||||
/// adapter = ort.AdapterFormat()
|
||||
/// adapter.set_parameters({
|
||||
/// 'lora_param_a': param_a,
|
||||
/// 'lora_param_b': param_b
|
||||
/// })
|
||||
/// adapter.export_adapter('tests/data/adapter.orl')
|
||||
/// ```
|
||||
///
|
||||
/// Then, in Rust:
|
||||
/// ```
|
||||
/// # use ort::{session::{Adapter, RunOptions, Session}, value::Tensor};
|
||||
/// # fn main() -> ort::Result<()> {
|
||||
/// let mut model = Session::builder()?.commit_from_file("tests/data/lora_model.onnx")?;
|
||||
/// let lora = Adapter::from_file("tests/data/adapter.orl", None)?;
|
||||
///
|
||||
/// let mut run_options = RunOptions::new()?;
|
||||
/// run_options.add_adapter(&lora)?;
|
||||
///
|
||||
/// let outputs =
|
||||
/// model.run_with_options(ort::inputs![Tensor::<f32>::from_array(([4, 4], vec![1.0; 16]))?], &run_options)?;
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
///
|
||||
/// Using [`Adapter`] is identical to, but more convenient than:
|
||||
/// ```
|
||||
/// # use ort::{session::{Adapter, RunOptions, Session}, value::Tensor};
|
||||
/// # fn main() -> ort::Result<()> {
|
||||
/// let mut model = Session::builder()?.commit_from_file("tests/data/lora_model.onnx")?;
|
||||
///
|
||||
/// // Load our parameters from disk somehow
|
||||
/// let param_a = Tensor::<f32>::from_array(([4, 1], vec![3., 4., 5., 6.]))?;
|
||||
/// let param_b = Tensor::<f32>::from_array(([1, 4], vec![7., 8., 6., 10.]))?;
|
||||
///
|
||||
/// let outputs = model.run(ort::inputs![
|
||||
/// "input" => Tensor::<f32>::from_array(([4, 4], vec![1.0; 16]))?,
|
||||
/// // Adapter parameters are just inputs.
|
||||
/// "lora_param_a" => param_a.view(),
|
||||
/// "lora_param_b" => param_b.view()
|
||||
/// ])?;
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
///
|
||||
/// [`Adapter`] also lets us copy the parameters to a GPU at load time, so that they don't need to be copied on each
|
||||
/// session run.
|
||||
///
|
||||
/// [`RunOptions::add_adapter`]: crate::session::RunOptions::add_adapter
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Adapter {
|
||||
pub(crate) inner: Arc<AdapterInner>
|
||||
@@ -105,10 +101,9 @@ impl Adapter {
|
||||
///
|
||||
/// ```
|
||||
/// # use ort::{
|
||||
/// # adapter::Adapter,
|
||||
/// # ep,
|
||||
/// # memory::DeviceType,
|
||||
/// # session::{run_options::RunOptions, Session},
|
||||
/// # session::{Adapter, RunOptions, Session},
|
||||
/// # value::Tensor
|
||||
/// # };
|
||||
/// # fn main() -> ort::Result<()> {
|
||||
@@ -151,10 +146,9 @@ impl Adapter {
|
||||
///
|
||||
/// ```
|
||||
/// # use ort::{
|
||||
/// # adapter::Adapter,
|
||||
/// # ep,
|
||||
/// # memory::DeviceType,
|
||||
/// # session::{run_options::RunOptions, Session},
|
||||
/// # session::{Adapter, RunOptions, Session},
|
||||
/// # value::Tensor
|
||||
/// # };
|
||||
/// # fn main() -> ort::Result<()> {
|
||||
@@ -14,7 +14,7 @@ use smallvec::SmallVec;
|
||||
|
||||
use crate::{
|
||||
error::Result,
|
||||
session::{SessionOutputs, SharedSessionInner, run_options::UntypedRunOptions},
|
||||
session::{SessionOutputs, SharedSessionInner, UntypedRunOptions},
|
||||
util::{STACK_SESSION_INPUTS, STACK_SESSION_OUTPUTS},
|
||||
value::{Value, ValueInner}
|
||||
};
|
||||
|
||||
@@ -1,81 +1,3 @@
|
||||
//! Enables binding of session inputs and/or outputs to pre-allocated memory.
|
||||
//!
|
||||
//! [`IoBinding`] minimizes copies between a device (like a GPU) and the host (CPU) by allowing you to bind a
|
||||
//! certain input/output to a pre-allocated value on a specific device.
|
||||
//!
|
||||
//! [`IoBinding`] is most suitable for:
|
||||
//! - An ensemble of models in which the output from one model is the input to another and does not need to pass through
|
||||
//! the CPU to perform additional processing.
|
||||
//! - Situations where an output should stay on a device (e.g. to perform additional hardware-accelerated processing).
|
||||
//! - Models that accept an input that does not change for multiple subsequent runs (like the conditional embedding for
|
||||
//! a diffusion model).
|
||||
//!
|
||||
//! [`IoBinding`] will not provide any meaningful benefit for:
|
||||
//! - Models where every input changes with each invocation, such as a causal language model or object recognition
|
||||
//! model.
|
||||
//! - Pipelines that go straight from CPU -> GPU -> CPU.
|
||||
//!
|
||||
//! # Example
|
||||
//! A diffusion model which takes a text condition input.
|
||||
//!
|
||||
//! ```no_run
|
||||
//! # use ort::{
|
||||
//! # ep,
|
||||
//! # io_binding::IoBinding,
|
||||
//! # memory::{Allocator, AllocatorType, AllocationDevice, MemoryInfo, MemoryType},
|
||||
//! # session::Session,
|
||||
//! # value::Tensor
|
||||
//! # };
|
||||
//! # fn main() -> ort::Result<()> {
|
||||
//! let mut text_encoder = Session::builder()?
|
||||
//! .with_execution_providers([ep::CUDA::default().build()])?
|
||||
//! .commit_from_file("text_encoder.onnx")?;
|
||||
//! let mut unet = Session::builder()?
|
||||
//! .with_execution_providers([ep::CUDA::default().build()])?
|
||||
//! .commit_from_file("unet.onnx")?;
|
||||
//!
|
||||
//! let text_condition = text_encoder
|
||||
//! .run(ort::inputs![Tensor::<i64>::from_array((
|
||||
//! vec![27],
|
||||
//! vec![
|
||||
//! 23763, 15460, 473, 68, 312, 265, 17463, 4098, 304, 1077, 283, 198, 7676, 5976, 272, 285, 3609, 435,
|
||||
//! 21680, 321, 265, 300, 1689, 64, 285, 4763, 64
|
||||
//! ]
|
||||
//! ))?])?
|
||||
//! .remove("output0")
|
||||
//! .unwrap();
|
||||
//!
|
||||
//! let input_allocator = Allocator::new(
|
||||
//! &unet,
|
||||
//! MemoryInfo::new(AllocationDevice::CUDA_PINNED, 0, AllocatorType::Device, MemoryType::CPUInput)?
|
||||
//! )?;
|
||||
//! let mut latents = Tensor::<f32>::new(&input_allocator, [1_usize, 4, 64, 64])?;
|
||||
//!
|
||||
//! let mut io_binding = unet.create_binding()?;
|
||||
//! io_binding.bind_input("condition", &text_condition)?;
|
||||
//!
|
||||
//! let output_allocator = Allocator::new(
|
||||
//! &unet,
|
||||
//! MemoryInfo::new(AllocationDevice::CUDA_PINNED, 0, AllocatorType::Device, MemoryType::CPUOutput)?
|
||||
//! )?;
|
||||
//! io_binding.bind_output("noise_pred", Tensor::<f32>::new(&output_allocator, [1_usize, 4, 64, 64])?)?;
|
||||
//!
|
||||
//! for _ in 0..20 {
|
||||
//! io_binding.bind_input("latents", &latents)?;
|
||||
//! let noise_pred = unet.run_binding(&io_binding)?.remove("noise_pred").unwrap();
|
||||
//!
|
||||
//! let mut latents = latents.extract_array_mut();
|
||||
//! latents += &noise_pred.try_extract_array::<f32>()?;
|
||||
//! }
|
||||
//! # Ok(())
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
//! [`IoBinding`] may provide a decent speedup in this example since the `condition` tensor is unchanging between runs.
|
||||
//! If we were to use normal session inference, the `condition` tensor would be needlessly copied with each invocation
|
||||
//! of `unet.run()`, and this copying can come with significant latency & overhead. With [`IoBinding`], the `condition`
|
||||
//! tensor is only copied to the device once instead of 20 times.
|
||||
|
||||
use alloc::{string::String, sync::Arc};
|
||||
use core::{
|
||||
fmt::Debug,
|
||||
@@ -94,9 +16,80 @@ use crate::{
|
||||
|
||||
/// Enables binding of session inputs and/or outputs to pre-allocated memory.
|
||||
///
|
||||
/// An `IoBinding` can be created from a [`Session`] with [`Session::create_binding`].
|
||||
/// [`IoBinding`] minimizes copies between a device (like a GPU) and the host (CPU) by allowing you to bind a
|
||||
/// certain input/output to a pre-allocated value on a specific device.
|
||||
///
|
||||
/// See the [module-level documentation][self] for more information.
|
||||
/// [`IoBinding`] is most suitable for:
|
||||
/// - An ensemble of models in which the output from one model is the input to another and does not need to pass through
|
||||
/// the CPU to perform additional processing.
|
||||
/// - Situations where an output should stay on a device (e.g. to perform additional hardware-accelerated processing).
|
||||
/// - Models that accept an input that does not change for multiple subsequent runs (like the conditional embedding for
|
||||
/// a diffusion model).
|
||||
///
|
||||
/// [`IoBinding`] will not provide any meaningful benefit for:
|
||||
/// - Models where every input changes with each invocation, such as a causal language model or object recognition
|
||||
/// model.
|
||||
/// - Pipelines that go straight from CPU -> GPU -> CPU.
|
||||
///
|
||||
/// # Example
|
||||
/// A diffusion model which takes a text condition input.
|
||||
///
|
||||
/// ```no_run
|
||||
/// # use ort::{
|
||||
/// # ep,
|
||||
/// # memory::{Allocator, AllocatorType, AllocationDevice, MemoryInfo, MemoryType},
|
||||
/// # session::{Session, IoBinding},
|
||||
/// # value::Tensor
|
||||
/// # };
|
||||
/// # fn main() -> ort::Result<()> {
|
||||
/// let mut text_encoder = Session::builder()?
|
||||
/// .with_execution_providers([ep::CUDA::default().build()])?
|
||||
/// .commit_from_file("text_encoder.onnx")?;
|
||||
/// let mut unet = Session::builder()?
|
||||
/// .with_execution_providers([ep::CUDA::default().build()])?
|
||||
/// .commit_from_file("unet.onnx")?;
|
||||
///
|
||||
/// let text_condition = text_encoder
|
||||
/// .run(ort::inputs![Tensor::<i64>::from_array((
|
||||
/// vec![27],
|
||||
/// vec![
|
||||
/// 23763, 15460, 473, 68, 312, 265, 17463, 4098, 304, 1077, 283, 198, 7676, 5976, 272, 285, 3609, 435,
|
||||
/// 21680, 321, 265, 300, 1689, 64, 285, 4763, 64
|
||||
/// ]
|
||||
/// ))?])?
|
||||
/// .remove("output0")
|
||||
/// .unwrap();
|
||||
///
|
||||
/// let input_allocator = Allocator::new(
|
||||
/// &unet,
|
||||
/// MemoryInfo::new(AllocationDevice::CUDA_PINNED, 0, AllocatorType::Device, MemoryType::CPUInput)?
|
||||
/// )?;
|
||||
/// let mut latents = Tensor::<f32>::new(&input_allocator, [1_usize, 4, 64, 64])?;
|
||||
///
|
||||
/// let mut io_binding = unet.create_binding()?;
|
||||
/// io_binding.bind_input("condition", &text_condition)?;
|
||||
///
|
||||
/// let output_allocator = Allocator::new(
|
||||
/// &unet,
|
||||
/// MemoryInfo::new(AllocationDevice::CUDA_PINNED, 0, AllocatorType::Device, MemoryType::CPUOutput)?
|
||||
/// )?;
|
||||
/// io_binding.bind_output("noise_pred", Tensor::<f32>::new(&output_allocator, [1_usize, 4, 64, 64])?)?;
|
||||
///
|
||||
/// for _ in 0..20 {
|
||||
/// io_binding.bind_input("latents", &latents)?;
|
||||
/// let noise_pred = unet.run_binding(&io_binding)?.remove("noise_pred").unwrap();
|
||||
///
|
||||
/// let mut latents = latents.extract_array_mut();
|
||||
/// latents += &noise_pred.try_extract_array::<f32>()?;
|
||||
/// }
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
///
|
||||
/// [`IoBinding`] may provide a decent speedup in this example since the `condition` tensor is unchanging between runs.
|
||||
/// If we were to use normal session inference, the `condition` tensor would be needlessly copied with each invocation
|
||||
/// of `unet.run()`, and this copying can come with significant latency & overhead. With [`IoBinding`], the `condition`
|
||||
/// tensor is only copied to the device once instead of 20 times.
|
||||
#[derive(Debug)]
|
||||
pub struct IoBinding {
|
||||
ptr: NonNull<ort_sys::OrtIoBinding>,
|
||||
@@ -35,30 +35,34 @@ use crate::{
|
||||
AsPointer,
|
||||
environment::Environment,
|
||||
error::{Error, ErrorCode, Result, status_to_result},
|
||||
io_binding::IoBinding,
|
||||
memory::Allocator,
|
||||
ortsys,
|
||||
util::{AllocatedString, STACK_SESSION_INPUTS, STACK_SESSION_OUTPUTS, with_cstr, with_cstr_ptr_array},
|
||||
value::{DynValue, Outlet, Value, ValueType}
|
||||
};
|
||||
|
||||
mod adapter;
|
||||
#[cfg(all(feature = "std", not(target_arch = "wasm32")))]
|
||||
mod r#async;
|
||||
pub mod builder;
|
||||
pub mod input;
|
||||
pub mod metadata;
|
||||
pub mod output;
|
||||
pub mod run_options;
|
||||
mod input;
|
||||
mod io_binding;
|
||||
mod metadata;
|
||||
mod output;
|
||||
mod run_options;
|
||||
#[cfg(all(feature = "std", not(target_arch = "wasm32")))]
|
||||
pub use self::r#async::InferenceFut;
|
||||
#[cfg(all(feature = "std", not(target_arch = "wasm32")))]
|
||||
use self::r#async::{AsyncInferenceContext, InferenceFutInner};
|
||||
use self::{builder::SessionBuilder, metadata::ModelMetadata, run_options::UntypedRunOptions};
|
||||
pub use self::{
|
||||
adapter::Adapter,
|
||||
input::{SessionInputValue, SessionInputs},
|
||||
io_binding::IoBinding,
|
||||
metadata::ModelMetadata,
|
||||
output::SessionOutputs,
|
||||
run_options::{HasSelectedOutputs, NoSelectedOutputs, RunOptions, SelectedOutputMarker}
|
||||
run_options::{HasSelectedOutputs, NoSelectedOutputs, OutputSelector, RunOptions, SelectedOutputMarker}
|
||||
};
|
||||
use self::{builder::SessionBuilder, run_options::UntypedRunOptions};
|
||||
|
||||
/// Holds onto an [`ort_sys::OrtSession`] pointer and its associated allocator.
|
||||
///
|
||||
@@ -194,7 +198,7 @@ impl Session {
|
||||
///
|
||||
/// ```
|
||||
/// # use std::sync::Arc;
|
||||
/// # use ort::{session::{run_options::RunOptions, Session}, value::{Value, ValueType, TensorRef, TensorElementType}};
|
||||
/// # use ort::{session::{RunOptions, Session}, value::{Value, ValueType, TensorRef, TensorElementType}};
|
||||
/// # fn main() -> ort::Result<()> {
|
||||
/// let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
|
||||
/// let input = ndarray::Array4::<f32>::zeros((1, 64, 64, 3));
|
||||
@@ -223,7 +227,7 @@ impl Session {
|
||||
/// ```no_run
|
||||
/// # // no_run because upsample.onnx is too simple of a model for the termination signal to be reliable enough
|
||||
/// # use std::sync::Arc;
|
||||
/// # use ort::{session::{Session, run_options::RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
|
||||
/// # use ort::{session::{Session, RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
|
||||
/// # fn main() -> ort::Result<()> {
|
||||
/// # let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
|
||||
/// # let input = Value::from_array(ndarray::Array4::<f32>::zeros((1, 64, 64, 3)))?;
|
||||
@@ -385,7 +389,7 @@ impl Session {
|
||||
///
|
||||
/// ```
|
||||
/// # use std::sync::Arc;
|
||||
/// # use ort::{session::{Session, run_options::RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
|
||||
/// # use ort::{session::{Session, RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
|
||||
/// # fn main() -> ort::Result<()> { tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap().block_on(async {
|
||||
/// let mut session = Session::builder()?.with_intra_threads(2)?.commit_from_file("tests/data/upsample.onnx")?;
|
||||
/// let input = ndarray::Array4::<f32>::zeros((1, 64, 64, 3));
|
||||
@@ -501,7 +505,7 @@ impl Session {
|
||||
///
|
||||
/// ```
|
||||
/// # use std::sync::Arc;
|
||||
/// # use ort::{session::{Session, run_options::RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
|
||||
/// # use ort::{session::{Session, RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
|
||||
/// # fn main() -> ort::Result<()> { tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap().block_on(async {
|
||||
/// let mut session = Session::builder()?.with_intra_threads(2)?.commit_from_file("tests/data/upsample.onnx")?;
|
||||
/// let input = ndarray::Array4::<f32>::zeros((1, 64, 64, 3));
|
||||
@@ -614,7 +618,7 @@ impl Session {
|
||||
///
|
||||
/// ```
|
||||
/// # use std::sync::Arc;
|
||||
/// # use ort::{session::{run_options::RunOptions, Session, WorkloadType}, value::{Value, ValueType, TensorRef, TensorElementType}};
|
||||
/// # use ort::{session::{RunOptions, Session, WorkloadType}, value::{Value, ValueType, TensorRef, TensorElementType}};
|
||||
/// # fn main() -> ort::Result<()> {
|
||||
/// let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
|
||||
/// session.set_workload_type(WorkloadType::Efficient)?;
|
||||
|
||||
@@ -10,11 +10,13 @@ use smallvec::SmallVec;
|
||||
|
||||
use crate::{
|
||||
AsPointer,
|
||||
adapter::{Adapter, AdapterInner},
|
||||
error::Result,
|
||||
logging::LogLevel,
|
||||
ortsys,
|
||||
session::Outlet,
|
||||
session::{
|
||||
Outlet,
|
||||
adapter::{Adapter, AdapterInner}
|
||||
},
|
||||
util::{MiniMap, STACK_SESSION_OUTPUTS, with_cstr},
|
||||
value::{DynValue, Value, ValueTypeMarker}
|
||||
};
|
||||
@@ -23,7 +25,7 @@ use crate::{
|
||||
///
|
||||
/// ```
|
||||
/// # use std::sync::Arc;
|
||||
/// # use ort::{session::{Session, run_options::{RunOptions, OutputSelector}}, memory::Allocator, value::Tensor};
|
||||
/// # use ort::{session::{Session, RunOptions, OutputSelector}, memory::Allocator, value::Tensor};
|
||||
/// # fn main() -> ort::Result<()> {
|
||||
/// let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
|
||||
/// let input = Tensor::<f32>::new(&Allocator::default(), [1_usize, 64, 64, 3])?;
|
||||
@@ -102,7 +104,7 @@ impl OutputSelector {
|
||||
///
|
||||
/// ```
|
||||
/// # use std::sync::Arc;
|
||||
/// # use ort::{session::{Session, run_options::{RunOptions, OutputSelector}}, memory::Allocator, value::Tensor};
|
||||
/// # use ort::{session::{Session, RunOptions, OutputSelector}, memory::Allocator, value::Tensor};
|
||||
/// # fn main() -> ort::Result<()> {
|
||||
/// let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
|
||||
/// let input = Tensor::<f32>::new(&Allocator::default(), [1_usize, 64, 64, 3])?;
|
||||
@@ -227,7 +229,7 @@ impl<O: SelectedOutputMarker> RunOptions<O> {
|
||||
///
|
||||
/// ```
|
||||
/// # use std::sync::Arc;
|
||||
/// # use ort::{session::{Session, run_options::{RunOptions, OutputSelector}}, memory::Allocator, value::Tensor};
|
||||
/// # use ort::{session::{Session, RunOptions, OutputSelector}, memory::Allocator, value::Tensor};
|
||||
/// # fn main() -> ort::Result<()> {
|
||||
/// let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
|
||||
/// let input = Tensor::<f32>::new(&Allocator::default(), [1_usize, 64, 64, 3])?;
|
||||
@@ -282,7 +284,7 @@ impl<O: SelectedOutputMarker> RunOptions<O> {
|
||||
/// ```no_run
|
||||
/// # // no_run because upsample.onnx is too simple of a model for the termination signal to be reliable enough
|
||||
/// # use std::sync::Arc;
|
||||
/// # use ort::{session::{Session, run_options::{RunOptions, OutputSelector}}, value::Value};
|
||||
/// # use ort::{session::{Session, RunOptions, OutputSelector}, value::Value};
|
||||
/// # fn main() -> ort::Result<()> {
|
||||
/// # let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
|
||||
/// # let input = Value::from_array(ndarray::Array4::<f32>::zeros((1, 64, 64, 3)))?;
|
||||
@@ -310,7 +312,7 @@ impl<O: SelectedOutputMarker> RunOptions<O> {
|
||||
///
|
||||
/// ```no_run
|
||||
/// # use std::sync::Arc;
|
||||
/// # use ort::{session::{Session, run_options::{RunOptions, OutputSelector}}, value::Value};
|
||||
/// # use ort::{session::{Session, RunOptions, OutputSelector}, value::Value};
|
||||
/// # fn main() -> ort::Result<()> {
|
||||
/// # let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
|
||||
/// # let input = Value::from_array(ndarray::Array4::<f32>::zeros((1, 64, 64, 3)))?;
|
||||
@@ -339,7 +341,7 @@ impl<O: SelectedOutputMarker> RunOptions<O> {
|
||||
/// like CUDA:
|
||||
/// ```no_run
|
||||
/// # use std::sync::Arc;
|
||||
/// # use ort::session::run_options::RunOptions;
|
||||
/// # use ort::session::RunOptions;
|
||||
/// # fn main() -> ort::Result<()> {
|
||||
/// let mut run_options = RunOptions::new()?;
|
||||
/// run_options.add_config_entry("gpu_graph_id", "1")?;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use super::{DataLoader, TrainerCallbacks};
|
||||
use crate::session::input::SessionInputs;
|
||||
use crate::session::SessionInputs;
|
||||
|
||||
pub enum EvaluationStrategy {
|
||||
None,
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::path::Path;
|
||||
use super::TrainingArguments;
|
||||
use crate::{
|
||||
error::Result,
|
||||
session::input::SessionInputs,
|
||||
session::SessionInputs,
|
||||
training::{Checkpoint, Optimizer, Trainer}
|
||||
};
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use alloc::collections::VecDeque;
|
||||
use std::fs;
|
||||
|
||||
use crate::{error::Result, session::input::SessionInputs, training::Trainer};
|
||||
use crate::{error::Result, session::SessionInputs, training::Trainer};
|
||||
|
||||
mod dataloader;
|
||||
pub use self::dataloader::{DataLoader, IterableDataLoader, iterable_data_loader};
|
||||
|
||||
@@ -7,9 +7,8 @@ use core::ops::{Deref, DerefMut};
|
||||
use super::DefiniteTensorValueTypeMarker;
|
||||
use crate::{
|
||||
Error, OnceLock, Result, ep,
|
||||
io_binding::IoBinding,
|
||||
memory::{AllocationDevice, Allocator, AllocatorType, MemoryInfo, MemoryType},
|
||||
session::{NoSelectedOutputs, RunOptions, Session, builder::GraphOptimizationLevel},
|
||||
session::{IoBinding, NoSelectedOutputs, RunOptions, Session, builder::GraphOptimizationLevel},
|
||||
util::{MiniMap, Mutex, MutexGuard},
|
||||
value::{DynTensor, Value}
|
||||
};
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use ort::{
|
||||
adapter::Adapter,
|
||||
ep,
|
||||
memory::{AllocationDevice, Allocator, AllocatorType, MemoryInfo, MemoryType},
|
||||
operator::{
|
||||
@@ -7,7 +6,7 @@ use ort::{
|
||||
io::{OperatorInput, OperatorOutput},
|
||||
kernel::{Kernel, KernelAttributes, KernelContext}
|
||||
},
|
||||
session::{RunOptions, Session},
|
||||
session::{Adapter, RunOptions, Session},
|
||||
value::{Tensor, TensorElementType}
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user