refactor!: flatten session module

This commit is contained in:
Carson M.
2026-01-15 02:18:17 -06:00
parent 8329375b75
commit 2650caa43c
13 changed files with 170 additions and 181 deletions

View File

@@ -77,7 +77,7 @@
//! though.
//!
//! ## Limitations
//! - [`OutputSelector`](ort::session::run_options::OutputSelector) is not currently implemented.
//! - [`OutputSelector`](ort::session::OutputSelector) is not currently implemented.
//! - [`IoBinding`](ort::io_binding) is not supported by ONNX Runtime on the web.
#![deny(clippy::panic, clippy::panicking_unwrap)]

View File

@@ -239,7 +239,7 @@ impl CUDA {
/// - Models with control flow operators (like `If`, `Loop`, or `Scan`) are not supported.
/// - Input/output shapes cannot change across inference calls.
/// - The address of inputs/outputs cannot change across inference calls, so
/// [`IoBinding`](crate::io_binding::IoBinding) must be used.
/// [`IoBinding`](crate::session::IoBinding) must be used.
/// - `Session`s using CUDA graphs are technically not `Send` or `Sync`.
///
/// Consult the [ONNX Runtime documentation on CUDA graphs](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#using-cuda-graphs-preview) for more information.

View File

@@ -25,13 +25,11 @@ pub mod __private {
#[macro_use]
pub(crate) mod private;
pub mod adapter;
pub mod compiler;
pub mod editor;
pub mod environment;
pub mod ep;
pub mod error;
pub mod io_binding;
pub mod logging;
pub mod memory;
pub mod operator;

View File

@@ -1,67 +1,3 @@
//! An input adapter, allowing for loading many static inputs from disk at once.
//!
//! [`Adapter`] essentially acts as a collection of predefined inputs allocated on a specific device that can easily be
//! swapped out between session runs via [`RunOptions::add_adapter`]. With slight modifications to the session
//! graph, [`Adapter`]s can be used as low-rank adapters (LoRAs) or as containers of style embeddings.
//!
//! # Example
//! An adapter can be created in Python with the `AdapterFormat` class:
//! ```python
//! import numpy as np
//! import onnxruntime as ort
//!
//! param_a = ort.OrtValue.ortvalue_from_numpy(np.array([[3], [4], [5], [6]], dtype=np.float32))
//! param_b = ort.OrtValue.ortvalue_from_numpy(np.array([[7, 8, 9, 10]], dtype=np.float32))
//!
//! adapter = ort.AdapterFormat()
//! adapter.set_parameters({
//! 'lora_param_a': param_a,
//! 'lora_param_b': param_b
//! })
//! adapter.export_adapter('tests/data/adapter.orl')
//! ```
//!
//! Then, in Rust:
//! ```
//! # use ort::{adapter::Adapter, session::{run_options::RunOptions, Session}, value::Tensor};
//! # fn main() -> ort::Result<()> {
//! let mut model = Session::builder()?.commit_from_file("tests/data/lora_model.onnx")?;
//! let lora = Adapter::from_file("tests/data/adapter.orl", None)?;
//!
//! let mut run_options = RunOptions::new()?;
//! run_options.add_adapter(&lora)?;
//!
//! let outputs =
//! model.run_with_options(ort::inputs![Tensor::<f32>::from_array(([4, 4], vec![1.0; 16]))?], &run_options)?;
//! # Ok(())
//! # }
//! ```
//!
//! Using [`Adapter`] is identical to, but more convenient than:
//! ```
//! # use ort::{adapter::Adapter, session::{run_options::RunOptions, Session}, value::Tensor};
//! # fn main() -> ort::Result<()> {
//! let mut model = Session::builder()?.commit_from_file("tests/data/lora_model.onnx")?;
//!
//! // Load our parameters from disk somehow
//! let param_a = Tensor::<f32>::from_array(([4, 1], vec![3., 4., 5., 6.]))?;
//! let param_b = Tensor::<f32>::from_array(([1, 4], vec![7., 8., 6., 10.]))?;
//!
//! let outputs = model.run(ort::inputs![
//! "input" => Tensor::<f32>::from_array(([4, 4], vec![1.0; 16]))?,
//! // Adapter parameters are just inputs.
//! "lora_param_a" => param_a.view(),
//! "lora_param_b" => param_b.view()
//! ])?;
//! # Ok(())
//! # }
//! ```
//!
//! [`Adapter`] also lets us copy the parameters to a GPU at load time, so that they don't need to be copied on each
//! session run.
//!
//! [`RunOptions::add_adapter`]: crate::session::run_options::RunOptions::add_adapter
use alloc::sync::Arc;
use core::ptr::{self, NonNull};
#[cfg(feature = "std")]
@@ -91,7 +27,67 @@ impl Drop for AdapterInner {
/// An input adapter, allowing for loading many static inputs from disk at once.
///
/// See the [module-level documentation][self] for more information.
/// [`Adapter`] essentially acts as a collection of predefined inputs allocated on a specific device that can easily be
/// swapped out between session runs via [`RunOptions::add_adapter`]. With slight modifications to the session
/// graph, [`Adapter`]s can be used as low-rank adapters (LoRAs) or as containers of style embeddings.
///
/// # Example
/// An adapter can be created in Python with the `AdapterFormat` class:
/// ```python
/// import numpy as np
/// import onnxruntime as ort
///
/// param_a = ort.OrtValue.ortvalue_from_numpy(np.array([[3], [4], [5], [6]], dtype=np.float32))
/// param_b = ort.OrtValue.ortvalue_from_numpy(np.array([[7, 8, 9, 10]], dtype=np.float32))
///
/// adapter = ort.AdapterFormat()
/// adapter.set_parameters({
/// 'lora_param_a': param_a,
/// 'lora_param_b': param_b
/// })
/// adapter.export_adapter('tests/data/adapter.orl')
/// ```
///
/// Then, in Rust:
/// ```
/// # use ort::{session::{Adapter, RunOptions, Session}, value::Tensor};
/// # fn main() -> ort::Result<()> {
/// let mut model = Session::builder()?.commit_from_file("tests/data/lora_model.onnx")?;
/// let lora = Adapter::from_file("tests/data/adapter.orl", None)?;
///
/// let mut run_options = RunOptions::new()?;
/// run_options.add_adapter(&lora)?;
///
/// let outputs =
/// model.run_with_options(ort::inputs![Tensor::<f32>::from_array(([4, 4], vec![1.0; 16]))?], &run_options)?;
/// # Ok(())
/// # }
/// ```
///
/// Using [`Adapter`] is identical to, but more convenient than:
/// ```
/// # use ort::{session::{Adapter, RunOptions, Session}, value::Tensor};
/// # fn main() -> ort::Result<()> {
/// let mut model = Session::builder()?.commit_from_file("tests/data/lora_model.onnx")?;
///
/// // Load our parameters from disk somehow
/// let param_a = Tensor::<f32>::from_array(([4, 1], vec![3., 4., 5., 6.]))?;
/// let param_b = Tensor::<f32>::from_array(([1, 4], vec![7., 8., 6., 10.]))?;
///
/// let outputs = model.run(ort::inputs![
/// "input" => Tensor::<f32>::from_array(([4, 4], vec![1.0; 16]))?,
/// // Adapter parameters are just inputs.
/// "lora_param_a" => param_a.view(),
/// "lora_param_b" => param_b.view()
/// ])?;
/// # Ok(())
/// # }
/// ```
///
/// [`Adapter`] also lets us copy the parameters to a GPU at load time, so that they don't need to be copied on each
/// session run.
///
/// [`RunOptions::add_adapter`]: crate::session::RunOptions::add_adapter
#[derive(Debug, Clone)]
pub struct Adapter {
pub(crate) inner: Arc<AdapterInner>
@@ -105,10 +101,9 @@ impl Adapter {
///
/// ```
/// # use ort::{
/// # adapter::Adapter,
/// # ep,
/// # memory::DeviceType,
/// # session::{run_options::RunOptions, Session},
/// # session::{Adapter, RunOptions, Session},
/// # value::Tensor
/// # };
/// # fn main() -> ort::Result<()> {
@@ -151,10 +146,9 @@ impl Adapter {
///
/// ```
/// # use ort::{
/// # adapter::Adapter,
/// # ep,
/// # memory::DeviceType,
/// # session::{run_options::RunOptions, Session},
/// # session::{Adapter, RunOptions, Session},
/// # value::Tensor
/// # };
/// # fn main() -> ort::Result<()> {

View File

@@ -14,7 +14,7 @@ use smallvec::SmallVec;
use crate::{
error::Result,
session::{SessionOutputs, SharedSessionInner, run_options::UntypedRunOptions},
session::{SessionOutputs, SharedSessionInner, UntypedRunOptions},
util::{STACK_SESSION_INPUTS, STACK_SESSION_OUTPUTS},
value::{Value, ValueInner}
};

View File

@@ -1,81 +1,3 @@
//! Enables binding of session inputs and/or outputs to pre-allocated memory.
//!
//! [`IoBinding`] minimizes copies between a device (like a GPU) and the host (CPU) by allowing you to bind a
//! certain input/output to a pre-allocated value on a specific device.
//!
//! [`IoBinding`] is most suitable for:
//! - An ensemble of models in which the output from one model is the input to another and does not need to pass through
//! the CPU to perform additional processing.
//! - Situations where an output should stay on a device (e.g. to perform additional hardware-accelerated processing).
//! - Models that accept an input that does not change for multiple subsequent runs (like the conditional embedding for
//! a diffusion model).
//!
//! [`IoBinding`] will not provide any meaningful benefit for:
//! - Models where every input changes with each invocation, such as a causal language model or object recognition
//! model.
//! - Pipelines that go straight from CPU -> GPU -> CPU.
//!
//! # Example
//! A diffusion model which takes a text condition input.
//!
//! ```no_run
//! # use ort::{
//! # ep,
//! # io_binding::IoBinding,
//! # memory::{Allocator, AllocatorType, AllocationDevice, MemoryInfo, MemoryType},
//! # session::Session,
//! # value::Tensor
//! # };
//! # fn main() -> ort::Result<()> {
//! let mut text_encoder = Session::builder()?
//! .with_execution_providers([ep::CUDA::default().build()])?
//! .commit_from_file("text_encoder.onnx")?;
//! let mut unet = Session::builder()?
//! .with_execution_providers([ep::CUDA::default().build()])?
//! .commit_from_file("unet.onnx")?;
//!
//! let text_condition = text_encoder
//! .run(ort::inputs![Tensor::<i64>::from_array((
//! vec![27],
//! vec![
//! 23763, 15460, 473, 68, 312, 265, 17463, 4098, 304, 1077, 283, 198, 7676, 5976, 272, 285, 3609, 435,
//! 21680, 321, 265, 300, 1689, 64, 285, 4763, 64
//! ]
//! ))?])?
//! .remove("output0")
//! .unwrap();
//!
//! let input_allocator = Allocator::new(
//! &unet,
//! MemoryInfo::new(AllocationDevice::CUDA_PINNED, 0, AllocatorType::Device, MemoryType::CPUInput)?
//! )?;
//! let mut latents = Tensor::<f32>::new(&input_allocator, [1_usize, 4, 64, 64])?;
//!
//! let mut io_binding = unet.create_binding()?;
//! io_binding.bind_input("condition", &text_condition)?;
//!
//! let output_allocator = Allocator::new(
//! &unet,
//! MemoryInfo::new(AllocationDevice::CUDA_PINNED, 0, AllocatorType::Device, MemoryType::CPUOutput)?
//! )?;
//! io_binding.bind_output("noise_pred", Tensor::<f32>::new(&output_allocator, [1_usize, 4, 64, 64])?)?;
//!
//! for _ in 0..20 {
//! io_binding.bind_input("latents", &latents)?;
//! let noise_pred = unet.run_binding(&io_binding)?.remove("noise_pred").unwrap();
//!
//! let mut latents = latents.extract_array_mut();
//! latents += &noise_pred.try_extract_array::<f32>()?;
//! }
//! # Ok(())
//! # }
//! ```
//!
//! [`IoBinding`] may provide a decent speedup in this example since the `condition` tensor is unchanging between runs.
//! If we were to use normal session inference, the `condition` tensor would be needlessly copied with each invocation
//! of `unet.run()`, and this copying can come with significant latency & overhead. With [`IoBinding`], the `condition`
//! tensor is only copied to the device once instead of 20 times.
use alloc::{string::String, sync::Arc};
use core::{
fmt::Debug,
@@ -94,9 +16,80 @@ use crate::{
/// Enables binding of session inputs and/or outputs to pre-allocated memory.
///
/// An `IoBinding` can be created from a [`Session`] with [`Session::create_binding`].
/// [`IoBinding`] minimizes copies between a device (like a GPU) and the host (CPU) by allowing you to bind a
/// certain input/output to a pre-allocated value on a specific device.
///
/// See the [module-level documentation][self] for more information.
/// [`IoBinding`] is most suitable for:
/// - An ensemble of models in which the output from one model is the input to another and does not need to pass through
/// the CPU to perform additional processing.
/// - Situations where an output should stay on a device (e.g. to perform additional hardware-accelerated processing).
/// - Models that accept an input that does not change for multiple subsequent runs (like the conditional embedding for
/// a diffusion model).
///
/// [`IoBinding`] will not provide any meaningful benefit for:
/// - Models where every input changes with each invocation, such as a causal language model or object recognition
/// model.
/// - Pipelines that go straight from CPU -> GPU -> CPU.
///
/// # Example
/// A diffusion model which takes a text condition input.
///
/// ```no_run
/// # use ort::{
/// # ep,
/// # memory::{Allocator, AllocatorType, AllocationDevice, MemoryInfo, MemoryType},
/// # session::{Session, IoBinding},
/// # value::Tensor
/// # };
/// # fn main() -> ort::Result<()> {
/// let mut text_encoder = Session::builder()?
/// .with_execution_providers([ep::CUDA::default().build()])?
/// .commit_from_file("text_encoder.onnx")?;
/// let mut unet = Session::builder()?
/// .with_execution_providers([ep::CUDA::default().build()])?
/// .commit_from_file("unet.onnx")?;
///
/// let text_condition = text_encoder
/// .run(ort::inputs![Tensor::<i64>::from_array((
/// vec![27],
/// vec![
/// 23763, 15460, 473, 68, 312, 265, 17463, 4098, 304, 1077, 283, 198, 7676, 5976, 272, 285, 3609, 435,
/// 21680, 321, 265, 300, 1689, 64, 285, 4763, 64
/// ]
/// ))?])?
/// .remove("output0")
/// .unwrap();
///
/// let input_allocator = Allocator::new(
/// &unet,
/// MemoryInfo::new(AllocationDevice::CUDA_PINNED, 0, AllocatorType::Device, MemoryType::CPUInput)?
/// )?;
/// let mut latents = Tensor::<f32>::new(&input_allocator, [1_usize, 4, 64, 64])?;
///
/// let mut io_binding = unet.create_binding()?;
/// io_binding.bind_input("condition", &text_condition)?;
///
/// let output_allocator = Allocator::new(
/// &unet,
/// MemoryInfo::new(AllocationDevice::CUDA_PINNED, 0, AllocatorType::Device, MemoryType::CPUOutput)?
/// )?;
/// io_binding.bind_output("noise_pred", Tensor::<f32>::new(&output_allocator, [1_usize, 4, 64, 64])?)?;
///
/// for _ in 0..20 {
/// io_binding.bind_input("latents", &latents)?;
/// let noise_pred = unet.run_binding(&io_binding)?.remove("noise_pred").unwrap();
///
/// let mut latents = latents.extract_array_mut();
/// latents += &noise_pred.try_extract_array::<f32>()?;
/// }
/// # Ok(())
/// # }
/// ```
///
/// [`IoBinding`] may provide a decent speedup in this example since the `condition` tensor is unchanging between runs.
/// If we were to use normal session inference, the `condition` tensor would be needlessly copied with each invocation
/// of `unet.run()`, and this copying can come with significant latency & overhead. With [`IoBinding`], the `condition`
/// tensor is only copied to the device once instead of 20 times.
#[derive(Debug)]
pub struct IoBinding {
ptr: NonNull<ort_sys::OrtIoBinding>,

View File

@@ -35,30 +35,34 @@ use crate::{
AsPointer,
environment::Environment,
error::{Error, ErrorCode, Result, status_to_result},
io_binding::IoBinding,
memory::Allocator,
ortsys,
util::{AllocatedString, STACK_SESSION_INPUTS, STACK_SESSION_OUTPUTS, with_cstr, with_cstr_ptr_array},
value::{DynValue, Outlet, Value, ValueType}
};
mod adapter;
#[cfg(all(feature = "std", not(target_arch = "wasm32")))]
mod r#async;
pub mod builder;
pub mod input;
pub mod metadata;
pub mod output;
pub mod run_options;
mod input;
mod io_binding;
mod metadata;
mod output;
mod run_options;
#[cfg(all(feature = "std", not(target_arch = "wasm32")))]
pub use self::r#async::InferenceFut;
#[cfg(all(feature = "std", not(target_arch = "wasm32")))]
use self::r#async::{AsyncInferenceContext, InferenceFutInner};
use self::{builder::SessionBuilder, metadata::ModelMetadata, run_options::UntypedRunOptions};
pub use self::{
adapter::Adapter,
input::{SessionInputValue, SessionInputs},
io_binding::IoBinding,
metadata::ModelMetadata,
output::SessionOutputs,
run_options::{HasSelectedOutputs, NoSelectedOutputs, RunOptions, SelectedOutputMarker}
run_options::{HasSelectedOutputs, NoSelectedOutputs, OutputSelector, RunOptions, SelectedOutputMarker}
};
use self::{builder::SessionBuilder, run_options::UntypedRunOptions};
/// Holds onto an [`ort_sys::OrtSession`] pointer and its associated allocator.
///
@@ -194,7 +198,7 @@ impl Session {
///
/// ```
/// # use std::sync::Arc;
/// # use ort::{session::{run_options::RunOptions, Session}, value::{Value, ValueType, TensorRef, TensorElementType}};
/// # use ort::{session::{RunOptions, Session}, value::{Value, ValueType, TensorRef, TensorElementType}};
/// # fn main() -> ort::Result<()> {
/// let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
/// let input = ndarray::Array4::<f32>::zeros((1, 64, 64, 3));
@@ -223,7 +227,7 @@ impl Session {
/// ```no_run
/// # // no_run because upsample.onnx is too simple of a model for the termination signal to be reliable enough
/// # use std::sync::Arc;
/// # use ort::{session::{Session, run_options::RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
/// # use ort::{session::{Session, RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
/// # fn main() -> ort::Result<()> {
/// # let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
/// # let input = Value::from_array(ndarray::Array4::<f32>::zeros((1, 64, 64, 3)))?;
@@ -385,7 +389,7 @@ impl Session {
///
/// ```
/// # use std::sync::Arc;
/// # use ort::{session::{Session, run_options::RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
/// # use ort::{session::{Session, RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
/// # fn main() -> ort::Result<()> { tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap().block_on(async {
/// let mut session = Session::builder()?.with_intra_threads(2)?.commit_from_file("tests/data/upsample.onnx")?;
/// let input = ndarray::Array4::<f32>::zeros((1, 64, 64, 3));
@@ -501,7 +505,7 @@ impl Session {
///
/// ```
/// # use std::sync::Arc;
/// # use ort::{session::{Session, run_options::RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
/// # use ort::{session::{Session, RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
/// # fn main() -> ort::Result<()> { tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap().block_on(async {
/// let mut session = Session::builder()?.with_intra_threads(2)?.commit_from_file("tests/data/upsample.onnx")?;
/// let input = ndarray::Array4::<f32>::zeros((1, 64, 64, 3));
@@ -614,7 +618,7 @@ impl Session {
///
/// ```
/// # use std::sync::Arc;
/// # use ort::{session::{run_options::RunOptions, Session, WorkloadType}, value::{Value, ValueType, TensorRef, TensorElementType}};
/// # use ort::{session::{RunOptions, Session, WorkloadType}, value::{Value, ValueType, TensorRef, TensorElementType}};
/// # fn main() -> ort::Result<()> {
/// let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
/// session.set_workload_type(WorkloadType::Efficient)?;

View File

@@ -10,11 +10,13 @@ use smallvec::SmallVec;
use crate::{
AsPointer,
adapter::{Adapter, AdapterInner},
error::Result,
logging::LogLevel,
ortsys,
session::Outlet,
session::{
Outlet,
adapter::{Adapter, AdapterInner}
},
util::{MiniMap, STACK_SESSION_OUTPUTS, with_cstr},
value::{DynValue, Value, ValueTypeMarker}
};
@@ -23,7 +25,7 @@ use crate::{
///
/// ```
/// # use std::sync::Arc;
/// # use ort::{session::{Session, run_options::{RunOptions, OutputSelector}}, memory::Allocator, value::Tensor};
/// # use ort::{session::{Session, RunOptions, OutputSelector}, memory::Allocator, value::Tensor};
/// # fn main() -> ort::Result<()> {
/// let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
/// let input = Tensor::<f32>::new(&Allocator::default(), [1_usize, 64, 64, 3])?;
@@ -102,7 +104,7 @@ impl OutputSelector {
///
/// ```
/// # use std::sync::Arc;
/// # use ort::{session::{Session, run_options::{RunOptions, OutputSelector}}, memory::Allocator, value::Tensor};
/// # use ort::{session::{Session, RunOptions, OutputSelector}, memory::Allocator, value::Tensor};
/// # fn main() -> ort::Result<()> {
/// let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
/// let input = Tensor::<f32>::new(&Allocator::default(), [1_usize, 64, 64, 3])?;
@@ -227,7 +229,7 @@ impl<O: SelectedOutputMarker> RunOptions<O> {
///
/// ```
/// # use std::sync::Arc;
/// # use ort::{session::{Session, run_options::{RunOptions, OutputSelector}}, memory::Allocator, value::Tensor};
/// # use ort::{session::{Session, RunOptions, OutputSelector}, memory::Allocator, value::Tensor};
/// # fn main() -> ort::Result<()> {
/// let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
/// let input = Tensor::<f32>::new(&Allocator::default(), [1_usize, 64, 64, 3])?;
@@ -282,7 +284,7 @@ impl<O: SelectedOutputMarker> RunOptions<O> {
/// ```no_run
/// # // no_run because upsample.onnx is too simple of a model for the termination signal to be reliable enough
/// # use std::sync::Arc;
/// # use ort::{session::{Session, run_options::{RunOptions, OutputSelector}}, value::Value};
/// # use ort::{session::{Session, RunOptions, OutputSelector}, value::Value};
/// # fn main() -> ort::Result<()> {
/// # let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
/// # let input = Value::from_array(ndarray::Array4::<f32>::zeros((1, 64, 64, 3)))?;
@@ -310,7 +312,7 @@ impl<O: SelectedOutputMarker> RunOptions<O> {
///
/// ```no_run
/// # use std::sync::Arc;
/// # use ort::{session::{Session, run_options::{RunOptions, OutputSelector}}, value::Value};
/// # use ort::{session::{Session, RunOptions, OutputSelector}, value::Value};
/// # fn main() -> ort::Result<()> {
/// # let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
/// # let input = Value::from_array(ndarray::Array4::<f32>::zeros((1, 64, 64, 3)))?;
@@ -339,7 +341,7 @@ impl<O: SelectedOutputMarker> RunOptions<O> {
/// like CUDA:
/// ```no_run
/// # use std::sync::Arc;
/// # use ort::session::run_options::RunOptions;
/// # use ort::session::RunOptions;
/// # fn main() -> ort::Result<()> {
/// let mut run_options = RunOptions::new()?;
/// run_options.add_config_entry("gpu_graph_id", "1")?;

View File

@@ -1,7 +1,7 @@
use std::path::PathBuf;
use super::{DataLoader, TrainerCallbacks};
use crate::session::input::SessionInputs;
use crate::session::SessionInputs;
pub enum EvaluationStrategy {
None,

View File

@@ -3,7 +3,7 @@ use std::path::Path;
use super::TrainingArguments;
use crate::{
error::Result,
session::input::SessionInputs,
session::SessionInputs,
training::{Checkpoint, Optimizer, Trainer}
};

View File

@@ -1,7 +1,7 @@
use alloc::collections::VecDeque;
use std::fs;
use crate::{error::Result, session::input::SessionInputs, training::Trainer};
use crate::{error::Result, session::SessionInputs, training::Trainer};
mod dataloader;
pub use self::dataloader::{DataLoader, IterableDataLoader, iterable_data_loader};

View File

@@ -7,9 +7,8 @@ use core::ops::{Deref, DerefMut};
use super::DefiniteTensorValueTypeMarker;
use crate::{
Error, OnceLock, Result, ep,
io_binding::IoBinding,
memory::{AllocationDevice, Allocator, AllocatorType, MemoryInfo, MemoryType},
session::{NoSelectedOutputs, RunOptions, Session, builder::GraphOptimizationLevel},
session::{IoBinding, NoSelectedOutputs, RunOptions, Session, builder::GraphOptimizationLevel},
util::{MiniMap, Mutex, MutexGuard},
value::{DynTensor, Value}
};

View File

@@ -1,5 +1,4 @@
use ort::{
adapter::Adapter,
ep,
memory::{AllocationDevice, Allocator, AllocatorType, MemoryInfo, MemoryType},
operator::{
@@ -7,7 +6,7 @@ use ort::{
io::{OperatorInput, OperatorOutput},
kernel::{Kernel, KernelAttributes, KernelContext}
},
session::{RunOptions, Session},
session::{Adapter, RunOptions, Session},
value::{Tensor, TensorElementType}
};