refactor!: flatten session module

2026-04-25 16:34:55 +02:00 · 2026-01-15 02:18:17 -06:00
parent 8329375b75
commit 2650caa43c
13 changed files with 170 additions and 181 deletions
--- a/backends/web/lib.rs
+++ b/backends/web/lib.rs
@@ -77,7 +77,7 @@
 //! though.
 //!
 //! ## Limitations
-//! - [`OutputSelector`](ort::session::run_options::OutputSelector) is not currently implemented.
+//! - [`OutputSelector`](ort::session::OutputSelector) is not currently implemented.
 //! - [`IoBinding`](ort::io_binding) is not supported by ONNX Runtime on the web.

 #![deny(clippy::panic, clippy::panicking_unwrap)]
--- a/src/ep/cuda.rs
+++ b/src/ep/cuda.rs
@@ -239,7 +239,7 @@ impl CUDA {
 	/// - Models with control flow operators (like `If`, `Loop`, or `Scan`) are not supported.
 	/// - Input/output shapes cannot change across inference calls.
 	/// - The address of inputs/outputs cannot change across inference calls, so
-	///   [`IoBinding`](crate::io_binding::IoBinding) must be used.
+	///   [`IoBinding`](crate::session::IoBinding) must be used.
 	/// - `Session`s using CUDA graphs are technically not `Send` or `Sync`.
 	///
 	/// Consult the [ONNX Runtime documentation on CUDA graphs](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#using-cuda-graphs-preview) for more information.
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -25,13 +25,11 @@ pub mod __private {
 #[macro_use]
 pub(crate) mod private;

-pub mod adapter;
 pub mod compiler;
 pub mod editor;
 pub mod environment;
 pub mod ep;
 pub mod error;
-pub mod io_binding;
 pub mod logging;
 pub mod memory;
 pub mod operator;
--- a/src/session/adapter.rs
+++ b/src/session/adapter.rs
@@ -1,67 +1,3 @@
-//! An input adapter, allowing for loading many static inputs from disk at once.
-//!
-//! [`Adapter`] essentially acts as a collection of predefined inputs allocated on a specific device that can easily be
-//! swapped out between session runs via [`RunOptions::add_adapter`]. With slight modifications to the session
-//! graph, [`Adapter`]s can be used as low-rank adapters (LoRAs) or as containers of style embeddings.
-//!
-//! # Example
-//! An adapter can be created in Python with the `AdapterFormat` class:
-//! ```python
-//! import numpy as np
-//! import onnxruntime as ort
-//!
-//! param_a = ort.OrtValue.ortvalue_from_numpy(np.array([[3], [4], [5], [6]], dtype=np.float32))
-//! param_b = ort.OrtValue.ortvalue_from_numpy(np.array([[7, 8, 9, 10]], dtype=np.float32))
-//!
-//! adapter = ort.AdapterFormat()
-//! adapter.set_parameters({
-//! 	'lora_param_a': param_a,
-//! 	'lora_param_b': param_b
-//! })
-//! adapter.export_adapter('tests/data/adapter.orl')
-//! ```
-//!
-//! Then, in Rust:
-//! ```
-//! # use ort::{adapter::Adapter, session::{run_options::RunOptions, Session}, value::Tensor};
-//! # fn main() -> ort::Result<()> {
-//! let mut model = Session::builder()?.commit_from_file("tests/data/lora_model.onnx")?;
-//! let lora = Adapter::from_file("tests/data/adapter.orl", None)?;
-//!
-//! let mut run_options = RunOptions::new()?;
-//! run_options.add_adapter(&lora)?;
-//!
-//! let outputs =
-//! 	model.run_with_options(ort::inputs![Tensor::<f32>::from_array(([4, 4], vec![1.0; 16]))?], &run_options)?;
-//! # Ok(())
-//! # }
-//! ```
-//!
-//! Using [`Adapter`] is identical to, but more convenient than:
-//! ```
-//! # use ort::{adapter::Adapter, session::{run_options::RunOptions, Session}, value::Tensor};
-//! # fn main() -> ort::Result<()> {
-//! let mut model = Session::builder()?.commit_from_file("tests/data/lora_model.onnx")?;
-//!
-//! // Load our parameters from disk somehow
-//! let param_a = Tensor::<f32>::from_array(([4, 1], vec![3., 4., 5., 6.]))?;
-//! let param_b = Tensor::<f32>::from_array(([1, 4], vec![7., 8., 6., 10.]))?;
-//!
-//! let outputs = model.run(ort::inputs![
-//! 	"input" => Tensor::<f32>::from_array(([4, 4], vec![1.0; 16]))?,
-//! 	// Adapter parameters are just inputs.
-//! 	"lora_param_a" => param_a.view(),
-//! 	"lora_param_b" => param_b.view()
-//! ])?;
-//! # Ok(())
-//! # }
-//! ```
-//!
-//! [`Adapter`] also lets us copy the parameters to a GPU at load time, so that they don't need to be copied on each
-//! session run.
-//!
-//! [`RunOptions::add_adapter`]: crate::session::run_options::RunOptions::add_adapter
-
 use alloc::sync::Arc;
 use core::ptr::{self, NonNull};
 #[cfg(feature = "std")]
@@ -91,7 +27,67 @@ impl Drop for AdapterInner {

 /// An input adapter, allowing for loading many static inputs from disk at once.
 ///
-/// See the [module-level documentation][self] for more information.
+/// [`Adapter`] essentially acts as a collection of predefined inputs allocated on a specific device that can easily be
+/// swapped out between session runs via [`RunOptions::add_adapter`]. With slight modifications to the session
+/// graph, [`Adapter`]s can be used as low-rank adapters (LoRAs) or as containers of style embeddings.
+///
+/// # Example
+/// An adapter can be created in Python with the `AdapterFormat` class:
+/// ```python
+/// import numpy as np
+/// import onnxruntime as ort
+///
+/// param_a = ort.OrtValue.ortvalue_from_numpy(np.array([[3], [4], [5], [6]], dtype=np.float32))
+/// param_b = ort.OrtValue.ortvalue_from_numpy(np.array([[7, 8, 9, 10]], dtype=np.float32))
+///
+/// adapter = ort.AdapterFormat()
+/// adapter.set_parameters({
+/// 	'lora_param_a': param_a,
+/// 	'lora_param_b': param_b
+/// })
+/// adapter.export_adapter('tests/data/adapter.orl')
+/// ```
+///
+/// Then, in Rust:
+/// ```
+/// # use ort::{session::{Adapter, RunOptions, Session}, value::Tensor};
+/// # fn main() -> ort::Result<()> {
+/// let mut model = Session::builder()?.commit_from_file("tests/data/lora_model.onnx")?;
+/// let lora = Adapter::from_file("tests/data/adapter.orl", None)?;
+///
+/// let mut run_options = RunOptions::new()?;
+/// run_options.add_adapter(&lora)?;
+///
+/// let outputs =
+/// 	model.run_with_options(ort::inputs![Tensor::<f32>::from_array(([4, 4], vec![1.0; 16]))?], &run_options)?;
+/// # Ok(())
+/// # }
+/// ```
+///
+/// Using [`Adapter`] is identical to, but more convenient than:
+/// ```
+/// # use ort::{session::{Adapter, RunOptions, Session}, value::Tensor};
+/// # fn main() -> ort::Result<()> {
+/// let mut model = Session::builder()?.commit_from_file("tests/data/lora_model.onnx")?;
+///
+/// // Load our parameters from disk somehow
+/// let param_a = Tensor::<f32>::from_array(([4, 1], vec![3., 4., 5., 6.]))?;
+/// let param_b = Tensor::<f32>::from_array(([1, 4], vec![7., 8., 6., 10.]))?;
+///
+/// let outputs = model.run(ort::inputs![
+/// 	"input" => Tensor::<f32>::from_array(([4, 4], vec![1.0; 16]))?,
+/// 	// Adapter parameters are just inputs.
+/// 	"lora_param_a" => param_a.view(),
+/// 	"lora_param_b" => param_b.view()
+/// ])?;
+/// # Ok(())
+/// # }
+/// ```
+///
+/// [`Adapter`] also lets us copy the parameters to a GPU at load time, so that they don't need to be copied on each
+/// session run.
+///
+/// [`RunOptions::add_adapter`]: crate::session::RunOptions::add_adapter
 #[derive(Debug, Clone)]
 pub struct Adapter {
 	pub(crate) inner: Arc<AdapterInner>
@@ -105,10 +101,9 @@ impl Adapter {
 	///
 	/// ```
 	/// # use ort::{
-	/// # 	adapter::Adapter,
 	/// # 	ep,
 	/// # 	memory::DeviceType,
-	/// # 	session::{run_options::RunOptions, Session},
+	/// # 	session::{Adapter, RunOptions, Session},
 	/// # 	value::Tensor
 	/// # };
 	/// # fn main() -> ort::Result<()> {
@@ -151,10 +146,9 @@ impl Adapter {
 	///
 	/// ```
 	/// # use ort::{
-	/// # 	adapter::Adapter,
 	/// # 	ep,
 	/// # 	memory::DeviceType,
-	/// # 	session::{run_options::RunOptions, Session},
+	/// # 	session::{Adapter, RunOptions, Session},
 	/// # 	value::Tensor
 	/// # };
 	/// # fn main() -> ort::Result<()> {
--- a/src/session/async.rs
+++ b/src/session/async.rs
@@ -14,7 +14,7 @@ use smallvec::SmallVec;

 use crate::{
 	error::Result,
-	session::{SessionOutputs, SharedSessionInner, run_options::UntypedRunOptions},
+	session::{SessionOutputs, SharedSessionInner, UntypedRunOptions},
 	util::{STACK_SESSION_INPUTS, STACK_SESSION_OUTPUTS},
 	value::{Value, ValueInner}
 };
--- a/src/session/io_binding.rs
+++ b/src/session/io_binding.rs
@@ -1,81 +1,3 @@
-//! Enables binding of session inputs and/or outputs to pre-allocated memory.
-//!
-//! [`IoBinding`] minimizes copies between a device (like a GPU) and the host (CPU) by allowing you to bind a
-//! certain input/output to a pre-allocated value on a specific device.
-//!
-//! [`IoBinding`] is most suitable for:
-//! - An ensemble of models in which the output from one model is the input to another and does not need to pass through
-//!   the CPU to perform additional processing.
-//! - Situations where an output should stay on a device (e.g. to perform additional hardware-accelerated processing).
-//! - Models that accept an input that does not change for multiple subsequent runs (like the conditional embedding for
-//!   a diffusion model).
-//!
-//! [`IoBinding`] will not provide any meaningful benefit for:
-//! - Models where every input changes with each invocation, such as a causal language model or object recognition
-//!   model.
-//! - Pipelines that go straight from CPU -> GPU -> CPU.
-//!
-//! # Example
-//! A diffusion model which takes a text condition input.
-//!
-//! ```no_run
-//! # use ort::{
-//! # 	ep,
-//! # 	io_binding::IoBinding,
-//! # 	memory::{Allocator, AllocatorType, AllocationDevice, MemoryInfo, MemoryType},
-//! # 	session::Session,
-//! # 	value::Tensor
-//! # };
-//! # fn main() -> ort::Result<()> {
-//! let mut text_encoder = Session::builder()?
-//! 	.with_execution_providers([ep::CUDA::default().build()])?
-//! 	.commit_from_file("text_encoder.onnx")?;
-//! let mut unet = Session::builder()?
-//! 	.with_execution_providers([ep::CUDA::default().build()])?
-//! 	.commit_from_file("unet.onnx")?;
-//!
-//! let text_condition = text_encoder
-//! 	.run(ort::inputs![Tensor::<i64>::from_array((
-//! 		vec![27],
-//! 		vec![
-//! 			23763, 15460, 473, 68, 312, 265, 17463, 4098, 304, 1077, 283, 198, 7676, 5976, 272, 285, 3609, 435,
-//! 			21680, 321, 265, 300, 1689, 64, 285, 4763, 64
-//! 		]
-//! 	))?])?
-//! 	.remove("output0")
-//! 	.unwrap();
-//!
-//! let input_allocator = Allocator::new(
-//! 	&unet,
-//! 	MemoryInfo::new(AllocationDevice::CUDA_PINNED, 0, AllocatorType::Device, MemoryType::CPUInput)?
-//! )?;
-//! let mut latents = Tensor::<f32>::new(&input_allocator, [1_usize, 4, 64, 64])?;
-//!
-//! let mut io_binding = unet.create_binding()?;
-//! io_binding.bind_input("condition", &text_condition)?;
-//!
-//! let output_allocator = Allocator::new(
-//! 	&unet,
-//! 	MemoryInfo::new(AllocationDevice::CUDA_PINNED, 0, AllocatorType::Device, MemoryType::CPUOutput)?
-//! )?;
-//! io_binding.bind_output("noise_pred", Tensor::<f32>::new(&output_allocator, [1_usize, 4, 64, 64])?)?;
-//!
-//! for _ in 0..20 {
-//! 	io_binding.bind_input("latents", &latents)?;
-//! 	let noise_pred = unet.run_binding(&io_binding)?.remove("noise_pred").unwrap();
-//!
-//! 	let mut latents = latents.extract_array_mut();
-//! 	latents += &noise_pred.try_extract_array::<f32>()?;
-//! }
-//! # Ok(())
-//! # }
-//! ```
-//!
-//! [`IoBinding`] may provide a decent speedup in this example since the `condition` tensor is unchanging between runs.
-//! If we were to use normal session inference, the `condition` tensor would be needlessly copied with each invocation
-//! of `unet.run()`, and this copying can come with significant latency & overhead. With [`IoBinding`], the `condition`
-//! tensor is only copied to the device once instead of 20 times.
-
 use alloc::{string::String, sync::Arc};
 use core::{
 	fmt::Debug,
@@ -94,9 +16,80 @@ use crate::{

 /// Enables binding of session inputs and/or outputs to pre-allocated memory.
 ///
-/// An `IoBinding` can be created from a [`Session`] with [`Session::create_binding`].
+/// [`IoBinding`] minimizes copies between a device (like a GPU) and the host (CPU) by allowing you to bind a
+/// certain input/output to a pre-allocated value on a specific device.
 ///
-/// See the [module-level documentation][self] for more information.
+/// [`IoBinding`] is most suitable for:
+/// - An ensemble of models in which the output from one model is the input to another and does not need to pass through
+///   the CPU to perform additional processing.
+/// - Situations where an output should stay on a device (e.g. to perform additional hardware-accelerated processing).
+/// - Models that accept an input that does not change for multiple subsequent runs (like the conditional embedding for
+///   a diffusion model).
+///
+/// [`IoBinding`] will not provide any meaningful benefit for:
+/// - Models where every input changes with each invocation, such as a causal language model or object recognition
+///   model.
+/// - Pipelines that go straight from CPU -> GPU -> CPU.
+///
+/// # Example
+/// A diffusion model which takes a text condition input.
+///
+/// ```no_run
+/// # use ort::{
+/// # 	ep,
+/// # 	memory::{Allocator, AllocatorType, AllocationDevice, MemoryInfo, MemoryType},
+/// # 	session::{Session, IoBinding},
+/// # 	value::Tensor
+/// # };
+/// # fn main() -> ort::Result<()> {
+/// let mut text_encoder = Session::builder()?
+/// 	.with_execution_providers([ep::CUDA::default().build()])?
+/// 	.commit_from_file("text_encoder.onnx")?;
+/// let mut unet = Session::builder()?
+/// 	.with_execution_providers([ep::CUDA::default().build()])?
+/// 	.commit_from_file("unet.onnx")?;
+///
+/// let text_condition = text_encoder
+/// 	.run(ort::inputs![Tensor::<i64>::from_array((
+/// 		vec![27],
+/// 		vec![
+/// 			23763, 15460, 473, 68, 312, 265, 17463, 4098, 304, 1077, 283, 198, 7676, 5976, 272, 285, 3609, 435,
+/// 			21680, 321, 265, 300, 1689, 64, 285, 4763, 64
+/// 		]
+/// 	))?])?
+/// 	.remove("output0")
+/// 	.unwrap();
+///
+/// let input_allocator = Allocator::new(
+/// 	&unet,
+/// 	MemoryInfo::new(AllocationDevice::CUDA_PINNED, 0, AllocatorType::Device, MemoryType::CPUInput)?
+/// )?;
+/// let mut latents = Tensor::<f32>::new(&input_allocator, [1_usize, 4, 64, 64])?;
+///
+/// let mut io_binding = unet.create_binding()?;
+/// io_binding.bind_input("condition", &text_condition)?;
+///
+/// let output_allocator = Allocator::new(
+/// 	&unet,
+/// 	MemoryInfo::new(AllocationDevice::CUDA_PINNED, 0, AllocatorType::Device, MemoryType::CPUOutput)?
+/// )?;
+/// io_binding.bind_output("noise_pred", Tensor::<f32>::new(&output_allocator, [1_usize, 4, 64, 64])?)?;
+///
+/// for _ in 0..20 {
+/// 	io_binding.bind_input("latents", &latents)?;
+/// 	let noise_pred = unet.run_binding(&io_binding)?.remove("noise_pred").unwrap();
+///
+/// 	let mut latents = latents.extract_array_mut();
+/// 	latents += &noise_pred.try_extract_array::<f32>()?;
+/// }
+/// # Ok(())
+/// # }
+/// ```
+///
+/// [`IoBinding`] may provide a decent speedup in this example since the `condition` tensor is unchanging between runs.
+/// If we were to use normal session inference, the `condition` tensor would be needlessly copied with each invocation
+/// of `unet.run()`, and this copying can come with significant latency & overhead. With [`IoBinding`], the `condition`
+/// tensor is only copied to the device once instead of 20 times.
 #[derive(Debug)]
 pub struct IoBinding {
 	ptr: NonNull<ort_sys::OrtIoBinding>,
--- a/src/session/mod.rs
+++ b/src/session/mod.rs
@@ -35,30 +35,34 @@ use crate::{
 	AsPointer,
 	environment::Environment,
 	error::{Error, ErrorCode, Result, status_to_result},
-	io_binding::IoBinding,
 	memory::Allocator,
 	ortsys,
 	util::{AllocatedString, STACK_SESSION_INPUTS, STACK_SESSION_OUTPUTS, with_cstr, with_cstr_ptr_array},
 	value::{DynValue, Outlet, Value, ValueType}
 };

+mod adapter;
 #[cfg(all(feature = "std", not(target_arch = "wasm32")))]
 mod r#async;
 pub mod builder;
-pub mod input;
-pub mod metadata;
-pub mod output;
-pub mod run_options;
+mod input;
+mod io_binding;
+mod metadata;
+mod output;
+mod run_options;
 #[cfg(all(feature = "std", not(target_arch = "wasm32")))]
 pub use self::r#async::InferenceFut;
 #[cfg(all(feature = "std", not(target_arch = "wasm32")))]
 use self::r#async::{AsyncInferenceContext, InferenceFutInner};
-use self::{builder::SessionBuilder, metadata::ModelMetadata, run_options::UntypedRunOptions};
 pub use self::{
+	adapter::Adapter,
 	input::{SessionInputValue, SessionInputs},
+	io_binding::IoBinding,
+	metadata::ModelMetadata,
 	output::SessionOutputs,
-	run_options::{HasSelectedOutputs, NoSelectedOutputs, RunOptions, SelectedOutputMarker}
+	run_options::{HasSelectedOutputs, NoSelectedOutputs, OutputSelector, RunOptions, SelectedOutputMarker}
 };
+use self::{builder::SessionBuilder, run_options::UntypedRunOptions};

 /// Holds onto an [`ort_sys::OrtSession`] pointer and its associated allocator.
 ///
@@ -194,7 +198,7 @@ impl Session {
 	///
 	/// ```
 	/// # use std::sync::Arc;
-	/// # use ort::{session::{run_options::RunOptions, Session}, value::{Value, ValueType, TensorRef, TensorElementType}};
+	/// # use ort::{session::{RunOptions, Session}, value::{Value, ValueType, TensorRef, TensorElementType}};
 	/// # fn main() -> ort::Result<()> {
 	/// let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
 	/// let input = ndarray::Array4::<f32>::zeros((1, 64, 64, 3));
@@ -223,7 +227,7 @@ impl Session {
 	/// ```no_run
 	/// # // no_run because upsample.onnx is too simple of a model for the termination signal to be reliable enough
 	/// # use std::sync::Arc;
-	/// # use ort::{session::{Session, run_options::RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
+	/// # use ort::{session::{Session, RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
 	/// # fn main() -> ort::Result<()> {
 	/// # 	let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
 	/// # 	let input = Value::from_array(ndarray::Array4::<f32>::zeros((1, 64, 64, 3)))?;
@@ -385,7 +389,7 @@ impl Session {
 	///
 	/// ```
 	/// # use std::sync::Arc;
-	/// # use ort::{session::{Session, run_options::RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
+	/// # use ort::{session::{Session, RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
 	/// # fn main() -> ort::Result<()> { tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap().block_on(async {
 	/// let mut session = Session::builder()?.with_intra_threads(2)?.commit_from_file("tests/data/upsample.onnx")?;
 	/// let input = ndarray::Array4::<f32>::zeros((1, 64, 64, 3));
@@ -501,7 +505,7 @@ impl Session {
 	///
 	/// ```
 	/// # use std::sync::Arc;
-	/// # use ort::{session::{Session, run_options::RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
+	/// # use ort::{session::{Session, RunOptions}, value::{Value, ValueType, TensorRef, TensorElementType}};
 	/// # fn main() -> ort::Result<()> { tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap().block_on(async {
 	/// let mut session = Session::builder()?.with_intra_threads(2)?.commit_from_file("tests/data/upsample.onnx")?;
 	/// let input = ndarray::Array4::<f32>::zeros((1, 64, 64, 3));
@@ -614,7 +618,7 @@ impl Session {
 	///
 	/// ```
 	/// # use std::sync::Arc;
-	/// # use ort::{session::{run_options::RunOptions, Session, WorkloadType}, value::{Value, ValueType, TensorRef, TensorElementType}};
+	/// # use ort::{session::{RunOptions, Session, WorkloadType}, value::{Value, ValueType, TensorRef, TensorElementType}};
 	/// # fn main() -> ort::Result<()> {
 	/// let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
 	/// session.set_workload_type(WorkloadType::Efficient)?;
--- a/src/session/run_options.rs
+++ b/src/session/run_options.rs
@@ -10,11 +10,13 @@ use smallvec::SmallVec;

 use crate::{
 	AsPointer,
-	adapter::{Adapter, AdapterInner},
 	error::Result,
 	logging::LogLevel,
 	ortsys,
-	session::Outlet,
+	session::{
+		Outlet,
+		adapter::{Adapter, AdapterInner}
+	},
 	util::{MiniMap, STACK_SESSION_OUTPUTS, with_cstr},
 	value::{DynValue, Value, ValueTypeMarker}
 };
@@ -23,7 +25,7 @@ use crate::{
 ///
 /// ```
 /// # use std::sync::Arc;
-/// # use ort::{session::{Session, run_options::{RunOptions, OutputSelector}}, memory::Allocator, value::Tensor};
+/// # use ort::{session::{Session, RunOptions, OutputSelector}, memory::Allocator, value::Tensor};
 /// # fn main() -> ort::Result<()> {
 /// let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
 /// let input = Tensor::<f32>::new(&Allocator::default(), [1_usize, 64, 64, 3])?;
@@ -102,7 +104,7 @@ impl OutputSelector {
 	///
 	/// ```
 	/// # use std::sync::Arc;
-	/// # use ort::{session::{Session, run_options::{RunOptions, OutputSelector}}, memory::Allocator, value::Tensor};
+	/// # use ort::{session::{Session, RunOptions, OutputSelector}, memory::Allocator, value::Tensor};
 	/// # fn main() -> ort::Result<()> {
 	/// let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
 	/// let input = Tensor::<f32>::new(&Allocator::default(), [1_usize, 64, 64, 3])?;
@@ -227,7 +229,7 @@ impl<O: SelectedOutputMarker> RunOptions<O> {
 	///
 	/// ```
 	/// # use std::sync::Arc;
-	/// # use ort::{session::{Session, run_options::{RunOptions, OutputSelector}}, memory::Allocator, value::Tensor};
+	/// # use ort::{session::{Session, RunOptions, OutputSelector}, memory::Allocator, value::Tensor};
 	/// # fn main() -> ort::Result<()> {
 	/// let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
 	/// let input = Tensor::<f32>::new(&Allocator::default(), [1_usize, 64, 64, 3])?;
@@ -282,7 +284,7 @@ impl<O: SelectedOutputMarker> RunOptions<O> {
 	/// ```no_run
 	/// # // no_run because upsample.onnx is too simple of a model for the termination signal to be reliable enough
 	/// # use std::sync::Arc;
-	/// # use ort::{session::{Session, run_options::{RunOptions, OutputSelector}}, value::Value};
+	/// # use ort::{session::{Session, RunOptions, OutputSelector}, value::Value};
 	/// # fn main() -> ort::Result<()> {
 	/// # 	let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
 	/// # 	let input = Value::from_array(ndarray::Array4::<f32>::zeros((1, 64, 64, 3)))?;
@@ -310,7 +312,7 @@ impl<O: SelectedOutputMarker> RunOptions<O> {
 	///
 	/// ```no_run
 	/// # use std::sync::Arc;
-	/// # use ort::{session::{Session, run_options::{RunOptions, OutputSelector}}, value::Value};
+	/// # use ort::{session::{Session, RunOptions, OutputSelector}, value::Value};
 	/// # fn main() -> ort::Result<()> {
 	/// # 	let mut session = Session::builder()?.commit_from_file("tests/data/upsample.onnx")?;
 	/// # 	let input = Value::from_array(ndarray::Array4::<f32>::zeros((1, 64, 64, 3)))?;
@@ -339,7 +341,7 @@ impl<O: SelectedOutputMarker> RunOptions<O> {
 	/// like CUDA:
 	/// ```no_run
 	/// # use std::sync::Arc;
-	/// # use ort::session::run_options::RunOptions;
+	/// # use ort::session::RunOptions;
 	/// # fn main() -> ort::Result<()> {
 	/// let mut run_options = RunOptions::new()?;
 	/// run_options.add_config_entry("gpu_graph_id", "1")?;
--- a/src/training/simple/args.rs
+++ b/src/training/simple/args.rs
@@ -1,7 +1,7 @@
 use std::path::PathBuf;

 use super::{DataLoader, TrainerCallbacks};
-use crate::session::input::SessionInputs;
+use crate::session::SessionInputs;

 pub enum EvaluationStrategy {
 	None,
--- a/src/training/simple/callbacks.rs
+++ b/src/training/simple/callbacks.rs
@@ -3,7 +3,7 @@ use std::path::Path;
 use super::TrainingArguments;
 use crate::{
 	error::Result,
-	session::input::SessionInputs,
+	session::SessionInputs,
 	training::{Checkpoint, Optimizer, Trainer}
 };

--- a/src/training/simple/mod.rs
+++ b/src/training/simple/mod.rs
@@ -1,7 +1,7 @@
 use alloc::collections::VecDeque;
 use std::fs;

-use crate::{error::Result, session::input::SessionInputs, training::Trainer};
+use crate::{error::Result, session::SessionInputs, training::Trainer};

 mod dataloader;
 pub use self::dataloader::{DataLoader, IterableDataLoader, iterable_data_loader};
--- a/src/value/impl_tensor/copy.rs
+++ b/src/value/impl_tensor/copy.rs
@@ -7,9 +7,8 @@ use core::ops::{Deref, DerefMut};
 use super::DefiniteTensorValueTypeMarker;
 use crate::{
 	Error, OnceLock, Result, ep,
-	io_binding::IoBinding,
 	memory::{AllocationDevice, Allocator, AllocatorType, MemoryInfo, MemoryType},
-	session::{NoSelectedOutputs, RunOptions, Session, builder::GraphOptimizationLevel},
+	session::{IoBinding, NoSelectedOutputs, RunOptions, Session, builder::GraphOptimizationLevel},
 	util::{MiniMap, Mutex, MutexGuard},
 	value::{DynTensor, Value}
 };
--- a/tests/leak-check/main.rs
+++ b/tests/leak-check/main.rs
@@ -1,5 +1,4 @@
 use ort::{
-	adapter::Adapter,
 	ep,
 	memory::{AllocationDevice, Allocator, AllocatorType, MemoryInfo, MemoryType},
 	operator::{
@@ -7,7 +6,7 @@ use ort::{
 		io::{OperatorInput, OperatorOutput},
 		kernel::{Kernel, KernelAttributes, KernelContext}
 	},
-	session::{RunOptions, Session},
+	session::{Adapter, RunOptions, Session},
 	value::{Tensor, TensorElementType}
 };