refactor!: undo The Flattening

2026-04-25 16:34:55 +02:00 · 2024-11-12 22:19:15 -06:00
parent 17fe990bdf
commit d4f82fc50e
67 changed files with 521 additions and 343 deletions
--- a/examples/async-gpt2-api/examples/async-gpt2-api.rs
+++ b/examples/async-gpt2-api/examples/async-gpt2-api.rs
@@ -11,7 +11,11 @@ use axum::{
 };
 use futures::Stream;
 use ndarray::{Array1, ArrayViewD, Axis, array, concatenate, s};
-use ort::{CUDAExecutionProvider, GraphOptimizationLevel, Session, inputs};
+use ort::{
+	execution_providers::CUDAExecutionProvider,
+	inputs,
+	session::{Session, builder::GraphOptimizationLevel}
+};
 use rand::Rng;
 use tokenizers::Tokenizer;
 use tokio::net::TcpListener;
--- a/examples/cudarc/src/main.rs
+++ b/examples/cudarc/src/main.rs
@@ -1,10 +1,15 @@
 use std::{ops::Mul, path::Path};

-use cudarc::driver::{sys::CUdeviceptr, CudaDevice, DevicePtr, DevicePtrMut};
-use image::{imageops::FilterType, GenericImageView, ImageBuffer, Rgba};
+use cudarc::driver::{CudaDevice, DevicePtr, DevicePtrMut, sys::CUdeviceptr};
+use image::{GenericImageView, ImageBuffer, Rgba, imageops::FilterType};
 use ndarray::Array;
-use ort::{AllocationDevice, AllocatorType, CUDAExecutionProvider, ExecutionProvider, MemoryInfo, MemoryType, Session, TensorRefMut};
-use show_image::{event, AsImageView, WindowOptions};
+use ort::{
+	execution_providers::{CUDAExecutionProvider, ExecutionProvider},
+	memory::{AllocationDevice, AllocatorType, MemoryInfo, MemoryType},
+	session::Session,
+	value::TensorRefMut
+};
+use show_image::{AsImageView, WindowOptions, event};

 #[show_image::main]
 fn main() -> anyhow::Result<()> {
@@ -66,13 +71,10 @@ fn main() -> anyhow::Result<()> {
 	let window = show_image::context()
 		.run_function_wait(move |context| -> Result<_, String> {
 			let mut window = context
-				.create_window(
-					"ort + modnet",
-					WindowOptions {
-						size: Some([img_width, img_height]),
-						..WindowOptions::default()
-					}
-				)
+				.create_window("ort + modnet", WindowOptions {
+					size: Some([img_width, img_height]),
+					..WindowOptions::default()
+				})
 				.map_err(|e| e.to_string())?;
 			window.set_image("photo", &output.as_image_view().map_err(|e| e.to_string())?);
 			Ok(window.proxy())
--- a/examples/custom-ops/examples/custom-ops.rs
+++ b/examples/custom-ops/examples/custom-ops.rs
@@ -1,5 +1,13 @@
 use ndarray::Array2;
-use ort::{Kernel, KernelAttributes, KernelContext, Operator, OperatorDomain, OperatorInput, OperatorOutput, Session, TensorElementType};
+use ort::{
+	operator::{
+		Operator, OperatorDomain,
+		io::{OperatorInput, OperatorOutput},
+		kernel::{Kernel, KernelAttributes, KernelContext}
+	},
+	session::Session,
+	tensor::TensorElementType
+};

 struct CustomOpOne;
 struct CustomOpOneKernel;
--- a/examples/gpt2/examples/gpt2-no-ndarray.rs
+++ b/examples/gpt2/examples/gpt2-no-ndarray.rs
@@ -4,7 +4,11 @@ use std::{
 	sync::Arc
 };

-use ort::{CUDAExecutionProvider, GraphOptimizationLevel, Session, inputs};
+use ort::{
+	execution_providers::CUDAExecutionProvider,
+	inputs,
+	session::{Session, builder::GraphOptimizationLevel}
+};
 use rand::Rng;
 use tokenizers::Tokenizer;

--- a/examples/gpt2/examples/gpt2.rs
+++ b/examples/gpt2/examples/gpt2.rs
@@ -4,7 +4,11 @@ use std::{
 };

 use ndarray::{Array1, ArrayViewD, Axis, array, concatenate, s};
-use ort::{CUDAExecutionProvider, GraphOptimizationLevel, Session, inputs};
+use ort::{
+	execution_providers::CUDAExecutionProvider,
+	inputs,
+	session::{Session, builder::GraphOptimizationLevel}
+};
 use rand::Rng;
 use tokenizers::Tokenizer;

--- a/examples/model-info/examples/model-info.rs
+++ b/examples/model-info/examples/model-info.rs
@@ -1,6 +1,6 @@
 use std::{env, process};

-use ort::{Session, TensorElementType, ValueType};
+use ort::{session::Session, tensor::TensorElementType, value::ValueType};

 fn display_element_type(t: TensorElementType) -> &'static str {
 	match t {
--- a/examples/modnet/examples/modnet.rs
+++ b/examples/modnet/examples/modnet.rs
@@ -4,7 +4,7 @@ use std::{ops::Mul, path::Path};

 use image::{GenericImageView, ImageBuffer, Rgba, imageops::FilterType};
 use ndarray::Array;
-use ort::{CUDAExecutionProvider, Session, inputs};
+use ort::{execution_providers::CUDAExecutionProvider, inputs, session::Session};
 use show_image::{AsImageView, WindowOptions, event};

 #[show_image::main]
--- a/examples/phi-3-vision/src/image_process.rs
+++ b/examples/phi-3-vision/src/image_process.rs
@@ -18,7 +18,7 @@ pub const NUM_CROPS: usize = 1;
 pub const _NUM_IMG_TOKENS: usize = 144;

 const OPENAI_CLIP_MEAN: [f32; 3] = [0.48145466, 0.4578275, 0.40821073];
-const OPENAI_CLIP_STD: [f32; 3] = [0.26862954, 0.26130258, 0.27577711];
+const OPENAI_CLIP_STD: [f32; 3] = [0.26862954, 0.2613026, 0.2757771];

 pub struct Phi3VImageProcessor {
 	num_crops: usize,
--- a/examples/phi-3-vision/src/main.rs
+++ b/examples/phi-3-vision/src/main.rs
@@ -4,12 +4,12 @@ use std::{path::Path, time::Instant};
 use anyhow::Result;
 use image::DynamicImage;
 use ndarray::{Array, Array2, Array3, Array4, ArrayView, Ix3, Ix4, s};
-use ort::{Session, Tensor};
+use ort::{session::Session, value::Tensor};
 use tokenizers::Tokenizer;

-const VISION_MODEL_NAME: &'static str = "phi-3-v-128k-instruct-vision.onnx";
-const TEXT_EMBEDDING_MODEL_NAME: &'static str = "phi-3-v-128k-instruct-text-embedding.onnx";
-const GENERATION_MODEL_NAME: &'static str = "phi-3-v-128k-instruct-text.onnx";
+const VISION_MODEL_NAME: &str = "phi-3-v-128k-instruct-vision.onnx";
+const TEXT_EMBEDDING_MODEL_NAME: &str = "phi-3-v-128k-instruct-text-embedding.onnx";
+const GENERATION_MODEL_NAME: &str = "phi-3-v-128k-instruct-text.onnx";

 const MAX_LENGTH: usize = 1000; // max length of the generated text
 const EOS_TOKEN_ID: i64 = 32007; // <|end|>
@@ -37,8 +37,7 @@ fn get_image_embedding(vision_model: &Session, img: &Option<DynamicImage>) -> Re
 		]?;
 		let outputs = vision_model.run(model_inputs)?;
 		let predictions_view: ArrayView<f32, _> = outputs["visual_features"].try_extract_tensor::<f32>()?;
-		let predictions = predictions_view.into_dimensionality::<Ix3>()?.to_owned();
-		predictions
+		predictions_view.into_dimensionality::<Ix3>()?.to_owned()
 	} else {
 		Array::zeros((1, 0, 0))
 	};
@@ -71,7 +70,7 @@ fn merge_text_and_image_embeddings(
 	// Insert visual features
 	combined_embeds
 		.slice_mut(s![.., image_token_position..(image_token_position + visual_features.shape()[1]), ..])
-		.assign(&visual_features);
+		.assign(visual_features);

 	// Copy the remaining text embeddings
 	combined_embeds
@@ -109,13 +108,13 @@ pub async fn generate_text(
 	text: &str
 ) -> Result<()> {
 	let (inputs_embeds, mut attention_mask) = {
-		let visual_features = get_image_embedding(&vision_model, &image)?;
-		let prompt = format_chat_template(&image, text);
+		let visual_features = get_image_embedding(vision_model, image)?;
+		let prompt = format_chat_template(image, text);
 		let encoding = tokenizer.encode(prompt, true).map_err(|e| anyhow::anyhow!("Error encoding: {:?}", e))?;

 		let input_ids: Vec<i64> = encoding.get_ids().iter().map(|&id| id as i64).collect();
 		let input_ids: Array2<i64> = Array2::from_shape_vec((1, input_ids.len()), input_ids)?;
-		let mut inputs_embeds: Array3<f32> = get_text_embedding(&text_embedding_model, &input_ids)?;
+		let mut inputs_embeds: Array3<f32> = get_text_embedding(text_embedding_model, &input_ids)?;

 		let attention_mask: Vec<i64> = encoding.get_attention_mask().iter().map(|&mask| mask as i64).collect();
 		let mut attention_mask: Array2<i64> = Array2::from_shape_vec((1, attention_mask.len()), attention_mask)?;
@@ -190,7 +189,7 @@ pub async fn generate_text(

 		// Update current_embeds, attention_mask, and past_key_values for the next iteration
 		let new_token_id = Array2::from_elem((1, 1), next_token_id);
-		next_inputs_embeds = get_text_embedding(&text_embedding_model, &new_token_id)?;
+		next_inputs_embeds = get_text_embedding(text_embedding_model, &new_token_id)?;
 		attention_mask = Array2::ones((1, attention_mask.shape()[1] + 1));
 		for i in 0..32 {
 			past_key_values[i * 2] = model_outputs[format!("present.{}.key", i)]
@@ -213,15 +212,9 @@ async fn main() -> Result<()> {

 	let data_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("data");
 	let tokenizer = Tokenizer::from_file(data_dir.join("tokenizer.json")).map_err(|e| anyhow::anyhow!("Error loading tokenizer: {:?}", e))?;
-	let vision_model = Session::builder()?
-		.with_execution_providers([ort::CPUExecutionProvider::default().build()])?
-		.commit_from_file(data_dir.join(VISION_MODEL_NAME))?;
-	let text_embedding_model = Session::builder()?
-		.with_execution_providers([ort::CPUExecutionProvider::default().build()])?
-		.commit_from_file(data_dir.join(TEXT_EMBEDDING_MODEL_NAME))?;
-	let generation_model = Session::builder()?
-		.with_execution_providers([ort::CPUExecutionProvider::default().build()])?
-		.commit_from_file(data_dir.join(GENERATION_MODEL_NAME))?;
+	let vision_model = Session::builder()?.commit_from_file(data_dir.join(VISION_MODEL_NAME))?;
+	let text_embedding_model = Session::builder()?.commit_from_file(data_dir.join(TEXT_EMBEDDING_MODEL_NAME))?;
+	let generation_model = Session::builder()?.commit_from_file(data_dir.join(GENERATION_MODEL_NAME))?;

 	// Generate text from text
 	let image: Option<DynamicImage> = None;
--- a/examples/sentence-transformers/examples/semantic-similarity.rs
+++ b/examples/sentence-transformers/examples/semantic-similarity.rs
@@ -1,7 +1,11 @@
 use std::path::Path;

 use ndarray::{Array2, Axis, Ix2};
-use ort::{CUDAExecutionProvider, Error, GraphOptimizationLevel, Session};
+use ort::{
+	Error,
+	execution_providers::CUDAExecutionProvider,
+	session::{Session, builder::GraphOptimizationLevel}
+};
 use tokenizers::Tokenizer;

 /// Example usage of a text embedding model like Sentence Transformers' `all-mini-lm-l6` model for semantic textual
--- a/examples/training/examples/train-clm-simple.rs
+++ b/examples/training/examples/train-clm-simple.rs
@@ -6,7 +6,12 @@ use std::{

 use kdam::BarExt;
 use ndarray::{Array1, Array2, ArrayViewD, Axis, concatenate, s};
-use ort::{Allocator, CUDAExecutionProvider, CheckpointStrategy, Session, SessionBuilder, Trainer, TrainerCallbacks, TrainingArguments};
+use ort::{
+	execution_providers::CUDAExecutionProvider,
+	memory::Allocator,
+	session::{Session, builder::SessionBuilder},
+	training::{CheckpointStrategy, Trainer, TrainerCallbacks, TrainerControl, TrainerState, TrainingArguments}
+};
 use rand::RngCore;
 use tokenizers::Tokenizer;

@@ -26,7 +31,7 @@ impl LoggerCallback {
 }

 impl TrainerCallbacks for LoggerCallback {
-	fn train_step(&mut self, train_loss: f32, state: &ort::TrainerState, _: &mut ort::TrainerControl<'_>) -> ort::Result<()> {
+	fn train_step(&mut self, train_loss: f32, state: &TrainerState, _: &mut TrainerControl<'_>) -> ort::Result<()> {
 		self.progress_bar.total = state.max_steps;
 		self.progress_bar.set_postfix(format!("loss={train_loss:.3}"));
 		let _ = self.progress_bar.update_to(state.iter_step);
--- a/examples/training/examples/train-clm.rs
+++ b/examples/training/examples/train-clm.rs
@@ -6,7 +6,12 @@ use std::{

 use kdam::BarExt;
 use ndarray::{Array1, Array2, ArrayViewD, Axis, concatenate, s};
-use ort::{Allocator, CUDAExecutionProvider, Checkpoint, Session, SessionBuilder, Trainer};
+use ort::{
+	execution_providers::CUDAExecutionProvider,
+	memory::Allocator,
+	session::{Session, builder::SessionBuilder},
+	training::{Checkpoint, Trainer}
+};
 use rand::RngCore;
 use tokenizers::Tokenizer;

--- a/examples/yolov8/examples/yolov8.rs
+++ b/examples/yolov8/examples/yolov8.rs
@@ -4,7 +4,11 @@ use std::path::Path;

 use image::{GenericImageView, imageops::FilterType};
 use ndarray::{Array, Axis, s};
-use ort::{CUDAExecutionProvider, Session, SessionOutputs, inputs};
+use ort::{
+	execution_providers::CUDAExecutionProvider,
+	inputs,
+	session::{Session, SessionOutputs}
+};
 use raqote::{DrawOptions, DrawTarget, LineJoin, PathBuilder, SolidSource, Source, StrokeStyle};
 use show_image::{AsImageView, WindowOptions, event};