mirror of
https://github.com/pykeio/ort
synced 2026-04-25 16:34:55 +02:00
refactor!: undo The Flattening
This commit is contained in:
@@ -11,7 +11,11 @@ use axum::{
|
||||
};
|
||||
use futures::Stream;
|
||||
use ndarray::{Array1, ArrayViewD, Axis, array, concatenate, s};
|
||||
use ort::{CUDAExecutionProvider, GraphOptimizationLevel, Session, inputs};
|
||||
use ort::{
|
||||
execution_providers::CUDAExecutionProvider,
|
||||
inputs,
|
||||
session::{Session, builder::GraphOptimizationLevel}
|
||||
};
|
||||
use rand::Rng;
|
||||
use tokenizers::Tokenizer;
|
||||
use tokio::net::TcpListener;
|
||||
|
||||
@@ -1,10 +1,15 @@
|
||||
use std::{ops::Mul, path::Path};
|
||||
|
||||
use cudarc::driver::{sys::CUdeviceptr, CudaDevice, DevicePtr, DevicePtrMut};
|
||||
use image::{imageops::FilterType, GenericImageView, ImageBuffer, Rgba};
|
||||
use cudarc::driver::{CudaDevice, DevicePtr, DevicePtrMut, sys::CUdeviceptr};
|
||||
use image::{GenericImageView, ImageBuffer, Rgba, imageops::FilterType};
|
||||
use ndarray::Array;
|
||||
use ort::{AllocationDevice, AllocatorType, CUDAExecutionProvider, ExecutionProvider, MemoryInfo, MemoryType, Session, TensorRefMut};
|
||||
use show_image::{event, AsImageView, WindowOptions};
|
||||
use ort::{
|
||||
execution_providers::{CUDAExecutionProvider, ExecutionProvider},
|
||||
memory::{AllocationDevice, AllocatorType, MemoryInfo, MemoryType},
|
||||
session::Session,
|
||||
value::TensorRefMut
|
||||
};
|
||||
use show_image::{AsImageView, WindowOptions, event};
|
||||
|
||||
#[show_image::main]
|
||||
fn main() -> anyhow::Result<()> {
|
||||
@@ -66,13 +71,10 @@ fn main() -> anyhow::Result<()> {
|
||||
let window = show_image::context()
|
||||
.run_function_wait(move |context| -> Result<_, String> {
|
||||
let mut window = context
|
||||
.create_window(
|
||||
"ort + modnet",
|
||||
WindowOptions {
|
||||
size: Some([img_width, img_height]),
|
||||
..WindowOptions::default()
|
||||
}
|
||||
)
|
||||
.create_window("ort + modnet", WindowOptions {
|
||||
size: Some([img_width, img_height]),
|
||||
..WindowOptions::default()
|
||||
})
|
||||
.map_err(|e| e.to_string())?;
|
||||
window.set_image("photo", &output.as_image_view().map_err(|e| e.to_string())?);
|
||||
Ok(window.proxy())
|
||||
|
||||
@@ -1,5 +1,13 @@
|
||||
use ndarray::Array2;
|
||||
use ort::{Kernel, KernelAttributes, KernelContext, Operator, OperatorDomain, OperatorInput, OperatorOutput, Session, TensorElementType};
|
||||
use ort::{
|
||||
operator::{
|
||||
Operator, OperatorDomain,
|
||||
io::{OperatorInput, OperatorOutput},
|
||||
kernel::{Kernel, KernelAttributes, KernelContext}
|
||||
},
|
||||
session::Session,
|
||||
tensor::TensorElementType
|
||||
};
|
||||
|
||||
struct CustomOpOne;
|
||||
struct CustomOpOneKernel;
|
||||
|
||||
@@ -4,7 +4,11 @@ use std::{
|
||||
sync::Arc
|
||||
};
|
||||
|
||||
use ort::{CUDAExecutionProvider, GraphOptimizationLevel, Session, inputs};
|
||||
use ort::{
|
||||
execution_providers::CUDAExecutionProvider,
|
||||
inputs,
|
||||
session::{Session, builder::GraphOptimizationLevel}
|
||||
};
|
||||
use rand::Rng;
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
|
||||
@@ -4,7 +4,11 @@ use std::{
|
||||
};
|
||||
|
||||
use ndarray::{Array1, ArrayViewD, Axis, array, concatenate, s};
|
||||
use ort::{CUDAExecutionProvider, GraphOptimizationLevel, Session, inputs};
|
||||
use ort::{
|
||||
execution_providers::CUDAExecutionProvider,
|
||||
inputs,
|
||||
session::{Session, builder::GraphOptimizationLevel}
|
||||
};
|
||||
use rand::Rng;
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::{env, process};
|
||||
|
||||
use ort::{Session, TensorElementType, ValueType};
|
||||
use ort::{session::Session, tensor::TensorElementType, value::ValueType};
|
||||
|
||||
fn display_element_type(t: TensorElementType) -> &'static str {
|
||||
match t {
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::{ops::Mul, path::Path};
|
||||
|
||||
use image::{GenericImageView, ImageBuffer, Rgba, imageops::FilterType};
|
||||
use ndarray::Array;
|
||||
use ort::{CUDAExecutionProvider, Session, inputs};
|
||||
use ort::{execution_providers::CUDAExecutionProvider, inputs, session::Session};
|
||||
use show_image::{AsImageView, WindowOptions, event};
|
||||
|
||||
#[show_image::main]
|
||||
|
||||
@@ -18,7 +18,7 @@ pub const NUM_CROPS: usize = 1;
|
||||
pub const _NUM_IMG_TOKENS: usize = 144;
|
||||
|
||||
const OPENAI_CLIP_MEAN: [f32; 3] = [0.48145466, 0.4578275, 0.40821073];
|
||||
const OPENAI_CLIP_STD: [f32; 3] = [0.26862954, 0.26130258, 0.27577711];
|
||||
const OPENAI_CLIP_STD: [f32; 3] = [0.26862954, 0.2613026, 0.2757771];
|
||||
|
||||
pub struct Phi3VImageProcessor {
|
||||
num_crops: usize,
|
||||
|
||||
@@ -4,12 +4,12 @@ use std::{path::Path, time::Instant};
|
||||
use anyhow::Result;
|
||||
use image::DynamicImage;
|
||||
use ndarray::{Array, Array2, Array3, Array4, ArrayView, Ix3, Ix4, s};
|
||||
use ort::{Session, Tensor};
|
||||
use ort::{session::Session, value::Tensor};
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
const VISION_MODEL_NAME: &'static str = "phi-3-v-128k-instruct-vision.onnx";
|
||||
const TEXT_EMBEDDING_MODEL_NAME: &'static str = "phi-3-v-128k-instruct-text-embedding.onnx";
|
||||
const GENERATION_MODEL_NAME: &'static str = "phi-3-v-128k-instruct-text.onnx";
|
||||
const VISION_MODEL_NAME: &str = "phi-3-v-128k-instruct-vision.onnx";
|
||||
const TEXT_EMBEDDING_MODEL_NAME: &str = "phi-3-v-128k-instruct-text-embedding.onnx";
|
||||
const GENERATION_MODEL_NAME: &str = "phi-3-v-128k-instruct-text.onnx";
|
||||
|
||||
const MAX_LENGTH: usize = 1000; // max length of the generated text
|
||||
const EOS_TOKEN_ID: i64 = 32007; // <|end|>
|
||||
@@ -37,8 +37,7 @@ fn get_image_embedding(vision_model: &Session, img: &Option<DynamicImage>) -> Re
|
||||
]?;
|
||||
let outputs = vision_model.run(model_inputs)?;
|
||||
let predictions_view: ArrayView<f32, _> = outputs["visual_features"].try_extract_tensor::<f32>()?;
|
||||
let predictions = predictions_view.into_dimensionality::<Ix3>()?.to_owned();
|
||||
predictions
|
||||
predictions_view.into_dimensionality::<Ix3>()?.to_owned()
|
||||
} else {
|
||||
Array::zeros((1, 0, 0))
|
||||
};
|
||||
@@ -71,7 +70,7 @@ fn merge_text_and_image_embeddings(
|
||||
// Insert visual features
|
||||
combined_embeds
|
||||
.slice_mut(s![.., image_token_position..(image_token_position + visual_features.shape()[1]), ..])
|
||||
.assign(&visual_features);
|
||||
.assign(visual_features);
|
||||
|
||||
// Copy the remaining text embeddings
|
||||
combined_embeds
|
||||
@@ -109,13 +108,13 @@ pub async fn generate_text(
|
||||
text: &str
|
||||
) -> Result<()> {
|
||||
let (inputs_embeds, mut attention_mask) = {
|
||||
let visual_features = get_image_embedding(&vision_model, &image)?;
|
||||
let prompt = format_chat_template(&image, text);
|
||||
let visual_features = get_image_embedding(vision_model, image)?;
|
||||
let prompt = format_chat_template(image, text);
|
||||
let encoding = tokenizer.encode(prompt, true).map_err(|e| anyhow::anyhow!("Error encoding: {:?}", e))?;
|
||||
|
||||
let input_ids: Vec<i64> = encoding.get_ids().iter().map(|&id| id as i64).collect();
|
||||
let input_ids: Array2<i64> = Array2::from_shape_vec((1, input_ids.len()), input_ids)?;
|
||||
let mut inputs_embeds: Array3<f32> = get_text_embedding(&text_embedding_model, &input_ids)?;
|
||||
let mut inputs_embeds: Array3<f32> = get_text_embedding(text_embedding_model, &input_ids)?;
|
||||
|
||||
let attention_mask: Vec<i64> = encoding.get_attention_mask().iter().map(|&mask| mask as i64).collect();
|
||||
let mut attention_mask: Array2<i64> = Array2::from_shape_vec((1, attention_mask.len()), attention_mask)?;
|
||||
@@ -190,7 +189,7 @@ pub async fn generate_text(
|
||||
|
||||
// Update current_embeds, attention_mask, and past_key_values for the next iteration
|
||||
let new_token_id = Array2::from_elem((1, 1), next_token_id);
|
||||
next_inputs_embeds = get_text_embedding(&text_embedding_model, &new_token_id)?;
|
||||
next_inputs_embeds = get_text_embedding(text_embedding_model, &new_token_id)?;
|
||||
attention_mask = Array2::ones((1, attention_mask.shape()[1] + 1));
|
||||
for i in 0..32 {
|
||||
past_key_values[i * 2] = model_outputs[format!("present.{}.key", i)]
|
||||
@@ -213,15 +212,9 @@ async fn main() -> Result<()> {
|
||||
|
||||
let data_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("data");
|
||||
let tokenizer = Tokenizer::from_file(data_dir.join("tokenizer.json")).map_err(|e| anyhow::anyhow!("Error loading tokenizer: {:?}", e))?;
|
||||
let vision_model = Session::builder()?
|
||||
.with_execution_providers([ort::CPUExecutionProvider::default().build()])?
|
||||
.commit_from_file(data_dir.join(VISION_MODEL_NAME))?;
|
||||
let text_embedding_model = Session::builder()?
|
||||
.with_execution_providers([ort::CPUExecutionProvider::default().build()])?
|
||||
.commit_from_file(data_dir.join(TEXT_EMBEDDING_MODEL_NAME))?;
|
||||
let generation_model = Session::builder()?
|
||||
.with_execution_providers([ort::CPUExecutionProvider::default().build()])?
|
||||
.commit_from_file(data_dir.join(GENERATION_MODEL_NAME))?;
|
||||
let vision_model = Session::builder()?.commit_from_file(data_dir.join(VISION_MODEL_NAME))?;
|
||||
let text_embedding_model = Session::builder()?.commit_from_file(data_dir.join(TEXT_EMBEDDING_MODEL_NAME))?;
|
||||
let generation_model = Session::builder()?.commit_from_file(data_dir.join(GENERATION_MODEL_NAME))?;
|
||||
|
||||
// Generate text from text
|
||||
let image: Option<DynamicImage> = None;
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
use std::path::Path;
|
||||
|
||||
use ndarray::{Array2, Axis, Ix2};
|
||||
use ort::{CUDAExecutionProvider, Error, GraphOptimizationLevel, Session};
|
||||
use ort::{
|
||||
Error,
|
||||
execution_providers::CUDAExecutionProvider,
|
||||
session::{Session, builder::GraphOptimizationLevel}
|
||||
};
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
/// Example usage of a text embedding model like Sentence Transformers' `all-mini-lm-l6` model for semantic textual
|
||||
|
||||
@@ -6,7 +6,12 @@ use std::{
|
||||
|
||||
use kdam::BarExt;
|
||||
use ndarray::{Array1, Array2, ArrayViewD, Axis, concatenate, s};
|
||||
use ort::{Allocator, CUDAExecutionProvider, CheckpointStrategy, Session, SessionBuilder, Trainer, TrainerCallbacks, TrainingArguments};
|
||||
use ort::{
|
||||
execution_providers::CUDAExecutionProvider,
|
||||
memory::Allocator,
|
||||
session::{Session, builder::SessionBuilder},
|
||||
training::{CheckpointStrategy, Trainer, TrainerCallbacks, TrainerControl, TrainerState, TrainingArguments}
|
||||
};
|
||||
use rand::RngCore;
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
@@ -26,7 +31,7 @@ impl LoggerCallback {
|
||||
}
|
||||
|
||||
impl TrainerCallbacks for LoggerCallback {
|
||||
fn train_step(&mut self, train_loss: f32, state: &ort::TrainerState, _: &mut ort::TrainerControl<'_>) -> ort::Result<()> {
|
||||
fn train_step(&mut self, train_loss: f32, state: &TrainerState, _: &mut TrainerControl<'_>) -> ort::Result<()> {
|
||||
self.progress_bar.total = state.max_steps;
|
||||
self.progress_bar.set_postfix(format!("loss={train_loss:.3}"));
|
||||
let _ = self.progress_bar.update_to(state.iter_step);
|
||||
|
||||
@@ -6,7 +6,12 @@ use std::{
|
||||
|
||||
use kdam::BarExt;
|
||||
use ndarray::{Array1, Array2, ArrayViewD, Axis, concatenate, s};
|
||||
use ort::{Allocator, CUDAExecutionProvider, Checkpoint, Session, SessionBuilder, Trainer};
|
||||
use ort::{
|
||||
execution_providers::CUDAExecutionProvider,
|
||||
memory::Allocator,
|
||||
session::{Session, builder::SessionBuilder},
|
||||
training::{Checkpoint, Trainer}
|
||||
};
|
||||
use rand::RngCore;
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
|
||||
@@ -4,7 +4,11 @@ use std::path::Path;
|
||||
|
||||
use image::{GenericImageView, imageops::FilterType};
|
||||
use ndarray::{Array, Axis, s};
|
||||
use ort::{CUDAExecutionProvider, Session, SessionOutputs, inputs};
|
||||
use ort::{
|
||||
execution_providers::CUDAExecutionProvider,
|
||||
inputs,
|
||||
session::{Session, SessionOutputs}
|
||||
};
|
||||
use raqote::{DrawOptions, DrawTarget, LineJoin, PathBuilder, SolidSource, Source, StrokeStyle};
|
||||
use show_image::{AsImageView, WindowOptions, event};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user