mirror of
https://github.com/pykeio/ort
synced 2026-04-25 16:34:55 +02:00
chore(*): format code
This commit is contained in:
@@ -1,17 +1,17 @@
|
||||
use std::{path::Path, sync::Arc};
|
||||
|
||||
use axum::{
|
||||
Router,
|
||||
extract::{FromRef, State},
|
||||
response::{
|
||||
sse::{Event, KeepAlive},
|
||||
Sse
|
||||
Sse,
|
||||
sse::{Event, KeepAlive}
|
||||
},
|
||||
routing::post,
|
||||
Router
|
||||
routing::post
|
||||
};
|
||||
use futures::Stream;
|
||||
use ndarray::{array, concatenate, s, Array1, ArrayViewD, Axis};
|
||||
use ort::{inputs, CUDAExecutionProvider, GraphOptimizationLevel, Session};
|
||||
use ndarray::{Array1, ArrayViewD, Axis, array, concatenate, s};
|
||||
use ort::{CUDAExecutionProvider, GraphOptimizationLevel, Session, inputs};
|
||||
use rand::Rng;
|
||||
use tokenizers::Tokenizer;
|
||||
use tokio::net::TcpListener;
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::{
|
||||
sync::Arc
|
||||
};
|
||||
|
||||
use ort::{inputs, CUDAExecutionProvider, GraphOptimizationLevel, Session};
|
||||
use ort::{CUDAExecutionProvider, GraphOptimizationLevel, Session, inputs};
|
||||
use rand::Rng;
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
|
||||
@@ -3,8 +3,8 @@ use std::{
|
||||
path::Path
|
||||
};
|
||||
|
||||
use ndarray::{array, concatenate, s, Array1, ArrayViewD, Axis};
|
||||
use ort::{inputs, CUDAExecutionProvider, GraphOptimizationLevel, Session};
|
||||
use ndarray::{Array1, ArrayViewD, Axis, array, concatenate, s};
|
||||
use ort::{CUDAExecutionProvider, GraphOptimizationLevel, Session, inputs};
|
||||
use rand::Rng;
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
|
||||
use std::{ops::Mul, path::Path};
|
||||
|
||||
use image::{imageops::FilterType, GenericImageView, ImageBuffer, Rgba};
|
||||
use image::{GenericImageView, ImageBuffer, Rgba, imageops::FilterType};
|
||||
use ndarray::Array;
|
||||
use ort::{inputs, CUDAExecutionProvider, Session};
|
||||
use show_image::{event, AsImageView, WindowOptions};
|
||||
use ort::{CUDAExecutionProvider, Session, inputs};
|
||||
use show_image::{AsImageView, WindowOptions, event};
|
||||
|
||||
#[show_image::main]
|
||||
fn main() -> ort::Result<()> {
|
||||
@@ -57,13 +57,10 @@ fn main() -> ort::Result<()> {
|
||||
let window = show_image::context()
|
||||
.run_function_wait(move |context| -> Result<_, String> {
|
||||
let mut window = context
|
||||
.create_window(
|
||||
"ort + modnet",
|
||||
WindowOptions {
|
||||
size: Some([img_width, img_height]),
|
||||
..WindowOptions::default()
|
||||
}
|
||||
)
|
||||
.create_window("ort + modnet", WindowOptions {
|
||||
size: Some([img_width, img_height]),
|
||||
..WindowOptions::default()
|
||||
})
|
||||
.map_err(|e| e.to_string())?;
|
||||
window.set_image("photo", &output.as_image_view().map_err(|e| e.to_string())?);
|
||||
Ok(window.proxy())
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
//! to be used with the Phi-3 vision model, adapting the original Python code to Rust.
|
||||
use anyhow::Result;
|
||||
use image::{DynamicImage, GenericImageView, ImageBuffer};
|
||||
use ndarray::{s, Array2, Array4, Array5, Axis};
|
||||
use ndarray::{Array2, Array4, Array5, Axis, s};
|
||||
|
||||
/// see https://huggingface.co/microsoft/Phi-3-vision-128k-instruct-onnx-cpu/blob/main/cpu-int4-rtn-block-32-acc-level-4/processor_config.json
|
||||
/// NOTE: The default setting in processor_config.json is num_crops = 16,
|
||||
@@ -24,7 +24,7 @@ pub struct Phi3VImageProcessor {
|
||||
num_crops: usize,
|
||||
image_mean: Vec<f32>,
|
||||
image_std: Vec<f32>,
|
||||
do_convert_rgb: bool,
|
||||
do_convert_rgb: bool
|
||||
}
|
||||
|
||||
impl Phi3VImageProcessor {
|
||||
@@ -33,7 +33,7 @@ impl Phi3VImageProcessor {
|
||||
num_crops: NUM_CROPS,
|
||||
image_mean: OPENAI_CLIP_MEAN.to_vec(),
|
||||
image_std: OPENAI_CLIP_STD.to_vec(),
|
||||
do_convert_rgb: true,
|
||||
do_convert_rgb: true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,7 +72,7 @@ impl Phi3VImageProcessor {
|
||||
Ok(BatchFeature {
|
||||
pixel_values,
|
||||
image_sizes,
|
||||
num_img_tokens: vec![num_img_tokens as i64],
|
||||
num_img_tokens: vec![num_img_tokens as i64]
|
||||
})
|
||||
}
|
||||
|
||||
@@ -99,11 +99,7 @@ impl Phi3VImageProcessor {
|
||||
let resized = image.resize_exact(new_width, new_height, image::imageops::FilterType::Lanczos3);
|
||||
let padded = self.padding_336(&resized);
|
||||
|
||||
if transposed {
|
||||
padded.rotate90()
|
||||
} else {
|
||||
padded
|
||||
}
|
||||
if transposed { padded.rotate90() } else { padded }
|
||||
}
|
||||
|
||||
fn padding_336(&self, image: &DynamicImage) -> DynamicImage {
|
||||
@@ -188,5 +184,5 @@ impl Phi3VImageProcessor {
|
||||
pub struct BatchFeature {
|
||||
pub pixel_values: Array5<f32>,
|
||||
pub image_sizes: Array2<i64>,
|
||||
pub num_img_tokens: Vec<i64>,
|
||||
pub num_img_tokens: Vec<i64>
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
mod image_process;
|
||||
use std::{path::Path, time::Instant};
|
||||
|
||||
use anyhow::Result;
|
||||
use image::DynamicImage;
|
||||
use ndarray::{s, Array, Array2, Array3, Array4, ArrayView, Ix3, Ix4};
|
||||
use ndarray::{Array, Array2, Array3, Array4, ArrayView, Ix3, Ix4, s};
|
||||
use ort::{Session, Tensor};
|
||||
use std::path::Path;
|
||||
use std::time::Instant;
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
const VISION_MODEL_NAME: &'static str = "phi-3-v-128k-instruct-vision.onnx";
|
||||
@@ -59,7 +59,7 @@ fn merge_text_and_image_embeddings(
|
||||
inputs_embeds: &Array3<f32>,
|
||||
attention_mask: &Array2<i64>,
|
||||
visual_features: &Array3<f32>,
|
||||
image_token_position: usize,
|
||||
image_token_position: usize
|
||||
) -> (Array3<f32>, Array2<i64>) {
|
||||
let mut combined_embeds = Array3::zeros((1, inputs_embeds.shape()[1] + visual_features.shape()[1], inputs_embeds.shape()[2]));
|
||||
|
||||
@@ -96,7 +96,7 @@ fn merge_text_and_image_embeddings(
|
||||
fn format_chat_template(img: &Option<DynamicImage>, txt: &str) -> String {
|
||||
match img {
|
||||
Some(_) => format!("<s><|user|>\n<|image_1|>\n{txt}<|end|>\n<|assistant|>\n", txt = txt),
|
||||
None => format!("<s><|user|>\n{txt}<|end|>\n<|assistant|>\n", txt = txt),
|
||||
None => format!("<s><|user|>\n{txt}<|end|>\n<|assistant|>\n", txt = txt)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,7 +106,7 @@ pub async fn generate_text(
|
||||
text_embedding_model: &Session,
|
||||
generation_model: &Session,
|
||||
image: &Option<DynamicImage>,
|
||||
text: &str,
|
||||
text: &str
|
||||
) -> Result<()> {
|
||||
let (mut inputs_embeds, mut attention_mask) = {
|
||||
let visual_features = get_image_embedding(&vision_model, &image)?;
|
||||
@@ -161,9 +161,11 @@ pub async fn generate_text(
|
||||
//
|
||||
// The current implementation uses a simple greedy decoding strategy:
|
||||
// - We select the token with the highest probability (argmax) from the logits.
|
||||
// - This approach always chooses the most likely next token, which can lead to deterministic and potentially repetitive outputs.
|
||||
// - This approach always chooses the most likely next token, which can lead to deterministic and potentially repetitive
|
||||
// outputs.
|
||||
//
|
||||
// Note: More advanced sampling strategies (e.g., temperature scaling, top-k, top-p sampling) are not implemented in the current version.
|
||||
// Note: More advanced sampling strategies (e.g., temperature scaling, top-k, top-p sampling) are not implemented in the
|
||||
// current version.
|
||||
//
|
||||
// The selected token ID will be in the range [0, VOCAB_SIZE - 1].
|
||||
let logits: ArrayView<f32, _> = model_outputs["logits"].try_extract_tensor::<f32>()?.into_dimensionality::<Ix3>()?;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::path::Path;
|
||||
|
||||
use ndarray::{s, Array1, Array2, Axis, Ix2};
|
||||
use ndarray::{Array1, Array2, Axis, Ix2, s};
|
||||
use ort::{CUDAExecutionProvider, Error, GraphOptimizationLevel, Session};
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::{
|
||||
};
|
||||
|
||||
use kdam::BarExt;
|
||||
use ndarray::{concatenate, s, Array1, Array2, ArrayViewD, Axis};
|
||||
use ndarray::{Array1, Array2, ArrayViewD, Axis, concatenate, s};
|
||||
use ort::{Allocator, CUDAExecutionProvider, CheckpointStrategy, Session, SessionBuilder, Trainer, TrainerCallbacks, TrainingArguments};
|
||||
use rand::RngCore;
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::{
|
||||
};
|
||||
|
||||
use kdam::BarExt;
|
||||
use ndarray::{concatenate, s, Array1, Array2, ArrayViewD, Axis};
|
||||
use ndarray::{Array1, Array2, ArrayViewD, Axis, concatenate, s};
|
||||
use ort::{Allocator, CUDAExecutionProvider, Checkpoint, Session, SessionBuilder, Trainer};
|
||||
use rand::RngCore;
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
@@ -2,11 +2,11 @@
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use image::{imageops::FilterType, GenericImageView};
|
||||
use ndarray::{s, Array, Axis};
|
||||
use ort::{inputs, CUDAExecutionProvider, Session, SessionOutputs};
|
||||
use image::{GenericImageView, imageops::FilterType};
|
||||
use ndarray::{Array, Axis, s};
|
||||
use ort::{CUDAExecutionProvider, Session, SessionOutputs, inputs};
|
||||
use raqote::{DrawOptions, DrawTarget, LineJoin, PathBuilder, SolidSource, Source, StrokeStyle};
|
||||
use show_image::{event, AsImageView, WindowOptions};
|
||||
use show_image::{AsImageView, WindowOptions, event};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct BoundingBox {
|
||||
@@ -137,13 +137,10 @@ fn main() -> ort::Result<()> {
|
||||
let window = show_image::context()
|
||||
.run_function_wait(move |context| -> Result<_, String> {
|
||||
let mut window = context
|
||||
.create_window(
|
||||
"ort + YOLOv8",
|
||||
WindowOptions {
|
||||
size: Some([img_width, img_height]),
|
||||
..WindowOptions::default()
|
||||
}
|
||||
)
|
||||
.create_window("ort + YOLOv8", WindowOptions {
|
||||
size: Some([img_width, img_height]),
|
||||
..WindowOptions::default()
|
||||
})
|
||||
.map_err(|e| e.to_string())?;
|
||||
window.set_image("baseball", &original_img.as_image_view().map_err(|e| e.to_string())?);
|
||||
window.set_overlay("yolo", &overlay.as_image_view().map_err(|e| e.to_string())?, true);
|
||||
|
||||
Reference in New Issue
Block a user