chore(*): format code

This commit is contained in:
Carson M.
2024-10-14 00:41:21 -05:00
parent ae7b594d7f
commit cdd6be7a66
45 changed files with 177 additions and 217 deletions

View File

@@ -1,17 +1,17 @@
use std::{path::Path, sync::Arc};
use axum::{
Router,
extract::{FromRef, State},
response::{
sse::{Event, KeepAlive},
Sse
Sse,
sse::{Event, KeepAlive}
},
routing::post,
Router
routing::post
};
use futures::Stream;
use ndarray::{array, concatenate, s, Array1, ArrayViewD, Axis};
use ort::{inputs, CUDAExecutionProvider, GraphOptimizationLevel, Session};
use ndarray::{Array1, ArrayViewD, Axis, array, concatenate, s};
use ort::{CUDAExecutionProvider, GraphOptimizationLevel, Session, inputs};
use rand::Rng;
use tokenizers::Tokenizer;
use tokio::net::TcpListener;

View File

@@ -4,7 +4,7 @@ use std::{
sync::Arc
};
use ort::{inputs, CUDAExecutionProvider, GraphOptimizationLevel, Session};
use ort::{CUDAExecutionProvider, GraphOptimizationLevel, Session, inputs};
use rand::Rng;
use tokenizers::Tokenizer;

View File

@@ -3,8 +3,8 @@ use std::{
path::Path
};
use ndarray::{array, concatenate, s, Array1, ArrayViewD, Axis};
use ort::{inputs, CUDAExecutionProvider, GraphOptimizationLevel, Session};
use ndarray::{Array1, ArrayViewD, Axis, array, concatenate, s};
use ort::{CUDAExecutionProvider, GraphOptimizationLevel, Session, inputs};
use rand::Rng;
use tokenizers::Tokenizer;

View File

@@ -2,10 +2,10 @@
use std::{ops::Mul, path::Path};
use image::{imageops::FilterType, GenericImageView, ImageBuffer, Rgba};
use image::{GenericImageView, ImageBuffer, Rgba, imageops::FilterType};
use ndarray::Array;
use ort::{inputs, CUDAExecutionProvider, Session};
use show_image::{event, AsImageView, WindowOptions};
use ort::{CUDAExecutionProvider, Session, inputs};
use show_image::{AsImageView, WindowOptions, event};
#[show_image::main]
fn main() -> ort::Result<()> {
@@ -57,13 +57,10 @@ fn main() -> ort::Result<()> {
let window = show_image::context()
.run_function_wait(move |context| -> Result<_, String> {
let mut window = context
.create_window(
"ort + modnet",
WindowOptions {
size: Some([img_width, img_height]),
..WindowOptions::default()
}
)
.create_window("ort + modnet", WindowOptions {
size: Some([img_width, img_height]),
..WindowOptions::default()
})
.map_err(|e| e.to_string())?;
window.set_image("photo", &output.as_image_view().map_err(|e| e.to_string())?);
Ok(window.proxy())

View File

@@ -9,7 +9,7 @@
//! to be used with the Phi-3 vision model, adapting the original Python code to Rust.
use anyhow::Result;
use image::{DynamicImage, GenericImageView, ImageBuffer};
use ndarray::{s, Array2, Array4, Array5, Axis};
use ndarray::{Array2, Array4, Array5, Axis, s};
/// see https://huggingface.co/microsoft/Phi-3-vision-128k-instruct-onnx-cpu/blob/main/cpu-int4-rtn-block-32-acc-level-4/processor_config.json
/// NOTE: The default setting in processor_config.json is num_crops = 16,
@@ -24,7 +24,7 @@ pub struct Phi3VImageProcessor {
num_crops: usize,
image_mean: Vec<f32>,
image_std: Vec<f32>,
do_convert_rgb: bool,
do_convert_rgb: bool
}
impl Phi3VImageProcessor {
@@ -33,7 +33,7 @@ impl Phi3VImageProcessor {
num_crops: NUM_CROPS,
image_mean: OPENAI_CLIP_MEAN.to_vec(),
image_std: OPENAI_CLIP_STD.to_vec(),
do_convert_rgb: true,
do_convert_rgb: true
}
}
@@ -72,7 +72,7 @@ impl Phi3VImageProcessor {
Ok(BatchFeature {
pixel_values,
image_sizes,
num_img_tokens: vec![num_img_tokens as i64],
num_img_tokens: vec![num_img_tokens as i64]
})
}
@@ -99,11 +99,7 @@ impl Phi3VImageProcessor {
let resized = image.resize_exact(new_width, new_height, image::imageops::FilterType::Lanczos3);
let padded = self.padding_336(&resized);
if transposed {
padded.rotate90()
} else {
padded
}
if transposed { padded.rotate90() } else { padded }
}
fn padding_336(&self, image: &DynamicImage) -> DynamicImage {
@@ -188,5 +184,5 @@ impl Phi3VImageProcessor {
pub struct BatchFeature {
pub pixel_values: Array5<f32>,
pub image_sizes: Array2<i64>,
pub num_img_tokens: Vec<i64>,
pub num_img_tokens: Vec<i64>
}

View File

@@ -1,10 +1,10 @@
mod image_process;
use std::{path::Path, time::Instant};
use anyhow::Result;
use image::DynamicImage;
use ndarray::{s, Array, Array2, Array3, Array4, ArrayView, Ix3, Ix4};
use ndarray::{Array, Array2, Array3, Array4, ArrayView, Ix3, Ix4, s};
use ort::{Session, Tensor};
use std::path::Path;
use std::time::Instant;
use tokenizers::Tokenizer;
const VISION_MODEL_NAME: &'static str = "phi-3-v-128k-instruct-vision.onnx";
@@ -59,7 +59,7 @@ fn merge_text_and_image_embeddings(
inputs_embeds: &Array3<f32>,
attention_mask: &Array2<i64>,
visual_features: &Array3<f32>,
image_token_position: usize,
image_token_position: usize
) -> (Array3<f32>, Array2<i64>) {
let mut combined_embeds = Array3::zeros((1, inputs_embeds.shape()[1] + visual_features.shape()[1], inputs_embeds.shape()[2]));
@@ -96,7 +96,7 @@ fn merge_text_and_image_embeddings(
fn format_chat_template(img: &Option<DynamicImage>, txt: &str) -> String {
match img {
Some(_) => format!("<s><|user|>\n<|image_1|>\n{txt}<|end|>\n<|assistant|>\n", txt = txt),
None => format!("<s><|user|>\n{txt}<|end|>\n<|assistant|>\n", txt = txt),
None => format!("<s><|user|>\n{txt}<|end|>\n<|assistant|>\n", txt = txt)
}
}
@@ -106,7 +106,7 @@ pub async fn generate_text(
text_embedding_model: &Session,
generation_model: &Session,
image: &Option<DynamicImage>,
text: &str,
text: &str
) -> Result<()> {
let (mut inputs_embeds, mut attention_mask) = {
let visual_features = get_image_embedding(&vision_model, &image)?;
@@ -161,9 +161,11 @@ pub async fn generate_text(
//
// The current implementation uses a simple greedy decoding strategy:
// - We select the token with the highest probability (argmax) from the logits.
// - This approach always chooses the most likely next token, which can lead to deterministic and potentially repetitive outputs.
// - This approach always chooses the most likely next token, which can lead to deterministic and potentially repetitive
// outputs.
//
// Note: More advanced sampling strategies (e.g., temperature scaling, top-k, top-p sampling) are not implemented in the current version.
// Note: More advanced sampling strategies (e.g., temperature scaling, top-k, top-p sampling) are not implemented in the
// current version.
//
// The selected token ID will be in the range [0, VOCAB_SIZE - 1].
let logits: ArrayView<f32, _> = model_outputs["logits"].try_extract_tensor::<f32>()?.into_dimensionality::<Ix3>()?;

View File

@@ -1,6 +1,6 @@
use std::path::Path;
use ndarray::{s, Array1, Array2, Axis, Ix2};
use ndarray::{Array1, Array2, Axis, Ix2, s};
use ort::{CUDAExecutionProvider, Error, GraphOptimizationLevel, Session};
use tokenizers::Tokenizer;

View File

@@ -5,7 +5,7 @@ use std::{
};
use kdam::BarExt;
use ndarray::{concatenate, s, Array1, Array2, ArrayViewD, Axis};
use ndarray::{Array1, Array2, ArrayViewD, Axis, concatenate, s};
use ort::{Allocator, CUDAExecutionProvider, CheckpointStrategy, Session, SessionBuilder, Trainer, TrainerCallbacks, TrainingArguments};
use rand::RngCore;
use tokenizers::Tokenizer;

View File

@@ -5,7 +5,7 @@ use std::{
};
use kdam::BarExt;
use ndarray::{concatenate, s, Array1, Array2, ArrayViewD, Axis};
use ndarray::{Array1, Array2, ArrayViewD, Axis, concatenate, s};
use ort::{Allocator, CUDAExecutionProvider, Checkpoint, Session, SessionBuilder, Trainer};
use rand::RngCore;
use tokenizers::Tokenizer;

View File

@@ -2,11 +2,11 @@
use std::path::Path;
use image::{imageops::FilterType, GenericImageView};
use ndarray::{s, Array, Axis};
use ort::{inputs, CUDAExecutionProvider, Session, SessionOutputs};
use image::{GenericImageView, imageops::FilterType};
use ndarray::{Array, Axis, s};
use ort::{CUDAExecutionProvider, Session, SessionOutputs, inputs};
use raqote::{DrawOptions, DrawTarget, LineJoin, PathBuilder, SolidSource, Source, StrokeStyle};
use show_image::{event, AsImageView, WindowOptions};
use show_image::{AsImageView, WindowOptions, event};
#[derive(Debug, Clone, Copy)]
struct BoundingBox {
@@ -137,13 +137,10 @@ fn main() -> ort::Result<()> {
let window = show_image::context()
.run_function_wait(move |context| -> Result<_, String> {
let mut window = context
.create_window(
"ort + YOLOv8",
WindowOptions {
size: Some([img_width, img_height]),
..WindowOptions::default()
}
)
.create_window("ort + YOLOv8", WindowOptions {
size: Some([img_width, img_height]),
..WindowOptions::default()
})
.map_err(|e| e.to_string())?;
window.set_image("baseball", &original_img.as_image_view().map_err(|e| e.to_string())?);
window.set_overlay("yolo", &overlay.as_image_view().map_err(|e| e.to_string())?, true);