mirror of
https://github.com/pykeio/ort
synced 2026-04-25 16:34:55 +02:00
refactor!: allow zero-copy from_array for array views with TensorRef
This has all sorts of fun breaking changes: - `ort::inputs!` no longer yields an `ort::Result<...>` (thank God) - `Tensor::from_array` now only accepts owned data. - Introduce `TensorRef::from_array_view` and `TensorRefMut::from_array_view_mut`. - `TryFrom<A>` is no longer implemented for `Tensor<T>` for any variants. This opens the door to new optimizations on top of fixing a few unsoundness issues. TODO: update docs
This commit is contained in:
@@ -18,7 +18,7 @@ tokio = { version = "1.36", features = [ "full" ] }
|
||||
tokio-stream = "0.1"
|
||||
tower-http = { version = "0.5", features = ["fs", "trace"] }
|
||||
anyhow = "1.0"
|
||||
async-stream = "0.3"
|
||||
async-stream-lite = "0.2"
|
||||
|
||||
[features]
|
||||
load-dynamic = [ "ort/load-dynamic" ]
|
||||
|
||||
@@ -10,11 +10,10 @@ use axum::{
|
||||
routing::post
|
||||
};
|
||||
use futures::Stream;
|
||||
use ndarray::{Array1, ArrayViewD, Axis, array, concatenate, s};
|
||||
use ort::{
|
||||
execution_providers::CUDAExecutionProvider,
|
||||
inputs,
|
||||
session::{Session, builder::GraphOptimizationLevel}
|
||||
session::{Session, builder::GraphOptimizationLevel},
|
||||
value::TensorRef
|
||||
};
|
||||
use rand::Rng;
|
||||
use tokenizers::Tokenizer;
|
||||
@@ -64,37 +63,31 @@ struct AppState {
|
||||
tokenizer: Arc<Tokenizer>
|
||||
}
|
||||
|
||||
fn generate_stream(tokenizer: Arc<Tokenizer>, session: Arc<Session>, tokens: Vec<i64>, gen_tokens: usize) -> impl Stream<Item = ort::Result<Event>> + Send {
|
||||
async_stream::try_stream! {
|
||||
let mut tokens = Array1::from_iter(tokens.iter().cloned());
|
||||
|
||||
fn generate_stream(tokenizer: Arc<Tokenizer>, session: Arc<Session>, mut tokens: Vec<i64>, gen_tokens: usize) -> impl Stream<Item = ort::Result<Event>> + Send {
|
||||
async_stream_lite::try_async_stream(|yielder| async move {
|
||||
for _ in 0..gen_tokens {
|
||||
let array = tokens.view().insert_axis(Axis(0)).insert_axis(Axis(1));
|
||||
let outputs = session.run_async(inputs![array]?)?.await?;
|
||||
let generated_tokens: ArrayViewD<f32> = outputs["output1"].try_extract_tensor()?;
|
||||
let input = TensorRef::from_array_view((vec![1, 1, tokens.len() as i64], tokens.as_slice()))?;
|
||||
let outputs = session.run_async(ort::inputs![input])?.await?;
|
||||
let (dim, probabilities) = outputs["output1"].try_extract_raw_tensor()?;
|
||||
|
||||
// Collect and sort logits
|
||||
let probabilities = &mut generated_tokens
|
||||
.slice(s![0, 0, -1, ..])
|
||||
.insert_axis(Axis(0))
|
||||
.to_owned()
|
||||
.iter()
|
||||
.cloned()
|
||||
.enumerate()
|
||||
.collect::<Vec<_>>();
|
||||
let (seq_len, vocab_size) = (dim[2] as usize, dim[3] as usize);
|
||||
let mut probabilities: Vec<(usize, f32)> = probabilities[(seq_len - 1) * vocab_size..].iter().copied().enumerate().collect();
|
||||
probabilities.sort_unstable_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Less));
|
||||
|
||||
// Sample using top-k sampling
|
||||
let token = {
|
||||
let mut rng = rand::thread_rng();
|
||||
probabilities[rng.gen_range(0..=5)].0
|
||||
probabilities[rng.gen_range(0..=5)].0 as i64
|
||||
};
|
||||
tokens = concatenate![Axis(0), tokens, array![token.try_into().unwrap()]];
|
||||
tokens.push(token);
|
||||
|
||||
let token_str = tokenizer.decode(&[token as _], true).unwrap();
|
||||
yield Event::default().data(token_str);
|
||||
yielder.r#yield(Event::default().data(token_str)).await;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
impl FromRef<AppState> for Arc<Session> {
|
||||
|
||||
@@ -45,7 +45,7 @@ fn main() -> anyhow::Result<()> {
|
||||
)
|
||||
.unwrap()
|
||||
};
|
||||
let outputs = model.run([tensor.into()])?;
|
||||
let outputs = model.run(ort::inputs![tensor])?;
|
||||
|
||||
let output = outputs["output"].try_extract_tensor::<f32>()?;
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use ndarray::Array2;
|
||||
use ort::{
|
||||
operator::{
|
||||
Operator, OperatorDomain,
|
||||
@@ -6,7 +5,8 @@ use ort::{
|
||||
kernel::{Kernel, KernelAttributes, KernelContext}
|
||||
},
|
||||
session::Session,
|
||||
tensor::TensorElementType
|
||||
tensor::TensorElementType,
|
||||
value::Tensor
|
||||
};
|
||||
|
||||
struct CustomOpOne;
|
||||
@@ -78,7 +78,16 @@ fn main() -> ort::Result<()> {
|
||||
.with_operators(OperatorDomain::new("test.customop")?.add(CustomOpOne)?.add(CustomOpTwo)?)?
|
||||
.commit_from_file("tests/data/custom_op_test.onnx")?;
|
||||
|
||||
let values = session.run(ort::inputs![Array2::<f32>::zeros((3, 5)), Array2::<f32>::ones((3, 5))]?)?;
|
||||
let allocator = session.allocator();
|
||||
let value1 = Tensor::<f32>::new(allocator, [3, 5])?;
|
||||
let mut value2 = Tensor::<f32>::new(allocator, [3, 5])?;
|
||||
{
|
||||
let (_, data) = value2.extract_raw_tensor_mut();
|
||||
for datum in data {
|
||||
*datum = 1.;
|
||||
}
|
||||
}
|
||||
let values = session.run(ort::inputs![&value1, &value2])?;
|
||||
println!("{:?}", values[0].try_extract_tensor::<i32>()?);
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
use std::{
|
||||
io::{self, Write},
|
||||
path::Path,
|
||||
sync::Arc
|
||||
path::Path
|
||||
};
|
||||
|
||||
use ort::{
|
||||
execution_providers::CUDAExecutionProvider,
|
||||
inputs,
|
||||
session::{Session, builder::GraphOptimizationLevel}
|
||||
session::{Session, builder::GraphOptimizationLevel},
|
||||
value::TensorRef
|
||||
};
|
||||
use rand::Rng;
|
||||
use tokenizers::Tokenizer;
|
||||
@@ -33,7 +33,7 @@ fn main() -> ort::Result<()> {
|
||||
.with_execution_providers([CUDAExecutionProvider::default().build()])
|
||||
.commit()?;
|
||||
|
||||
let mut stdout = io::stdout();
|
||||
let mut stdout: io::Stdout = io::stdout();
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
// Load our model
|
||||
@@ -45,7 +45,7 @@ fn main() -> ort::Result<()> {
|
||||
// Load the tokenizer and encode the prompt into a sequence of tokens.
|
||||
let tokenizer = Tokenizer::from_file(Path::new(env!("CARGO_MANIFEST_DIR")).join("data").join("tokenizer.json")).unwrap();
|
||||
let tokens = tokenizer.encode(PROMPT, false).unwrap();
|
||||
let mut tokens = Arc::new(tokens.get_ids().iter().map(|i| *i as i64).collect::<Vec<_>>().into_boxed_slice());
|
||||
let mut tokens = tokens.get_ids().iter().map(|i| *i as i64).collect::<Vec<_>>();
|
||||
|
||||
print!("{PROMPT}");
|
||||
stdout.flush().unwrap();
|
||||
@@ -53,8 +53,8 @@ fn main() -> ort::Result<()> {
|
||||
for _ in 0..GEN_TOKENS {
|
||||
// Raw tensor construction takes a tuple of (dimensions, data).
|
||||
// The model expects our input to have shape [B, _, S]
|
||||
let input = (vec![1, 1, tokens.len() as i64], Arc::clone(&tokens));
|
||||
let outputs = session.run(inputs![input]?)?;
|
||||
let input = TensorRef::from_array_view((vec![1, 1, tokens.len() as i64], tokens.as_slice()))?;
|
||||
let outputs = session.run(inputs![input])?;
|
||||
let (dim, mut probabilities) = outputs["output1"].try_extract_raw_tensor()?;
|
||||
|
||||
// The output tensor will have shape [B, _, S + 1, V]
|
||||
@@ -70,9 +70,7 @@ fn main() -> ort::Result<()> {
|
||||
let token = probabilities[rng.gen_range(0..=TOP_K)].0 as i64;
|
||||
|
||||
// Add our generated token to the input sequence
|
||||
let mut vec = tokens.to_vec();
|
||||
vec.push(token);
|
||||
*Arc::make_mut(&mut tokens) = vec.into_boxed_slice();
|
||||
tokens.push(token);
|
||||
|
||||
let token_str = tokenizer.decode(&[token as u32], true).unwrap();
|
||||
print!("{}", token_str);
|
||||
|
||||
@@ -7,7 +7,8 @@ use ndarray::{Array1, ArrayViewD, Axis, array, concatenate, s};
|
||||
use ort::{
|
||||
execution_providers::CUDAExecutionProvider,
|
||||
inputs,
|
||||
session::{Session, builder::GraphOptimizationLevel}
|
||||
session::{Session, builder::GraphOptimizationLevel},
|
||||
value::TensorRef
|
||||
};
|
||||
use rand::Rng;
|
||||
use tokenizers::Tokenizer;
|
||||
@@ -54,7 +55,7 @@ fn main() -> ort::Result<()> {
|
||||
|
||||
for _ in 0..GEN_TOKENS {
|
||||
let array = tokens.view().insert_axis(Axis(0)).insert_axis(Axis(1));
|
||||
let outputs = session.run(inputs![array]?)?;
|
||||
let outputs = session.run(inputs![TensorRef::from_array_view(array)?])?;
|
||||
let generated_tokens: ArrayViewD<f32> = outputs["output1"].try_extract_tensor()?;
|
||||
|
||||
// Collect and sort logits
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::{ops::Mul, path::Path};
|
||||
|
||||
use image::{GenericImageView, ImageBuffer, Rgba, imageops::FilterType};
|
||||
use ndarray::Array;
|
||||
use ort::{execution_providers::CUDAExecutionProvider, inputs, session::Session};
|
||||
use ort::{execution_providers::CUDAExecutionProvider, inputs, session::Session, value::TensorRef};
|
||||
use show_image::{AsImageView, WindowOptions, event};
|
||||
|
||||
#[show_image::main]
|
||||
@@ -31,7 +31,7 @@ fn main() -> ort::Result<()> {
|
||||
input[[0, 2, y, x]] = (b as f32 - 127.5) / 127.5;
|
||||
}
|
||||
|
||||
let outputs = model.run(inputs!["input" => input.view()]?)?;
|
||||
let outputs = model.run(inputs!["input" => TensorRef::from_array_view(input.view())?])?;
|
||||
|
||||
let output = outputs["output"].try_extract_tensor::<f32>()?;
|
||||
|
||||
|
||||
@@ -4,7 +4,10 @@ use std::{path::Path, time::Instant};
|
||||
use anyhow::Result;
|
||||
use image::DynamicImage;
|
||||
use ndarray::{Array, Array2, Array3, Array4, ArrayView, Ix3, Ix4, s};
|
||||
use ort::{session::Session, value::Tensor};
|
||||
use ort::{
|
||||
session::Session,
|
||||
value::{Tensor, TensorRef}
|
||||
};
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
const VISION_MODEL_NAME: &str = "phi-3-v-128k-instruct-vision.onnx";
|
||||
@@ -31,11 +34,10 @@ fn get_image_embedding(vision_model: &Session, img: &Option<DynamicImage>) -> Re
|
||||
pixel_values = result.pixel_values.shape(),
|
||||
image_sizes = result.image_sizes.shape(),
|
||||
);
|
||||
let model_inputs = ort::inputs![
|
||||
"pixel_values" => result.pixel_values,
|
||||
"image_sizes" => result.image_sizes,
|
||||
]?;
|
||||
let outputs = vision_model.run(model_inputs)?;
|
||||
let outputs = vision_model.run(ort::inputs![
|
||||
"pixel_values" => Tensor::from_array(result.pixel_values)?,
|
||||
"image_sizes" => Tensor::from_array(result.image_sizes)?,
|
||||
])?;
|
||||
let predictions_view: ArrayView<f32, _> = outputs["visual_features"].try_extract_tensor::<f32>()?;
|
||||
predictions_view.into_dimensionality::<Ix3>()?.to_owned()
|
||||
} else {
|
||||
@@ -45,10 +47,9 @@ fn get_image_embedding(vision_model: &Session, img: &Option<DynamicImage>) -> Re
|
||||
}
|
||||
|
||||
fn get_text_embedding(text_embedding_model: &Session, input_ids: &Array2<i64>) -> Result<Array3<f32>> {
|
||||
let model_inputs = ort::inputs![
|
||||
"input_ids" => input_ids.to_owned(),
|
||||
]?;
|
||||
let outputs = text_embedding_model.run(model_inputs)?;
|
||||
let outputs = text_embedding_model.run(ort::inputs![
|
||||
"input_ids" => TensorRef::from_array_view(input_ids)?,
|
||||
])?;
|
||||
let inputs_embeds_view: ArrayView<f32, _> = outputs["inputs_embeds"].try_extract_tensor::<f32>()?;
|
||||
let inputs_embeds = inputs_embeds_view.into_dimensionality::<Ix3>()?.to_owned();
|
||||
Ok(inputs_embeds)
|
||||
@@ -144,12 +145,12 @@ pub async fn generate_text(
|
||||
// Prepare model inputs
|
||||
let model_inputs = {
|
||||
let mut model_inputs = ort::inputs![
|
||||
"inputs_embeds" => next_inputs_embeds.clone(),
|
||||
"attention_mask" => attention_mask.clone(),
|
||||
]?;
|
||||
"inputs_embeds" => TensorRef::from_array_view(&next_inputs_embeds)?,
|
||||
"attention_mask" => TensorRef::from_array_view(&attention_mask)?,
|
||||
];
|
||||
for i in 0..32 {
|
||||
model_inputs.push((format!("past_key_values.{}.key", i).into(), Tensor::from_array(past_key_values[i * 2].view())?.into()));
|
||||
model_inputs.push((format!("past_key_values.{}.value", i).into(), Tensor::from_array(past_key_values[i * 2 + 1].view())?.into()));
|
||||
model_inputs.push((format!("past_key_values.{}.key", i).into(), TensorRef::from_array_view(&past_key_values[i * 2])?.into()));
|
||||
model_inputs.push((format!("past_key_values.{}.value", i).into(), TensorRef::from_array_view(&past_key_values[i * 2 + 1])?.into()));
|
||||
}
|
||||
model_inputs
|
||||
};
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
use std::path::Path;
|
||||
|
||||
use ndarray::{Array2, Axis, Ix2};
|
||||
use ndarray::{Axis, Ix2};
|
||||
use ort::{
|
||||
Error,
|
||||
execution_providers::CUDAExecutionProvider,
|
||||
session::{Session, builder::GraphOptimizationLevel}
|
||||
session::{Session, builder::GraphOptimizationLevel},
|
||||
value::TensorRef
|
||||
};
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
@@ -45,11 +46,11 @@ fn main() -> ort::Result<()> {
|
||||
let mask: Vec<i64> = encodings.iter().flat_map(|e| e.get_attention_mask().iter().map(|i| *i as i64)).collect();
|
||||
|
||||
// Convert our flattened arrays into 2-dimensional tensors of shape [N, L].
|
||||
let a_ids = Array2::from_shape_vec([inputs.len(), padded_token_length], ids).unwrap();
|
||||
let a_mask = Array2::from_shape_vec([inputs.len(), padded_token_length], mask).unwrap();
|
||||
let a_ids = TensorRef::from_array_view(([inputs.len(), padded_token_length], &*ids))?;
|
||||
let a_mask = TensorRef::from_array_view(([inputs.len(), padded_token_length], &*mask))?;
|
||||
|
||||
// Run the model.
|
||||
let outputs = session.run(ort::inputs![a_ids, a_mask]?)?;
|
||||
let outputs = session.run(ort::inputs![a_ids, a_mask])?;
|
||||
|
||||
// Extract our embeddings tensor and convert it to a strongly-typed 2-dimensional array.
|
||||
let embeddings = outputs[1].try_extract_tensor::<f32>()?.into_dimensionality::<Ix2>().unwrap();
|
||||
|
||||
@@ -5,12 +5,12 @@ use std::{
|
||||
};
|
||||
|
||||
use kdam::BarExt;
|
||||
use ndarray::{Array1, Array2, ArrayViewD, Axis, concatenate, s};
|
||||
use ort::{
|
||||
execution_providers::CUDAExecutionProvider,
|
||||
memory::Allocator,
|
||||
session::{Session, builder::SessionBuilder},
|
||||
training::{CheckpointStrategy, Trainer, TrainerCallbacks, TrainerControl, TrainerState, TrainingArguments}
|
||||
training::{CheckpointStrategy, Trainer, TrainerCallbacks, TrainerControl, TrainerState, TrainingArguments},
|
||||
value::{Tensor, TensorRef}
|
||||
};
|
||||
use rand::RngCore;
|
||||
use tokenizers::Tokenizer;
|
||||
@@ -94,10 +94,10 @@ fn main() -> ort::Result<()> {
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
Ok((
|
||||
ort::inputs![Array2::<i64>::from_shape_vec([BATCH_SIZE, SEQUENCE_LENGTH], input_buffer.iter().map(|c| *c as i64).collect()).unwrap()]?,
|
||||
ort::inputs![Array1::<i64>::from_shape_vec([BATCH_SIZE * SEQUENCE_LENGTH], label_buffer.iter().map(|c| *c as i64).collect()).unwrap()]?
|
||||
))
|
||||
let inputs = Tensor::from_array(([BATCH_SIZE, SEQUENCE_LENGTH], input_buffer.iter().map(|c| *c as i64).collect::<Vec<i64>>()))?;
|
||||
let labels = Tensor::from_array(([BATCH_SIZE * SEQUENCE_LENGTH], label_buffer.iter().map(|c| *c as i64).collect::<Vec<i64>>()))?;
|
||||
|
||||
Ok((ort::inputs![inputs], ort::inputs![labels]))
|
||||
};
|
||||
|
||||
trainer.train(
|
||||
@@ -115,26 +115,19 @@ fn main() -> ort::Result<()> {
|
||||
let mut stdout = std::io::stdout();
|
||||
|
||||
let tokens = tokenizer.encode("<|endoftext|>", false).unwrap();
|
||||
let tokens = tokens.get_ids().iter().map(|i| *i as i64).collect::<Vec<_>>();
|
||||
|
||||
let mut tokens = Array1::from_iter(tokens.iter().cloned());
|
||||
let mut tokens = tokens.get_ids().iter().map(|i| *i as i64).collect::<Vec<_>>();
|
||||
|
||||
for _ in 0..50 {
|
||||
let array = tokens.view().insert_axis(Axis(0));
|
||||
let outputs = session.run(ort::inputs![array]?)?;
|
||||
let generated_tokens: ArrayViewD<f32> = outputs["probs"].try_extract_tensor()?;
|
||||
let input = TensorRef::from_array_view((vec![1, 1, tokens.len() as i64], tokens.as_slice()))?;
|
||||
let outputs = session.run(ort::inputs![input])?;
|
||||
let (dim, probabilities) = outputs["probs"].try_extract_raw_tensor()?;
|
||||
|
||||
let probabilities = &mut generated_tokens
|
||||
.slice(s![-1, ..])
|
||||
.to_owned()
|
||||
.iter()
|
||||
.cloned()
|
||||
.enumerate()
|
||||
.collect::<Vec<_>>();
|
||||
let (seq_len, vocab_size) = (dim[2] as usize, dim[3] as usize);
|
||||
let mut probabilities: Vec<(usize, f32)> = probabilities[(seq_len - 1) * vocab_size..].iter().copied().enumerate().collect();
|
||||
probabilities.sort_unstable_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Less));
|
||||
|
||||
let token = probabilities[0].0;
|
||||
tokens = concatenate![Axis(0), tokens, ndarray::array![token.try_into().unwrap()]];
|
||||
let token = probabilities[0].0 as i64;
|
||||
tokens.push(token);
|
||||
|
||||
let token_str = tokenizer.decode(&[token as _], false).unwrap();
|
||||
print!("{}", token_str);
|
||||
|
||||
@@ -5,12 +5,12 @@ use std::{
|
||||
};
|
||||
|
||||
use kdam::BarExt;
|
||||
use ndarray::{Array1, Array2, ArrayViewD, Axis, concatenate, s};
|
||||
use ort::{
|
||||
execution_providers::CUDAExecutionProvider,
|
||||
memory::Allocator,
|
||||
session::{Session, builder::SessionBuilder},
|
||||
training::{Checkpoint, Trainer}
|
||||
training::{Checkpoint, Trainer},
|
||||
value::{Tensor, TensorRef}
|
||||
};
|
||||
use rand::RngCore;
|
||||
use tokenizers::Tokenizer;
|
||||
@@ -83,10 +83,10 @@ fn main() -> ort::Result<()> {
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
let inputs = Array2::<i64>::from_shape_vec([BATCH_SIZE, SEQUENCE_LENGTH], input_buffer.iter().map(|c| *c as i64).collect()).unwrap();
|
||||
let labels = Array1::<i64>::from_shape_vec([BATCH_SIZE * SEQUENCE_LENGTH], label_buffer.iter().map(|c| *c as i64).collect()).unwrap();
|
||||
let inputs = Tensor::from_array(([BATCH_SIZE, SEQUENCE_LENGTH], input_buffer.iter().map(|c| *c as i64).collect::<Vec<i64>>()))?;
|
||||
let labels = Tensor::from_array(([BATCH_SIZE * SEQUENCE_LENGTH], label_buffer.iter().map(|c| *c as i64).collect::<Vec<i64>>()))?;
|
||||
|
||||
let outputs = trainer.step(ort::inputs![inputs.view()]?, ort::inputs![labels.view()]?)?;
|
||||
let outputs = trainer.step(ort::inputs![inputs], ort::inputs![labels])?;
|
||||
let loss = outputs[0].try_extract_scalar::<f32>()?;
|
||||
pb.set_postfix(format!("loss={loss:.3}"));
|
||||
pb.update(1).unwrap();
|
||||
@@ -107,26 +107,19 @@ fn main() -> ort::Result<()> {
|
||||
let mut stdout = std::io::stdout();
|
||||
|
||||
let tokens = tokenizer.encode("<|endoftext|>", false).unwrap();
|
||||
let tokens = tokens.get_ids().iter().map(|i| *i as i64).collect::<Vec<_>>();
|
||||
|
||||
let mut tokens = Array1::from_iter(tokens.iter().cloned());
|
||||
let mut tokens = tokens.get_ids().iter().map(|i| *i as i64).collect::<Vec<_>>();
|
||||
|
||||
for _ in 0..50 {
|
||||
let array = tokens.view().insert_axis(Axis(0));
|
||||
let outputs = session.run(ort::inputs![array]?)?;
|
||||
let generated_tokens: ArrayViewD<f32> = outputs["probs"].try_extract_tensor()?;
|
||||
let input = TensorRef::from_array_view((vec![1, 1, tokens.len() as i64], tokens.as_slice()))?;
|
||||
let outputs = session.run(ort::inputs![input])?;
|
||||
let (dim, probabilities) = outputs["probs"].try_extract_raw_tensor()?;
|
||||
|
||||
let probabilities = &mut generated_tokens
|
||||
.slice(s![-1, ..])
|
||||
.to_owned()
|
||||
.iter()
|
||||
.cloned()
|
||||
.enumerate()
|
||||
.collect::<Vec<_>>();
|
||||
let (seq_len, vocab_size) = (dim[2] as usize, dim[3] as usize);
|
||||
let mut probabilities: Vec<(usize, f32)> = probabilities[(seq_len - 1) * vocab_size..].iter().copied().enumerate().collect();
|
||||
probabilities.sort_unstable_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Less));
|
||||
|
||||
let token = probabilities[0].0;
|
||||
tokens = concatenate![Axis(0), tokens, ndarray::array![token.try_into().unwrap()]];
|
||||
let token = probabilities[0].0 as i64;
|
||||
tokens.push(token);
|
||||
|
||||
let token_str = tokenizer.decode(&[token as _], false).unwrap();
|
||||
print!("{}", token_str);
|
||||
|
||||
@@ -7,7 +7,8 @@ use ndarray::{Array, Axis, s};
|
||||
use ort::{
|
||||
execution_providers::CUDAExecutionProvider,
|
||||
inputs,
|
||||
session::{Session, SessionOutputs}
|
||||
session::{Session, SessionOutputs},
|
||||
value::TensorRef
|
||||
};
|
||||
use raqote::{DrawOptions, DrawTarget, LineJoin, PathBuilder, SolidSource, Source, StrokeStyle};
|
||||
use show_image::{AsImageView, WindowOptions, event};
|
||||
@@ -66,7 +67,7 @@ fn main() -> ort::Result<()> {
|
||||
let model = Session::builder()?.commit_from_url(YOLOV8M_URL)?;
|
||||
|
||||
// Run YOLOv8 inference
|
||||
let outputs: SessionOutputs = model.run(inputs!["images" => input.view()]?)?;
|
||||
let outputs: SessionOutputs = model.run(inputs!["images" => TensorRef::from_array_view(&input)?])?;
|
||||
let output = outputs["output0"].try_extract_tensor::<f32>()?.t().into_owned();
|
||||
|
||||
let mut boxes = Vec::new();
|
||||
|
||||
Reference in New Issue
Block a user