examples: refactor all-mini-lm-l6 for semantic similarity

This commit is contained in:
Carson M.
2024-07-22 19:53:23 -05:00
parent 1a10b11b10
commit c5538f26d0
6 changed files with 65 additions and 45 deletions

View File

@@ -1,41 +0,0 @@
use std::path::Path;
use ndarray::{Array1, Axis};
use ort::{CUDAExecutionProvider, GraphOptimizationLevel, Session};
use tokenizers::Tokenizer;
/// all-mini-lm-l6 embeddings generation
///
/// This is a sentence-transformers model: It maps sentences & paragraphs to a 384
///
/// dimensional dense vector space and can be used for tasks like clustering or semantic search.
fn main() -> ort::Result<()> {
// Initialize tracing to receive debug messages from `ort`
tracing_subscriber::fmt::init();
// Create the ONNX Runtime environment, enabling CUDA execution providers for all sessions created in this process.
ort::init()
.with_name("all-Mini-LM-L6")
.with_execution_providers([CUDAExecutionProvider::default().build()])
.commit()?;
// Load our model
let session = Session::builder()?
.with_optimization_level(GraphOptimizationLevel::Level1)?
.with_intra_threads(1)?
.commit_from_url("https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx")?;
// Load the tokenizer and encode the text.
let tokenizer = Tokenizer::from_file(Path::new(env!("CARGO_MANIFEST_DIR")).join("data").join("tokenizer.json")).unwrap();
let tokens = tokenizer.encode("test", false)?;
let mask = tokens.get_attention_mask().iter().map(|i| *i as i64).collect::<Vec<i64>>();
let ids = tokens.get_ids().iter().map(|i| *i as i64).collect::<Vec<i64>>();
let a_ids = Array1::from_vec(ids);
let a_mask = Array1::from_vec(mask);
let input_ids = a_ids.view().insert_axis(Axis(0));
let input_mask = a_mask.view().insert_axis(Axis(0));
let outputs = session.run(ort::inputs![input_ids, input_mask]?)?;
let tensor = outputs[1].try_extract_tensor::<f32>();
println!("{:?}", tensor);
Ok(())
}

View File

@@ -1,6 +1,6 @@
[package]
publish = false
name = "example-all-mini-lm-l6"
name = "sentence-transformers"
version = "0.0.0"
edition = "2021"

View File

@@ -0,0 +1,61 @@
use std::path::Path;
use ndarray::{s, Array1, Array2, Axis, Ix2};
use ort::{CUDAExecutionProvider, GraphOptimizationLevel, Session};
use tokenizers::Tokenizer;
/// Example usage of a text embedding model like Sentence Transformers' `all-mini-lm-l6` model for semantic textual similarity.
///
/// Text embedding models map sentences & paragraphs to an n-dimensional dense vector space, which can then be used for
/// tasks like clustering or semantic search.
fn main() -> ort::Result<()> {
// Initialize tracing to receive debug messages from `ort`
tracing_subscriber::fmt::init();
// Create the ONNX Runtime environment, enabling CUDA execution providers for all sessions created in this process.
ort::init()
.with_name("sbert")
.with_execution_providers([CUDAExecutionProvider::default().build()])
.commit()?;
// Load our model
let session = Session::builder()?
.with_optimization_level(GraphOptimizationLevel::Level1)?
.with_intra_threads(1)?
.commit_from_url("https://parcel.pyke.io/v2/cdn/assetdelivery/ortrsv2/ex_models/all-MiniLM-L6-v2.onnx")?;
// Load the tokenizer and encode the text.
let tokenizer = Tokenizer::from_file(Path::new(env!("CARGO_MANIFEST_DIR")).join("data").join("tokenizer.json")).unwrap();
let inputs = vec!["The weather outside is lovely.", "It's so sunny outside!", "She drove to the stadium."];
// Encode our input strings. `encode_batch` will pad each input to be the same length.
let encodings = tokenizer.encode_batch(inputs.clone(), false)?;
// Get the padded length of each encoding.
let padded_token_length = encodings[0].len();
// Get our token IDs & mask as a flattened array.
let ids: Vec<i64> = encodings.iter().flat_map(|e| e.get_ids().iter().map(|i| *i as i64)).collect();
let mask: Vec<i64> = encodings.iter().flat_map(|e| e.get_attention_mask().iter().map(|i| *i as i64)).collect();
// Convert our flattened arrays into 2-dimensional tensors of shape [N, L].
let a_ids = Array2::from_shape_vec([inputs.len(), padded_token_length], ids).unwrap();
let a_mask = Array2::from_shape_vec([inputs.len(), padded_token_length], mask).unwrap();
// Run the model.
let outputs = session.run(ort::inputs![a_ids, a_mask]?)?;
// Extract our embeddings tensor and convert it to a strongly-typed 2-dimensional array.
let embeddings = outputs[1].try_extract_tensor::<f32>()?.into_dimensionality::<Ix2>().unwrap();
println!("Similarity for '{}'", inputs[0]);
let query = embeddings.index_axis(Axis(0), 0);
for (embeddings, sentence) in embeddings.axis_iter(Axis(0)).zip(inputs.iter()).skip(1) {
// Calculate cosine similarity against the 'query' sentence.
let dot_product: f32 = query.iter().zip(embeddings.iter()).map(|(a, b)| a * b).sum();
println!("\t'{}': {:.1}%", sentence, dot_product * 100.);
}
Ok(())
}