Path: blob/main/crates/wasi-nn/src/backend/openvino.rs
2459 views
//! Implements a `wasi-nn` [`BackendInner`] using OpenVINO.12use super::{3BackendError, BackendExecutionContext, BackendFromDir, BackendGraph, BackendInner, Id,4NamedTensor, read,5};6use crate::wit::{ExecutionTarget, GraphEncoding, Tensor, TensorType};7use crate::{ExecutionContext, Graph};8use openvino::{DeviceType, ElementType, InferenceError, SetupError, Shape, Tensor as OvTensor};9use std::path::Path;10use std::sync::{Arc, Mutex};1112#[derive(Default)]13pub struct OpenvinoBackend(Option<openvino::Core>);14unsafe impl Send for OpenvinoBackend {}15unsafe impl Sync for OpenvinoBackend {}1617impl BackendInner for OpenvinoBackend {18fn encoding(&self) -> GraphEncoding {19GraphEncoding::Openvino20}2122fn load(&mut self, builders: &[&[u8]], target: ExecutionTarget) -> Result<Graph, BackendError> {23if builders.len() != 2 {24return Err(BackendError::InvalidNumberOfBuilders(2, builders.len()));25}26// Construct the context if none is present; this is done lazily (i.e.27// upon actually loading a model) because it may fail to find and load28// the OpenVINO libraries. The laziness limits the extent of the error29// only to wasi-nn users, not all WASI users.30if self.0.is_none() {31self.0.replace(openvino::Core::new()?);32}33// Read the guest array.34let xml = builders[0];35let weights = builders[1];3637// Construct a new tensor for the model weights.38let shape = Shape::new(&[1, weights.len() as i64])?;39let mut weights_tensor = OvTensor::new(ElementType::U8, &shape)?;40let buffer = weights_tensor.get_raw_data_mut()?;41buffer.copy_from_slice(&weights);4243// Construct OpenVINO graph structures: `model` contains the graph44// structure, `compiled_model` can perform inference.45let core = self46.047.as_mut()48.expect("openvino::Core was previously constructed");49let model = core.read_model_from_buffer(&xml, Some(&weights_tensor))?;50let compiled_model = core.compile_model(&model, target.into())?;51let box_: Box<dyn BackendGraph> =52Box::new(OpenvinoGraph(Arc::new(Mutex::new(compiled_model))));53Ok(box_.into())54}5556fn as_dir_loadable(&mut self) -> Option<&mut dyn BackendFromDir> {57Some(self)58}59}6061impl BackendFromDir for OpenvinoBackend {62fn load_from_dir(63&mut self,64path: &Path,65target: ExecutionTarget,66) -> Result<Graph, BackendError> {67let model = read(&path.join("model.xml"))?;68let weights = read(&path.join("model.bin"))?;69self.load(&[&model, &weights], target)70}71}7273struct OpenvinoGraph(Arc<Mutex<openvino::CompiledModel>>);7475unsafe impl Send for OpenvinoGraph {}76unsafe impl Sync for OpenvinoGraph {}7778impl BackendGraph for OpenvinoGraph {79fn init_execution_context(&self) -> Result<ExecutionContext, BackendError> {80let mut compiled_model = self.0.lock().unwrap();81let infer_request = compiled_model.create_infer_request()?;82let box_: Box<dyn BackendExecutionContext> =83Box::new(OpenvinoExecutionContext(infer_request, self.0.clone()));84Ok(box_.into())85}86}8788struct OpenvinoExecutionContext(openvino::InferRequest, Arc<Mutex<openvino::CompiledModel>>);8990impl BackendExecutionContext for OpenvinoExecutionContext {91fn set_input(&mut self, id: Id, tensor: &Tensor) -> Result<(), BackendError> {92// Construct the tensor.93let precision = tensor.ty.into();94let dimensions = tensor95.dimensions96.iter()97.map(|&d| d as i64)98.collect::<Vec<_>>();99let shape = Shape::new(&dimensions)?;100let mut new_tensor = OvTensor::new(precision, &shape)?;101let buffer = new_tensor.get_raw_data_mut()?;102buffer.copy_from_slice(&tensor.data);103// Assign the tensor to the request.104match id {105Id::Index(i) => self.0.set_input_tensor_by_index(i as usize, &new_tensor)?,106Id::Name(name) => self.0.set_tensor(&name, &new_tensor)?,107};108Ok(())109}110111fn compute(112&mut self,113inputs: Option<Vec<NamedTensor>>,114) -> Result<Option<Vec<NamedTensor>>, BackendError> {115match inputs {116// WIT117Some(inputs) => {118// Process all named inputs119for input in &inputs {120let precision = input.tensor.ty.into();121let dimensions = input122.tensor123.dimensions124.iter()125.map(|&d| d as i64)126.collect::<Vec<_>>();127let shape = Shape::new(&dimensions)?;128let mut new_tensor = OvTensor::new(precision, &shape)?;129let buffer = new_tensor.get_raw_data_mut()?;130buffer.copy_from_slice(&input.tensor.data);131132self.0.set_tensor(&input.name, &new_tensor)?;133}134135// Run inference136self.0.infer()?;137138// Get all outputs139let compiled_model = self.1.lock().unwrap();140let output_count = compiled_model.get_output_size()?;141142let mut output_tensors = Vec::new();143for i in 0..output_count {144let output_tensor = self.0.get_output_tensor_by_index(i)?;145146let dimensions = output_tensor147.get_shape()?148.get_dimensions()149.iter()150.map(|&dim| dim as u32)151.collect::<Vec<u32>>();152153let ty = output_tensor.get_element_type()?.try_into()?;154let data = output_tensor.get_raw_data()?.to_vec();155156// Currently openvino backend returns output index only, not output tensor name157output_tensors.push(NamedTensor {158name: format!("{i}"),159tensor: Tensor {160dimensions,161ty,162data,163},164});165}166Ok(Some(output_tensors))167}168169// WITX170None => {171self.0.infer()?;172Ok(None)173}174}175}176177fn get_output(&mut self, id: Id) -> Result<Tensor, BackendError> {178let output_name = match id {179Id::Index(i) => self.0.get_output_tensor_by_index(i as usize)?,180Id::Name(name) => self.0.get_tensor(&name)?,181};182let dimensions = output_name183.get_shape()?184.get_dimensions()185.iter()186.map(|&dim| dim as u32)187.collect::<Vec<u32>>();188let ty = output_name.get_element_type()?.try_into()?;189let data = output_name.get_raw_data()?.to_vec();190Ok(Tensor {191dimensions,192ty,193data,194})195}196}197198impl From<InferenceError> for BackendError {199fn from(e: InferenceError) -> Self {200BackendError::BackendAccess(anyhow::Error::new(e))201}202}203204impl From<SetupError> for BackendError {205fn from(e: SetupError) -> Self {206BackendError::BackendAccess(anyhow::Error::new(e))207}208}209210/// Return the execution target string expected by OpenVINO from the211/// `ExecutionTarget` enum provided by wasi-nn.212impl From<ExecutionTarget> for DeviceType<'static> {213fn from(target: ExecutionTarget) -> Self {214match target {215ExecutionTarget::Cpu => DeviceType::CPU,216ExecutionTarget::Gpu => DeviceType::GPU,217ExecutionTarget::Tpu => {218unimplemented!("OpenVINO does not support TPU execution targets")219}220}221}222}223224/// Return OpenVINO's precision type for the `TensorType` enum provided by225/// wasi-nn.226impl From<TensorType> for ElementType {227fn from(tensor_type: TensorType) -> Self {228match tensor_type {229TensorType::Fp16 => ElementType::F16,230TensorType::Fp32 => ElementType::F32,231TensorType::Fp64 => ElementType::F64,232TensorType::U8 => ElementType::U8,233TensorType::I32 => ElementType::I32,234TensorType::I64 => ElementType::I64,235TensorType::Bf16 => ElementType::Bf16,236}237}238}239240/// Return the `TensorType` enum provided by wasi-nn for OpenVINO's precision type241impl TryFrom<ElementType> for TensorType {242type Error = BackendError;243fn try_from(element_type: ElementType) -> Result<Self, Self::Error> {244match element_type {245ElementType::F16 => Ok(TensorType::Fp16),246ElementType::F32 => Ok(TensorType::Fp32),247ElementType::F64 => Ok(TensorType::Fp64),248ElementType::U8 => Ok(TensorType::U8),249ElementType::I32 => Ok(TensorType::I32),250ElementType::I64 => Ok(TensorType::I64),251ElementType::Bf16 => Ok(TensorType::Bf16),252_ => Err(BackendError::UnsupportedTensorType(253element_type.to_string(),254)),255}256}257}258259260