Path: blob/main/devices/src/virtio/video/decoder/backend/mod.rs
5394 views
// Copyright 2020 The ChromiumOS Authors1// Use of this source code is governed by a BSD-style license that can be2// found in the LICENSE file.34//! This module implements the interface that actual decoder devices need to5//! implement in order to provide video decoding capability to the guest.67use base::AsRawDescriptor;89use crate::virtio::video::decoder::Capability;10use crate::virtio::video::error::VideoError;11use crate::virtio::video::error::VideoResult;12use crate::virtio::video::format::Format;13use crate::virtio::video::format::Rect;14use crate::virtio::video::resource::GuestResource;15use crate::virtio::video::resource::GuestResourceHandle;1617#[cfg(feature = "ffmpeg")]18pub mod ffmpeg;1920#[cfg(feature = "vaapi")]21pub mod vaapi;22#[cfg(feature = "libvda")]23pub mod vda;2425/// Contains the device's state for one playback session, i.e. one stream.26pub trait DecoderSession {27/// Tell how many output buffers will be used for this session and which format they will carry.28/// This method must be called after a `ProvidePictureBuffers` event is emitted, and before the29/// first call to `use_output_buffer()`.30fn set_output_parameters(&mut self, buffer_count: usize, format: Format) -> VideoResult<()>;3132/// Decode the compressed stream contained in [`offset`..`offset`+`bytes_used`] of the shared33/// memory in the input `resource`.34///35/// `resource_id` is the ID of the input resource. It will be signaled using the36/// `NotifyEndOfBitstreamBuffer` once the input resource is not used anymore.37///38/// `timestamp` is a timestamp that will be copied into the frames decoded from that input39/// stream. Units are effectively free and provided by the input stream.40///41/// The device takes ownership of `resource` and is responsible for closing it once it is not42/// used anymore.43///44/// The device will emit a `NotifyEndOfBitstreamBuffer` event with the `resource_id` value after45/// the input buffer has been entirely processed.46///47/// The device will emit a `PictureReady` event with the `timestamp` value for each picture48/// produced from that input buffer.49fn decode(50&mut self,51resource_id: u32,52timestamp: u64,53resource: GuestResourceHandle,54offset: u32,55bytes_used: u32,56) -> VideoResult<()>;5758/// Flush the decoder device, i.e. finish processing all queued decode requests and emit frames59/// for them.60///61/// The device will emit a `FlushCompleted` event once the flush is done.62fn flush(&mut self) -> VideoResult<()>;6364/// Reset the decoder device, i.e. cancel all pending decoding requests.65///66/// The device will emit a `ResetCompleted` event once the reset is done.67fn reset(&mut self) -> VideoResult<()>;6869/// Immediately release all buffers passed using `use_output_buffer()` and70/// `reuse_output_buffer()`.71fn clear_output_buffers(&mut self) -> VideoResult<()>;7273/// Returns the event pipe on which the availability of events will be signaled. Note that the74/// returned value is borrowed and only valid as long as the session is alive.75fn event_pipe(&self) -> &dyn AsRawDescriptor;7677/// Ask the device to use `resource` to store decoded frames according to its layout.78/// `picture_buffer_id` is the ID of the picture that will be reproduced in `PictureReady`79/// events using this buffer.80///81/// The device takes ownership of `resource` and is responsible for closing it once the buffer82/// is not used anymore (either when the session is closed, or a new set of buffers is provided83/// for the session).84///85/// The device will emit a `PictureReady` event with the `picture_buffer_id` field set to the86/// same value as the argument of the same name when a frame has been decoded into that buffer.87fn use_output_buffer(88&mut self,89picture_buffer_id: i32,90resource: GuestResource,91) -> VideoResult<()>;9293/// Ask the device to reuse an output buffer previously passed to94/// `use_output_buffer` and that has previously been returned to the decoder95/// in a `PictureReady` event.96///97/// The device will emit a `PictureReady` event with the `picture_buffer_id`98/// field set to the same value as the argument of the same name when a99/// frame has been decoded into that buffer.100fn reuse_output_buffer(&mut self, picture_buffer_id: i32) -> VideoResult<()>;101102/// Blocking call to read a single event from the event pipe.103fn read_event(&mut self) -> VideoResult<DecoderEvent>;104}105106impl<S: AsMut<dyn DecoderSession> + AsRef<dyn DecoderSession> + ?Sized> DecoderSession for S {107fn set_output_parameters(&mut self, buffer_count: usize, format: Format) -> VideoResult<()> {108self.as_mut().set_output_parameters(buffer_count, format)109}110111fn decode(112&mut self,113resource_id: u32,114timestamp: u64,115resource: GuestResourceHandle,116offset: u32,117bytes_used: u32,118) -> VideoResult<()> {119self.as_mut()120.decode(resource_id, timestamp, resource, offset, bytes_used)121}122123fn flush(&mut self) -> VideoResult<()> {124self.as_mut().flush()125}126127fn reset(&mut self) -> VideoResult<()> {128self.as_mut().reset()129}130131fn clear_output_buffers(&mut self) -> VideoResult<()> {132self.as_mut().clear_output_buffers()133}134135fn event_pipe(&self) -> &dyn AsRawDescriptor {136self.as_ref().event_pipe()137}138139fn use_output_buffer(140&mut self,141picture_buffer_id: i32,142resource: GuestResource,143) -> VideoResult<()> {144self.as_mut().use_output_buffer(picture_buffer_id, resource)145}146147fn reuse_output_buffer(&mut self, picture_buffer_id: i32) -> VideoResult<()> {148self.as_mut().reuse_output_buffer(picture_buffer_id)149}150151fn read_event(&mut self) -> VideoResult<DecoderEvent> {152self.as_mut().read_event()153}154}155156pub trait DecoderBackend: Send {157type Session: DecoderSession;158159/// Return the decoding capabilities for this backend instance.160fn get_capabilities(&self) -> Capability;161162/// Create a new decoding session for the passed `format`.163fn new_session(&mut self, format: Format) -> VideoResult<Self::Session>;164165/// Turn this backend into a trait object, allowing the same decoder to operate on a set of166/// different backends.167fn into_trait_object(self) -> Box<dyn DecoderBackend<Session = Box<dyn DecoderSession>>>168where169Self: Sized + 'static,170{171Box::new(GenericDecoderBackend(self)) as Box<dyn DecoderBackend<Session = _>>172}173}174175/// Type that changes the `Session` associated type to `Box<dyn DecoderSession>`, allowing us to176/// use trait objects for backends.177struct GenericDecoderBackend<S: DecoderBackend>(pub S);178179impl<S> DecoderBackend for GenericDecoderBackend<S>180where181S: DecoderBackend,182<S as DecoderBackend>::Session: 'static,183{184type Session = Box<dyn DecoderSession>;185186fn get_capabilities(&self) -> Capability {187self.0.get_capabilities()188}189190fn new_session(&mut self, format: Format) -> VideoResult<Self::Session> {191self.0192.new_session(format)193.map(|s| Box::new(s) as Box<dyn DecoderSession>)194}195}196197impl<S> DecoderBackend for Box<S>198where199S: ?Sized,200S: DecoderBackend,201{202type Session = S::Session;203204fn get_capabilities(&self) -> Capability {205self.as_ref().get_capabilities()206}207208fn new_session(&mut self, format: Format) -> VideoResult<Self::Session> {209self.as_mut().new_session(format)210}211}212213#[derive(Debug)]214pub enum DecoderEvent {215/// Emitted when the device knows the buffer format it will need to decode frames, and how many216/// buffers it will need. The decoder is supposed to call `set_output_parameters()` to confirm217/// the pixel format and actual number of buffers used, and provide buffers of the requested218/// dimensions using `use_output_buffer()`.219ProvidePictureBuffers {220min_num_buffers: u32,221width: i32,222height: i32,223visible_rect: Rect,224},225/// Emitted when the decoder is done decoding a picture. `picture_buffer_id`226/// corresponds to the argument of the same name passed to `use_output_buffer()`227/// or `reuse_output_buffer()`. `bitstream_id` corresponds to the argument of228/// the same name passed to `decode()` and can be used to match decoded frames229/// to the input buffer they were produced from.230PictureReady {231picture_buffer_id: i32,232timestamp: u64,233},234/// Emitted when an input buffer passed to `decode()` is not used by the235/// device anymore and can be reused by the decoder. The parameter corresponds236/// to the `timestamp` argument passed to `decode()`.237NotifyEndOfBitstreamBuffer(u32),238/// Emitted when a decoding error has occured.239NotifyError(VideoError),240/// Emitted after `flush()` has been called to signal that the flush is completed.241FlushCompleted(VideoResult<()>),242/// Emitted after `reset()` has been called to signal that the reset is completed.243ResetCompleted(VideoResult<()>),244}245246#[cfg(test)]247/// Shared functions that can be used to test individual backends.248mod tests {249use std::time::Duration;250251use base::MappedRegion;252use base::MemoryMappingBuilder;253use base::SharedMemory;254use base::WaitContext;255256use super::*;257use crate::virtio::video::format::FramePlane;258use crate::virtio::video::resource::GuestMemArea;259use crate::virtio::video::resource::GuestMemHandle;260use crate::virtio::video::resource::VirtioObjectHandle;261262// Test video stream and its properties.263const H264_STREAM: &[u8] = include_bytes!("test-25fps.h264");264const H264_STREAM_WIDTH: i32 = 320;265const H264_STREAM_HEIGHT: i32 = 240;266const H264_STREAM_NUM_FRAMES: usize = 250;267const H264_STREAM_CRCS: &str = include_str!("test-25fps.crc");268269/// Splits a H.264 annex B stream into chunks that are all guaranteed to contain a full frame270/// worth of data.271///272/// This is a pretty naive implementation that is only guaranteed to work with our test stream.273/// We are not using `AVCodecParser` because it seems to modify the decoding context, which274/// would result in testing conditions that diverge more from our real use case where parsing275/// has already been done.276struct H264NalIterator<'a> {277stream: &'a [u8],278pos: usize,279}280281impl<'a> H264NalIterator<'a> {282fn new(stream: &'a [u8]) -> Self {283Self { stream, pos: 0 }284}285286/// Returns the position of the start of the next frame in the stream.287fn next_frame_pos(&self) -> Option<usize> {288const H264_START_CODE: [u8; 4] = [0x0, 0x0, 0x0, 0x1];289self.stream[self.pos + 1..]290.windows(H264_START_CODE.len())291.position(|window| window == H264_START_CODE)292.map(|pos| self.pos + pos + 1)293}294295/// Returns whether `slice` contains frame data, i.e. a header where the NAL unit type is296/// 0x1 or 0x5.297fn contains_frame(slice: &[u8]) -> bool {298slice[4..].windows(4).any(|window| {299window[0..3] == [0x0, 0x0, 0x1]300&& (window[3] & 0x1f == 0x5 || window[3] & 0x1f == 0x1)301})302}303}304305impl<'a> Iterator for H264NalIterator<'a> {306type Item = &'a [u8];307308fn next(&mut self) -> Option<Self::Item> {309match self.pos {310cur_pos if cur_pos == self.stream.len() => None,311cur_pos => loop {312self.pos = self.next_frame_pos().unwrap_or(self.stream.len());313let slice = &self.stream[cur_pos..self.pos];314315// Keep advancing as long as we don't have frame data in our slice.316if Self::contains_frame(slice) || self.pos == self.stream.len() {317return Some(slice);318}319},320}321}322}323324// Build a virtio object handle from a linear memory area. This is useful to emulate the325// scenario where we are decoding from or into virtio objects.326#[allow(dead_code)]327pub fn build_object_handle(mem: &SharedMemory) -> GuestResourceHandle {328GuestResourceHandle::VirtioObject(VirtioObjectHandle {329desc: base::clone_descriptor(mem).unwrap(),330modifier: 0,331})332}333334// Build a guest memory handle from a linear memory area. This is useful to emulate the335// scenario where we are decoding from or into guest memory.336#[allow(dead_code)]337pub fn build_guest_mem_handle(mem: &SharedMemory) -> GuestResourceHandle {338GuestResourceHandle::GuestPages(GuestMemHandle {339desc: base::clone_descriptor(mem).unwrap(),340mem_areas: vec![GuestMemArea {341offset: 0,342length: mem.size() as usize,343}],344})345}346347/// Full decoding test of a H.264 video, checking that the flow of events is happening as348/// expected.349pub fn decode_h264_generic<D, I, O>(350decoder: &mut D,351input_resource_builder: I,352output_resource_builder: O,353) where354D: DecoderBackend,355I: Fn(&SharedMemory) -> GuestResourceHandle,356O: Fn(&SharedMemory) -> GuestResourceHandle,357{358const NUM_OUTPUT_BUFFERS: usize = 4;359const INPUT_BUF_SIZE: usize = 0x4000;360const OUTPUT_BUFFER_SIZE: usize =361(H264_STREAM_WIDTH * (H264_STREAM_HEIGHT + H264_STREAM_HEIGHT / 2)) as usize;362let mut session = decoder363.new_session(Format::H264)364.expect("failed to create H264 decoding session.");365let wait_ctx = WaitContext::new().expect("Failed to create wait context");366wait_ctx367.add(session.event_pipe(), 0u8)368.expect("Failed to add event pipe to wait context");369// Output buffers suitable for receiving NV12 frames for our stream.370let output_buffers = (0..NUM_OUTPUT_BUFFERS)371.map(|i| {372SharedMemory::new(373format!("video-output-buffer-{i}"),374OUTPUT_BUFFER_SIZE as u64,375)376.unwrap()377})378.collect::<Vec<_>>();379let input_shm = SharedMemory::new("video-input-buffer", INPUT_BUF_SIZE as u64).unwrap();380let input_mapping = MemoryMappingBuilder::new(input_shm.size() as usize)381.from_shared_memory(&input_shm)382.build()383.unwrap();384385let mut decoded_frames_count = 0usize;386let mut expected_frames_crcs = H264_STREAM_CRCS.lines();387388let mut on_frame_decoded = |session: &mut D::Session, picture_buffer_id: i32| {389// Verify that the CRC of the decoded frame matches the expected one.390let mapping = MemoryMappingBuilder::new(OUTPUT_BUFFER_SIZE)391.from_shared_memory(&output_buffers[picture_buffer_id as usize])392.build()393.unwrap();394let mut frame_data = vec![0u8; mapping.size()];395assert_eq!(396mapping.read_slice(&mut frame_data, 0).unwrap(),397mapping.size()398);399400let mut hasher = crc32fast::Hasher::new();401hasher.update(&frame_data);402let frame_crc = hasher.finalize();403assert_eq!(404format!("{frame_crc:08x}"),405expected_frames_crcs406.next()407.expect("No CRC for decoded frame")408);409410// We can recycle the frame now.411session.reuse_output_buffer(picture_buffer_id).unwrap();412decoded_frames_count += 1;413};414415// Simple value by which we will multiply the frame number to obtain a fake timestamp.416const TIMESTAMP_FOR_INPUT_ID_FACTOR: u64 = 1_000_000;417for (input_id, slice) in H264NalIterator::new(H264_STREAM).enumerate() {418let buffer_handle = input_resource_builder(&input_shm);419input_mapping420.write_slice(slice, 0)421.expect("Failed to write stream data into input buffer.");422session423.decode(424input_id as u32,425input_id as u64 * TIMESTAMP_FOR_INPUT_ID_FACTOR,426buffer_handle,4270,428slice.len() as u32,429)430.expect("Call to decode() failed.");431432// Get all the events resulting from this submission.433let mut events = Vec::new();434while !wait_ctx.wait_timeout(Duration::ZERO).unwrap().is_empty() {435events.push(session.read_event().unwrap());436}437438// Our bitstream buffer should have been returned.439let event_idx = events440.iter()441.position(|event| {442let input_id = input_id as u32;443matches!(event, DecoderEvent::NotifyEndOfBitstreamBuffer(index) if *index == input_id)444})445.unwrap();446events.remove(event_idx);447448// After sending the first buffer we should get the initial resolution change event and449// can provide the frames to decode into.450if input_id == 0 {451let event_idx = events452.iter()453.position(|event| {454matches!(455event,456DecoderEvent::ProvidePictureBuffers {457width: H264_STREAM_WIDTH,458height: H264_STREAM_HEIGHT,459visible_rect: Rect {460left: 0,461top: 0,462right: H264_STREAM_WIDTH,463bottom: H264_STREAM_HEIGHT,464},465..466}467)468})469.unwrap();470events.remove(event_idx);471472let out_format = Format::NV12;473474session475.set_output_parameters(NUM_OUTPUT_BUFFERS, out_format)476.unwrap();477478// Pass the buffers we will decode into.479for (picture_buffer_id, buffer) in output_buffers.iter().enumerate() {480session481.use_output_buffer(482picture_buffer_id as i32,483GuestResource {484handle: output_resource_builder(buffer),485planes: vec![486FramePlane {487offset: 0,488stride: H264_STREAM_WIDTH as usize,489size: (H264_STREAM_WIDTH * H264_STREAM_HEIGHT) as usize,490},491FramePlane {492offset: (H264_STREAM_WIDTH * H264_STREAM_HEIGHT) as usize,493stride: H264_STREAM_WIDTH as usize,494size: (H264_STREAM_WIDTH * H264_STREAM_HEIGHT) as usize,495},496],497width: H264_STREAM_WIDTH as _,498height: H264_STREAM_HEIGHT as _,499format: out_format,500guest_cpu_mappable: false,501},502)503.unwrap();504}505}506507// If we have remaining events, they must be decoded frames. Get them and recycle them.508for event in events {509match event {510DecoderEvent::PictureReady {511picture_buffer_id, ..512} => on_frame_decoded(&mut session, picture_buffer_id),513e => panic!("Unexpected event: {e:?}"),514}515}516}517518session.flush().unwrap();519520// Keep getting frames until the final event, which should be `FlushCompleted`.521let mut received_flush_completed = false;522while !wait_ctx.wait_timeout(Duration::ZERO).unwrap().is_empty() {523match session.read_event().unwrap() {524DecoderEvent::PictureReady {525picture_buffer_id, ..526} => on_frame_decoded(&mut session, picture_buffer_id),527DecoderEvent::FlushCompleted(Ok(())) => {528received_flush_completed = true;529break;530}531e => panic!("Unexpected event: {e:?}"),532}533}534535// Confirm that we got the FlushCompleted event.536assert!(received_flush_completed);537538// We should have read all the events for that session.539assert_eq!(wait_ctx.wait_timeout(Duration::ZERO).unwrap().len(), 0);540541// We should not be expecting any more frame542assert_eq!(expected_frames_crcs.next(), None);543544// Check that we decoded the expected number of frames.545assert_eq!(decoded_frames_count, H264_STREAM_NUM_FRAMES);546}547}548549550