// SPDX-License-Identifier: GPL-2.012use core::{3cmp,4mem,5sync::atomic::{6fence,7Ordering, //8}, //9};1011use kernel::{12device,13dma::{14CoherentAllocation,15DmaAddress, //16},17dma_write,18io::poll::read_poll_timeout,19prelude::*,20sync::aref::ARef,21time::Delta,22transmute::{23AsBytes,24FromBytes, //25},26};2728use crate::{29driver::Bar0,30gsp::{31fw::{32GspMsgElement,33MsgFunction,34MsgqRxHeader,35MsgqTxHeader, //36},37PteArray,38GSP_PAGE_SHIFT,39GSP_PAGE_SIZE, //40},41num,42regs,43sbuffer::SBufferIter, //44};4546/// Trait implemented by types representing a command to send to the GSP.47///48/// The main purpose of this trait is to provide [`Cmdq::send_command`] with the information it49/// needs to send a given command.50///51/// [`CommandToGsp::init`] in particular is responsible for initializing the command directly52/// into the space reserved for it in the command queue buffer.53///54/// Some commands may be followed by a variable-length payload. For these, the55/// [`CommandToGsp::variable_payload_len`] and [`CommandToGsp::init_variable_payload`] need to be56/// defined as well.57pub(crate) trait CommandToGsp {58/// Function identifying this command to the GSP.59const FUNCTION: MsgFunction;6061/// Type generated by [`CommandToGsp::init`], to be written into the command queue buffer.62type Command: FromBytes + AsBytes;6364/// Error type returned by [`CommandToGsp::init`].65type InitError;6667/// In-place command initializer responsible for filling the command in the command queue68/// buffer.69fn init(&self) -> impl Init<Self::Command, Self::InitError>;7071/// Size of the variable-length payload following the command structure generated by72/// [`CommandToGsp::init`].73///74/// Most commands don't have a variable-length payload, so this is zero by default.75fn variable_payload_len(&self) -> usize {76077}7879/// Method initializing the variable-length payload.80///81/// The command buffer is circular, which means that we may need to jump back to its beginning82/// while in the middle of a command. For this reason, the variable-length payload is83/// initialized using a [`SBufferIter`].84///85/// This method will receive a buffer of the length returned by86/// [`CommandToGsp::variable_payload_len`], and must write every single byte of it. Leaving87/// unwritten space will lead to an error.88///89/// Most commands don't have a variable-length payload, so this does nothing by default.90fn init_variable_payload(91&self,92_dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>,93) -> Result {94Ok(())95}96}9798/// Trait representing messages received from the GSP.99///100/// This trait tells [`Cmdq::receive_msg`] how it can receive a given type of message.101pub(crate) trait MessageFromGsp: Sized {102/// Function identifying this message from the GSP.103const FUNCTION: MsgFunction;104105/// Error type returned by [`MessageFromGsp::read`].106type InitError;107108/// Type containing the raw message to be read from the message queue.109type Message: FromBytes;110111/// Method reading the message from the message queue and returning it.112///113/// From a `Self::Message` and a [`SBufferIter`], constructs an instance of `Self` and returns114/// it.115fn read(116msg: &Self::Message,117sbuffer: &mut SBufferIter<core::array::IntoIter<&[u8], 2>>,118) -> Result<Self, Self::InitError>;119}120121/// Number of GSP pages making the [`Msgq`].122pub(crate) const MSGQ_NUM_PAGES: u32 = 0x3f;123124/// Circular buffer of a [`Msgq`].125///126/// This area of memory is to be shared between the driver and the GSP to exchange commands or127/// messages.128#[repr(C, align(0x1000))]129#[derive(Debug)]130struct MsgqData {131data: [[u8; GSP_PAGE_SIZE]; num::u32_as_usize(MSGQ_NUM_PAGES)],132}133134// Annoyingly we are forced to use a literal to specify the alignment of135// `MsgqData`, so check that it corresponds to the actual GSP page size here.136static_assert!(align_of::<MsgqData>() == GSP_PAGE_SIZE);137138/// Unidirectional message queue.139///140/// Contains the data for a message queue, that either the driver or GSP writes to.141///142/// Note that while the write pointer of `tx` corresponds to the `msgq` of the same instance, the143/// read pointer of `rx` actually refers to the `Msgq` owned by the other side.144/// This design ensures that only the driver or GSP ever writes to a given instance of this struct.145#[repr(C)]146// There is no struct defined for this in the open-gpu-kernel-source headers.147// Instead it is defined by code in `GspMsgQueuesInit()`.148struct Msgq {149/// Header for sending messages, including the write pointer.150tx: MsgqTxHeader,151/// Header for receiving messages, including the read pointer.152rx: MsgqRxHeader,153/// The message queue proper.154msgq: MsgqData,155}156157/// Structure shared between the driver and the GSP and containing the command and message queues.158#[repr(C)]159struct GspMem {160/// Self-mapping page table entries.161ptes: PteArray<{ GSP_PAGE_SIZE / size_of::<u64>() }>,162/// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the163/// write and read pointers that the CPU updates.164///165/// This member is read-only for the GSP.166cpuq: Msgq,167/// GSP queue: the GSP writes messages here, and the driver reads them. It also contains the168/// write and read pointers that the GSP updates.169///170/// This member is read-only for the driver.171gspq: Msgq,172}173174// SAFETY: These structs don't meet the no-padding requirements of AsBytes but175// that is not a problem because they are not used outside the kernel.176unsafe impl AsBytes for GspMem {}177178// SAFETY: These structs don't meet the no-padding requirements of FromBytes but179// that is not a problem because they are not used outside the kernel.180unsafe impl FromBytes for GspMem {}181182/// Wrapper around [`GspMem`] to share it with the GPU using a [`CoherentAllocation`].183///184/// This provides the low-level functionality to communicate with the GSP, including allocation of185/// queue space to write messages to and management of read/write pointers.186///187/// This is shared with the GSP, with clear ownership rules regarding the command queues:188///189/// * The driver owns (i.e. can write to) the part of the CPU message queue between the CPU write190/// pointer and the GSP read pointer. This region is returned by [`Self::driver_write_area`].191/// * The driver owns (i.e. can read from) the part of the GSP message queue between the CPU read192/// pointer and the GSP write pointer. This region is returned by [`Self::driver_read_area`].193struct DmaGspMem(CoherentAllocation<GspMem>);194195impl DmaGspMem {196/// Allocate a new instance and map it for `dev`.197fn new(dev: &device::Device<device::Bound>) -> Result<Self> {198const MSGQ_SIZE: u32 = num::usize_into_u32::<{ size_of::<Msgq>() }>();199const RX_HDR_OFF: u32 = num::usize_into_u32::<{ mem::offset_of!(Msgq, rx) }>();200201let gsp_mem =202CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?;203dma_write!(gsp_mem[0].ptes = PteArray::new(gsp_mem.dma_handle())?)?;204dma_write!(gsp_mem[0].cpuq.tx = MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES))?;205dma_write!(gsp_mem[0].cpuq.rx = MsgqRxHeader::new())?;206207Ok(Self(gsp_mem))208}209210/// Returns the region of the CPU message queue that the driver is currently allowed to write211/// to.212///213/// As the message queue is a circular buffer, the region may be discontiguous in memory. In214/// that case the second slice will have a non-zero length.215fn driver_write_area(&mut self) -> (&mut [[u8; GSP_PAGE_SIZE]], &mut [[u8; GSP_PAGE_SIZE]]) {216let tx = self.cpu_write_ptr() as usize;217let rx = self.gsp_read_ptr() as usize;218219// SAFETY:220// - The `CoherentAllocation` contains exactly one object.221// - We will only access the driver-owned part of the shared memory.222// - Per the safety statement of the function, no concurrent access will be performed.223let gsp_mem = &mut unsafe { self.0.as_slice_mut(0, 1) }.unwrap()[0];224// PANIC: per the invariant of `cpu_write_ptr`, `tx` is `<= MSGQ_NUM_PAGES`.225let (before_tx, after_tx) = gsp_mem.cpuq.msgq.data.split_at_mut(tx);226227if rx <= tx {228// The area from `tx` up to the end of the ring, and from the beginning of the ring up229// to `rx`, minus one unit, belongs to the driver.230if rx == 0 {231let last = after_tx.len() - 1;232(&mut after_tx[..last], &mut before_tx[0..0])233} else {234(after_tx, &mut before_tx[..rx])235}236} else {237// The area from `tx` to `rx`, minus one unit, belongs to the driver.238//239// PANIC: per the invariants of `cpu_write_ptr` and `gsp_read_ptr`, `rx` and `tx` are240// `<= MSGQ_NUM_PAGES`, and the test above ensured that `rx > tx`.241(after_tx.split_at_mut(rx - tx).0, &mut before_tx[0..0])242}243}244245/// Returns the region of the GSP message queue that the driver is currently allowed to read246/// from.247///248/// As the message queue is a circular buffer, the region may be discontiguous in memory. In249/// that case the second slice will have a non-zero length.250fn driver_read_area(&self) -> (&[[u8; GSP_PAGE_SIZE]], &[[u8; GSP_PAGE_SIZE]]) {251let tx = self.gsp_write_ptr() as usize;252let rx = self.cpu_read_ptr() as usize;253254// SAFETY:255// - The `CoherentAllocation` contains exactly one object.256// - We will only access the driver-owned part of the shared memory.257// - Per the safety statement of the function, no concurrent access will be performed.258let gsp_mem = &unsafe { self.0.as_slice(0, 1) }.unwrap()[0];259// PANIC: per the invariant of `cpu_read_ptr`, `xx` is `<= MSGQ_NUM_PAGES`.260let (before_rx, after_rx) = gsp_mem.gspq.msgq.data.split_at(rx);261262match tx.cmp(&rx) {263cmp::Ordering::Equal => (&after_rx[0..0], &after_rx[0..0]),264cmp::Ordering::Greater => (&after_rx[..tx], &before_rx[0..0]),265cmp::Ordering::Less => (after_rx, &before_rx[..tx]),266}267}268269/// Allocates a region on the command queue that is large enough to send a command of `size`270/// bytes.271///272/// This returns a [`GspCommand`] ready to be written to by the caller.273///274/// # Errors275///276/// - `EAGAIN` if the driver area is too small to hold the requested command.277/// - `EIO` if the command header is not properly aligned.278fn allocate_command(&mut self, size: usize) -> Result<GspCommand<'_>> {279// Get the current writable area as an array of bytes.280let (slice_1, slice_2) = {281let (slice_1, slice_2) = self.driver_write_area();282283#[allow(clippy::incompatible_msrv)]284(slice_1.as_flattened_mut(), slice_2.as_flattened_mut())285};286287// If the GSP is still processing previous messages the shared region288// may be full in which case we will have to retry once the GSP has289// processed the existing commands.290if size_of::<GspMsgElement>() + size > slice_1.len() + slice_2.len() {291return Err(EAGAIN);292}293294// Extract area for the `GspMsgElement`.295let (header, slice_1) = GspMsgElement::from_bytes_mut_prefix(slice_1).ok_or(EIO)?;296297// Create the contents area.298let (slice_1, slice_2) = if slice_1.len() > size {299// Contents fits entirely in `slice_1`.300(&mut slice_1[..size], &mut slice_2[0..0])301} else {302// Need all of `slice_1` and some of `slice_2`.303let slice_2_len = size - slice_1.len();304(slice_1, &mut slice_2[..slice_2_len])305};306307Ok(GspCommand {308header,309contents: (slice_1, slice_2),310})311}312313// Returns the index of the memory page the GSP will write the next message to.314//315// # Invariants316//317// - The returned value is between `0` and `MSGQ_NUM_PAGES`.318fn gsp_write_ptr(&self) -> u32 {319let gsp_mem = self.0.start_ptr();320321// SAFETY:322// - The 'CoherentAllocation' contains at least one object.323// - By the invariants of `CoherentAllocation` the pointer is valid.324(unsafe { (*gsp_mem).gspq.tx.write_ptr() } % MSGQ_NUM_PAGES)325}326327// Returns the index of the memory page the GSP will read the next command from.328//329// # Invariants330//331// - The returned value is between `0` and `MSGQ_NUM_PAGES`.332fn gsp_read_ptr(&self) -> u32 {333let gsp_mem = self.0.start_ptr();334335// SAFETY:336// - The 'CoherentAllocation' contains at least one object.337// - By the invariants of `CoherentAllocation` the pointer is valid.338(unsafe { (*gsp_mem).gspq.rx.read_ptr() } % MSGQ_NUM_PAGES)339}340341// Returns the index of the memory page the CPU can read the next message from.342//343// # Invariants344//345// - The returned value is between `0` and `MSGQ_NUM_PAGES`.346fn cpu_read_ptr(&self) -> u32 {347let gsp_mem = self.0.start_ptr();348349// SAFETY:350// - The ['CoherentAllocation'] contains at least one object.351// - By the invariants of CoherentAllocation the pointer is valid.352(unsafe { (*gsp_mem).cpuq.rx.read_ptr() } % MSGQ_NUM_PAGES)353}354355// Informs the GSP that it can send `elem_count` new pages into the message queue.356fn advance_cpu_read_ptr(&mut self, elem_count: u32) {357let rptr = self.cpu_read_ptr().wrapping_add(elem_count) % MSGQ_NUM_PAGES;358359// Ensure read pointer is properly ordered.360fence(Ordering::SeqCst);361362let gsp_mem = self.0.start_ptr_mut();363364// SAFETY:365// - The 'CoherentAllocation' contains at least one object.366// - By the invariants of `CoherentAllocation` the pointer is valid.367unsafe { (*gsp_mem).cpuq.rx.set_read_ptr(rptr) };368}369370// Returns the index of the memory page the CPU can write the next command to.371//372// # Invariants373//374// - The returned value is between `0` and `MSGQ_NUM_PAGES`.375fn cpu_write_ptr(&self) -> u32 {376let gsp_mem = self.0.start_ptr();377378// SAFETY:379// - The 'CoherentAllocation' contains at least one object.380// - By the invariants of `CoherentAllocation` the pointer is valid.381(unsafe { (*gsp_mem).cpuq.tx.write_ptr() } % MSGQ_NUM_PAGES)382}383384// Informs the GSP that it can process `elem_count` new pages from the command queue.385fn advance_cpu_write_ptr(&mut self, elem_count: u32) {386let wptr = self.cpu_write_ptr().wrapping_add(elem_count) & MSGQ_NUM_PAGES;387let gsp_mem = self.0.start_ptr_mut();388389// SAFETY:390// - The 'CoherentAllocation' contains at least one object.391// - By the invariants of `CoherentAllocation` the pointer is valid.392unsafe { (*gsp_mem).cpuq.tx.set_write_ptr(wptr) };393394// Ensure all command data is visible before triggering the GSP read.395fence(Ordering::SeqCst);396}397}398399/// A command ready to be sent on the command queue.400///401/// This is the type returned by [`DmaGspMem::allocate_command`].402struct GspCommand<'a> {403// Writable reference to the header of the command.404header: &'a mut GspMsgElement,405// Writable slices to the contents of the command. The second slice is zero unless the command406// loops over the command queue.407contents: (&'a mut [u8], &'a mut [u8]),408}409410/// A message ready to be processed from the message queue.411///412/// This is the type returned by [`Cmdq::wait_for_msg`].413struct GspMessage<'a> {414// Reference to the header of the message.415header: &'a GspMsgElement,416// Slices to the contents of the message. The second slice is zero unless the message loops417// over the message queue.418contents: (&'a [u8], &'a [u8]),419}420421/// GSP command queue.422///423/// Provides the ability to send commands and receive messages from the GSP using a shared memory424/// area.425pub(crate) struct Cmdq {426/// Device this command queue belongs to.427dev: ARef<device::Device>,428/// Current command sequence number.429seq: u32,430/// Memory area shared with the GSP for communicating commands and messages.431gsp_mem: DmaGspMem,432}433434impl Cmdq {435/// Offset of the data after the PTEs.436const POST_PTE_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq);437438/// Offset of command queue ring buffer.439pub(crate) const CMDQ_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq)440+ core::mem::offset_of!(Msgq, msgq)441- Self::POST_PTE_OFFSET;442443/// Offset of message queue ring buffer.444pub(crate) const STATQ_OFFSET: usize = core::mem::offset_of!(GspMem, gspq)445+ core::mem::offset_of!(Msgq, msgq)446- Self::POST_PTE_OFFSET;447448/// Number of page table entries for the GSP shared region.449pub(crate) const NUM_PTES: usize = size_of::<GspMem>() >> GSP_PAGE_SHIFT;450451/// Creates a new command queue for `dev`.452pub(crate) fn new(dev: &device::Device<device::Bound>) -> Result<Cmdq> {453let gsp_mem = DmaGspMem::new(dev)?;454455Ok(Cmdq {456dev: dev.into(),457seq: 0,458gsp_mem,459})460}461462/// Computes the checksum for the message pointed to by `it`.463///464/// A message is made of several parts, so `it` is an iterator over byte slices representing465/// these parts.466fn calculate_checksum<T: Iterator<Item = u8>>(it: T) -> u32 {467let sum64 = it468.enumerate()469.map(|(idx, byte)| (((idx % 8) * 8) as u32, byte))470.fold(0, |acc, (rol, byte)| acc ^ u64::from(byte).rotate_left(rol));471472((sum64 >> 32) as u32) ^ (sum64 as u32)473}474475/// Notifies the GSP that we have updated the command queue pointers.476fn notify_gsp(bar: &Bar0) {477regs::NV_PGSP_QUEUE_HEAD::default()478.set_address(0)479.write(bar);480}481482/// Sends `command` to the GSP.483///484/// # Errors485///486/// - `EAGAIN` if there was not enough space in the command queue to send the command.487/// - `EIO` if the variable payload requested by the command has not been entirely488/// written to by its [`CommandToGsp::init_variable_payload`] method.489///490/// Error codes returned by the command initializers are propagated as-is.491pub(crate) fn send_command<M>(&mut self, bar: &Bar0, command: M) -> Result492where493M: CommandToGsp,494// This allows all error types, including `Infallible`, to be used for `M::InitError`.495Error: From<M::InitError>,496{497let command_size = size_of::<M::Command>() + command.variable_payload_len();498let dst = self.gsp_mem.allocate_command(command_size)?;499500// Extract area for the command itself.501let (cmd, payload_1) = M::Command::from_bytes_mut_prefix(dst.contents.0).ok_or(EIO)?;502503// Fill the header and command in-place.504let msg_element = GspMsgElement::init(self.seq, command_size, M::FUNCTION);505// SAFETY: `msg_header` and `cmd` are valid references, and not touched if the initializer506// fails.507unsafe {508msg_element.__init(core::ptr::from_mut(dst.header))?;509command.init().__init(core::ptr::from_mut(cmd))?;510}511512// Fill the variable-length payload.513if command_size > size_of::<M::Command>() {514let mut sbuffer =515SBufferIter::new_writer([&mut payload_1[..], &mut dst.contents.1[..]]);516command.init_variable_payload(&mut sbuffer)?;517518if !sbuffer.is_empty() {519return Err(EIO);520}521}522523// Compute checksum now that the whole message is ready.524dst.header525.set_checksum(Cmdq::calculate_checksum(SBufferIter::new_reader([526dst.header.as_bytes(),527dst.contents.0,528dst.contents.1,529])));530531dev_dbg!(532&self.dev,533"GSP RPC: send: seq# {}, function={}, length=0x{:x}\n",534self.seq,535M::FUNCTION,536dst.header.length(),537);538539// All set - update the write pointer and inform the GSP of the new command.540let elem_count = dst.header.element_count();541self.seq += 1;542self.gsp_mem.advance_cpu_write_ptr(elem_count);543Cmdq::notify_gsp(bar);544545Ok(())546}547548/// Wait for a message to become available on the message queue.549///550/// This works purely at the transport layer and does not interpret or validate the message551/// beyond the advertised length in its [`GspMsgElement`].552///553/// This method returns:554///555/// - A reference to the [`GspMsgElement`] of the message,556/// - Two byte slices with the contents of the message. The second slice is empty unless the557/// message loops across the message queue.558///559/// # Errors560///561/// - `ETIMEDOUT` if `timeout` has elapsed before any message becomes available.562/// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the563/// message queue.564///565/// Error codes returned by the message constructor are propagated as-is.566fn wait_for_msg(&self, timeout: Delta) -> Result<GspMessage<'_>> {567// Wait for a message to arrive from the GSP.568let (slice_1, slice_2) = read_poll_timeout(569|| Ok(self.gsp_mem.driver_read_area()),570|driver_area| !driver_area.0.is_empty(),571Delta::from_millis(1),572timeout,573)574.map(|(slice_1, slice_2)| {575#[allow(clippy::incompatible_msrv)]576(slice_1.as_flattened(), slice_2.as_flattened())577})?;578579// Extract the `GspMsgElement`.580let (header, slice_1) = GspMsgElement::from_bytes_prefix(slice_1).ok_or(EIO)?;581582dev_dbg!(583self.dev,584"GSP RPC: receive: seq# {}, function={:?}, length=0x{:x}\n",585header.sequence(),586header.function(),587header.length(),588);589590// Check that the driver read area is large enough for the message.591if slice_1.len() + slice_2.len() < header.length() {592return Err(EIO);593}594595// Cut the message slices down to the actual length of the message.596let (slice_1, slice_2) = if slice_1.len() > header.length() {597// PANIC: we checked above that `slice_1` is at least as long as `msg_header.length()`.598(slice_1.split_at(header.length()).0, &slice_2[0..0])599} else {600(601slice_1,602// PANIC: we checked above that `slice_1.len() + slice_2.len()` is at least as603// large as `msg_header.length()`.604slice_2.split_at(header.length() - slice_1.len()).0,605)606};607608// Validate checksum.609if Cmdq::calculate_checksum(SBufferIter::new_reader([610header.as_bytes(),611slice_1,612slice_2,613])) != 0614{615dev_err!(616self.dev,617"GSP RPC: receive: Call {} - bad checksum",618header.sequence()619);620return Err(EIO);621}622623Ok(GspMessage {624header,625contents: (slice_1, slice_2),626})627}628629/// Receive a message from the GSP.630///631/// `init` is a closure tasked with processing the message. It receives a reference to the632/// message in the message queue, and a [`SBufferIter`] pointing to its variable-length633/// payload, if any.634///635/// The expected message is specified using the `M` generic parameter. If the pending message636/// is different, `EAGAIN` is returned and the unexpected message is dropped.637///638/// This design is by no means final, but it is simple and will let us go through GSP639/// initialization.640///641/// # Errors642///643/// - `ETIMEDOUT` if `timeout` has elapsed before any message becomes available.644/// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the645/// message queue.646/// - `EINVAL` if the function of the message was unrecognized.647pub(crate) fn receive_msg<M: MessageFromGsp>(&mut self, timeout: Delta) -> Result<M>648where649// This allows all error types, including `Infallible`, to be used for `M::InitError`.650Error: From<M::InitError>,651{652let message = self.wait_for_msg(timeout)?;653let function = message.header.function().map_err(|_| EINVAL)?;654655// Extract the message. Store the result as we want to advance the read pointer even in656// case of failure.657let result = if function == M::FUNCTION {658let (cmd, contents_1) = M::Message::from_bytes_prefix(message.contents.0).ok_or(EIO)?;659let mut sbuffer = SBufferIter::new_reader([contents_1, message.contents.1]);660661M::read(cmd, &mut sbuffer).map_err(|e| e.into())662} else {663Err(ERANGE)664};665666// Advance the read pointer past this message.667self.gsp_mem.advance_cpu_read_ptr(u32::try_from(668message.header.length().div_ceil(GSP_PAGE_SIZE),669)?);670671result672}673674/// Returns the DMA handle of the command queue's shared memory region.675pub(crate) fn dma_handle(&self) -> DmaAddress {676self.gsp_mem.0.dma_handle()677}678}679680681