// Copyright 2017 The ChromiumOS Authors1// Use of this source code is governed by a BSD-style license that can be2// found in the LICENSE file.34use std::cmp::min;5use std::fs::File;6use std::marker::PhantomData;7use std::mem::MaybeUninit;8use std::ptr::null_mut;9use std::time::Duration;1011use libc::c_int;12use libc::epoll_create1;13use libc::epoll_ctl;14use libc::epoll_event;15use libc::epoll_wait;16use libc::ENOENT;17use libc::EPOLLHUP;18use libc::EPOLLIN;19use libc::EPOLLOUT;20use libc::EPOLLRDHUP;21use libc::EPOLL_CLOEXEC;22use libc::EPOLL_CTL_ADD;23use libc::EPOLL_CTL_DEL;24use libc::EPOLL_CTL_MOD;25use smallvec::SmallVec;2627use super::errno_result;28use super::Result;29use crate::handle_eintr_errno;30use crate::AsRawDescriptor;31use crate::EventToken;32use crate::EventType;33use crate::FromRawDescriptor;34use crate::RawDescriptor;35use crate::TriggeredEvent;3637const EVENT_CONTEXT_MAX_EVENTS: usize = 16;3839impl From<EventType> for u32 {40fn from(et: EventType) -> u32 {41let v = match et {42EventType::None => 0,43EventType::Read => EPOLLIN,44EventType::Write => EPOLLOUT,45EventType::ReadWrite => EPOLLIN | EPOLLOUT,46};47v as u3248}49}5051/// Used to poll multiple objects that have file descriptors.52///53/// See [`crate::WaitContext`] for an example that uses the cross-platform wrapper.54pub struct EventContext<T> {55epoll_ctx: File,56// Needed to satisfy usage of T57tokens: PhantomData<[T]>,58}5960impl<T: EventToken> EventContext<T> {61/// Creates a new `EventContext`.62pub fn new() -> Result<EventContext<T>> {63// SAFETY:64// Safe because we check the return value.65let epoll_fd = unsafe { epoll_create1(EPOLL_CLOEXEC) };66if epoll_fd < 0 {67return errno_result();68}69Ok(EventContext {70// SAFETY:71// Safe because epoll_fd is valid.72epoll_ctx: unsafe { File::from_raw_descriptor(epoll_fd) },73tokens: PhantomData,74})75}7677/// Creates a new `EventContext` and adds the slice of `fd` and `token` tuples to the new78/// context.79///80/// This is equivalent to calling `new` followed by `add_many`. If there is an error, this will81/// return the error instead of the new context.82pub fn build_with(fd_tokens: &[(&dyn AsRawDescriptor, T)]) -> Result<EventContext<T>> {83let ctx = EventContext::new()?;84ctx.add_many(fd_tokens)?;85Ok(ctx)86}8788/// Adds the given slice of `fd` and `token` tuples to this context.89///90/// This is equivalent to calling `add` with each `fd` and `token`. If there are any errors,91/// this method will stop adding `fd`s and return the first error, leaving this context in a92/// undefined state.93pub fn add_many(&self, fd_tokens: &[(&dyn AsRawDescriptor, T)]) -> Result<()> {94for (fd, token) in fd_tokens {95self.add(*fd, T::from_raw_token(token.as_raw_token()))?;96}97Ok(())98}99100/// Adds the given `fd` to this context and associates the given `token` with the `fd`'s101/// readable events.102///103/// A `fd` can only be added once and does not need to be kept open. If the `fd` is dropped and104/// there were no duplicated file descriptors (i.e. adding the same descriptor with a different105/// FD number) added to this context, events will not be reported by `wait` anymore.106pub fn add(&self, fd: &dyn AsRawDescriptor, token: T) -> Result<()> {107self.add_for_event(fd, EventType::Read, token)108}109110/// Adds the given `descriptor` to this context, watching for the specified events and111/// associates the given 'token' with those events.112///113/// A `descriptor` can only be added once and does not need to be kept open. If the `descriptor`114/// is dropped and there were no duplicated file descriptors (i.e. adding the same descriptor115/// with a different FD number) added to this context, events will not be reported by `wait`116/// anymore.117pub fn add_for_event(118&self,119descriptor: &dyn AsRawDescriptor,120event_type: EventType,121token: T,122) -> Result<()> {123let mut evt = epoll_event {124events: event_type.into(),125u64: token.as_raw_token(),126};127// SAFETY:128// Safe because we give a valid epoll FD and FD to watch, as well as a valid epoll_event129// structure. Then we check the return value.130let ret = unsafe {131epoll_ctl(132self.epoll_ctx.as_raw_descriptor(),133EPOLL_CTL_ADD,134descriptor.as_raw_descriptor(),135&mut evt,136)137};138if ret < 0 {139return errno_result();140};141Ok(())142}143144/// If `fd` was previously added to this context, the watched events will be replaced with145/// `event_type` and the token associated with it will be replaced with the given `token`.146pub fn modify(&self, fd: &dyn AsRawDescriptor, event_type: EventType, token: T) -> Result<()> {147let mut evt = epoll_event {148events: event_type.into(),149u64: token.as_raw_token(),150};151// SAFETY:152// Safe because we give a valid epoll FD and FD to modify, as well as a valid epoll_event153// structure. Then we check the return value.154let ret = unsafe {155epoll_ctl(156self.epoll_ctx.as_raw_descriptor(),157EPOLL_CTL_MOD,158fd.as_raw_descriptor(),159&mut evt,160)161};162if ret < 0 {163return errno_result();164};165Ok(())166}167168/// Deletes the given `fd` from this context. If the `fd` is not being polled by this context,169/// the call is silently dropped without errors.170///171/// If an `fd`'s token shows up in the list of hangup events, it should be removed using this172/// method or by closing/dropping (if and only if the fd was never dup()'d/fork()'d) the `fd`.173/// Failure to do so will cause the `wait` method to always return immediately, causing ~100%174/// CPU load.175pub fn delete(&self, fd: &dyn AsRawDescriptor) -> Result<()> {176// SAFETY:177// Safe because we give a valid epoll FD and FD to stop watching. Then we check the return178// value.179let ret = unsafe {180epoll_ctl(181self.epoll_ctx.as_raw_descriptor(),182EPOLL_CTL_DEL,183fd.as_raw_descriptor(),184null_mut(),185)186};187// If epoll_ctl returns ENOENT it means the fd is not part of the current polling set so188// there is nothing to delete.189if ret < 0 && ret != ENOENT {190return errno_result();191};192Ok(())193}194195/// Waits for any events to occur in FDs that were previously added to this context.196///197/// The events are level-triggered, meaning that if any events are unhandled (i.e. not reading198/// for readable events and not closing for hungup events), subsequent calls to `wait` will199/// return immediately. The consequence of not handling an event perpetually while calling200/// `wait` is that the callers loop will degenerated to busy loop polling, pinning a CPU to201/// ~100% usage.202pub fn wait(&self) -> Result<SmallVec<[TriggeredEvent<T>; 16]>> {203self.wait_timeout(Duration::new(i64::MAX as u64, 0))204}205206/// Like `wait` except will only block for a maximum of the given `timeout`.207///208/// This may return earlier than `timeout` with zero events if the duration indicated exceeds209/// system limits.210pub fn wait_timeout(&self, timeout: Duration) -> Result<SmallVec<[TriggeredEvent<T>; 16]>> {211let mut epoll_events: [MaybeUninit<epoll_event>; EVENT_CONTEXT_MAX_EVENTS] =212// SAFETY:213// `MaybeUnint<T>` has the same layout as plain `T` (`epoll_event` in our case).214// We submit an uninitialized array to the `epoll_wait` system call, which returns how many215// elements it initialized, and then we convert only the initialized `MaybeUnint` values216// into `epoll_event` structures after the call.217unsafe { MaybeUninit::uninit().assume_init() };218219let timeout_millis = if timeout.as_secs() as i64 == i64::MAX {220// We make the convenient assumption that 2^63 seconds is an effectively unbounded time221// frame. This is meant to mesh with `wait` calling us with no timeout.222-1223} else {224// In cases where we the number of milliseconds would overflow an i32, we substitute the225// maximum timeout which is ~24.8 days.226let millis = timeout227.as_secs()228.checked_mul(1_000)229.and_then(|ms| ms.checked_add(u64::from(timeout.subsec_nanos()) / 1_000_000))230.unwrap_or(i32::MAX as u64);231min(i32::MAX as u64, millis) as i32232};233let ret = {234let max_events = epoll_events.len() as c_int;235// SAFETY:236// Safe because we give an epoll context and a properly sized epoll_events array237// pointer, which we trust the kernel to fill in properly. The `transmute` is safe,238// since `MaybeUnint<T>` has the same layout as `T`, and the `epoll_wait` syscall will239// initialize as many elements of the `epoll_events` array as it returns.240unsafe {241handle_eintr_errno!(epoll_wait(242self.epoll_ctx.as_raw_descriptor(),243std::mem::transmute(&mut epoll_events[0]),244max_events,245timeout_millis246))247}248};249if ret < 0 {250return errno_result();251}252let count = ret as usize;253254let events = epoll_events[0..count]255.iter()256.map(|e| {257// SAFETY:258// Converting `MaybeUninit<epoll_event>` into `epoll_event` is safe here, since we259// are only iterating over elements that the `epoll_wait` system call initialized.260let e = unsafe { e.assume_init() };261TriggeredEvent {262token: T::from_raw_token(e.u64),263is_readable: e.events & (EPOLLIN as u32) != 0,264is_writable: e.events & (EPOLLOUT as u32) != 0,265is_hungup: e.events & ((EPOLLHUP | EPOLLRDHUP) as u32) != 0,266}267})268.collect();269Ok(events)270}271}272273impl<T: EventToken> AsRawDescriptor for EventContext<T> {274fn as_raw_descriptor(&self) -> RawDescriptor {275self.epoll_ctx.as_raw_descriptor()276}277}278279#[cfg(test)]280mod tests {281use std::time::Instant;282283use base_event_token_derive::EventToken;284285use super::*;286use crate::Event;287288#[test]289fn event_context() {290let evt1 = Event::new().unwrap();291let evt2 = Event::new().unwrap();292evt1.signal().unwrap();293evt2.signal().unwrap();294let ctx: EventContext<u32> = EventContext::build_with(&[(&evt1, 1), (&evt2, 2)]).unwrap();295296let mut evt_count = 0;297while evt_count < 2 {298for event in ctx.wait().unwrap().iter().filter(|e| e.is_readable) {299evt_count += 1;300match event.token {3011 => {302evt1.wait().unwrap();303ctx.delete(&evt1).unwrap();304}3052 => {306evt2.wait().unwrap();307ctx.delete(&evt2).unwrap();308}309_ => panic!("unexpected token"),310};311}312}313assert_eq!(evt_count, 2);314}315316#[test]317fn event_context_overflow() {318const EVT_COUNT: usize = EVENT_CONTEXT_MAX_EVENTS * 2 + 1;319let ctx: EventContext<usize> = EventContext::new().unwrap();320let mut evts = Vec::with_capacity(EVT_COUNT);321for i in 0..EVT_COUNT {322let evt = Event::new().unwrap();323evt.signal().unwrap();324ctx.add(&evt, i).unwrap();325evts.push(evt);326}327let mut evt_count = 0;328while evt_count < EVT_COUNT {329for event in ctx.wait().unwrap().iter().filter(|e| e.is_readable) {330evts[event.token].wait().unwrap();331evt_count += 1;332}333}334}335336#[test]337fn event_context_timeout() {338let ctx: EventContext<u32> = EventContext::new().unwrap();339let dur = Duration::from_millis(10);340let start_inst = Instant::now();341ctx.wait_timeout(dur).unwrap();342assert!(start_inst.elapsed() >= dur);343}344345#[test]346#[allow(dead_code)]347fn event_token_derive() {348#[derive(EventToken)]349enum EmptyToken {}350351#[derive(PartialEq, Debug, EventToken)]352enum Token {353Alpha,354Beta,355// comments356Gamma(u32),357Delta { index: usize },358Omega,359}360361assert_eq!(362Token::from_raw_token(Token::Alpha.as_raw_token()),363Token::Alpha364);365assert_eq!(366Token::from_raw_token(Token::Beta.as_raw_token()),367Token::Beta368);369assert_eq!(370Token::from_raw_token(Token::Gamma(55).as_raw_token()),371Token::Gamma(55)372);373assert_eq!(374Token::from_raw_token(Token::Delta { index: 100 }.as_raw_token()),375Token::Delta { index: 100 }376);377assert_eq!(378Token::from_raw_token(Token::Omega.as_raw_token()),379Token::Omega380);381}382}383384385