1
0
Fork 0
forked from wry/wry

io: use io_uring for all io

There should no longer be any

- read
- write
- connect
- sendmsg
- recvmsg
- accept

calls in the codebase. Previously we were using a mix of io_uring and
these calls which had some negative effects: Since we were using the old
system calls, we had to set the file descriptors to non-blocking. But
our io_uring code did not handle EAGAIN. This lead to programs sometimes
being killed when the wayland IO was actually blocking.

Now all file descriptors are set to blocking, but io_uring makes it
non-blocking from our perspective. The one exception are evdev files
because they are read via libinput and libinput uses the old system
calls.
This commit is contained in:
Julian Orth 2022-12-31 17:55:58 +01:00
parent 2db0ee8995
commit 9812a02f87
55 changed files with 900 additions and 672 deletions

View file

@ -2,17 +2,25 @@ use {
crate::utils::{numcell::NumCell, ptr_ext::PtrExt},
std::{
alloc::Layout,
cmp,
collections::Bound,
fmt::Arguments,
io::{self, Write},
marker::PhantomData,
mem,
ops::{Deref, DerefMut, Range, RangeBounds},
ptr::NonNull,
slice,
},
uapi::Pod,
};
const METADATA_SIZE: u32 = 8;
const METADATA_ALIGN: usize = 4;
const SIZE_OFF: u32 = 0;
const RC_OFF: u32 = 4;
const RC_OFF_INV: u32 = METADATA_SIZE - RC_OFF;
const SIZE_OFF_INV: u32 = METADATA_SIZE - SIZE_OFF;
pub struct Buf {
storage: NonNull<u8>,
@ -118,9 +126,55 @@ impl Buf {
self.len32() as _
}
fn size32(&self) -> u32 {
unsafe {
*self
.storage
.as_ptr()
.sub(SIZE_OFF_INV as _)
.cast::<u32>()
.deref()
}
}
pub fn cap32(&self) -> u32 {
self.size32() - METADATA_SIZE
}
pub fn as_ptr(&self) -> *mut u8 {
unsafe { self.storage.as_ptr().add(self.range.start as _) }
}
pub fn write_fmt(&mut self, args: Arguments) -> Result<Self, io::Error> {
let cap = self.len();
let mut buf = self.deref_mut();
buf.write_fmt(args)?;
let len = cap - buf.len();
Ok(self.slice(..len))
}
pub fn into_full(self) -> Self {
let new = Self {
storage: self.storage,
range: 0..self.cap32(),
};
mem::forget(self);
new
}
fn as_slice(&self) -> &[u8] {
unsafe { slice::from_raw_parts(self.as_ptr(), self.len()) }
}
fn as_slice_mut(&mut self) -> &mut [u8] {
unsafe { slice::from_raw_parts_mut(self.as_ptr(), self.len()) }
}
}
impl Default for Buf {
fn default() -> Self {
Self::new(0)
}
}
impl Deref for Buf {
@ -128,14 +182,14 @@ impl Deref for Buf {
fn deref(&self) -> &Self::Target {
self.assert_unique();
unsafe { slice::from_raw_parts(self.as_ptr(), self.len()) }
self.as_slice()
}
}
impl DerefMut for Buf {
fn deref_mut(&mut self) -> &mut Self::Target {
self.assert_unique();
unsafe { slice::from_raw_parts_mut(self.as_ptr(), self.len()) }
self.as_slice_mut()
}
}
@ -153,3 +207,122 @@ impl Drop for Buf {
}
}
}
pub struct DynamicBuf {
buf: Buf,
len: usize,
}
impl DynamicBuf {
pub fn new() -> Self {
Self {
buf: Buf::new(0),
len: 0,
}
}
pub fn from_buf(buf: Buf) -> Self {
buf.assert_unique();
Self {
buf: buf.into_full(),
len: 0,
}
}
pub fn unwrap(mut self) -> Buf {
self.buf.slice(..self.len)
}
pub fn len(&self) -> usize {
self.len
}
pub fn reserve(&mut self, n: usize) {
if self.buf.len() - self.len < n {
let cap = self.len.checked_add(n).unwrap();
let cap = cmp::max(self.buf.len() * 2, cap);
let mut new = Buf::new(cap);
new[..self.len].copy_from_slice(&self.buf[..self.len]);
self.buf = new;
}
}
pub fn extend_from_slice(&mut self, buf: &[u8]) {
self.reserve(buf.len());
self.buf.as_slice_mut()[self.len..self.len + buf.len()].copy_from_slice(buf);
self.len += buf.len();
}
pub fn push(&mut self, b: u8) {
self.extend_from_slice(&[b]);
}
pub fn clear(&mut self) {
self.len = 0;
}
pub fn borrow(&mut self) -> BorrowedBuf<'_> {
BorrowedBuf {
buf: self.buf.slice(..self.len),
_phantom: Default::default(),
}
}
}
pub struct BorrowedBuf<'a> {
pub buf: Buf,
_phantom: PhantomData<&'a mut DynamicBuf>,
}
impl<'a> Drop for BorrowedBuf<'a> {
fn drop(&mut self) {
assert_eq!(self.buf.rc().get(), 2);
}
}
impl Write for DynamicBuf {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.extend_from_slice(buf);
Ok(buf.len())
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
impl Deref for DynamicBuf {
type Target = [u8];
fn deref(&self) -> &Self::Target {
self.buf.as_slice()
}
}
impl DerefMut for DynamicBuf {
fn deref_mut(&mut self) -> &mut Self::Target {
self.buf.as_slice_mut()
}
}
pub struct TypedBuf<T: Pod> {
buf: Buf,
_phantom: PhantomData<T>,
}
impl<T: Pod> TypedBuf<T> {
pub fn new() -> Self {
Self {
buf: Buf::new(mem::size_of::<T>()),
_phantom: Default::default(),
}
}
pub fn buf(&mut self) -> Buf {
self.buf.clone()
}
pub fn t(&self) -> T {
uapi::pod_read(&self.buf[..]).unwrap()
}
}

View file

@ -1,7 +1,4 @@
use {
crate::{io_uring::IoUringError, utils::oserror::OsError},
thiserror::Error,
};
use {crate::io_uring::IoUringError, thiserror::Error};
pub use {
buf_in::BufFdIn,
buf_out::{BufFdOut, OutBuffer, OutBufferSwapchain},
@ -17,7 +14,7 @@ mod parser;
#[derive(Debug, Error)]
pub enum BufFdError {
#[error("An IO error occurred")]
Io(#[source] OsError),
Io(#[source] IoUringError),
#[error("An io-uring error occurred")]
Ring(#[from] IoUringError),
#[error("The peer did not send a file descriptor")]
@ -31,5 +28,4 @@ pub enum BufFdError {
}
const BUF_SIZE: usize = 4096;
const CMSG_BUF_SIZE: usize = 4096;
const MAX_IN_FD: usize = 32;

View file

@ -15,7 +15,7 @@ pub struct BufFdIn {
fd: Rc<OwnedFd>,
ring: Rc<IoUring>,
in_fd: VecDeque<OwnedFd>,
in_fd: VecDeque<Rc<OwnedFd>>,
in_buf: Buf,
in_left: usize,
@ -86,7 +86,7 @@ impl BufFdIn {
Ok(())
}
pub fn get_fd(&mut self) -> Result<OwnedFd, BufFdError> {
pub fn get_fd(&mut self) -> Result<Rc<OwnedFd>, BufFdError> {
match self.in_fd.pop_front() {
Some(f) => Ok(f),
None => Err(BufFdError::NoFd),

View file

@ -4,17 +4,16 @@ use {
time::Time,
utils::{
buf::Buf,
buffd::{BufFdError, BUF_SIZE, CMSG_BUF_SIZE},
buffd::{BufFdError, BUF_SIZE},
oserror::OsError,
},
},
std::{
collections::VecDeque,
mem::{self, MaybeUninit},
mem::{self},
rc::Rc,
slice,
},
uapi::{c, Errno, OwnedFd},
uapi::{c, OwnedFd},
};
pub(super) const OUT_BUF_SIZE: usize = 2 * BUF_SIZE;
@ -80,8 +79,6 @@ impl OutBufferSwapchain {
pub struct BufFdOut {
fd: Rc<OwnedFd>,
ring: Rc<IoUring>,
cmsg_buf: Box<[MaybeUninit<u8>; CMSG_BUF_SIZE]>,
fd_ids: Vec<i32>,
}
impl BufFdOut {
@ -89,8 +86,6 @@ impl BufFdOut {
Self {
fd: fd.clone(),
ring: ring.clone(),
cmsg_buf: Box::new([MaybeUninit::uninit(); CMSG_BUF_SIZE]),
fd_ids: vec![],
}
}
@ -131,7 +126,7 @@ impl BufFdOut {
buf = buffer.buf.slice(buffer.meta.read_pos..next_pos);
}
}
match self.ring.sendmsg(&self.fd, buf, fds, timeout).await {
match self.ring.sendmsg_one(&self.fd, buf, fds, timeout).await {
Ok(n) => {
buffer.meta.read_pos += n;
Ok(())
@ -144,59 +139,23 @@ impl BufFdOut {
pub async fn flush2(
&mut self,
buf: &[u8],
fds: &mut Vec<Rc<OwnedFd>>,
mut buf: Buf,
mut fds: Vec<Rc<OwnedFd>>,
) -> Result<(), BufFdError> {
let mut read_pos = 0;
while read_pos < buf.len() {
if self.flush_sync2(&mut read_pos, buf, fds)? {
self.ring.writable(&self.fd).await?;
let res = self
.ring
.sendmsg_one(&self.fd, buf.slice(read_pos..), mem::take(&mut fds), None)
.await;
match res {
Ok(n) => read_pos += n,
Err(IoUringError::OsError(OsError(c::ECONNRESET))) => {
return Err(BufFdError::Closed)
}
Err(e) => return Err(BufFdError::Io(e)),
}
}
Ok(())
}
fn flush_sync2(
&mut self,
read_pos: &mut usize,
buf: &[u8],
fds: &mut Vec<Rc<OwnedFd>>,
) -> Result<bool, BufFdError> {
let mut cmsg_len = 0;
let mut fds_opt = None;
if fds.len() > 0 {
self.fd_ids.clear();
self.fd_ids.extend(fds.iter().map(|f| f.raw()));
let hdr = c::cmsghdr {
cmsg_len: 0,
cmsg_level: c::SOL_SOCKET,
cmsg_type: c::SCM_RIGHTS,
};
let mut cmsg_buf = &mut self.cmsg_buf[..];
cmsg_len = uapi::cmsg_write(&mut cmsg_buf, hdr, &self.fd_ids[..]).unwrap();
fds_opt = Some(fds);
}
while *read_pos < buf.len() {
let buf = &buf[*read_pos..];
let hdr = uapi::Msghdr {
iov: slice::from_ref(&buf),
control: Some(&self.cmsg_buf[..cmsg_len]),
name: uapi::sockaddr_none_ref(),
};
let bytes_sent =
match uapi::sendmsg(self.fd.raw(), &hdr, c::MSG_DONTWAIT | c::MSG_NOSIGNAL) {
Ok(b) => {
if let Some(fds) = fds_opt.take() {
fds.clear();
}
b
}
Err(Errno(c::EAGAIN)) => return Ok(true),
Err(Errno(c::ECONNRESET)) => return Err(BufFdError::Closed),
Err(e) => return Err(BufFdError::Io(e.into())),
};
*read_pos += bytes_sent;
}
Ok(false)
}
}

View file

@ -97,7 +97,7 @@ impl<'a, 'b> MsgParser<'a, 'b> {
pub fn fd(&mut self) -> Result<Rc<OwnedFd>, MsgParserError> {
match self.buf.get_fd() {
Ok(fd) => Ok(Rc::new(fd)),
Ok(fd) => Ok(fd),
_ => Err(MsgParserError::MissingFd),
}
}

View file

@ -2,40 +2,33 @@ use {
crate::{
io_uring::{IoUring, IoUringError},
utils::{
oserror::OsError,
buf::{Buf, DynamicBuf},
queue::AsyncQueue,
stack::Stack,
vec_ext::{UninitVecExt, VecExt},
vecstorage::VecStorage,
},
},
std::{
collections::VecDeque,
mem::{self, MaybeUninit},
ptr::NonNull,
mem::{self},
rc::Rc,
},
thiserror::Error,
uapi::{c, Errno, MaybeUninitSliceExt, Msghdr, MsghdrMut, OwnedFd},
uapi::{c, OwnedFd},
};
#[derive(Debug, Error)]
pub enum BufIoError {
#[error("Could not write to the socket")]
FlushError(#[source] OsError),
FlushError(#[source] IoUringError),
#[error("Could not read from the socket")]
ReadError(#[source] OsError),
#[error("Cannot wait for fd to become writable")]
Writable(#[source] IoUringError),
#[error("Cannot wait for fd to become readable")]
Readable(#[source] IoUringError),
ReadError(#[source] IoUringError),
#[error("The socket is closed")]
Closed,
}
pub struct BufIoMessage {
pub fds: Vec<Rc<OwnedFd>>,
pub buf: Vec<u8>,
pub buf: Buf,
}
struct MessageOffset {
@ -46,27 +39,24 @@ struct MessageOffset {
pub struct BufIo {
fd: Rc<OwnedFd>,
ring: Rc<IoUring>,
bufs: Stack<Vec<u8>>,
bufs: Stack<Buf>,
outgoing: AsyncQueue<BufIoMessage>,
}
pub struct BufIoIncoming {
bufio: Rc<BufIo>,
buf: Box<[MaybeUninit<u8>; 4096]>,
buf: Buf,
buf_start: usize,
buf_end: usize,
pub fds: VecDeque<Rc<OwnedFd>>,
cmsg: Box<[MaybeUninit<u8>; 256]>,
}
struct Outgoing {
bufio: Rc<BufIo>,
msgs: VecDeque<MessageOffset>,
cmsg: Vec<MaybeUninit<u8>>,
fds: Vec<c::c_int>,
iovecs: VecStorage<NonNull<[u8]>>,
bufs: Vec<Buf>,
}
impl BufIo {
@ -83,14 +73,9 @@ impl BufIo {
let _ = uapi::shutdown(self.fd.raw(), c::SHUT_RDWR);
}
pub fn buf(&self) -> Vec<u8> {
let mut buf = self.bufs.pop().unwrap_or_default();
buf.clear();
buf
}
pub fn add_buf(&self, buf: Vec<u8>) {
self.bufs.push(buf);
pub fn buf(&self) -> DynamicBuf {
let buf = self.bufs.pop().unwrap_or_default();
DynamicBuf::from_buf(buf)
}
pub fn send(&self, msg: BufIoMessage) {
@ -101,9 +86,7 @@ impl BufIo {
let mut outgoing = Outgoing {
bufio: self,
msgs: Default::default(),
cmsg: vec![],
fds: vec![],
iovecs: Default::default(),
bufs: vec![],
};
outgoing.run().await
}
@ -111,11 +94,10 @@ impl BufIo {
pub fn incoming(self: &Rc<Self>) -> BufIoIncoming {
BufIoIncoming {
bufio: self.clone(),
buf: Box::new([MaybeUninit::uninit(); 4096]),
buf: Buf::new(4096),
buf_start: 0,
buf_end: 0,
fds: Default::default(),
cmsg: Box::new([MaybeUninit::uninit(); 256]),
}
}
}
@ -128,72 +110,42 @@ impl BufIoIncoming {
) -> Result<(), BufIoError> {
while n > 0 {
if self.buf_start == self.buf_end {
while let Err(e) = self.recvmsg() {
if e.0 != c::EAGAIN {
return Err(BufIoError::ReadError(e.into()));
}
if let Err(e) = self.bufio.ring.readable(&self.bufio.fd).await {
return Err(BufIoError::Readable(e));
}
self.buf_start = 0;
self.buf_end = 0;
let res = self
.bufio
.ring
.recvmsg(&self.bufio.fd, &mut [self.buf.clone()], &mut self.fds)
.await;
match res {
Ok(n) => self.buf_end = n,
Err(e) => return Err(BufIoError::ReadError(e)),
}
if self.buf_start == self.buf_end {
return Err(BufIoError::Closed);
}
}
let read = n.min(self.buf_end - self.buf_start);
let buf_start = self.buf_start % self.buf.len();
unsafe {
buf.extend_from_slice(
self.buf[buf_start..buf_start + read].slice_assume_init_ref(),
);
}
let buf_start = self.buf_start;
buf.extend_from_slice(&self.buf[buf_start..buf_start + read]);
n -= read;
self.buf_start += read;
}
Ok(())
}
fn recvmsg(&mut self) -> Result<(), Errno> {
self.buf_start = 0;
self.buf_end = 0;
let mut iov = [&mut self.buf[..]];
let mut hdr = MsghdrMut {
iov: &mut iov[..],
control: Some(&mut self.cmsg[..]),
name: uapi::sockaddr_none_mut(),
flags: 0,
};
let (ivec, _, mut cmsg) =
uapi::recvmsg(self.bufio.fd.raw(), &mut hdr, c::MSG_CMSG_CLOEXEC)?;
self.buf_end += ivec.len();
while cmsg.len() > 0 {
let (_, hdr, body) = uapi::cmsg_read(&mut cmsg)?;
if hdr.cmsg_level == c::SOL_SOCKET && hdr.cmsg_type == c::SCM_RIGHTS {
for fd in uapi::pod_iter(body)? {
self.fds.push_back(Rc::new(OwnedFd::new(fd)));
}
}
}
Ok(())
}
}
impl Outgoing {
async fn run(&mut self) -> Result<(), BufIoError> {
loop {
self.bufio.outgoing.non_empty().await;
while let Err(e) = self.try_flush() {
if e != Errno(c::EAGAIN) {
return Err(BufIoError::FlushError(e.into()));
}
if let Err(e) = self.bufio.ring.writable(&self.bufio.fd).await {
return Err(BufIoError::Writable(e));
}
if let Err(e) = self.try_flush().await {
return Err(BufIoError::FlushError(e));
}
}
}
fn try_flush(&mut self) -> Result<(), Errno> {
async fn try_flush(&mut self) -> Result<(), IoUringError> {
loop {
while let Some(msg) = self.bufio.outgoing.try_pop() {
self.msgs.push_back(MessageOffset { msg, offset: 0 });
@ -201,40 +153,23 @@ impl Outgoing {
if self.msgs.is_empty() {
return Ok(());
}
let mut iovecs = self.iovecs.take_as();
let mut fds = &[][..];
let mut fds = Vec::new();
for msg in &mut self.msgs {
if msg.msg.fds.len() > 0 {
if fds.len() > 0 || iovecs.len() > 0 {
if fds.len() > 0 || self.bufs.len() > 0 {
break;
}
fds = &msg.msg.fds;
fds = mem::take(&mut msg.msg.fds);
}
iovecs.push(&msg.msg.buf[msg.offset..]);
self.bufs.push(msg.msg.buf.slice(msg.offset..));
}
self.cmsg.clear();
if fds.len() > 0 {
self.fds.clear();
self.fds.extend(fds.iter().map(|f| f.raw()));
let cmsg_space = uapi::cmsg_space(fds.len() * mem::size_of::<c::c_int>());
self.cmsg.reserve(cmsg_space);
let (_, mut spare) = self.cmsg.split_at_spare_mut_bytes_ext();
let hdr = c::cmsghdr {
cmsg_len: 0,
cmsg_level: c::SOL_SOCKET,
cmsg_type: c::SCM_RIGHTS,
};
let len = uapi::cmsg_write(&mut spare, hdr, &self.fds[..]).unwrap();
self.cmsg.set_len_safe(len);
}
let msg = Msghdr {
iov: &iovecs[..],
control: Some(&self.cmsg[..]),
name: uapi::sockaddr_none_ref(),
};
let mut n = uapi::sendmsg(self.bufio.fd.raw(), &msg, c::MSG_DONTWAIT)?;
drop(iovecs);
self.msgs[0].msg.fds.clear();
let res = self
.bufio
.ring
.sendmsg(&self.bufio.fd, &mut self.bufs, fds, None)
.await;
self.bufs.clear();
let mut n = res?;
while n > 0 {
let len = self.msgs[0].msg.buf.len() - self.msgs[0].offset;
if n < len {

View file

@ -1,9 +1,9 @@
use {
crate::{
io_uring::{IoUring, IoUringError},
utils::oserror::OsError,
utils::{buf::TypedBuf, oserror::OsError},
},
std::{rc::Rc, time::Duration},
std::{cell::RefCell, rc::Rc, time::Duration},
thiserror::Error,
uapi::{c, OwnedFd},
};
@ -13,7 +13,7 @@ pub enum TimerError {
#[error("Could not create a timer")]
CreateTimer(#[source] OsError),
#[error("Could not read from a timer")]
TimerReadError(#[source] OsError),
TimerReadError(#[source] IoUringError),
#[error("Could not set a timer")]
SetTimer(#[source] OsError),
#[error("The io-uring returned an error")]
@ -23,24 +23,28 @@ pub enum TimerError {
#[derive(Clone)]
pub struct TimerFd {
fd: Rc<OwnedFd>,
buf: Rc<RefCell<TypedBuf<u64>>>,
}
impl TimerFd {
pub fn new(clock_id: c::c_int) -> Result<Self, TimerError> {
let fd = match uapi::timerfd_create(clock_id, c::TFD_CLOEXEC | c::TFD_NONBLOCK) {
let fd = match uapi::timerfd_create(clock_id, c::TFD_CLOEXEC) {
Ok(fd) => Rc::new(fd),
Err(e) => return Err(TimerError::CreateTimer(e.into())),
};
Ok(Self { fd })
Ok(Self {
fd,
buf: Rc::new(RefCell::new(TypedBuf::new())),
})
}
#[allow(clippy::await_holding_refcell_ref)]
pub async fn expired(&self, ring: &IoUring) -> Result<u64, TimerError> {
ring.readable(&self.fd).await?;
let mut buf = 0u64;
if let Err(e) = uapi::read(self.fd.raw(), &mut buf) {
return Err(TimerError::TimerReadError(e.into()));
let mut buf = self.buf.borrow_mut();
if let Err(e) = ring.read(&self.fd, buf.buf()).await {
return Err(TimerError::TimerReadError(e));
}
Ok(buf)
Ok(buf.t())
}
pub fn program(