1
0
Fork 0
forked from wry/wry

io-uring: add sendmsg

This commit is contained in:
Julian Orth 2022-05-13 17:37:20 +02:00
parent 9416efeabe
commit e4f97287bc
17 changed files with 493 additions and 191 deletions

155
src/utils/buf.rs Normal file
View file

@ -0,0 +1,155 @@
use {
crate::utils::{numcell::NumCell, ptr_ext::PtrExt},
std::{
alloc::Layout,
collections::Bound,
ops::{Deref, DerefMut, Range, RangeBounds},
ptr::NonNull,
slice,
},
};
const METADATA_SIZE: u32 = 8;
const METADATA_ALIGN: usize = 4;
const RC_OFF: u32 = 4;
const RC_OFF_INV: u32 = METADATA_SIZE - RC_OFF;
pub struct Buf {
storage: NonNull<u8>,
range: Range<u32>,
}
impl Buf {
pub fn from_slice(vec: &[u8]) -> Buf {
let len = vec.len();
assert!(len <= (u32::MAX - METADATA_SIZE) as usize);
let len = len as u32;
let size = len + METADATA_SIZE;
let layout = Layout::from_size_align(size as _, METADATA_ALIGN).unwrap();
let ptr = unsafe { std::alloc::alloc(layout) };
if ptr.is_null() {
std::alloc::handle_alloc_error(layout);
}
unsafe {
*ptr.cast::<u32>() = size;
*ptr.add(RC_OFF as _).cast::<u32>() = 1;
let mut buf = Buf {
storage: NonNull::new_unchecked(ptr.add(METADATA_SIZE as _)),
range: Range { start: 0, end: len },
};
buf[..].copy_from_slice(vec);
buf
}
}
pub fn new(len: usize) -> Buf {
assert!(len <= (u32::MAX - METADATA_SIZE) as usize);
let len = len as u32;
let size = len + METADATA_SIZE;
let layout = Layout::from_size_align(size as _, METADATA_ALIGN).unwrap();
let ptr = unsafe { std::alloc::alloc_zeroed(layout) };
if ptr.is_null() {
std::alloc::handle_alloc_error(layout);
}
unsafe {
*ptr.cast::<u32>() = size;
*ptr.add(RC_OFF as _).cast::<u32>() = 1;
Buf {
storage: NonNull::new_unchecked(ptr.add(METADATA_SIZE as _)),
range: Range { start: 0, end: len },
}
}
}
pub fn clone(&mut self) -> Buf {
self.rc().fetch_add(1);
Buf {
storage: self.storage,
range: self.range.clone(),
}
}
pub fn slice(&mut self, range: impl RangeBounds<usize>) -> Buf {
let start = match range.start_bound() {
Bound::Included(&n) => n,
Bound::Excluded(&n) => n.wrapping_add(1),
Bound::Unbounded => 0,
};
let end = match range.end_bound() {
Bound::Included(&n) => n.wrapping_add(1),
Bound::Excluded(&n) => n,
Bound::Unbounded => self.len(),
};
self.slice_(start as _, end as _)
}
fn slice_(&mut self, start: u32, end: u32) -> Buf {
assert!(start <= end);
assert!(end <= self.len32());
self.rc().fetch_add(1);
Buf {
storage: self.storage,
range: Range {
start: self.range.start + start,
end: self.range.start + end,
},
}
}
fn rc(&self) -> &NumCell<u32> {
unsafe {
self.storage
.as_ptr()
.sub(RC_OFF_INV as _)
.cast::<NumCell<u32>>()
.deref()
}
}
fn assert_unique(&self) {
assert_eq!(self.rc().get(), 1);
}
pub fn len32(&self) -> u32 {
self.range.end - self.range.start
}
pub fn len(&self) -> usize {
self.len32() as _
}
pub fn as_ptr(&self) -> *mut u8 {
unsafe { self.storage.as_ptr().add(self.range.start as _) }
}
}
impl Deref for Buf {
type Target = [u8];
fn deref(&self) -> &Self::Target {
self.assert_unique();
unsafe { slice::from_raw_parts(self.as_ptr(), self.len()) }
}
}
impl DerefMut for Buf {
fn deref_mut(&mut self) -> &mut Self::Target {
self.assert_unique();
unsafe { slice::from_raw_parts_mut(self.as_ptr(), self.len()) }
}
}
impl Drop for Buf {
fn drop(&mut self) {
unsafe {
let prev = self.rc().fetch_sub(1);
if prev != 1 {
return;
}
let ptr = self.storage.as_ptr().sub(METADATA_SIZE as _).cast::<u32>();
let size = *ptr as _;
let layout = Layout::from_size_align_unchecked(size, METADATA_ALIGN);
std::alloc::dealloc(ptr as _, layout);
}
}
}

View file

@ -1,10 +1,13 @@
use {
crate::{
io_uring::IoUring,
utils::buffd::{BufFdError, BUF_SIZE, CMSG_BUF_SIZE},
wheel::{Wheel, WheelTimeoutFuture},
io_uring::{IoUring, IoUringError},
time::Time,
utils::{
buf::Buf,
buffd::{BufFdError, BUF_SIZE, CMSG_BUF_SIZE},
oserror::OsError,
},
},
futures_util::{future::Fuse, select, FutureExt},
std::{
collections::VecDeque,
mem::{self, MaybeUninit},
@ -21,37 +24,33 @@ pub(super) struct MsgFds {
pub(super) fds: Vec<Rc<OwnedFd>>,
}
pub struct OutBuffer {
pub(super) struct OutBufferMeta {
pub(super) read_pos: usize,
pub(super) write_pos: usize,
pub(super) buf: *mut [MaybeUninit<u8>; OUT_BUF_SIZE],
pub(super) fds: VecDeque<MsgFds>,
}
pub struct OutBuffer {
pub(super) meta: OutBufferMeta,
pub(super) buf: Buf,
}
impl Default for OutBuffer {
fn default() -> Self {
Self {
read_pos: 0,
write_pos: 0,
buf: Box::into_raw(Box::new([MaybeUninit::<u32>::uninit(); OUT_BUF_SIZE / 4])) as _,
fds: Default::default(),
meta: OutBufferMeta {
read_pos: 0,
write_pos: 0,
fds: Default::default(),
},
buf: Buf::new(OUT_BUF_SIZE),
}
}
}
impl OutBuffer {
pub fn write(&mut self, bytes: &[MaybeUninit<u8>]) {
if bytes.len() > OUT_BUF_SIZE - self.write_pos {
panic!("Out buffer overflow");
}
unsafe {
(*self.buf)[self.write_pos..self.write_pos + bytes.len()].copy_from_slice(bytes);
}
self.write_pos += bytes.len();
}
pub fn is_full(&self) -> bool {
self.write_pos > BUF_SIZE
self.meta.write_pos > BUF_SIZE
}
}
@ -70,7 +69,7 @@ impl OutBufferSwapchain {
}
pub fn commit(&mut self) {
if self.cur.write_pos > 0 {
if self.cur.meta.write_pos > 0 {
let new = self.free.pop().unwrap_or_default();
let old = mem::replace(&mut self.cur, new);
self.pending.push_back(old);
@ -81,103 +80,66 @@ impl OutBufferSwapchain {
pub struct BufFdOut {
fd: Rc<OwnedFd>,
ring: Rc<IoUring>,
wheel: Rc<Wheel>,
cmsg_buf: Box<[MaybeUninit<u8>; CMSG_BUF_SIZE]>,
fd_ids: Vec<i32>,
}
impl BufFdOut {
pub fn new(fd: &Rc<OwnedFd>, ring: &Rc<IoUring>, wheel: &Rc<Wheel>) -> Self {
pub fn new(fd: &Rc<OwnedFd>, ring: &Rc<IoUring>) -> Self {
Self {
fd: fd.clone(),
ring: ring.clone(),
wheel: wheel.clone(),
cmsg_buf: Box::new([MaybeUninit::uninit(); CMSG_BUF_SIZE]),
fd_ids: vec![],
}
}
pub async fn flush(
&mut self,
buf: &mut OutBuffer,
timeout: &mut Option<Fuse<WheelTimeoutFuture>>,
) -> Result<(), BufFdError> {
while buf.read_pos < buf.write_pos {
if self.flush_sync(buf)? {
if timeout.is_none() {
*timeout = Some(self.wheel.timeout(5000).fuse());
}
select! {
_ = timeout.as_mut().unwrap() => {
return Err(BufFdError::Timeout);
},
res = self.ring.writable(&self.fd).fuse() => {
res?;
},
}
}
pub async fn flush(&mut self, buf: &mut OutBuffer, timeout: Time) -> Result<(), BufFdError> {
while buf.meta.read_pos < buf.meta.write_pos {
self.flush_buffer(buf, Some(timeout)).await?;
}
buf.read_pos = 0;
buf.write_pos = 0;
buf.meta.read_pos = 0;
buf.meta.write_pos = 0;
Ok(())
}
pub async fn flush_no_timeout(&mut self, buf: &mut OutBuffer) -> Result<(), BufFdError> {
while buf.read_pos < buf.write_pos {
if self.flush_sync(buf)? {
let _ = self.ring.writable(&self.fd).await?;
}
while buf.meta.read_pos < buf.meta.write_pos {
self.flush_buffer(buf, None).await?;
}
buf.read_pos = 0;
buf.write_pos = 0;
buf.meta.read_pos = 0;
buf.meta.write_pos = 0;
Ok(())
}
fn flush_sync(&mut self, buffer: &mut OutBuffer) -> Result<bool, BufFdError> {
while buffer.read_pos < buffer.write_pos {
let mut buf = unsafe { &(*buffer.buf)[buffer.read_pos..buffer.write_pos] };
let mut cmsg_len = 0;
let mut fds_opt = None;
{
let mut f = buffer.fds.front().map(|f| f.pos);
if f == Some(buffer.read_pos) {
let fds = buffer.fds.pop_front().unwrap();
self.fd_ids.clear();
self.fd_ids.extend(fds.fds.iter().map(|f| f.raw()));
let hdr = c::cmsghdr {
cmsg_len: 0,
cmsg_level: c::SOL_SOCKET,
cmsg_type: c::SCM_RIGHTS,
};
let mut cmsg_buf = &mut self.cmsg_buf[..];
cmsg_len = uapi::cmsg_write(&mut cmsg_buf, hdr, &self.fd_ids[..]).unwrap();
fds_opt = Some(fds);
f = buffer.fds.front().map(|f| f.pos)
}
if let Some(next_pos) = f {
buf = &buf[..next_pos - buffer.read_pos];
}
async fn flush_buffer(
&mut self,
buffer: &mut OutBuffer,
timeout: Option<Time>,
) -> Result<(), BufFdError> {
let mut buf = buffer
.buf
.slice(buffer.meta.read_pos..buffer.meta.write_pos);
let mut fds = vec![];
{
let mut f = buffer.meta.fds.front().map(|f| f.pos);
if f == Some(buffer.meta.read_pos) {
fds = buffer.meta.fds.pop_front().unwrap().fds;
f = buffer.meta.fds.front().map(|f| f.pos)
}
if let Some(next_pos) = f {
buf = buffer.buf.slice(buffer.meta.read_pos..next_pos);
}
let hdr = uapi::Msghdr {
iov: slice::from_ref(&buf),
control: Some(&self.cmsg_buf[..cmsg_len]),
name: uapi::sockaddr_none_ref(),
};
let bytes_sent =
match uapi::sendmsg(self.fd.raw(), &hdr, c::MSG_DONTWAIT | c::MSG_NOSIGNAL) {
Ok(b) => b,
Err(Errno(c::EAGAIN)) => {
if let Some(fds) = fds_opt {
buffer.fds.push_front(fds);
}
return Ok(true);
}
Err(Errno(c::ECONNRESET)) => return Err(BufFdError::Closed),
Err(e) => return Err(BufFdError::Io(e.into())),
};
buffer.read_pos += bytes_sent;
}
Ok(false)
match self.ring.sendmsg(&self.fd, buf, fds, timeout).await {
Ok(n) => {
buffer.meta.read_pos += n;
Ok(())
}
Err(IoUringError::OsError(OsError(c::ECONNRESET))) => return Err(BufFdError::Closed),
Err(IoUringError::OsError(OsError(c::ETIME))) => return Err(BufFdError::Timeout),
Err(e) => return Err(BufFdError::Ring(e)),
}
}
pub async fn flush2(
@ -238,11 +200,3 @@ impl BufFdOut {
Ok(false)
}
}
impl Drop for OutBuffer {
fn drop(&mut self) {
unsafe {
Box::from_raw(self.buf as *mut [MaybeUninit<u32>; OUT_BUF_SIZE / 4]);
}
}
}

View file

@ -2,14 +2,15 @@ use {
crate::{
fixed::Fixed,
object::ObjectId,
utils::buffd::buf_out::{MsgFds, OutBuffer},
utils::buffd::buf_out::{MsgFds, OutBuffer, OutBufferMeta, OUT_BUF_SIZE},
},
std::{mem, mem::MaybeUninit, rc::Rc},
uapi::OwnedFd,
std::{mem, rc::Rc},
uapi::{OwnedFd, Packed},
};
pub struct MsgFormatter<'a> {
buf: &'a mut OutBuffer,
buf: &'a mut [u8],
meta: &'a mut OutBufferMeta,
pos: usize,
fds: &'a mut Vec<Rc<OwnedFd>>,
}
@ -17,24 +18,33 @@ pub struct MsgFormatter<'a> {
impl<'a> MsgFormatter<'a> {
pub fn new(buf: &'a mut OutBuffer, fds: &'a mut Vec<Rc<OwnedFd>>) -> Self {
Self {
pos: buf.write_pos,
buf,
pos: buf.meta.write_pos,
buf: &mut buf.buf[..],
fds,
meta: &mut buf.meta,
}
}
fn write(&mut self, bytes: &[u8]) {
if bytes.len() > OUT_BUF_SIZE - self.meta.write_pos {
panic!("Out buffer overflow");
}
self.buf[self.meta.write_pos..self.meta.write_pos + bytes.len()].copy_from_slice(bytes);
self.meta.write_pos += bytes.len();
}
pub fn int(&mut self, int: i32) -> &mut Self {
self.buf.write(uapi::as_maybe_uninit_bytes(&int));
self.write(uapi::as_bytes(&int));
self
}
pub fn uint(&mut self, int: u32) -> &mut Self {
self.buf.write(uapi::as_maybe_uninit_bytes(&int));
self.write(uapi::as_bytes(&int));
self
}
pub fn fixed(&mut self, fixed: Fixed) -> &mut Self {
self.buf.write(uapi::as_maybe_uninit_bytes(&fixed));
self.write(uapi::as_bytes(&fixed.0));
self
}
@ -50,9 +60,9 @@ impl<'a> MsgFormatter<'a> {
let len = s.len() + 1;
let cap = (len + 3) & !3;
self.uint(len as u32);
self.buf.write(uapi::as_maybe_uninit_bytes(s));
let none = [MaybeUninit::new(0); 4];
self.buf.write(&none[..cap - len + 1]);
self.write(uapi::as_bytes(s));
let none = [0; 4];
self.write(&none[..cap - len + 1]);
self
}
@ -71,46 +81,43 @@ impl<'a> MsgFormatter<'a> {
#[allow(dead_code)]
pub fn array<F: FnOnce(&mut MsgFormatter<'_>)>(&mut self, f: F) -> &mut Self {
let pos = self.buf.write_pos;
let pos = self.meta.write_pos;
self.uint(0);
let len = {
let mut fmt = MsgFormatter {
buf: self.buf,
meta: self.meta,
pos,
fds: self.fds,
};
f(&mut fmt);
let len = self.buf.write_pos - pos - 4;
let none = [MaybeUninit::new(0); 4];
self.buf
.write(&none[..self.buf.write_pos.wrapping_neg() & 3]);
let len = self.meta.write_pos - pos - 4;
let none = [0; 4];
self.write(&none[..self.meta.write_pos.wrapping_neg() & 3]);
len as u32
};
unsafe {
(*self.buf.buf)[pos..pos + 4].copy_from_slice(uapi::as_maybe_uninit_bytes(&len));
}
self.buf[pos..pos + 4].copy_from_slice(uapi::as_bytes(&len));
self
}
pub fn binary<T: ?Sized>(&mut self, t: &T) -> &mut Self {
pub fn binary<T: ?Sized + Packed>(&mut self, t: &T) -> &mut Self {
self.uint(mem::size_of_val(t) as u32);
self.buf.write(uapi::as_maybe_uninit_bytes(t));
let none = [MaybeUninit::new(0); 4];
self.buf
.write(&none[..self.buf.write_pos.wrapping_neg() & 3]);
self.write(uapi::as_bytes(t));
let none = [0; 4];
self.write(&none[..self.meta.write_pos.wrapping_neg() & 3]);
self
}
pub fn write_len(self) {
assert!(self.buf.write_pos - self.pos >= 8);
assert!(self.meta.write_pos - self.pos >= 8);
assert_eq!(self.pos % 4, 0);
unsafe {
let second_ptr = (self.buf.buf as *mut u8).add(self.pos + 4) as *mut u32;
let len = ((self.buf.write_pos - self.pos) as u32) << 16;
let second_ptr = self.buf.as_ptr().add(self.pos + 4) as *mut u32;
let len = ((self.meta.write_pos - self.pos) as u32) << 16;
*second_ptr |= len;
}
if self.fds.len() > 0 {
self.buf.fds.push_back(MsgFds {
self.meta.fds.push_back(MsgFds {
pos: self.pos,
fds: mem::take(self.fds),
})