io-uring: add sendmsg
This commit is contained in:
parent
9416efeabe
commit
e4f97287bc
17 changed files with 493 additions and 191 deletions
155
src/utils/buf.rs
Normal file
155
src/utils/buf.rs
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
use {
|
||||
crate::utils::{numcell::NumCell, ptr_ext::PtrExt},
|
||||
std::{
|
||||
alloc::Layout,
|
||||
collections::Bound,
|
||||
ops::{Deref, DerefMut, Range, RangeBounds},
|
||||
ptr::NonNull,
|
||||
slice,
|
||||
},
|
||||
};
|
||||
|
||||
const METADATA_SIZE: u32 = 8;
|
||||
const METADATA_ALIGN: usize = 4;
|
||||
const RC_OFF: u32 = 4;
|
||||
const RC_OFF_INV: u32 = METADATA_SIZE - RC_OFF;
|
||||
|
||||
pub struct Buf {
|
||||
storage: NonNull<u8>,
|
||||
range: Range<u32>,
|
||||
}
|
||||
|
||||
impl Buf {
|
||||
pub fn from_slice(vec: &[u8]) -> Buf {
|
||||
let len = vec.len();
|
||||
assert!(len <= (u32::MAX - METADATA_SIZE) as usize);
|
||||
let len = len as u32;
|
||||
let size = len + METADATA_SIZE;
|
||||
let layout = Layout::from_size_align(size as _, METADATA_ALIGN).unwrap();
|
||||
let ptr = unsafe { std::alloc::alloc(layout) };
|
||||
if ptr.is_null() {
|
||||
std::alloc::handle_alloc_error(layout);
|
||||
}
|
||||
unsafe {
|
||||
*ptr.cast::<u32>() = size;
|
||||
*ptr.add(RC_OFF as _).cast::<u32>() = 1;
|
||||
let mut buf = Buf {
|
||||
storage: NonNull::new_unchecked(ptr.add(METADATA_SIZE as _)),
|
||||
range: Range { start: 0, end: len },
|
||||
};
|
||||
buf[..].copy_from_slice(vec);
|
||||
buf
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(len: usize) -> Buf {
|
||||
assert!(len <= (u32::MAX - METADATA_SIZE) as usize);
|
||||
let len = len as u32;
|
||||
let size = len + METADATA_SIZE;
|
||||
let layout = Layout::from_size_align(size as _, METADATA_ALIGN).unwrap();
|
||||
let ptr = unsafe { std::alloc::alloc_zeroed(layout) };
|
||||
if ptr.is_null() {
|
||||
std::alloc::handle_alloc_error(layout);
|
||||
}
|
||||
unsafe {
|
||||
*ptr.cast::<u32>() = size;
|
||||
*ptr.add(RC_OFF as _).cast::<u32>() = 1;
|
||||
Buf {
|
||||
storage: NonNull::new_unchecked(ptr.add(METADATA_SIZE as _)),
|
||||
range: Range { start: 0, end: len },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clone(&mut self) -> Buf {
|
||||
self.rc().fetch_add(1);
|
||||
Buf {
|
||||
storage: self.storage,
|
||||
range: self.range.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn slice(&mut self, range: impl RangeBounds<usize>) -> Buf {
|
||||
let start = match range.start_bound() {
|
||||
Bound::Included(&n) => n,
|
||||
Bound::Excluded(&n) => n.wrapping_add(1),
|
||||
Bound::Unbounded => 0,
|
||||
};
|
||||
let end = match range.end_bound() {
|
||||
Bound::Included(&n) => n.wrapping_add(1),
|
||||
Bound::Excluded(&n) => n,
|
||||
Bound::Unbounded => self.len(),
|
||||
};
|
||||
self.slice_(start as _, end as _)
|
||||
}
|
||||
|
||||
fn slice_(&mut self, start: u32, end: u32) -> Buf {
|
||||
assert!(start <= end);
|
||||
assert!(end <= self.len32());
|
||||
self.rc().fetch_add(1);
|
||||
Buf {
|
||||
storage: self.storage,
|
||||
range: Range {
|
||||
start: self.range.start + start,
|
||||
end: self.range.start + end,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn rc(&self) -> &NumCell<u32> {
|
||||
unsafe {
|
||||
self.storage
|
||||
.as_ptr()
|
||||
.sub(RC_OFF_INV as _)
|
||||
.cast::<NumCell<u32>>()
|
||||
.deref()
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_unique(&self) {
|
||||
assert_eq!(self.rc().get(), 1);
|
||||
}
|
||||
|
||||
pub fn len32(&self) -> u32 {
|
||||
self.range.end - self.range.start
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.len32() as _
|
||||
}
|
||||
|
||||
pub fn as_ptr(&self) -> *mut u8 {
|
||||
unsafe { self.storage.as_ptr().add(self.range.start as _) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for Buf {
|
||||
type Target = [u8];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.assert_unique();
|
||||
unsafe { slice::from_raw_parts(self.as_ptr(), self.len()) }
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for Buf {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
self.assert_unique();
|
||||
unsafe { slice::from_raw_parts_mut(self.as_ptr(), self.len()) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Buf {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
let prev = self.rc().fetch_sub(1);
|
||||
if prev != 1 {
|
||||
return;
|
||||
}
|
||||
let ptr = self.storage.as_ptr().sub(METADATA_SIZE as _).cast::<u32>();
|
||||
let size = *ptr as _;
|
||||
let layout = Layout::from_size_align_unchecked(size, METADATA_ALIGN);
|
||||
std::alloc::dealloc(ptr as _, layout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,10 +1,13 @@
|
|||
use {
|
||||
crate::{
|
||||
io_uring::IoUring,
|
||||
utils::buffd::{BufFdError, BUF_SIZE, CMSG_BUF_SIZE},
|
||||
wheel::{Wheel, WheelTimeoutFuture},
|
||||
io_uring::{IoUring, IoUringError},
|
||||
time::Time,
|
||||
utils::{
|
||||
buf::Buf,
|
||||
buffd::{BufFdError, BUF_SIZE, CMSG_BUF_SIZE},
|
||||
oserror::OsError,
|
||||
},
|
||||
},
|
||||
futures_util::{future::Fuse, select, FutureExt},
|
||||
std::{
|
||||
collections::VecDeque,
|
||||
mem::{self, MaybeUninit},
|
||||
|
|
@ -21,37 +24,33 @@ pub(super) struct MsgFds {
|
|||
pub(super) fds: Vec<Rc<OwnedFd>>,
|
||||
}
|
||||
|
||||
pub struct OutBuffer {
|
||||
pub(super) struct OutBufferMeta {
|
||||
pub(super) read_pos: usize,
|
||||
pub(super) write_pos: usize,
|
||||
pub(super) buf: *mut [MaybeUninit<u8>; OUT_BUF_SIZE],
|
||||
pub(super) fds: VecDeque<MsgFds>,
|
||||
}
|
||||
|
||||
pub struct OutBuffer {
|
||||
pub(super) meta: OutBufferMeta,
|
||||
pub(super) buf: Buf,
|
||||
}
|
||||
|
||||
impl Default for OutBuffer {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
read_pos: 0,
|
||||
write_pos: 0,
|
||||
buf: Box::into_raw(Box::new([MaybeUninit::<u32>::uninit(); OUT_BUF_SIZE / 4])) as _,
|
||||
fds: Default::default(),
|
||||
meta: OutBufferMeta {
|
||||
read_pos: 0,
|
||||
write_pos: 0,
|
||||
fds: Default::default(),
|
||||
},
|
||||
buf: Buf::new(OUT_BUF_SIZE),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl OutBuffer {
|
||||
pub fn write(&mut self, bytes: &[MaybeUninit<u8>]) {
|
||||
if bytes.len() > OUT_BUF_SIZE - self.write_pos {
|
||||
panic!("Out buffer overflow");
|
||||
}
|
||||
unsafe {
|
||||
(*self.buf)[self.write_pos..self.write_pos + bytes.len()].copy_from_slice(bytes);
|
||||
}
|
||||
self.write_pos += bytes.len();
|
||||
}
|
||||
|
||||
pub fn is_full(&self) -> bool {
|
||||
self.write_pos > BUF_SIZE
|
||||
self.meta.write_pos > BUF_SIZE
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -70,7 +69,7 @@ impl OutBufferSwapchain {
|
|||
}
|
||||
|
||||
pub fn commit(&mut self) {
|
||||
if self.cur.write_pos > 0 {
|
||||
if self.cur.meta.write_pos > 0 {
|
||||
let new = self.free.pop().unwrap_or_default();
|
||||
let old = mem::replace(&mut self.cur, new);
|
||||
self.pending.push_back(old);
|
||||
|
|
@ -81,103 +80,66 @@ impl OutBufferSwapchain {
|
|||
pub struct BufFdOut {
|
||||
fd: Rc<OwnedFd>,
|
||||
ring: Rc<IoUring>,
|
||||
wheel: Rc<Wheel>,
|
||||
cmsg_buf: Box<[MaybeUninit<u8>; CMSG_BUF_SIZE]>,
|
||||
fd_ids: Vec<i32>,
|
||||
}
|
||||
|
||||
impl BufFdOut {
|
||||
pub fn new(fd: &Rc<OwnedFd>, ring: &Rc<IoUring>, wheel: &Rc<Wheel>) -> Self {
|
||||
pub fn new(fd: &Rc<OwnedFd>, ring: &Rc<IoUring>) -> Self {
|
||||
Self {
|
||||
fd: fd.clone(),
|
||||
ring: ring.clone(),
|
||||
wheel: wheel.clone(),
|
||||
cmsg_buf: Box::new([MaybeUninit::uninit(); CMSG_BUF_SIZE]),
|
||||
fd_ids: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn flush(
|
||||
&mut self,
|
||||
buf: &mut OutBuffer,
|
||||
timeout: &mut Option<Fuse<WheelTimeoutFuture>>,
|
||||
) -> Result<(), BufFdError> {
|
||||
while buf.read_pos < buf.write_pos {
|
||||
if self.flush_sync(buf)? {
|
||||
if timeout.is_none() {
|
||||
*timeout = Some(self.wheel.timeout(5000).fuse());
|
||||
}
|
||||
select! {
|
||||
_ = timeout.as_mut().unwrap() => {
|
||||
return Err(BufFdError::Timeout);
|
||||
},
|
||||
res = self.ring.writable(&self.fd).fuse() => {
|
||||
res?;
|
||||
},
|
||||
}
|
||||
}
|
||||
pub async fn flush(&mut self, buf: &mut OutBuffer, timeout: Time) -> Result<(), BufFdError> {
|
||||
while buf.meta.read_pos < buf.meta.write_pos {
|
||||
self.flush_buffer(buf, Some(timeout)).await?;
|
||||
}
|
||||
buf.read_pos = 0;
|
||||
buf.write_pos = 0;
|
||||
buf.meta.read_pos = 0;
|
||||
buf.meta.write_pos = 0;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn flush_no_timeout(&mut self, buf: &mut OutBuffer) -> Result<(), BufFdError> {
|
||||
while buf.read_pos < buf.write_pos {
|
||||
if self.flush_sync(buf)? {
|
||||
let _ = self.ring.writable(&self.fd).await?;
|
||||
}
|
||||
while buf.meta.read_pos < buf.meta.write_pos {
|
||||
self.flush_buffer(buf, None).await?;
|
||||
}
|
||||
buf.read_pos = 0;
|
||||
buf.write_pos = 0;
|
||||
buf.meta.read_pos = 0;
|
||||
buf.meta.write_pos = 0;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn flush_sync(&mut self, buffer: &mut OutBuffer) -> Result<bool, BufFdError> {
|
||||
while buffer.read_pos < buffer.write_pos {
|
||||
let mut buf = unsafe { &(*buffer.buf)[buffer.read_pos..buffer.write_pos] };
|
||||
let mut cmsg_len = 0;
|
||||
let mut fds_opt = None;
|
||||
{
|
||||
let mut f = buffer.fds.front().map(|f| f.pos);
|
||||
if f == Some(buffer.read_pos) {
|
||||
let fds = buffer.fds.pop_front().unwrap();
|
||||
self.fd_ids.clear();
|
||||
self.fd_ids.extend(fds.fds.iter().map(|f| f.raw()));
|
||||
let hdr = c::cmsghdr {
|
||||
cmsg_len: 0,
|
||||
cmsg_level: c::SOL_SOCKET,
|
||||
cmsg_type: c::SCM_RIGHTS,
|
||||
};
|
||||
let mut cmsg_buf = &mut self.cmsg_buf[..];
|
||||
cmsg_len = uapi::cmsg_write(&mut cmsg_buf, hdr, &self.fd_ids[..]).unwrap();
|
||||
fds_opt = Some(fds);
|
||||
f = buffer.fds.front().map(|f| f.pos)
|
||||
}
|
||||
if let Some(next_pos) = f {
|
||||
buf = &buf[..next_pos - buffer.read_pos];
|
||||
}
|
||||
async fn flush_buffer(
|
||||
&mut self,
|
||||
buffer: &mut OutBuffer,
|
||||
timeout: Option<Time>,
|
||||
) -> Result<(), BufFdError> {
|
||||
let mut buf = buffer
|
||||
.buf
|
||||
.slice(buffer.meta.read_pos..buffer.meta.write_pos);
|
||||
let mut fds = vec![];
|
||||
{
|
||||
let mut f = buffer.meta.fds.front().map(|f| f.pos);
|
||||
if f == Some(buffer.meta.read_pos) {
|
||||
fds = buffer.meta.fds.pop_front().unwrap().fds;
|
||||
f = buffer.meta.fds.front().map(|f| f.pos)
|
||||
}
|
||||
if let Some(next_pos) = f {
|
||||
buf = buffer.buf.slice(buffer.meta.read_pos..next_pos);
|
||||
}
|
||||
let hdr = uapi::Msghdr {
|
||||
iov: slice::from_ref(&buf),
|
||||
control: Some(&self.cmsg_buf[..cmsg_len]),
|
||||
name: uapi::sockaddr_none_ref(),
|
||||
};
|
||||
let bytes_sent =
|
||||
match uapi::sendmsg(self.fd.raw(), &hdr, c::MSG_DONTWAIT | c::MSG_NOSIGNAL) {
|
||||
Ok(b) => b,
|
||||
Err(Errno(c::EAGAIN)) => {
|
||||
if let Some(fds) = fds_opt {
|
||||
buffer.fds.push_front(fds);
|
||||
}
|
||||
return Ok(true);
|
||||
}
|
||||
Err(Errno(c::ECONNRESET)) => return Err(BufFdError::Closed),
|
||||
Err(e) => return Err(BufFdError::Io(e.into())),
|
||||
};
|
||||
buffer.read_pos += bytes_sent;
|
||||
}
|
||||
Ok(false)
|
||||
match self.ring.sendmsg(&self.fd, buf, fds, timeout).await {
|
||||
Ok(n) => {
|
||||
buffer.meta.read_pos += n;
|
||||
Ok(())
|
||||
}
|
||||
Err(IoUringError::OsError(OsError(c::ECONNRESET))) => return Err(BufFdError::Closed),
|
||||
Err(IoUringError::OsError(OsError(c::ETIME))) => return Err(BufFdError::Timeout),
|
||||
Err(e) => return Err(BufFdError::Ring(e)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn flush2(
|
||||
|
|
@ -238,11 +200,3 @@ impl BufFdOut {
|
|||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for OutBuffer {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
Box::from_raw(self.buf as *mut [MaybeUninit<u32>; OUT_BUF_SIZE / 4]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,14 +2,15 @@ use {
|
|||
crate::{
|
||||
fixed::Fixed,
|
||||
object::ObjectId,
|
||||
utils::buffd::buf_out::{MsgFds, OutBuffer},
|
||||
utils::buffd::buf_out::{MsgFds, OutBuffer, OutBufferMeta, OUT_BUF_SIZE},
|
||||
},
|
||||
std::{mem, mem::MaybeUninit, rc::Rc},
|
||||
uapi::OwnedFd,
|
||||
std::{mem, rc::Rc},
|
||||
uapi::{OwnedFd, Packed},
|
||||
};
|
||||
|
||||
pub struct MsgFormatter<'a> {
|
||||
buf: &'a mut OutBuffer,
|
||||
buf: &'a mut [u8],
|
||||
meta: &'a mut OutBufferMeta,
|
||||
pos: usize,
|
||||
fds: &'a mut Vec<Rc<OwnedFd>>,
|
||||
}
|
||||
|
|
@ -17,24 +18,33 @@ pub struct MsgFormatter<'a> {
|
|||
impl<'a> MsgFormatter<'a> {
|
||||
pub fn new(buf: &'a mut OutBuffer, fds: &'a mut Vec<Rc<OwnedFd>>) -> Self {
|
||||
Self {
|
||||
pos: buf.write_pos,
|
||||
buf,
|
||||
pos: buf.meta.write_pos,
|
||||
buf: &mut buf.buf[..],
|
||||
fds,
|
||||
meta: &mut buf.meta,
|
||||
}
|
||||
}
|
||||
|
||||
fn write(&mut self, bytes: &[u8]) {
|
||||
if bytes.len() > OUT_BUF_SIZE - self.meta.write_pos {
|
||||
panic!("Out buffer overflow");
|
||||
}
|
||||
self.buf[self.meta.write_pos..self.meta.write_pos + bytes.len()].copy_from_slice(bytes);
|
||||
self.meta.write_pos += bytes.len();
|
||||
}
|
||||
|
||||
pub fn int(&mut self, int: i32) -> &mut Self {
|
||||
self.buf.write(uapi::as_maybe_uninit_bytes(&int));
|
||||
self.write(uapi::as_bytes(&int));
|
||||
self
|
||||
}
|
||||
|
||||
pub fn uint(&mut self, int: u32) -> &mut Self {
|
||||
self.buf.write(uapi::as_maybe_uninit_bytes(&int));
|
||||
self.write(uapi::as_bytes(&int));
|
||||
self
|
||||
}
|
||||
|
||||
pub fn fixed(&mut self, fixed: Fixed) -> &mut Self {
|
||||
self.buf.write(uapi::as_maybe_uninit_bytes(&fixed));
|
||||
self.write(uapi::as_bytes(&fixed.0));
|
||||
self
|
||||
}
|
||||
|
||||
|
|
@ -50,9 +60,9 @@ impl<'a> MsgFormatter<'a> {
|
|||
let len = s.len() + 1;
|
||||
let cap = (len + 3) & !3;
|
||||
self.uint(len as u32);
|
||||
self.buf.write(uapi::as_maybe_uninit_bytes(s));
|
||||
let none = [MaybeUninit::new(0); 4];
|
||||
self.buf.write(&none[..cap - len + 1]);
|
||||
self.write(uapi::as_bytes(s));
|
||||
let none = [0; 4];
|
||||
self.write(&none[..cap - len + 1]);
|
||||
self
|
||||
}
|
||||
|
||||
|
|
@ -71,46 +81,43 @@ impl<'a> MsgFormatter<'a> {
|
|||
|
||||
#[allow(dead_code)]
|
||||
pub fn array<F: FnOnce(&mut MsgFormatter<'_>)>(&mut self, f: F) -> &mut Self {
|
||||
let pos = self.buf.write_pos;
|
||||
let pos = self.meta.write_pos;
|
||||
self.uint(0);
|
||||
let len = {
|
||||
let mut fmt = MsgFormatter {
|
||||
buf: self.buf,
|
||||
meta: self.meta,
|
||||
pos,
|
||||
fds: self.fds,
|
||||
};
|
||||
f(&mut fmt);
|
||||
let len = self.buf.write_pos - pos - 4;
|
||||
let none = [MaybeUninit::new(0); 4];
|
||||
self.buf
|
||||
.write(&none[..self.buf.write_pos.wrapping_neg() & 3]);
|
||||
let len = self.meta.write_pos - pos - 4;
|
||||
let none = [0; 4];
|
||||
self.write(&none[..self.meta.write_pos.wrapping_neg() & 3]);
|
||||
len as u32
|
||||
};
|
||||
unsafe {
|
||||
(*self.buf.buf)[pos..pos + 4].copy_from_slice(uapi::as_maybe_uninit_bytes(&len));
|
||||
}
|
||||
self.buf[pos..pos + 4].copy_from_slice(uapi::as_bytes(&len));
|
||||
self
|
||||
}
|
||||
|
||||
pub fn binary<T: ?Sized>(&mut self, t: &T) -> &mut Self {
|
||||
pub fn binary<T: ?Sized + Packed>(&mut self, t: &T) -> &mut Self {
|
||||
self.uint(mem::size_of_val(t) as u32);
|
||||
self.buf.write(uapi::as_maybe_uninit_bytes(t));
|
||||
let none = [MaybeUninit::new(0); 4];
|
||||
self.buf
|
||||
.write(&none[..self.buf.write_pos.wrapping_neg() & 3]);
|
||||
self.write(uapi::as_bytes(t));
|
||||
let none = [0; 4];
|
||||
self.write(&none[..self.meta.write_pos.wrapping_neg() & 3]);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn write_len(self) {
|
||||
assert!(self.buf.write_pos - self.pos >= 8);
|
||||
assert!(self.meta.write_pos - self.pos >= 8);
|
||||
assert_eq!(self.pos % 4, 0);
|
||||
unsafe {
|
||||
let second_ptr = (self.buf.buf as *mut u8).add(self.pos + 4) as *mut u32;
|
||||
let len = ((self.buf.write_pos - self.pos) as u32) << 16;
|
||||
let second_ptr = self.buf.as_ptr().add(self.pos + 4) as *mut u32;
|
||||
let len = ((self.meta.write_pos - self.pos) as u32) << 16;
|
||||
*second_ptr |= len;
|
||||
}
|
||||
if self.fds.len() > 0 {
|
||||
self.buf.fds.push_back(MsgFds {
|
||||
self.meta.fds.push_back(MsgFds {
|
||||
pos: self.pos,
|
||||
fds: mem::take(self.fds),
|
||||
})
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue