1
0
Fork 0
forked from wry/wry

vulkan: add an async allocator

This commit is contained in:
Julian Orth 2024-09-07 17:38:12 +02:00
parent fe8238421f
commit 37fb45df00
6 changed files with 335 additions and 72 deletions

View file

@ -13,7 +13,7 @@ use {
#[derive(Debug, Error)]
#[error(transparent)]
pub struct AllocatorError(#[from] pub Box<dyn Error>);
pub struct AllocatorError(#[from] pub Box<dyn Error + Send>);
bitflags! {
BufferUsage: u32;

View file

@ -290,7 +290,6 @@ impl CpuWorker {
})
}
#[expect(dead_code)]
pub fn submit(&self, job: Box<dyn CpuJob>) -> PendingJob {
let mut job = NonNull::from(Box::leak(job));
let id = self.data.next.next();

View file

@ -642,7 +642,7 @@ pub struct GfxFormat {
#[derive(Error)]
#[error(transparent)]
pub struct GfxError(pub Box<dyn Error>);
pub struct GfxError(pub Box<dyn Error + Send>);
impl Debug for GfxError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {

View file

@ -1,26 +1,55 @@
use {
crate::{
gfx_apis::vulkan::{device::VulkanDevice, instance::API_VERSION, VulkanError},
cpu_worker::{AsyncCpuWork, CpuJob, CpuWork, CpuWorker},
gfx_apis::vulkan::{
device::VulkanDevice, instance::API_VERSION, renderer::VulkanRenderer, VulkanError,
},
utils::{numcell::NumCell, ptr_ext::MutPtrExt},
},
ash::vk::{DeviceMemory, DeviceSize, MemoryRequirements},
ash::{
vk::{DeviceMemory, DeviceSize, MemoryRequirements},
Device,
},
gpu_alloc::{Config, GpuAllocator, MemoryBlock, MemoryPropertyFlags, Request, UsageFlags},
gpu_alloc_ash::AshMemoryDevice,
parking_lot::Mutex,
std::{
cell::{Cell, UnsafeCell},
rc::Rc,
sync::Arc,
},
};
pub struct VulkanAllocator {
pub(super) device: Rc<VulkanDevice>,
pub(super) non_coherent_atom_mask: u64,
pub struct SyncAllocatorStorage {
allocator: Arc<Mutex<GpuAllocator<DeviceMemory>>>,
device: Rc<VulkanDevice>,
}
pub struct UnsyncAllocatorStorage {
allocator: UnsafeCell<GpuAllocator<DeviceMemory>>,
device: Rc<VulkanDevice>,
}
pub struct VulkanAllocatorType<T> {
storage: T,
non_coherent_atom_mask: u64,
total: NumCell<u64>,
}
pub type VulkanAllocator = VulkanAllocatorType<UnsyncAllocatorStorage>;
pub type VulkanThreadedAllocator = VulkanAllocatorType<SyncAllocatorStorage>;
enum AllocatorType {
Local(Rc<VulkanAllocator>),
Threaded {
allocator: Rc<VulkanThreadedAllocator>,
renderer: Rc<VulkanRenderer>,
cpu: Rc<CpuWorker>,
},
}
pub struct VulkanAllocation {
pub(super) allocator: Rc<VulkanAllocator>,
allocator: AllocatorType,
pub(super) memory: DeviceMemory,
pub(super) offset: DeviceSize,
pub(super) mem: Option<*mut u8>,
@ -29,26 +58,65 @@ pub struct VulkanAllocation {
block: Cell<Option<MemoryBlock<DeviceMemory>>>,
}
impl VulkanAllocation {
unsafe fn free_locally<T>(
&self,
allocator: &VulkanAllocatorType<T>,
device: &VulkanDevice,
gpu: &mut GpuAllocator<DeviceMemory>,
) {
allocator.total.fetch_sub(self.size);
let block = self.block.take().unwrap();
do_free(gpu, &device.device, block, self.mem);
}
}
impl Drop for VulkanAllocation {
fn drop(&mut self) {
unsafe {
self.allocator.total.fetch_sub(self.size);
let mut block = self.block.take().unwrap();
if let Some(_ptr) = self.mem {
// log::info!("free = {:?} - {:?} ({})", ptr, ptr.add(block.size() as usize), block.size());
block.unmap(AshMemoryDevice::wrap(&self.allocator.device.device));
match &self.allocator {
AllocatorType::Local(a) => self.free_locally(a, &a.storage.device, a.allocator()),
AllocatorType::Threaded {
allocator,
renderer,
cpu,
} => {
if renderer.defunct.get() {
self.free_locally(
allocator,
&allocator.storage.device,
&mut allocator.storage.allocator.lock(),
);
} else {
let id = renderer.allocate_point();
let job = FreeJob {
id,
renderer: renderer.clone(),
allocator: allocator.clone(),
size: self.size,
work: FreeWork {
device: allocator.storage.device.device.clone(),
allocator: allocator.storage.allocator.clone(),
allocation: Some(UnsafeAllocation {
block: self.block.take().unwrap(),
ptr: self.mem,
}),
},
};
let pending = cpu.submit(Box::new(job));
renderer.pending_cpu_jobs.set(id, pending);
}
}
}
self.allocator
.allocator
.get()
.deref_mut()
.dealloc(AshMemoryDevice::wrap(&self.allocator.device.device), block);
}
}
}
impl VulkanDevice {
pub fn create_allocator(self: &Rc<Self>) -> Result<Rc<VulkanAllocator>, VulkanError> {
fn create_allocator_<T>(
self: &Rc<Self>,
map: impl FnOnce(GpuAllocator<DeviceMemory>) -> T,
) -> Result<Rc<VulkanAllocatorType<T>>, VulkanError> {
let config = Config::i_am_prototyping();
let props = unsafe {
gpu_alloc_ash::device_properties(
@ -61,53 +129,40 @@ impl VulkanDevice {
props.buffer_device_address = false;
let non_coherent_atom_size = props.non_coherent_atom_size;
let allocator = GpuAllocator::new(config, props);
Ok(Rc::new(VulkanAllocator {
device: self.clone(),
Ok(Rc::new(VulkanAllocatorType {
non_coherent_atom_mask: non_coherent_atom_size - 1,
allocator: UnsafeCell::new(allocator),
storage: map(allocator),
total: Default::default(),
}))
}
}
impl VulkanAllocator {
fn allocator(&self) -> &mut GpuAllocator<DeviceMemory> {
unsafe { self.allocator.get().deref_mut() }
pub fn create_allocator(self: &Rc<Self>) -> Result<Rc<VulkanAllocator>, VulkanError> {
self.create_allocator_(|a| UnsyncAllocatorStorage {
allocator: UnsafeCell::new(a),
device: self.clone(),
})
}
pub fn alloc(
pub fn create_threaded_allocator(
self: &Rc<Self>,
req: &MemoryRequirements,
usage: UsageFlags,
map: bool,
) -> Result<VulkanAllocation, VulkanError> {
let request = Request {
size: req.size,
align_mask: req.alignment - 1,
usage,
memory_types: req.memory_type_bits,
};
let block = unsafe {
self.allocator()
.alloc(AshMemoryDevice::wrap(&self.device.device), request)
};
let mut block = block.map_err(VulkanError::AllocateMemory2)?;
let ptr = match map {
true => {
let ptr = unsafe {
block.map(
AshMemoryDevice::wrap(&self.device.device),
0,
block.size() as usize,
)
};
Some(ptr.map_err(VulkanError::MapMemory)?.as_ptr())
}
false => None,
};
) -> Result<Rc<VulkanThreadedAllocator>, VulkanError> {
self.create_allocator_(|a| SyncAllocatorStorage {
allocator: Arc::new(Mutex::new(a)),
device: self.clone(),
})
}
}
impl<T> VulkanAllocatorType<T> {
fn commit_allocation(
self: &Rc<Self>,
ua: UnsafeAllocation,
allocator: AllocatorType,
) -> VulkanAllocation {
let UnsafeAllocation { block, ptr } = ua;
self.total.fetch_add(block.size());
Ok(VulkanAllocation {
allocator: self.clone(),
VulkanAllocation {
allocator,
memory: *block.memory(),
offset: block.offset(),
mem: ptr,
@ -117,17 +172,218 @@ impl VulkanAllocator {
false => Some(self.non_coherent_atom_mask),
},
block: Cell::new(Some(block)),
})
}
}
impl Drop for VulkanAllocator {
fn drop(&mut self) {
unsafe {
self.allocator
.get()
.deref_mut()
.cleanup(AshMemoryDevice::wrap(&self.device.device));
}
}
}
impl VulkanAllocator {
fn allocator(&self) -> &mut GpuAllocator<DeviceMemory> {
unsafe { self.storage.allocator.get().deref_mut() }
}
pub fn alloc(
self: &Rc<Self>,
req: &MemoryRequirements,
usage: UsageFlags,
map: bool,
) -> Result<VulkanAllocation, VulkanError> {
let ua = do_alloc(
self.allocator(),
&self.storage.device.device,
req,
usage,
map,
)?;
Ok(self.commit_allocation(ua, AllocatorType::Local(self.clone())))
}
}
impl VulkanThreadedAllocator {
#[expect(dead_code)]
pub fn async_alloc(
self: &Rc<Self>,
renderer: &Rc<VulkanRenderer>,
cpu: &Rc<CpuWorker>,
req: MemoryRequirements,
usage: UsageFlags,
map: bool,
cb: impl FnOnce(Result<VulkanAllocation, VulkanError>) + 'static,
) -> Result<(), VulkanError> {
renderer.check_defunct()?;
let id = renderer.allocate_point();
let job = AllocJob {
id,
renderer: renderer.clone(),
cpu: cpu.clone(),
allocator: self.clone(),
cb: Some(cb),
work: AllocWork {
req,
usage,
map,
device: self.storage.device.device.clone(),
allocator: self.storage.allocator.clone(),
res: None,
},
};
let pending = cpu.submit(Box::new(job));
renderer.pending_cpu_jobs.set(id, pending);
Ok(())
}
}
struct AllocJob<T> {
id: u64,
renderer: Rc<VulkanRenderer>,
cpu: Rc<CpuWorker>,
allocator: Rc<VulkanThreadedAllocator>,
cb: Option<T>,
work: AllocWork,
}
struct AllocWork {
req: MemoryRequirements,
usage: UsageFlags,
map: bool,
device: Arc<Device>,
allocator: Arc<Mutex<GpuAllocator<DeviceMemory>>>,
res: Option<Result<UnsafeAllocation, VulkanError>>,
}
impl CpuWork for AllocWork {
fn run(&mut self) -> Option<Box<dyn AsyncCpuWork>> {
let r = do_alloc(
&mut self.allocator.lock(),
&self.device,
&self.req,
self.usage,
self.map,
);
self.res = Some(r);
None
}
}
impl<T> CpuJob for AllocJob<T>
where
T: FnOnce(Result<VulkanAllocation, VulkanError>),
{
fn work(&mut self) -> &mut dyn CpuWork {
&mut self.work
}
fn completed(mut self: Box<Self>) {
self.renderer.pending_cpu_jobs.remove(&self.id);
let res = self.work.res.take().unwrap().map(|ua| {
self.allocator.commit_allocation(
ua,
AllocatorType::Threaded {
allocator: self.allocator.clone(),
renderer: self.renderer.clone(),
cpu: self.cpu.clone(),
},
)
});
self.cb.take().unwrap()(res);
}
}
struct FreeJob {
id: u64,
renderer: Rc<VulkanRenderer>,
allocator: Rc<VulkanThreadedAllocator>,
size: u64,
work: FreeWork,
}
struct FreeWork {
device: Arc<Device>,
allocator: Arc<Mutex<GpuAllocator<DeviceMemory>>>,
allocation: Option<UnsafeAllocation>,
}
impl CpuWork for FreeWork {
fn run(&mut self) -> Option<Box<dyn AsyncCpuWork>> {
let ua = self.allocation.take().unwrap();
unsafe {
do_free(&mut self.allocator.lock(), &self.device, ua.block, ua.ptr);
}
None
}
}
impl CpuJob for FreeJob {
fn work(&mut self) -> &mut dyn CpuWork {
&mut self.work
}
fn completed(self: Box<Self>) {
self.renderer.pending_cpu_jobs.remove(&self.id);
self.allocator.total.fetch_sub(self.size);
}
}
pub struct UnsafeAllocation {
block: MemoryBlock<DeviceMemory>,
ptr: Option<*mut u8>,
}
unsafe impl Send for UnsafeAllocation {}
fn do_alloc(
allocator: &mut GpuAllocator<DeviceMemory>,
device: &Device,
req: &MemoryRequirements,
usage: UsageFlags,
map: bool,
) -> Result<UnsafeAllocation, VulkanError> {
let request = Request {
size: req.size,
align_mask: req.alignment - 1,
usage,
memory_types: req.memory_type_bits,
};
let device = AshMemoryDevice::wrap(device);
let block = unsafe { allocator.alloc(device, request) };
let mut block = block.map_err(VulkanError::AllocateMemory2)?;
let ptr = match map {
true => {
let ptr = unsafe { block.map(device, 0, block.size() as usize) };
Some(ptr.map_err(VulkanError::MapMemory)?.as_ptr())
}
false => None,
};
Ok(UnsafeAllocation { block, ptr })
}
unsafe fn do_free(
gpu: &mut GpuAllocator<DeviceMemory>,
device: &Device,
mut block: MemoryBlock<DeviceMemory>,
ptr: Option<*mut u8>,
) {
let device = AshMemoryDevice::wrap(device);
if let Some(_ptr) = ptr {
// log::info!("free = {:?} - {:?} ({})", ptr, ptr.add(block.size() as usize), block.size());
block.unmap(device);
}
gpu.dealloc(device, block);
}
impl Drop for UnsyncAllocatorStorage {
fn drop(&mut self) {
let device = AshMemoryDevice::wrap(&self.device.device);
unsafe {
self.allocator.get_mut().cleanup(device);
}
}
}
impl Drop for SyncAllocatorStorage {
fn drop(&mut self) {
let device = AshMemoryDevice::wrap(&self.device.device);
unsafe {
self.allocator.lock().cleanup(device);
}
}
}

View file

@ -42,6 +42,7 @@ use {
std::{
ffi::{CStr, CString},
rc::Rc,
sync::Arc,
},
uapi::Ustr,
};
@ -52,7 +53,7 @@ pub struct VulkanDevice {
pub(super) gbm: Rc<GbmDevice>,
pub(super) sync_ctx: Rc<SyncObjCtx>,
pub(super) instance: Rc<VulkanInstance>,
pub(super) device: Device,
pub(super) device: Arc<Device>,
pub(super) external_memory_fd: external_memory_fd::Device,
pub(super) external_semaphore_fd: external_semaphore_fd::Device,
pub(super) external_fence_fd: external_fence_fd::Device,
@ -292,7 +293,7 @@ impl VulkanInstance {
sync_ctx: Rc::new(SyncObjCtx::new(gbm.drm.fd())),
gbm: Rc::new(gbm),
instance: self.clone(),
device,
device: Arc::new(device),
external_memory_fd,
external_semaphore_fd,
external_fence_fd,

View file

@ -1,13 +1,14 @@
use {
crate::{
async_engine::{AsyncEngine, SpawnedFuture},
cpu_worker::PendingJob,
format::{Format, XRGB8888},
gfx_api::{
AcquireSync, BufferResv, BufferResvUser, GfxApiOpt, GfxFormat, GfxFramebuffer,
GfxTexture, GfxWriteModifier, ReleaseSync, SyncFile,
},
gfx_apis::vulkan::{
allocator::VulkanAllocator,
allocator::{VulkanAllocator, VulkanThreadedAllocator},
command::{VulkanCommandBuffer, VulkanCommandPool},
descriptor::VulkanDescriptorSetLayout,
device::VulkanDevice,
@ -79,6 +80,9 @@ pub struct VulkanRenderer {
pub(super) tex_frag_mult_alpha_shader: Rc<VulkanShader>,
pub(super) tex_descriptor_set_layout: Rc<VulkanDescriptorSetLayout>,
pub(super) defunct: Cell<bool>,
pub(super) pending_cpu_jobs: CopyHashMap<u64, PendingJob>,
#[expect(dead_code)]
pub(super) shm_allocator: Rc<VulkanThreadedAllocator>,
}
pub(super) struct UsedTexture {
@ -173,6 +177,7 @@ impl VulkanDevice {
})
.collect();
let allocator = self.create_allocator()?;
let shm_allocator = self.create_threaded_allocator()?;
let render = Rc::new(VulkanRenderer {
formats: Rc::new(formats),
device: self.clone(),
@ -197,6 +202,8 @@ impl VulkanDevice {
tex_frag_mult_alpha_shader,
tex_descriptor_set_layout,
defunct: Cell::new(false),
pending_cpu_jobs: Default::default(),
shm_allocator,
});
render.get_or_create_pipelines(XRGB8888.vk_format)?;
Ok(render)