diff --git a/src/allocator.rs b/src/allocator.rs index 6354bdb7..f836a471 100644 --- a/src/allocator.rs +++ b/src/allocator.rs @@ -13,7 +13,7 @@ use { #[derive(Debug, Error)] #[error(transparent)] -pub struct AllocatorError(#[from] pub Box); +pub struct AllocatorError(#[from] pub Box); bitflags! { BufferUsage: u32; diff --git a/src/cpu_worker.rs b/src/cpu_worker.rs index d3d6adca..3875401f 100644 --- a/src/cpu_worker.rs +++ b/src/cpu_worker.rs @@ -290,7 +290,6 @@ impl CpuWorker { }) } - #[expect(dead_code)] pub fn submit(&self, job: Box) -> PendingJob { let mut job = NonNull::from(Box::leak(job)); let id = self.data.next.next(); diff --git a/src/gfx_api.rs b/src/gfx_api.rs index a3556206..5ca68519 100644 --- a/src/gfx_api.rs +++ b/src/gfx_api.rs @@ -642,7 +642,7 @@ pub struct GfxFormat { #[derive(Error)] #[error(transparent)] -pub struct GfxError(pub Box); +pub struct GfxError(pub Box); impl Debug for GfxError { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { diff --git a/src/gfx_apis/vulkan/allocator.rs b/src/gfx_apis/vulkan/allocator.rs index 4f2568e4..77ffced0 100644 --- a/src/gfx_apis/vulkan/allocator.rs +++ b/src/gfx_apis/vulkan/allocator.rs @@ -1,26 +1,55 @@ use { crate::{ - gfx_apis::vulkan::{device::VulkanDevice, instance::API_VERSION, VulkanError}, + cpu_worker::{AsyncCpuWork, CpuJob, CpuWork, CpuWorker}, + gfx_apis::vulkan::{ + device::VulkanDevice, instance::API_VERSION, renderer::VulkanRenderer, VulkanError, + }, utils::{numcell::NumCell, ptr_ext::MutPtrExt}, }, - ash::vk::{DeviceMemory, DeviceSize, MemoryRequirements}, + ash::{ + vk::{DeviceMemory, DeviceSize, MemoryRequirements}, + Device, + }, gpu_alloc::{Config, GpuAllocator, MemoryBlock, MemoryPropertyFlags, Request, UsageFlags}, gpu_alloc_ash::AshMemoryDevice, + parking_lot::Mutex, std::{ cell::{Cell, UnsafeCell}, rc::Rc, + sync::Arc, }, }; -pub struct VulkanAllocator { - pub(super) device: Rc, - pub(super) non_coherent_atom_mask: u64, +pub struct SyncAllocatorStorage { + allocator: Arc>>, + device: Rc, +} + +pub struct UnsyncAllocatorStorage { allocator: UnsafeCell>, + device: Rc, +} + +pub struct VulkanAllocatorType { + storage: T, + non_coherent_atom_mask: u64, total: NumCell, } +pub type VulkanAllocator = VulkanAllocatorType; +pub type VulkanThreadedAllocator = VulkanAllocatorType; + +enum AllocatorType { + Local(Rc), + Threaded { + allocator: Rc, + renderer: Rc, + cpu: Rc, + }, +} + pub struct VulkanAllocation { - pub(super) allocator: Rc, + allocator: AllocatorType, pub(super) memory: DeviceMemory, pub(super) offset: DeviceSize, pub(super) mem: Option<*mut u8>, @@ -29,26 +58,65 @@ pub struct VulkanAllocation { block: Cell>>, } +impl VulkanAllocation { + unsafe fn free_locally( + &self, + allocator: &VulkanAllocatorType, + device: &VulkanDevice, + gpu: &mut GpuAllocator, + ) { + allocator.total.fetch_sub(self.size); + let block = self.block.take().unwrap(); + do_free(gpu, &device.device, block, self.mem); + } +} + impl Drop for VulkanAllocation { fn drop(&mut self) { unsafe { - self.allocator.total.fetch_sub(self.size); - let mut block = self.block.take().unwrap(); - if let Some(_ptr) = self.mem { - // log::info!("free = {:?} - {:?} ({})", ptr, ptr.add(block.size() as usize), block.size()); - block.unmap(AshMemoryDevice::wrap(&self.allocator.device.device)); + match &self.allocator { + AllocatorType::Local(a) => self.free_locally(a, &a.storage.device, a.allocator()), + AllocatorType::Threaded { + allocator, + renderer, + cpu, + } => { + if renderer.defunct.get() { + self.free_locally( + allocator, + &allocator.storage.device, + &mut allocator.storage.allocator.lock(), + ); + } else { + let id = renderer.allocate_point(); + let job = FreeJob { + id, + renderer: renderer.clone(), + allocator: allocator.clone(), + size: self.size, + work: FreeWork { + device: allocator.storage.device.device.clone(), + allocator: allocator.storage.allocator.clone(), + allocation: Some(UnsafeAllocation { + block: self.block.take().unwrap(), + ptr: self.mem, + }), + }, + }; + let pending = cpu.submit(Box::new(job)); + renderer.pending_cpu_jobs.set(id, pending); + } + } } - self.allocator - .allocator - .get() - .deref_mut() - .dealloc(AshMemoryDevice::wrap(&self.allocator.device.device), block); } } } impl VulkanDevice { - pub fn create_allocator(self: &Rc) -> Result, VulkanError> { + fn create_allocator_( + self: &Rc, + map: impl FnOnce(GpuAllocator) -> T, + ) -> Result>, VulkanError> { let config = Config::i_am_prototyping(); let props = unsafe { gpu_alloc_ash::device_properties( @@ -61,53 +129,40 @@ impl VulkanDevice { props.buffer_device_address = false; let non_coherent_atom_size = props.non_coherent_atom_size; let allocator = GpuAllocator::new(config, props); - Ok(Rc::new(VulkanAllocator { - device: self.clone(), + Ok(Rc::new(VulkanAllocatorType { non_coherent_atom_mask: non_coherent_atom_size - 1, - allocator: UnsafeCell::new(allocator), + storage: map(allocator), total: Default::default(), })) } -} -impl VulkanAllocator { - fn allocator(&self) -> &mut GpuAllocator { - unsafe { self.allocator.get().deref_mut() } + pub fn create_allocator(self: &Rc) -> Result, VulkanError> { + self.create_allocator_(|a| UnsyncAllocatorStorage { + allocator: UnsafeCell::new(a), + device: self.clone(), + }) } - pub fn alloc( + pub fn create_threaded_allocator( self: &Rc, - req: &MemoryRequirements, - usage: UsageFlags, - map: bool, - ) -> Result { - let request = Request { - size: req.size, - align_mask: req.alignment - 1, - usage, - memory_types: req.memory_type_bits, - }; - let block = unsafe { - self.allocator() - .alloc(AshMemoryDevice::wrap(&self.device.device), request) - }; - let mut block = block.map_err(VulkanError::AllocateMemory2)?; - let ptr = match map { - true => { - let ptr = unsafe { - block.map( - AshMemoryDevice::wrap(&self.device.device), - 0, - block.size() as usize, - ) - }; - Some(ptr.map_err(VulkanError::MapMemory)?.as_ptr()) - } - false => None, - }; + ) -> Result, VulkanError> { + self.create_allocator_(|a| SyncAllocatorStorage { + allocator: Arc::new(Mutex::new(a)), + device: self.clone(), + }) + } +} + +impl VulkanAllocatorType { + fn commit_allocation( + self: &Rc, + ua: UnsafeAllocation, + allocator: AllocatorType, + ) -> VulkanAllocation { + let UnsafeAllocation { block, ptr } = ua; self.total.fetch_add(block.size()); - Ok(VulkanAllocation { - allocator: self.clone(), + VulkanAllocation { + allocator, memory: *block.memory(), offset: block.offset(), mem: ptr, @@ -117,17 +172,218 @@ impl VulkanAllocator { false => Some(self.non_coherent_atom_mask), }, block: Cell::new(Some(block)), - }) - } -} - -impl Drop for VulkanAllocator { - fn drop(&mut self) { - unsafe { - self.allocator - .get() - .deref_mut() - .cleanup(AshMemoryDevice::wrap(&self.device.device)); + } + } +} + +impl VulkanAllocator { + fn allocator(&self) -> &mut GpuAllocator { + unsafe { self.storage.allocator.get().deref_mut() } + } + + pub fn alloc( + self: &Rc, + req: &MemoryRequirements, + usage: UsageFlags, + map: bool, + ) -> Result { + let ua = do_alloc( + self.allocator(), + &self.storage.device.device, + req, + usage, + map, + )?; + Ok(self.commit_allocation(ua, AllocatorType::Local(self.clone()))) + } +} + +impl VulkanThreadedAllocator { + #[expect(dead_code)] + pub fn async_alloc( + self: &Rc, + renderer: &Rc, + cpu: &Rc, + req: MemoryRequirements, + usage: UsageFlags, + map: bool, + cb: impl FnOnce(Result) + 'static, + ) -> Result<(), VulkanError> { + renderer.check_defunct()?; + let id = renderer.allocate_point(); + let job = AllocJob { + id, + renderer: renderer.clone(), + cpu: cpu.clone(), + allocator: self.clone(), + cb: Some(cb), + work: AllocWork { + req, + usage, + map, + device: self.storage.device.device.clone(), + allocator: self.storage.allocator.clone(), + res: None, + }, + }; + let pending = cpu.submit(Box::new(job)); + renderer.pending_cpu_jobs.set(id, pending); + Ok(()) + } +} + +struct AllocJob { + id: u64, + renderer: Rc, + cpu: Rc, + allocator: Rc, + cb: Option, + work: AllocWork, +} + +struct AllocWork { + req: MemoryRequirements, + usage: UsageFlags, + map: bool, + device: Arc, + allocator: Arc>>, + res: Option>, +} + +impl CpuWork for AllocWork { + fn run(&mut self) -> Option> { + let r = do_alloc( + &mut self.allocator.lock(), + &self.device, + &self.req, + self.usage, + self.map, + ); + self.res = Some(r); + None + } +} + +impl CpuJob for AllocJob +where + T: FnOnce(Result), +{ + fn work(&mut self) -> &mut dyn CpuWork { + &mut self.work + } + + fn completed(mut self: Box) { + self.renderer.pending_cpu_jobs.remove(&self.id); + let res = self.work.res.take().unwrap().map(|ua| { + self.allocator.commit_allocation( + ua, + AllocatorType::Threaded { + allocator: self.allocator.clone(), + renderer: self.renderer.clone(), + cpu: self.cpu.clone(), + }, + ) + }); + self.cb.take().unwrap()(res); + } +} + +struct FreeJob { + id: u64, + renderer: Rc, + allocator: Rc, + size: u64, + work: FreeWork, +} + +struct FreeWork { + device: Arc, + allocator: Arc>>, + allocation: Option, +} + +impl CpuWork for FreeWork { + fn run(&mut self) -> Option> { + let ua = self.allocation.take().unwrap(); + unsafe { + do_free(&mut self.allocator.lock(), &self.device, ua.block, ua.ptr); + } + None + } +} + +impl CpuJob for FreeJob { + fn work(&mut self) -> &mut dyn CpuWork { + &mut self.work + } + + fn completed(self: Box) { + self.renderer.pending_cpu_jobs.remove(&self.id); + self.allocator.total.fetch_sub(self.size); + } +} + +pub struct UnsafeAllocation { + block: MemoryBlock, + ptr: Option<*mut u8>, +} + +unsafe impl Send for UnsafeAllocation {} + +fn do_alloc( + allocator: &mut GpuAllocator, + device: &Device, + req: &MemoryRequirements, + usage: UsageFlags, + map: bool, +) -> Result { + let request = Request { + size: req.size, + align_mask: req.alignment - 1, + usage, + memory_types: req.memory_type_bits, + }; + let device = AshMemoryDevice::wrap(device); + let block = unsafe { allocator.alloc(device, request) }; + let mut block = block.map_err(VulkanError::AllocateMemory2)?; + let ptr = match map { + true => { + let ptr = unsafe { block.map(device, 0, block.size() as usize) }; + Some(ptr.map_err(VulkanError::MapMemory)?.as_ptr()) + } + false => None, + }; + Ok(UnsafeAllocation { block, ptr }) +} + +unsafe fn do_free( + gpu: &mut GpuAllocator, + device: &Device, + mut block: MemoryBlock, + ptr: Option<*mut u8>, +) { + let device = AshMemoryDevice::wrap(device); + if let Some(_ptr) = ptr { + // log::info!("free = {:?} - {:?} ({})", ptr, ptr.add(block.size() as usize), block.size()); + block.unmap(device); + } + gpu.dealloc(device, block); +} + +impl Drop for UnsyncAllocatorStorage { + fn drop(&mut self) { + let device = AshMemoryDevice::wrap(&self.device.device); + unsafe { + self.allocator.get_mut().cleanup(device); + } + } +} + +impl Drop for SyncAllocatorStorage { + fn drop(&mut self) { + let device = AshMemoryDevice::wrap(&self.device.device); + unsafe { + self.allocator.lock().cleanup(device); } } } diff --git a/src/gfx_apis/vulkan/device.rs b/src/gfx_apis/vulkan/device.rs index 00f3b67b..8792a8ac 100644 --- a/src/gfx_apis/vulkan/device.rs +++ b/src/gfx_apis/vulkan/device.rs @@ -42,6 +42,7 @@ use { std::{ ffi::{CStr, CString}, rc::Rc, + sync::Arc, }, uapi::Ustr, }; @@ -52,7 +53,7 @@ pub struct VulkanDevice { pub(super) gbm: Rc, pub(super) sync_ctx: Rc, pub(super) instance: Rc, - pub(super) device: Device, + pub(super) device: Arc, pub(super) external_memory_fd: external_memory_fd::Device, pub(super) external_semaphore_fd: external_semaphore_fd::Device, pub(super) external_fence_fd: external_fence_fd::Device, @@ -292,7 +293,7 @@ impl VulkanInstance { sync_ctx: Rc::new(SyncObjCtx::new(gbm.drm.fd())), gbm: Rc::new(gbm), instance: self.clone(), - device, + device: Arc::new(device), external_memory_fd, external_semaphore_fd, external_fence_fd, diff --git a/src/gfx_apis/vulkan/renderer.rs b/src/gfx_apis/vulkan/renderer.rs index 6613b9ae..05982da7 100644 --- a/src/gfx_apis/vulkan/renderer.rs +++ b/src/gfx_apis/vulkan/renderer.rs @@ -1,13 +1,14 @@ use { crate::{ async_engine::{AsyncEngine, SpawnedFuture}, + cpu_worker::PendingJob, format::{Format, XRGB8888}, gfx_api::{ AcquireSync, BufferResv, BufferResvUser, GfxApiOpt, GfxFormat, GfxFramebuffer, GfxTexture, GfxWriteModifier, ReleaseSync, SyncFile, }, gfx_apis::vulkan::{ - allocator::VulkanAllocator, + allocator::{VulkanAllocator, VulkanThreadedAllocator}, command::{VulkanCommandBuffer, VulkanCommandPool}, descriptor::VulkanDescriptorSetLayout, device::VulkanDevice, @@ -79,6 +80,9 @@ pub struct VulkanRenderer { pub(super) tex_frag_mult_alpha_shader: Rc, pub(super) tex_descriptor_set_layout: Rc, pub(super) defunct: Cell, + pub(super) pending_cpu_jobs: CopyHashMap, + #[expect(dead_code)] + pub(super) shm_allocator: Rc, } pub(super) struct UsedTexture { @@ -173,6 +177,7 @@ impl VulkanDevice { }) .collect(); let allocator = self.create_allocator()?; + let shm_allocator = self.create_threaded_allocator()?; let render = Rc::new(VulkanRenderer { formats: Rc::new(formats), device: self.clone(), @@ -197,6 +202,8 @@ impl VulkanDevice { tex_frag_mult_alpha_shader, tex_descriptor_set_layout, defunct: Cell::new(false), + pending_cpu_jobs: Default::default(), + shm_allocator, }); render.get_or_create_pipelines(XRGB8888.vk_format)?; Ok(render)