From 156785d7c8c5aaa594d83d8c722efa8207ebc5e4 Mon Sep 17 00:00:00 2001 From: Julian Orth Date: Sun, 6 Oct 2024 14:07:16 +0200 Subject: [PATCH] vulkan: implement async shm downloads --- src/gfx_apis/vulkan.rs | 2 + src/gfx_apis/vulkan/renderer.rs | 132 ++++++++++++++++++++----------- src/gfx_apis/vulkan/shm_image.rs | 96 ++++++++++++++++------ src/gfx_apis/vulkan/transfer.rs | 114 +++++++++++++++++++++++--- 4 files changed, 260 insertions(+), 84 deletions(-) diff --git a/src/gfx_apis/vulkan.rs b/src/gfx_apis/vulkan.rs index acaa83dd..fd8287c1 100644 --- a/src/gfx_apis/vulkan.rs +++ b/src/gfx_apis/vulkan.rs @@ -213,6 +213,8 @@ pub enum VulkanError { StagingBufferNoDownload, #[error("Image contents are undefined")] UndefinedContents, + #[error("The framebuffer is being used by the transfer queue")] + BusyInTransfer, } impl From for GfxError { diff --git a/src/gfx_apis/vulkan/renderer.rs b/src/gfx_apis/vulkan/renderer.rs index 10592eba..cc225064 100644 --- a/src/gfx_apis/vulkan/renderer.rs +++ b/src/gfx_apis/vulkan/renderer.rs @@ -337,38 +337,69 @@ impl VulkanRenderer { } } - fn initial_barriers(&self, buf: CommandBuffer, fb: &VulkanImage) { + fn initial_barriers(&self, buf: CommandBuffer, fb: &VulkanImage) -> Result<(), VulkanError> { zone!("initial_barriers"); let mut memory = self.memory.borrow_mut(); let memory = &mut *memory; memory.image_barriers.clear(); - let mut fb_image_memory_barrier = image_barrier() - .image(fb.image) - .new_layout(ImageLayout::COLOR_ATTACHMENT_OPTIMAL) - .dst_access_mask( - AccessFlags2::COLOR_ATTACHMENT_WRITE | AccessFlags2::COLOR_ATTACHMENT_READ, - ) - .dst_stage_mask(PipelineStageFlags2::COLOR_ATTACHMENT_OUTPUT); - if fb.bridge.is_some() { - fb_image_memory_barrier = fb_image_memory_barrier - .src_access_mask(AccessFlags2::TRANSFER_READ) - .src_stage_mask(PipelineStageFlags2::TRANSFER) - .old_layout(if fb.is_undefined.get() { - ImageLayout::UNDEFINED - } else { - ImageLayout::TRANSFER_SRC_OPTIMAL - }); - } else { - fb_image_memory_barrier = fb_image_memory_barrier - .src_queue_family_index(QUEUE_FAMILY_FOREIGN_EXT) - .dst_queue_family_index(self.device.graphics_queue_idx) - .old_layout(if fb.is_undefined.get() { - ImageLayout::UNDEFINED - } else { - ImageLayout::GENERAL - }); + let mut need_fb_barrier = true; + if let VulkanImageMemory::Internal(..) = &fb.ty { + need_fb_barrier = fb.is_undefined.get() + || (self.device.distinct_transfer_queue_family_idx.is_some() + && fb.queue_state.get().acquire(QueueFamily::Gfx) + != QueueTransfer::Unnecessary); + } + if need_fb_barrier { + let mut fb_image_memory_barrier = image_barrier() + .image(fb.image) + .new_layout(ImageLayout::COLOR_ATTACHMENT_OPTIMAL) + .dst_access_mask( + AccessFlags2::COLOR_ATTACHMENT_WRITE | AccessFlags2::COLOR_ATTACHMENT_READ, + ) + .dst_stage_mask(PipelineStageFlags2::COLOR_ATTACHMENT_OUTPUT); + if fb.bridge.is_some() { + fb_image_memory_barrier = fb_image_memory_barrier + .src_access_mask(AccessFlags2::TRANSFER_READ) + .src_stage_mask(PipelineStageFlags2::TRANSFER) + .old_layout(if fb.is_undefined.get() { + ImageLayout::UNDEFINED + } else { + ImageLayout::TRANSFER_SRC_OPTIMAL + }); + } else if let VulkanImageMemory::Internal(..) = &fb.ty { + let mut queue_transfer = QueueTransfer::Unnecessary; + if self.device.distinct_transfer_queue_family_idx.is_some() { + queue_transfer = fb.queue_state.get().acquire(QueueFamily::Gfx); + } + match queue_transfer { + QueueTransfer::Unnecessary => { + fb_image_memory_barrier = + fb_image_memory_barrier.old_layout(ImageLayout::UNDEFINED); + } + QueueTransfer::Possible => { + if let Some(transfer_queue_idx) = + self.device.distinct_transfer_queue_family_idx + { + fb_image_memory_barrier = fb_image_memory_barrier + .src_queue_family_index(transfer_queue_idx) + .dst_queue_family_index(self.device.graphics_queue_idx) + .old_layout(ImageLayout::TRANSFER_SRC_OPTIMAL); + } + } + QueueTransfer::Impossible => return Err(VulkanError::BusyInTransfer), + } + } else { + fb_image_memory_barrier = fb_image_memory_barrier + .src_queue_family_index(QUEUE_FAMILY_FOREIGN_EXT) + .dst_queue_family_index(self.device.graphics_queue_idx) + .old_layout(if fb.is_undefined.get() { + ImageLayout::UNDEFINED + } else { + ImageLayout::GENERAL + }); + } + memory.image_barriers.push(fb_image_memory_barrier); } - memory.image_barriers.push(fb_image_memory_barrier); for img in &memory.dmabuf_sample { let image_memory_barrier = image_barrier() .src_queue_family_index(QUEUE_FAMILY_FOREIGN_EXT) @@ -397,6 +428,7 @@ impl VulkanRenderer { unsafe { self.device.device.cmd_pipeline_barrier2(buf, &dep_info); } + Ok(()) } fn begin_rendering(&self, buf: CommandBuffer, fb: &VulkanImage, clear: Option<&Color>) { @@ -643,26 +675,28 @@ impl VulkanRenderer { let mut memory = self.memory.borrow_mut(); let memory = &mut *memory; memory.image_barriers.clear(); - let mut fb_image_memory_barrier = image_barrier() - .src_queue_family_index(self.device.graphics_queue_idx) - .dst_queue_family_index(QUEUE_FAMILY_FOREIGN_EXT) - .new_layout(ImageLayout::GENERAL); - if let Some(bridge) = &fb.bridge { - fb_image_memory_barrier = fb_image_memory_barrier - .image(bridge.dmabuf_image) - .old_layout(ImageLayout::TRANSFER_DST_OPTIMAL) - .src_access_mask(AccessFlags2::TRANSFER_WRITE) - .src_stage_mask(PipelineStageFlags2::TRANSFER); - } else { - fb_image_memory_barrier = fb_image_memory_barrier - .image(fb.image) - .old_layout(ImageLayout::COLOR_ATTACHMENT_OPTIMAL) - .src_access_mask( - AccessFlags2::COLOR_ATTACHMENT_WRITE | AccessFlags2::COLOR_ATTACHMENT_READ, - ) - .src_stage_mask(PipelineStageFlags2::COLOR_ATTACHMENT_OUTPUT); + if let VulkanImageMemory::DmaBuf(..) = fb.ty { + let mut fb_image_memory_barrier = image_barrier() + .src_queue_family_index(self.device.graphics_queue_idx) + .dst_queue_family_index(QUEUE_FAMILY_FOREIGN_EXT) + .new_layout(ImageLayout::GENERAL); + if let Some(bridge) = &fb.bridge { + fb_image_memory_barrier = fb_image_memory_barrier + .image(bridge.dmabuf_image) + .old_layout(ImageLayout::TRANSFER_DST_OPTIMAL) + .src_access_mask(AccessFlags2::TRANSFER_WRITE) + .src_stage_mask(PipelineStageFlags2::TRANSFER); + } else { + fb_image_memory_barrier = fb_image_memory_barrier + .image(fb.image) + .old_layout(ImageLayout::COLOR_ATTACHMENT_OPTIMAL) + .src_access_mask( + AccessFlags2::COLOR_ATTACHMENT_WRITE | AccessFlags2::COLOR_ATTACHMENT_READ, + ) + .src_stage_mask(PipelineStageFlags2::COLOR_ATTACHMENT_OUTPUT); + } + memory.image_barriers.push(fb_image_memory_barrier); } - memory.image_barriers.push(fb_image_memory_barrier); for img in &memory.dmabuf_sample { let image_memory_barrier = image_barrier() .src_queue_family_index(self.device.graphics_queue_idx) @@ -833,6 +867,10 @@ impl VulkanRenderer { fn store_layouts(&self, fb: &VulkanImage) { fb.is_undefined.set(false); + fb.contents_are_undefined.set(false); + fb.queue_state.set(QueueState::Acquired { + family: QueueFamily::Gfx, + }); let memory = self.memory.borrow(); for img in &*memory.queue_transfer { img.queue_state.set(QueueState::Acquired { @@ -1101,7 +1139,7 @@ impl VulkanRenderer { let buf = self.gfx_command_buffers.allocate()?; self.collect_memory(opts); self.begin_command_buffer(buf.buffer)?; - self.initial_barriers(buf.buffer, fb); + self.initial_barriers(buf.buffer, fb)?; self.begin_rendering(buf.buffer, fb, clear); self.set_viewport(buf.buffer, fb); self.record_draws(buf.buffer, fb, opts)?; diff --git a/src/gfx_apis/vulkan/shm_image.rs b/src/gfx_apis/vulkan/shm_image.rs index a342a453..86155e2b 100644 --- a/src/gfx_apis/vulkan/shm_image.rs +++ b/src/gfx_apis/vulkan/shm_image.rs @@ -10,7 +10,7 @@ use { image::{QueueFamily, QueueState, VulkanImage, VulkanImageMemory}, renderer::{image_barrier, VulkanRenderer}, staging::VulkanStagingBuffer, - transfer::VulkanShmImageAsyncData, + transfer::{TransferType, VulkanShmImageAsyncData}, VulkanError, }, rect::Rect, @@ -19,10 +19,10 @@ use { ash::vk::{ AccessFlags2, BufferImageCopy2, BufferMemoryBarrier2, CommandBufferBeginInfo, CommandBufferSubmitInfo, CommandBufferUsageFlags, CopyBufferToImageInfo2, - DependencyInfoKHR, DeviceSize, Extent3D, ImageAspectFlags, ImageCreateInfo, ImageLayout, - ImageSubresourceLayers, ImageSubresourceRange, ImageTiling, ImageType, ImageUsageFlags, - ImageViewCreateInfo, ImageViewType, Offset3D, PipelineStageFlags2, SampleCountFlags, - SharingMode, SubmitInfo2, + CopyImageToBufferInfo2, DependencyInfoKHR, DeviceSize, Extent3D, ImageAspectFlags, + ImageCreateInfo, ImageLayout, ImageSubresourceLayers, ImageSubresourceRange, ImageTiling, + ImageType, ImageUsageFlags, ImageViewCreateInfo, ImageViewType, Offset3D, + PipelineStageFlags2, SampleCountFlags, SharingMode, SubmitInfo2, }, gpu_alloc::UsageFlags, isnt::std_1::primitive::IsntSliceExt, @@ -137,7 +137,7 @@ impl VulkanShmImage { } })?; let Some((cmd, fence, sync_file, point)) = - self.submit_buffer_to_image_copy(img, &staging, cpy, false)? + self.submit_buffer_image_copy(img, &staging, cpy, false, TransferType::Upload)? else { return Ok(()); }; @@ -149,12 +149,13 @@ impl VulkanShmImage { Ok(()) } - pub(super) fn submit_buffer_to_image_copy( + pub(super) fn submit_buffer_image_copy( &self, img: &Rc, staging: &VulkanStagingBuffer, regions: &[BufferImageCopy2], use_transfer_queue: bool, + tt: TransferType, ) -> Result, Rc, SyncFile, u64)>, VulkanError> { let memory_barrier = |sam, ssm, dam, dsm| { @@ -182,18 +183,30 @@ impl VulkanShmImage { .old_layout(if img.is_undefined.get() { ImageLayout::UNDEFINED } else { - ImageLayout::SHADER_READ_ONLY_OPTIMAL + match tt { + TransferType::Upload => ImageLayout::SHADER_READ_ONLY_OPTIMAL, + TransferType::Download => ImageLayout::COLOR_ATTACHMENT_OPTIMAL, + } }) - .new_layout(ImageLayout::TRANSFER_DST_OPTIMAL); + .new_layout(match tt { + TransferType::Upload => ImageLayout::TRANSFER_DST_OPTIMAL, + TransferType::Download => ImageLayout::TRANSFER_SRC_OPTIMAL, + }); if transfer_queue_family_idx == img.renderer.device.graphics_queue_idx { initial_image_barrier = initial_image_barrier .src_access_mask(AccessFlags2::SHADER_SAMPLED_READ) .src_stage_mask(PipelineStageFlags2::FRAGMENT_SHADER) } let initial_buffer_barrier = memory_barrier( - AccessFlags2::HOST_WRITE, + match tt { + TransferType::Upload => AccessFlags2::HOST_WRITE, + TransferType::Download => AccessFlags2::HOST_READ, + }, PipelineStageFlags2::HOST, - AccessFlags2::TRANSFER_READ, + match tt { + TransferType::Upload => AccessFlags2::TRANSFER_READ, + TransferType::Download => AccessFlags2::TRANSFER_WRITE, + }, PipelineStageFlags2::TRANSFER, ); let initial_dep_info = DependencyInfoKHR::default() @@ -203,29 +216,42 @@ impl VulkanShmImage { .image(img.image) .src_queue_family_index(transfer_queue_family_idx) .dst_queue_family_index(img.renderer.device.graphics_queue_idx) - .src_access_mask(AccessFlags2::TRANSFER_WRITE) + .src_access_mask(match tt { + TransferType::Upload => AccessFlags2::TRANSFER_WRITE, + TransferType::Download => AccessFlags2::TRANSFER_READ, + }) .src_stage_mask(PipelineStageFlags2::TRANSFER) - .old_layout(ImageLayout::TRANSFER_DST_OPTIMAL) - .new_layout(ImageLayout::SHADER_READ_ONLY_OPTIMAL); + .old_layout(match tt { + TransferType::Upload => ImageLayout::TRANSFER_DST_OPTIMAL, + TransferType::Download => ImageLayout::TRANSFER_SRC_OPTIMAL, + }) + .new_layout(match tt { + TransferType::Upload => ImageLayout::SHADER_READ_ONLY_OPTIMAL, + TransferType::Download => ImageLayout::COLOR_ATTACHMENT_OPTIMAL, + }); if transfer_queue_family_idx == img.renderer.device.graphics_queue_idx { final_image_barrier = final_image_barrier - .dst_access_mask(AccessFlags2::SHADER_SAMPLED_READ) + .dst_access_mask(match tt { + TransferType::Upload => AccessFlags2::SHADER_SAMPLED_READ, + TransferType::Download => AccessFlags2::COLOR_ATTACHMENT_WRITE, + }) .dst_stage_mask(PipelineStageFlags2::FRAGMENT_SHADER); } let final_buffer_barrier = memory_barrier( - AccessFlags2::TRANSFER_READ, + match tt { + TransferType::Upload => AccessFlags2::TRANSFER_READ, + TransferType::Download => AccessFlags2::TRANSFER_WRITE, + }, PipelineStageFlags2::TRANSFER, - AccessFlags2::HOST_WRITE, + match tt { + TransferType::Upload => AccessFlags2::HOST_WRITE, + TransferType::Download => AccessFlags2::HOST_READ, + }, PipelineStageFlags2::HOST, ); let final_dep_info = DependencyInfoKHR::default() .buffer_memory_barriers(slice::from_ref(&final_buffer_barrier)) .image_memory_barriers(slice::from_ref(&final_image_barrier)); - let cpy_info = CopyBufferToImageInfo2::default() - .src_buffer(staging.buffer) - .dst_image(img.image) - .dst_image_layout(ImageLayout::TRANSFER_DST_OPTIMAL) - .regions(regions); let cmd = match &img.renderer.transfer_command_buffers { Some(b) if use_transfer_queue => b.allocate()?, _ => img.renderer.gfx_command_buffers.allocate()?, @@ -241,7 +267,24 @@ impl VulkanShmImage { dev.begin_command_buffer(cmd.buffer, &begin_info) .map_err(VulkanError::BeginCommandBuffer)?; dev.cmd_pipeline_barrier2(cmd.buffer, &initial_dep_info); - dev.cmd_copy_buffer_to_image2(cmd.buffer, &cpy_info); + match tt { + TransferType::Upload => { + let cpy_info = CopyBufferToImageInfo2::default() + .src_buffer(staging.buffer) + .dst_image(img.image) + .dst_image_layout(ImageLayout::TRANSFER_DST_OPTIMAL) + .regions(regions); + dev.cmd_copy_buffer_to_image2(cmd.buffer, &cpy_info); + } + TransferType::Download => { + let cpy_info = CopyImageToBufferInfo2::default() + .dst_buffer(staging.buffer) + .src_image(img.image) + .src_image_layout(ImageLayout::TRANSFER_SRC_OPTIMAL) + .regions(regions); + dev.cmd_copy_image_to_buffer2(cmd.buffer, &cpy_info); + } + } dev.cmd_pipeline_barrier2(cmd.buffer, &final_dep_info); dev.end_command_buffer(cmd.buffer) .map_err(VulkanError::EndCommandBuffer)?; @@ -255,8 +298,10 @@ impl VulkanShmImage { ) .map_err(VulkanError::Submit)?; } - img.is_undefined.set(false); - img.contents_are_undefined.set(false); + if tt == TransferType::Upload { + img.is_undefined.set(false); + img.contents_are_undefined.set(false); + } let release_sync_file = match release_fence.export_sync_file() { Ok(s) => s, Err(e) => { @@ -381,6 +426,7 @@ impl VulkanRenderer { io_job: Default::default(), copy_job: Default::default(), staging: Default::default(), + client_mem: Default::default(), callback: Default::default(), callback_id: Cell::new(0), regions: Default::default(), diff --git a/src/gfx_apis/vulkan/transfer.rs b/src/gfx_apis/vulkan/transfer.rs index 901e7cf0..9062df13 100644 --- a/src/gfx_apis/vulkan/transfer.rs +++ b/src/gfx_apis/vulkan/transfer.rs @@ -41,6 +41,7 @@ pub struct VulkanShmImageAsyncData { pub(super) io_job: Cell>>, pub(super) copy_job: Cell>>, pub(super) staging: CloneCell>>, + pub(super) client_mem: CloneCell>>, pub(super) callback: Cell>>, pub(super) callback_id: Cell, pub(super) regions: RefCell>>, @@ -53,6 +54,7 @@ impl VulkanShmImageAsyncData { fn complete(&self, result: Result<(), VulkanError>) { self.busy.set(false); self.staging.take().unwrap().busy.set(false); + self.client_mem.take(); if let Some(cb) = self.callback.take() { cb.completed(result.map_err(|e| e.into())); } @@ -122,6 +124,7 @@ impl VulkanShmImage { data.data_copied.set(false); staging.busy.set(true); data.staging.set(Some(staging.clone())); + data.client_mem.set(Some(client_mem.clone())); if img.contents_are_undefined.get() { if tt == TransferType::Download { return Err(VulkanError::UndefinedContents); @@ -183,7 +186,9 @@ impl VulkanShmImage { return match tt { TransferType::Upload => self .async_transfer_initiate_host_copy(img, data, &staging, copies, client_mem, tt), - TransferType::Download => unreachable!(), + TransferType::Download => { + self.async_download_copy_image_to_buffer(img, &staging, copies) + } }; } @@ -324,7 +329,9 @@ impl VulkanShmImage { TransferType::Upload => { self.async_transfer_initiate_host_copy(img, data, &staging, copies, client_mem, tt) } - TransferType::Download => unreachable!(), + TransferType::Download => { + self.async_download_copy_image_to_buffer(img, &staging, copies) + } } } @@ -339,6 +346,9 @@ impl VulkanShmImage { ) -> Result<(), VulkanError> { img.renderer.check_defunct()?; + if tt == TransferType::Download { + staging.download(|_, _| ())?; + } let id = img.renderer.allocate_point(); let pending; match client_mem.safe_access() { @@ -440,14 +450,64 @@ impl VulkanShmImage { let staging = data.staging.get().unwrap().staging.get().unwrap(); staging.upload(|_, _| ())?; let Some((cmd, fence, sync_file, point)) = - self.submit_buffer_to_image_copy(img, &staging, regions, true)? + self.submit_buffer_image_copy(img, &staging, regions, true, TransferType::Upload)? else { return Ok(()); }; img.queue_state.set(QueueState::Releasing); let future = img.renderer.eng.spawn( "await async upload", - await_async_transfer_release_to_gfx(point, img.clone(), cmd, fence, sync_file), + await_async_transfer_release_to_gfx( + point, + img.clone(), + cmd, + fence, + sync_file, + TransferType::Upload, + ), + ); + img.renderer.pending_submits.set(point, future); + Ok(()) + } + + fn async_download_copy_image_to_buffer( + &self, + img: &Rc, + staging: &VulkanStagingBuffer, + copies: &[BufferImageCopy2], + ) -> Result<(), VulkanError> { + if img.queue_state.get().acquire(QueueFamily::Transfer) == QueueTransfer::Impossible { + return Ok(()); + } + img.renderer.check_defunct()?; + let Some((cmd, fence, sync_file, point)) = + self.submit_buffer_image_copy(img, &staging, copies, true, TransferType::Download)? + else { + img.queue_state.set(QueueState::Released { + to: QueueFamily::Gfx, + }); + let data = self.async_data.as_ref().unwrap(); + let client_mem = data.client_mem.get().unwrap(); + return self.async_transfer_initiate_host_copy( + &img, + data, + &staging, + copies, + &client_mem, + TransferType::Download, + ); + }; + img.queue_state.set(QueueState::Releasing); + let future = img.renderer.eng.spawn( + "await async image to buffer copy", + await_async_transfer_release_to_gfx( + point, + img.clone(), + cmd, + fence, + sync_file, + TransferType::Download, + ), ); img.renderer.pending_submits.set(point, future); Ok(()) @@ -518,12 +578,14 @@ fn complete_async_host_copy( data.complete(Err(VulkanError::AsyncCopyToStaging(e))); } data.data_copied.set(true); - let res = match tt { - TransferType::Upload => shm.async_upload_copy_buffer_to_image(img, data), - TransferType::Download => unreachable!(), - }; - if let Err(e) = res { - data.complete(Err(e)); + match tt { + TransferType::Upload => { + let res = shm.async_upload_copy_buffer_to_image(img, data); + if let Err(e) = res { + data.complete(Err(e)); + } + } + TransferType::Download => data.complete(Ok(())), } } @@ -556,7 +618,13 @@ async fn await_gfx_queue_release( let data = shm.async_data.as_ref().unwrap(); let res = match tt { TransferType::Upload => shm.async_upload_copy_buffer_to_image(&img, data), - TransferType::Download => unreachable!(), + TransferType::Download => match data.staging.get().unwrap().staging.get() { + Some(staging) => { + let copies = &*data.regions.borrow(); + shm.async_download_copy_image_to_buffer(&img, &staging, copies) + } + None => Ok(()), + }, }; if let Err(e) = res { data.complete(Err(e)); @@ -569,6 +637,7 @@ pub async fn await_async_transfer_release_to_gfx( buf: Rc, _fence: Rc, sync_file: SyncFile, + tt: TransferType, ) { let res = img.renderer.ring.readable(&sync_file.0).await; if let Err(e) = res { @@ -590,5 +659,26 @@ pub async fn await_async_transfer_release_to_gfx( unreachable!(); }; let data = shm.async_data.as_ref().unwrap(); - data.complete(Ok(())); + match tt { + TransferType::Upload => { + data.complete(Ok(())); + } + TransferType::Download => { + let data = shm.async_data.as_ref().unwrap(); + let staging = data.staging.get().unwrap().staging.get().unwrap(); + let client_mem = data.client_mem.get().unwrap(); + let copies = &*data.regions.borrow(); + let res = shm.async_transfer_initiate_host_copy( + &img, + data, + &staging, + copies, + &client_mem, + tt, + ); + if let Err(e) = res { + data.complete(Err(e)); + } + } + } }