diff --git a/src/gfx_apis/vulkan/bo_allocator.rs b/src/gfx_apis/vulkan/bo_allocator.rs index f89da55b..f9ae381d 100644 --- a/src/gfx_apis/vulkan/bo_allocator.rs +++ b/src/gfx_apis/vulkan/bo_allocator.rs @@ -86,8 +86,8 @@ impl VulkanDevice { drm: &Drm, ) -> Result { let allocator = self.create_allocator()?; - let pool = self.create_command_pool()?; - let command_buffer = pool.allocate_buffer()?; + let pool = self.create_command_pool(self.graphics_queue_idx)?; + let command_buffer = pool.allocate()?; let drm = drm.dup_render().map_err(VulkanError::DupDrm)?; Ok(VulkanBoAllocator { data: Rc::new(VulkanBoAllocatorData { diff --git a/src/gfx_apis/vulkan/command.rs b/src/gfx_apis/vulkan/command.rs index e3f737a5..c3edceb0 100644 --- a/src/gfx_apis/vulkan/command.rs +++ b/src/gfx_apis/vulkan/command.rs @@ -1,5 +1,5 @@ use { - crate::gfx_apis::vulkan::{device::VulkanDevice, VulkanError}, + crate::gfx_apis::vulkan::{device::VulkanDevice, renderer::CachedCommandBuffers, VulkanError}, ash::vk::{ CommandBuffer, CommandBufferAllocateInfo, CommandBufferLevel, CommandPool, CommandPoolCreateFlags, CommandPoolCreateInfo, @@ -53,17 +53,24 @@ impl VulkanCommandPool { } impl VulkanDevice { - pub fn create_command_pool(self: &Rc) -> Result, VulkanError> { + pub fn create_command_pool( + self: &Rc, + queue: u32, + ) -> Result { let info = CommandPoolCreateInfo::default() - .queue_family_index(self.graphics_queue_idx) + .queue_family_index(queue) .flags( CommandPoolCreateFlags::TRANSIENT | CommandPoolCreateFlags::RESET_COMMAND_BUFFER, ); let pool = unsafe { self.device.create_command_pool(&info, None) }; let pool = pool.map_err(VulkanError::AllocateCommandPool)?; - Ok(Rc::new(VulkanCommandPool { - device: self.clone(), - pool, - })) + Ok(CachedCommandBuffers { + pool: Rc::new(VulkanCommandPool { + device: self.clone(), + pool, + }), + buffers: Default::default(), + total_buffers: Default::default(), + }) } } diff --git a/src/gfx_apis/vulkan/device.rs b/src/gfx_apis/vulkan/device.rs index 8792a8ac..5ff1d796 100644 --- a/src/gfx_apis/vulkan/device.rs +++ b/src/gfx_apis/vulkan/device.rs @@ -63,6 +63,9 @@ pub struct VulkanDevice { pub(super) memory_types: ArrayVec, pub(super) graphics_queue: Queue, pub(super) graphics_queue_idx: u32, + pub(super) transfer_queue: Option, + pub(super) distinct_transfer_queue_family_idx: Option, + pub(super) transfer_granularity_mask: (u32, u32), } impl Drop for VulkanDevice { @@ -185,16 +188,59 @@ impl VulkanInstance { Err(VulkanError::NoDeviceFound(dev)) } - fn find_graphics_queue(&self, phy_dev: PhysicalDevice) -> Result { + fn find_queues( + &self, + phy_dev: PhysicalDevice, + ) -> Result<(u32, Option<(u32, u32, u32)>), VulkanError> { let props = unsafe { self.instance .get_physical_device_queue_family_properties(phy_dev) }; - props + let gfx_queue = props .iter() .position(|p| p.queue_flags.contains(QueueFlags::GRAPHICS)) - .map(|v| v as _) - .ok_or(VulkanError::NoGraphicsQueue) + .ok_or(VulkanError::NoGraphicsQueue)?; + let transfer_queue = 'transfer: { + let mut transfer_only = None; + let mut compute_only = None; + let mut separate_gfx = None; + for (idx, props) in props.iter().enumerate() { + if idx == gfx_queue { + continue; + } + let g = &props.min_image_transfer_granularity; + if g.width == 0 || g.height == 0 { + continue; + } + let f = props.queue_flags; + use QueueFlags as F; + if !f.intersects(F::GRAPHICS | F::COMPUTE) && f.intersects(F::TRANSFER) { + transfer_only = Some(idx); + } else if !f.intersects(F::GRAPHICS) && f.intersects(F::COMPUTE) { + compute_only = Some(idx); + } else if f.intersects(F::GRAPHICS) { + separate_gfx = Some(idx); + } + } + if let Some(idx) = transfer_only.or(compute_only).or(separate_gfx) { + break 'transfer Some(idx); + } + if props[gfx_queue].queue_count > 1 { + break 'transfer Some(gfx_queue); + } + None + }; + let mut width_mask = 0; + let mut height_mask = 0; + if let Some(idx) = transfer_queue { + let g = &props[idx].min_image_transfer_granularity; + width_mask = g.width.wrapping_sub(1); + height_mask = g.height.wrapping_sub(1); + } + Ok(( + gfx_queue as _, + transfer_queue.map(|v| (v as _, width_mask, height_mask)), + )) } fn supports_semaphore_import(&self, phy_dev: PhysicalDevice) -> bool { @@ -224,7 +270,15 @@ impl VulkanInstance { return Err(VulkanError::MissingDeviceExtension(ext)); } } - let graphics_queue_idx = self.find_graphics_queue(phy_dev)?; + let (graphics_queue_family_idx, transfer_queue_family) = self.find_queues(phy_dev)?; + let mut distinct_transfer_queue_family_idx = None; + let mut transfer_granularity_mask = (0, 0); + if let Some((idx, width_mask, height_mask)) = transfer_queue_family { + if idx != graphics_queue_family_idx { + distinct_transfer_queue_family_idx = Some(idx); + } + transfer_granularity_mask = (width_mask, height_mask); + } if !self.supports_semaphore_import(phy_dev) { return Err(VulkanError::SyncobjImport); } @@ -238,14 +292,24 @@ impl VulkanInstance { PhysicalDeviceSynchronization2Features::default().synchronization2(true); let mut dynamic_rendering_features = PhysicalDeviceDynamicRenderingFeatures::default().dynamic_rendering(true); - let queue_create_info = DeviceQueueCreateInfo::default() - .queue_family_index(graphics_queue_idx) - .queue_priorities(&[1.0]); + let mut queue_create_infos = ArrayVec::<_, 2>::new(); + queue_create_infos.push( + DeviceQueueCreateInfo::default() + .queue_family_index(graphics_queue_family_idx) + .queue_priorities(&[1.0]), + ); + if let Some((tq, _, _)) = transfer_queue_family { + queue_create_infos.push( + DeviceQueueCreateInfo::default() + .queue_family_index(tq) + .queue_priorities(&[1.0]), + ); + } let device_create_info = DeviceCreateInfo::default() .push_next(&mut semaphore_features) .push_next(&mut synchronization2_features) .push_next(&mut dynamic_rendering_features) - .queue_create_infos(std::slice::from_ref(&queue_create_info)) + .queue_create_infos(&queue_create_infos) .enabled_extension_names(&enabled_extensions); let device = unsafe { self.instance @@ -286,7 +350,14 @@ impl VulkanInstance { .iter() .copied() .collect(); - let graphics_queue = unsafe { device.get_device_queue(graphics_queue_idx, 0) }; + let graphics_queue = unsafe { device.get_device_queue(graphics_queue_family_idx, 0) }; + let transfer_queue = transfer_queue_family.map(|(family_idx, _, _)| { + let queue_idx = match family_idx == graphics_queue_family_idx { + true => 1, + false => 0, + }; + unsafe { device.get_device_queue(family_idx, queue_idx) } + }); Ok(Rc::new(VulkanDevice { physical_device: phy_dev, render_node, @@ -302,7 +373,10 @@ impl VulkanInstance { formats, memory_types, graphics_queue, - graphics_queue_idx, + graphics_queue_idx: graphics_queue_family_idx, + transfer_queue, + distinct_transfer_queue_family_idx, + transfer_granularity_mask, })) } } diff --git a/src/gfx_apis/vulkan/image.rs b/src/gfx_apis/vulkan/image.rs index 6b365bba..aa02a734 100644 --- a/src/gfx_apis/vulkan/image.rs +++ b/src/gfx_apis/vulkan/image.rs @@ -58,10 +58,41 @@ pub struct VulkanImage { pub(super) image: Image, pub(super) is_undefined: Cell, pub(super) contents_are_undefined: Cell, + pub(super) queue_state: Cell, pub(super) ty: VulkanImageMemory, pub(super) bridge: Option, } +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum QueueState { + Acquired { family: QueueFamily }, + Releasing, + Released { to: QueueFamily }, +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum QueueFamily { + Gfx, + Transfer, +} + +impl QueueState { + pub fn acquire(self, new: QueueFamily) -> QueueTransfer { + match self { + QueueState::Acquired { family } if family == new => QueueTransfer::Unnecessary, + QueueState::Released { to } if to == new => QueueTransfer::Possible, + _ => QueueTransfer::Impossible, + } + } +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum QueueTransfer { + Unnecessary, + Possible, + Impossible, +} + pub enum VulkanImageMemory { DmaBuf(VulkanDmaBufImage), Internal(VulkanShmImage), @@ -384,6 +415,9 @@ impl VulkanDmaBufImageTemplate { format: self.dmabuf.format, is_undefined: Cell::new(true), contents_are_undefined: Cell::new(false), + queue_state: Cell::new(QueueState::Acquired { + family: QueueFamily::Gfx, + }), bridge, })) } diff --git a/src/gfx_apis/vulkan/renderer.rs b/src/gfx_apis/vulkan/renderer.rs index e4612a07..10592eba 100644 --- a/src/gfx_apis/vulkan/renderer.rs +++ b/src/gfx_apis/vulkan/renderer.rs @@ -13,7 +13,7 @@ use { descriptor::VulkanDescriptorSetLayout, device::VulkanDevice, fence::VulkanFence, - image::{VulkanImage, VulkanImageMemory}, + image::{QueueFamily, QueueState, QueueTransfer, VulkanImage, VulkanImageMemory}, pipeline::{PipelineCreateInfo, VulkanPipeline}, semaphore::VulkanSemaphore, shaders::{ @@ -60,13 +60,12 @@ pub struct VulkanRenderer { pub(super) formats: Rc>, pub(super) device: Rc, pub(super) pipelines: CopyHashMap>, - pub(super) command_pool: Rc, - pub(super) command_buffers: Stack>, + pub(super) gfx_command_buffers: CachedCommandBuffers, + pub(super) transfer_command_buffers: Option, pub(super) wait_semaphores: Stack>, - pub(super) total_buffers: NumCell, pub(super) memory: RefCell, pub(super) pending_frames: CopyHashMap>, - pub(super) pending_uploads: CopyHashMap>, + pub(super) pending_submits: CopyHashMap>, pub(super) allocator: Rc, pub(super) last_point: NumCell, pub(super) buffer_resv_user: BufferResvUser, @@ -84,6 +83,26 @@ pub struct VulkanRenderer { pub(super) shm_allocator: Rc, } +pub(super) struct CachedCommandBuffers { + pub(super) pool: Rc, + pub(super) buffers: Stack>, + pub(super) total_buffers: NumCell, +} + +impl CachedCommandBuffers { + pub(super) fn allocate(&self) -> Result, VulkanError> { + zone!("allocate_command_buffer"); + let buf = match self.buffers.pop() { + Some(b) => b, + _ => { + self.total_buffers.fetch_add(1); + self.pool.allocate_buffer()? + } + }; + Ok(buf) + } +} + pub(super) struct UsedTexture { tex: Rc, resv: Option>, @@ -105,7 +124,8 @@ pub(super) enum TexSourceType { #[derive(Default)] pub(super) struct Memory { - sample: Vec>, + dmabuf_sample: Vec>, + queue_transfer: Vec>, textures: Vec, image_barriers: Vec>, wait_semaphores: Vec>, @@ -143,7 +163,11 @@ impl VulkanDevice { let tex_frag_shader = self.create_shader(TEX_FRAG)?; let tex_frag_mult_opaque_shader = self.create_shader(TEX_FRAG_MULT_OPAQUE)?; let tex_frag_mult_alpha_shader = self.create_shader(TEX_FRAG_MULT_ALPHA)?; - let command_pool = self.create_command_pool()?; + let gfx_command_buffers = self.create_command_pool(self.graphics_queue_idx)?; + let transfer_command_buffers = self + .distinct_transfer_queue_family_idx + .map(|idx| self.create_command_pool(idx)) + .transpose()?; let formats: AHashMap = self .formats .iter() @@ -181,13 +205,12 @@ impl VulkanDevice { formats: Rc::new(formats), device: self.clone(), pipelines: Default::default(), - command_pool, - command_buffers: Default::default(), + gfx_command_buffers, + transfer_command_buffers, wait_semaphores: Default::default(), - total_buffers: Default::default(), memory: Default::default(), pending_frames: Default::default(), - pending_uploads: Default::default(), + pending_submits: Default::default(), allocator, last_point: Default::default(), buffer_resv_user: Default::default(), @@ -276,15 +299,21 @@ impl VulkanRenderer { fn collect_memory(&self, opts: &[GfxApiOpt]) { zone!("collect_memory"); let mut memory = self.memory.borrow_mut(); - memory.sample.clear(); + memory.dmabuf_sample.clear(); + memory.queue_transfer.clear(); for cmd in opts { if let GfxApiOpt::CopyTexture(c) = cmd { let tex = c.tex.clone().into_vk(&self.device.device); if tex.contents_are_undefined.get() { continue; } + match tex.queue_state.get().acquire(QueueFamily::Gfx) { + QueueTransfer::Unnecessary => {} + QueueTransfer::Possible => memory.queue_transfer.push(tex.clone()), + QueueTransfer::Impossible => continue, + } if let VulkanImageMemory::DmaBuf(_) = &tex.ty { - memory.sample.push(tex.clone()) + memory.dmabuf_sample.push(tex.clone()) } memory.textures.push(UsedTexture { tex, @@ -340,7 +369,7 @@ impl VulkanRenderer { }); } memory.image_barriers.push(fb_image_memory_barrier); - for img in &memory.sample { + for img in &memory.dmabuf_sample { let image_memory_barrier = image_barrier() .src_queue_family_index(QUEUE_FAMILY_FOREIGN_EXT) .dst_queue_family_index(self.device.graphics_queue_idx) @@ -351,6 +380,19 @@ impl VulkanRenderer { .dst_stage_mask(PipelineStageFlags2::FRAGMENT_SHADER); memory.image_barriers.push(image_memory_barrier); } + if let Some(family_idx) = self.device.distinct_transfer_queue_family_idx { + for img in &memory.queue_transfer { + let image_memory_barrier = image_barrier() + .src_queue_family_index(family_idx) + .dst_queue_family_index(self.device.graphics_queue_idx) + .image(img.image) + .dst_access_mask(AccessFlags2::SHADER_SAMPLED_READ) + .dst_stage_mask(PipelineStageFlags2::FRAGMENT_SHADER) + .old_layout(ImageLayout::TRANSFER_DST_OPTIMAL) + .new_layout(ImageLayout::SHADER_READ_ONLY_OPTIMAL); + memory.image_barriers.push(image_memory_barrier); + } + } let dep_info = DependencyInfoKHR::default().image_memory_barriers(&memory.image_barriers); unsafe { self.device.device.cmd_pipeline_barrier2(buf, &dep_info); @@ -471,6 +513,11 @@ impl VulkanRenderer { log::warn!("Ignoring undefined texture"); continue; } + if tex.queue_state.get().acquire(QueueFamily::Gfx) == QueueTransfer::Impossible + { + log::warn!("Ignoring texture owned by different queue"); + continue; + } let copy_type = match c.alpha.is_some() { true => TexCopyType::Multiply, false => TexCopyType::Identity, @@ -616,7 +663,7 @@ impl VulkanRenderer { .src_stage_mask(PipelineStageFlags2::COLOR_ATTACHMENT_OUTPUT); } memory.image_barriers.push(fb_image_memory_barrier); - for img in &memory.sample { + for img in &memory.dmabuf_sample { let image_memory_barrier = image_barrier() .src_queue_family_index(self.device.graphics_queue_idx) .dst_queue_family_index(QUEUE_FAMILY_FOREIGN_EXT) @@ -732,6 +779,8 @@ impl VulkanRenderer { } } }; + let attach_async_shm_sync_file = self.device.transfer_queue.is_some() + && self.device.distinct_transfer_queue_family_idx.is_none(); for texture in &mut memory.textures { import( &texture.tex, @@ -739,6 +788,13 @@ impl VulkanRenderer { texture.resv.take(), DMA_BUF_SYNC_READ, ); + if attach_async_shm_sync_file { + if let VulkanImageMemory::Internal(shm) = &texture.tex.ty { + if let Some(data) = &shm.async_data { + data.last_sample.set(Some(sync_file.clone())); + } + } + } } import(fb, fb_release_sync, None, DMA_BUF_SYNC_WRITE); } @@ -777,6 +833,12 @@ impl VulkanRenderer { fn store_layouts(&self, fb: &VulkanImage) { fb.is_undefined.set(false); + let memory = self.memory.borrow(); + for img in &*memory.queue_transfer { + img.queue_state.set(QueueState::Acquired { + family: QueueFamily::Gfx, + }); + } } fn create_pending_frame(self: &Rc, buf: Rc) { @@ -932,7 +994,7 @@ impl VulkanRenderer { final_barriers = final_barriers.image_memory_barriers(slice::from_ref(&final_tex_barrier)); } - let buf = self.allocate_command_buffer()?; + let buf = self.gfx_command_buffers.allocate()?; let mut semaphores = vec![]; let mut semaphore_infos = vec![]; if let VulkanImageMemory::DmaBuf(buf) = &tex.ty { @@ -986,7 +1048,7 @@ impl VulkanRenderer { .map_err(VulkanError::Submit)?; } self.block(); - self.command_buffers.push(buf); + self.gfx_command_buffers.buffers.push(buf); for semaphore in semaphores { self.wait_semaphores.push(semaphore); } @@ -1009,7 +1071,8 @@ impl VulkanRenderer { let sync_file = { let mut memory = self.memory.borrow_mut(); memory.textures.clear(); - memory.sample.clear(); + memory.dmabuf_sample.clear(); + memory.queue_transfer.clear(); memory.wait_semaphores.clear(); memory.release_fence.take(); memory.release_sync_file.take() @@ -1017,18 +1080,6 @@ impl VulkanRenderer { res.map(|_| sync_file) } - pub(super) fn allocate_command_buffer(&self) -> Result, VulkanError> { - zone!("allocate_command_buffer"); - let buf = match self.command_buffers.pop() { - Some(b) => b, - _ => { - self.total_buffers.fetch_add(1); - self.command_pool.allocate_buffer()? - } - }; - Ok(buf) - } - fn allocate_semaphore(&self) -> Result, VulkanError> { zone!("allocate_semaphore"); let semaphore = match self.wait_semaphores.pop() { @@ -1047,7 +1098,7 @@ impl VulkanRenderer { clear: Option<&Color>, ) -> Result<(), VulkanError> { self.check_defunct()?; - let buf = self.allocate_command_buffer()?; + let buf = self.gfx_command_buffers.allocate()?; self.collect_memory(opts); self.begin_command_buffer(buf.buffer)?; self.initial_barriers(buf.buffer, fb); @@ -1078,7 +1129,7 @@ impl VulkanRenderer { pub fn on_drop(&self) { self.defunct.set(true); let mut pending_frames = self.pending_frames.lock(); - let mut pending_uploads = self.pending_uploads.lock(); + let mut pending_uploads = self.pending_submits.lock(); if pending_frames.is_not_empty() || pending_uploads.is_not_empty() { log::warn!("Context dropped with pending frames."); self.block(); @@ -1164,7 +1215,7 @@ async fn await_release( frame.renderer.block(); } if let Some(buf) = frame.cmd.take() { - frame.renderer.command_buffers.push(buf); + frame.renderer.gfx_command_buffers.buffers.push(buf); } for wait_semaphore in frame.wait_semaphores.take() { frame.renderer.wait_semaphores.push(wait_semaphore); diff --git a/src/gfx_apis/vulkan/shm_image.rs b/src/gfx_apis/vulkan/shm_image.rs index e5dc272e..7dcbb199 100644 --- a/src/gfx_apis/vulkan/shm_image.rs +++ b/src/gfx_apis/vulkan/shm_image.rs @@ -14,7 +14,7 @@ use { allocator::VulkanAllocation, command::VulkanCommandBuffer, fence::VulkanFence, - image::{VulkanImage, VulkanImageMemory}, + image::{QueueFamily, QueueState, QueueTransfer, VulkanImage, VulkanImageMemory}, renderer::{image_barrier, VulkanRenderer}, staging::VulkanStagingBuffer, VulkanError, @@ -22,9 +22,10 @@ use { rect::{Rect, Region}, utils::{clonecell::CloneCell, errorfmt::ErrorFmt, on_drop::OnDrop}, }, + arrayvec::ArrayVec, ash::vk::{ AccessFlags2, BufferImageCopy2, BufferMemoryBarrier2, CommandBufferBeginInfo, - CommandBufferSubmitInfo, CommandBufferUsageFlags, CopyBufferToImageInfo2, + CommandBufferSubmitInfo, CommandBufferUsageFlags, CopyBufferToImageInfo2, DependencyInfo, DependencyInfoKHR, DeviceSize, Extent3D, ImageAspectFlags, ImageCreateInfo, ImageLayout, ImageSubresourceLayers, ImageSubresourceRange, ImageTiling, ImageType, ImageUsageFlags, ImageViewCreateInfo, ImageViewType, Offset3D, PipelineStageFlags2, SampleCountFlags, @@ -58,6 +59,8 @@ pub struct VulkanShmImageAsyncData { pub(super) callback_id: Cell, pub(super) regions: RefCell>>, pub(super) cpu: Rc, + pub(super) last_sample: Cell>, + pub(super) data_copied: Cell, } impl VulkanShmImage { @@ -160,7 +163,7 @@ impl VulkanShmImage { } })?; let Some((cmd, fence, sync_file, point)) = - self.submit_buffer_to_image_copy(img, &staging, cpy)? + self.submit_buffer_to_image_copy(img, &staging, cpy, false)? else { return Ok(()); }; @@ -168,7 +171,7 @@ impl VulkanShmImage { "await upload", await_upload(point, img.clone(), cmd, sync_file, fence, staging), ); - img.renderer.pending_uploads.set(point, future); + img.renderer.pending_submits.set(point, future); Ok(()) } @@ -177,6 +180,7 @@ impl VulkanShmImage { img: &Rc, staging: &VulkanStagingBuffer, regions: &[BufferImageCopy2], + use_transfer_queue: bool, ) -> Result, Rc, SyncFile, u64)>, VulkanError> { let memory_barrier = |sam, ssm, dam, dsm| { @@ -189,18 +193,29 @@ impl VulkanShmImage { .dst_access_mask(dam) .dst_stage_mask(dsm) }; - let initial_image_barrier = image_barrier() + let mut transfer_queue_family_idx = img.renderer.device.graphics_queue_idx; + if use_transfer_queue { + if let Some(idx) = img.renderer.device.distinct_transfer_queue_family_idx { + transfer_queue_family_idx = idx; + } + } + let mut initial_image_barrier = image_barrier() .image(img.image) - .src_access_mask(AccessFlags2::SHADER_SAMPLED_READ) - .src_stage_mask(PipelineStageFlags2::FRAGMENT_SHADER) + .src_queue_family_index(img.renderer.device.graphics_queue_idx) + .dst_queue_family_index(transfer_queue_family_idx) + .dst_access_mask(AccessFlags2::TRANSFER_WRITE) + .dst_stage_mask(PipelineStageFlags2::TRANSFER) .old_layout(if img.is_undefined.get() { ImageLayout::UNDEFINED } else { ImageLayout::SHADER_READ_ONLY_OPTIMAL }) - .new_layout(ImageLayout::TRANSFER_DST_OPTIMAL) - .dst_access_mask(AccessFlags2::TRANSFER_WRITE) - .dst_stage_mask(PipelineStageFlags2::TRANSFER); + .new_layout(ImageLayout::TRANSFER_DST_OPTIMAL); + if transfer_queue_family_idx == img.renderer.device.graphics_queue_idx { + initial_image_barrier = initial_image_barrier + .src_access_mask(AccessFlags2::SHADER_SAMPLED_READ) + .src_stage_mask(PipelineStageFlags2::FRAGMENT_SHADER) + } let initial_buffer_barrier = memory_barrier( AccessFlags2::HOST_WRITE, PipelineStageFlags2::HOST, @@ -210,14 +225,19 @@ impl VulkanShmImage { let initial_dep_info = DependencyInfoKHR::default() .buffer_memory_barriers(slice::from_ref(&initial_buffer_barrier)) .image_memory_barriers(slice::from_ref(&initial_image_barrier)); - let final_image_barrier = image_barrier() + let mut final_image_barrier = image_barrier() .image(img.image) + .src_queue_family_index(transfer_queue_family_idx) + .dst_queue_family_index(img.renderer.device.graphics_queue_idx) .src_access_mask(AccessFlags2::TRANSFER_WRITE) .src_stage_mask(PipelineStageFlags2::TRANSFER) .old_layout(ImageLayout::TRANSFER_DST_OPTIMAL) - .new_layout(ImageLayout::SHADER_READ_ONLY_OPTIMAL) - .dst_access_mask(AccessFlags2::SHADER_SAMPLED_READ) - .dst_stage_mask(PipelineStageFlags2::FRAGMENT_SHADER); + .new_layout(ImageLayout::SHADER_READ_ONLY_OPTIMAL); + if transfer_queue_family_idx == img.renderer.device.graphics_queue_idx { + final_image_barrier = final_image_barrier + .dst_access_mask(AccessFlags2::SHADER_SAMPLED_READ) + .dst_stage_mask(PipelineStageFlags2::FRAGMENT_SHADER); + } let final_buffer_barrier = memory_barrier( AccessFlags2::TRANSFER_READ, PipelineStageFlags2::TRANSFER, @@ -232,7 +252,10 @@ impl VulkanShmImage { .dst_image(img.image) .dst_image_layout(ImageLayout::TRANSFER_DST_OPTIMAL) .regions(regions); - let cmd = img.renderer.allocate_command_buffer()?; + let cmd = match &img.renderer.transfer_command_buffers { + Some(b) if use_transfer_queue => b.allocate()?, + _ => img.renderer.gfx_command_buffers.allocate()?, + }; let dev = &img.renderer.device.device; let command_buffer_info = CommandBufferSubmitInfo::default().command_buffer(cmd.buffer); let submit_info = @@ -249,7 +272,10 @@ impl VulkanShmImage { dev.end_command_buffer(cmd.buffer) .map_err(VulkanError::EndCommandBuffer)?; dev.queue_submit2( - img.renderer.device.graphics_queue, + match img.renderer.device.transfer_queue { + Some(q) if use_transfer_queue => q, + _ => img.renderer.device.graphics_queue, + }, slice::from_ref(&submit_info), release_fence.fence, ) @@ -286,8 +312,8 @@ async fn await_upload( ); img.renderer.block(); } - img.renderer.command_buffers.push(buf); - img.renderer.pending_uploads.remove(&id); + img.renderer.gfx_command_buffers.buffers.push(buf); + img.renderer.pending_submits.remove(&id); } impl VulkanShmImageAsyncData { @@ -334,8 +360,9 @@ impl VulkanShmImage { return Err(VulkanError::InvalidBufferSize); } data.busy.set(true); + data.data_copied.set(false); if img.contents_are_undefined.get() { - damage = Region::new2(Rect::new_sized(0, 0, img.width as _, img.height as _).unwrap()) + damage = Region::new2(Rect::new_sized(0, 0, img.width as _, img.height as _).unwrap()); } let copies = &mut *data.regions.borrow_mut(); @@ -361,13 +388,18 @@ impl VulkanShmImage { .buffer_row_length(img.stride / self.shm_info.bpp); copies.push(copy); }; + let (width_mask, height_mask) = img.renderer.device.transfer_granularity_mask; + let width_mask = width_mask as i32; + let height_mask = height_mask as i32; for damage in damage.rects() { - let Some(damage) = Rect::new( - damage.x1().max(0), - damage.y1().max(0), - damage.x2().min(img.width as i32), - damage.y2().min(img.height as i32), - ) else { + if damage.x2() < 0 || damage.y2() < 0 { + continue; + } + let x1 = damage.x1().max(0) & !width_mask; + let y1 = damage.y1().max(0) & !height_mask; + let x2 = ((damage.x2() + width_mask) & !width_mask).min(img.width as i32); + let y2 = ((damage.y2() + height_mask) & !height_mask).min(img.height as i32); + let Some(damage) = Rect::new(x1, y1, x2, y2) else { continue; }; if damage.is_empty() { @@ -381,6 +413,8 @@ impl VulkanShmImage { ); } + self.async_release_from_gfx_queue(img, data)?; + if let Some(staging) = data.staging.get() { return self.async_upload_initiate_copy(img, data, &staging, copies, client_mem); } @@ -404,6 +438,95 @@ impl VulkanShmImage { ) } + fn async_release_from_gfx_queue( + &self, + img: &Rc, + data: &VulkanShmImageAsyncData, + ) -> Result<(), VulkanError> { + img.renderer.check_defunct()?; + let Some(transfer_queue_idx) = img.renderer.device.distinct_transfer_queue_family_idx + else { + let Some(sync_file) = data.last_sample.take() else { + img.queue_state.set(QueueState::Released { + to: QueueFamily::Transfer, + }); + return Ok(()); + }; + let id = img.renderer.allocate_point(); + let pending = img.renderer.eng.spawn( + "await_transfer_to_transfer", + await_gfx_queue_release(id, img.clone(), None, None, sync_file), + ); + img.renderer.pending_submits.set(id, pending); + img.queue_state.set(QueueState::Releasing); + return Ok(()); + }; + let mut barriers = ArrayVec::<_, 2>::new(); + match img.queue_state.get() { + QueueState::Acquired { family } => { + assert_eq!(family, QueueFamily::Gfx); + } + QueueState::Releasing => { + unreachable!(); + } + QueueState::Released { to } => { + assert_eq!(to, QueueFamily::Gfx); + let barrier = image_barrier() + .image(img.image) + .src_queue_family_index(transfer_queue_idx) + .dst_queue_family_index(img.renderer.device.graphics_queue_idx) + .dst_stage_mask(PipelineStageFlags2::ALL_COMMANDS) + .old_layout(ImageLayout::TRANSFER_DST_OPTIMAL) + .new_layout(ImageLayout::SHADER_READ_ONLY_OPTIMAL); + barriers.push(barrier); + } + } + let barrier = image_barrier() + .image(img.image) + .src_queue_family_index(img.renderer.device.graphics_queue_idx) + .dst_queue_family_index(transfer_queue_idx) + .src_access_mask(AccessFlags2::SHADER_SAMPLED_READ) + .src_stage_mask(PipelineStageFlags2::ALL_COMMANDS) + .old_layout(if img.is_undefined.get() { + ImageLayout::UNDEFINED + } else { + ImageLayout::SHADER_READ_ONLY_OPTIMAL + }) + .new_layout(ImageLayout::TRANSFER_DST_OPTIMAL); + barriers.push(barrier); + let dep_info = DependencyInfo::default().image_memory_barriers(&barriers); + let release_fence = img.renderer.device.create_fence()?; + let dev = &img.renderer.device.device; + let begin_info = + CommandBufferBeginInfo::default().flags(CommandBufferUsageFlags::ONE_TIME_SUBMIT); + let cmd = img.renderer.gfx_command_buffers.allocate()?; + let command_buffer_info = CommandBufferSubmitInfo::default().command_buffer(cmd.buffer); + let submit_info = + SubmitInfo2::default().command_buffer_infos(slice::from_ref(&command_buffer_info)); + unsafe { + dev.begin_command_buffer(cmd.buffer, &begin_info) + .map_err(VulkanError::BeginCommandBuffer)?; + dev.cmd_pipeline_barrier2(cmd.buffer, &dep_info); + dev.end_command_buffer(cmd.buffer) + .map_err(VulkanError::EndCommandBuffer)?; + dev.queue_submit2( + img.renderer.device.graphics_queue, + slice::from_ref(&submit_info), + release_fence.fence, + ) + .map_err(VulkanError::Submit)?; + } + let sync_file = release_fence.export_sync_file()?; + let id = img.renderer.allocate_point(); + let pending = img.renderer.eng.spawn( + "await_transfer_to_transfer", + await_gfx_queue_release(id, img.clone(), Some(cmd), Some(release_fence), sync_file), + ); + img.renderer.pending_submits.set(id, pending); + img.queue_state.set(QueueState::Releasing); + Ok(()) + } + fn async_upload_after_allocation( &self, img: &Rc, @@ -501,25 +624,28 @@ impl VulkanShmImage { &self, img: &Rc, data: &VulkanShmImageAsyncData, - res: Result<(), ReadWriteJobError>, ) -> Result<(), VulkanError> { - if let Err(e) = res { - return Err(VulkanError::AsyncCopyToStaging(e)); + if !data.data_copied.get() { + return Ok(()); + } + if img.queue_state.get().acquire(QueueFamily::Transfer) == QueueTransfer::Impossible { + return Ok(()); } img.renderer.check_defunct()?; let regions = &*data.regions.borrow(); let staging = data.staging.get().unwrap(); staging.upload(|_, _| ())?; let Some((cmd, fence, sync_file, point)) = - self.submit_buffer_to_image_copy(img, &staging, regions)? + self.submit_buffer_to_image_copy(img, &staging, regions, true)? else { return Ok(()); }; + img.queue_state.set(QueueState::Releasing); let future = img.renderer.eng.spawn( "await async upload", await_async_upload(point, img.clone(), cmd, fence, sync_file), ); - img.renderer.pending_uploads.set(point, future); + img.renderer.pending_submits.set(point, future); Ok(()) } } @@ -577,7 +703,42 @@ fn complete_async_upload( }; let data = shm.async_data.as_ref().unwrap(); store(data); - if let Err(e) = shm.async_upload_copy_buffer_to_image(img, data, res) { + if let Err(e) = res { + data.complete(Err(VulkanError::AsyncCopyToStaging(e))); + } + data.data_copied.set(true); + if let Err(e) = shm.async_upload_copy_buffer_to_image(img, data) { + data.complete(Err(e)); + } +} + +async fn await_gfx_queue_release( + id: u64, + img: Rc, + buf: Option>, + _fence: Option>, + sync_file: SyncFile, +) { + let res = img.renderer.ring.readable(&sync_file.0).await; + if let Err(e) = res { + log::error!( + "Could not wait for sync file to become readable: {}", + ErrorFmt(e) + ); + img.renderer.block(); + } + if let Some(buf) = buf { + img.renderer.gfx_command_buffers.buffers.push(buf); + } + img.renderer.pending_submits.remove(&id); + img.queue_state.set(QueueState::Released { + to: QueueFamily::Transfer, + }); + let VulkanImageMemory::Internal(shm) = &img.ty else { + unreachable!(); + }; + let data = shm.async_data.as_ref().unwrap(); + if let Err(e) = shm.async_upload_copy_buffer_to_image(&img, data) { data.complete(Err(e)); } } @@ -597,8 +758,14 @@ async fn await_async_upload( ); img.renderer.block(); } - img.renderer.command_buffers.push(buf); - img.renderer.pending_uploads.remove(&id); + match &img.renderer.transfer_command_buffers { + Some(b) => b.buffers.push(buf), + None => img.renderer.gfx_command_buffers.buffers.push(buf), + } + img.queue_state.set(QueueState::Released { + to: QueueFamily::Gfx, + }); + img.renderer.pending_submits.remove(&id); let VulkanImageMemory::Internal(shm) = &img.ty else { unreachable!(); }; @@ -701,6 +868,8 @@ impl VulkanRenderer { callback_id: Cell::new(0), regions: Default::default(), cpu: cpu.clone(), + last_sample: Default::default(), + data_copied: Default::default(), }); } let shm = VulkanShmImage { @@ -722,6 +891,9 @@ impl VulkanRenderer { image, is_undefined: Cell::new(true), contents_are_undefined: Cell::new(true), + queue_state: Cell::new(QueueState::Acquired { + family: QueueFamily::Gfx, + }), ty: VulkanImageMemory::Internal(shm), bridge: None, });