diff --git a/src/backends/metal/present.rs b/src/backends/metal/present.rs index 6a5b7fbb..e02e3b67 100644 --- a/src/backends/metal/present.rs +++ b/src/backends/metal/present.rs @@ -9,7 +9,7 @@ use { }, gfx_api::{ create_render_pass, AcquireSync, BufferResv, GfxApiOpt, GfxRenderPass, GfxTexture, - SyncFile, + ReleaseSync, SyncFile, }, theme::Color, time::Time, @@ -43,7 +43,8 @@ pub struct DirectScanoutCache { pub struct DirectScanoutData { tex: Rc, acquire_sync: AcquireSync, - _resv: Option>, + release_sync: ReleaseSync, + resv: Option>, fb: Rc, dma_buf_id: DmaBufId, position: DirectScanoutPosition, @@ -618,7 +619,8 @@ impl MetalConnector { return buffer.fb.as_ref().map(|fb| DirectScanoutData { tex: buffer.tex.upgrade().unwrap(), acquire_sync: ct.acquire_sync.clone(), - _resv: ct.buffer_resv.clone(), + release_sync: ct.release_sync, + resv: ct.buffer_resv.clone(), fb: fb.clone(), dma_buf_id: dmabuf.id, position, @@ -643,7 +645,8 @@ impl MetalConnector { Ok(fb) => Some(DirectScanoutData { tex: ct.tex.clone(), acquire_sync: ct.acquire_sync.clone(), - _resv: ct.buffer_resv.clone(), + release_sync: ct.release_sync, + resv: ct.buffer_resv.clone(), fb: Rc::new(fb), dma_buf_id: dmabuf.id, position, @@ -708,7 +711,7 @@ impl MetalConnector { None => { let sf = buffer .render_fb() - .perform_render_pass(pass) + .perform_render_pass(AcquireSync::Unnecessary, ReleaseSync::Explicit, pass) .map_err(MetalError::RenderFrame)?; sync_file = buffer.copy_to_dev(sf)?; fb = buffer.drm.clone(); @@ -748,11 +751,23 @@ impl MetalConnector { let render_hardware_cursor = self.cursor_enabled.get(); match &fb.direct_scanout_data { None => { - output.perform_screencopies(&fb.tex, render_hardware_cursor, 0, 0, None); + output.perform_screencopies( + &fb.tex, + None, + &AcquireSync::Unnecessary, + ReleaseSync::None, + render_hardware_cursor, + 0, + 0, + None, + ); } Some(dsd) => { output.perform_screencopies( &dsd.tex, + dsd.resv.as_ref(), + &dsd.acquire_sync, + dsd.release_sync, render_hardware_cursor, dsd.position.crtc_x, dsd.position.crtc_y, diff --git a/src/backends/metal/video.rs b/src/backends/metal/video.rs index d1eb10a7..12d9d147 100644 --- a/src/backends/metal/video.rs +++ b/src/backends/metal/video.rs @@ -2474,7 +2474,9 @@ impl MetalBackend { Ok(fb) => fb, Err(e) => return Err(MetalError::ImportFb(e)), }; - dev_fb.clear().map_err(MetalError::Clear)?; + dev_fb + .clear(AcquireSync::Unnecessary, ReleaseSync::None) + .map_err(MetalError::Clear)?; let (dev_tex, render_tex, render_fb, render_bo) = if dev.id == render_ctx.dev_id { let render_tex = match dev_img.to_texture() { Ok(fb) => fb, @@ -2526,7 +2528,9 @@ impl MetalBackend { Ok(fb) => fb, Err(e) => return Err(MetalError::ImportFb(e)), }; - render_fb.clear().map_err(MetalError::Clear)?; + render_fb + .clear(AcquireSync::Unnecessary, ReleaseSync::None) + .map_err(MetalError::Clear)?; let render_tex = match render_img.to_texture() { Ok(fb) => fb, Err(e) => return Err(MetalError::ImportTexture(e)), @@ -2797,9 +2801,17 @@ impl RenderBuffer { let Some(tex) = &self.dev_tex else { return Ok(sync_file); }; - let acquire_point = AcquireSync::from_sync_file(sync_file); self.dev_fb - .copy_texture(tex, acquire_point, ReleaseSync::Implicit, 0, 0) + .copy_texture( + AcquireSync::Unnecessary, + ReleaseSync::Explicit, + tex, + None, + AcquireSync::from_sync_file(sync_file), + ReleaseSync::None, + 0, + 0, + ) .map_err(MetalError::CopyToOutput) } } diff --git a/src/backends/x.rs b/src/backends/x.rs index d19f999f..21438f97 100644 --- a/src/backends/x.rs +++ b/src/backends/x.rs @@ -10,7 +10,7 @@ use { }, fixed::Fixed, format::XRGB8888, - gfx_api::{GfxContext, GfxError, GfxFramebuffer, GfxTexture}, + gfx_api::{AcquireSync, GfxContext, GfxError, GfxFramebuffer, GfxTexture, ReleaseSync}, ifs::wl_output::OutputId, state::State, utils::{ @@ -750,9 +750,14 @@ impl XBackend { image.last_serial.set(serial); if let Some(node) = self.state.root.outputs.get(&output.id) { - let res = self - .state - .present_output(&node, &image.fb.get(), &image.tex.get(), true); + let res = self.state.present_output( + &node, + &image.fb.get(), + AcquireSync::Implicit, + ReleaseSync::Implicit, + &image.tex.get(), + true, + ); if let Err(e) = res { log::error!("Could not render screen: {}", ErrorFmt(e)); return; diff --git a/src/cursor_user.rs b/src/cursor_user.rs index 44313422..7b6f99a3 100644 --- a/src/cursor_user.rs +++ b/src/cursor_user.rs @@ -3,6 +3,7 @@ use { backend::HardwareCursorUpdate, cursor::{Cursor, KnownCursor, DEFAULT_CURSOR_SIZE}, fixed::Fixed, + gfx_api::{AcquireSync, ReleaseSync}, rect::Rect, scale::Scale, state::State, @@ -497,8 +498,14 @@ impl CursorUser { } if render { let buffer = hc.get_buffer(); - let res = - buffer.render_hardware_cursor(cursor.deref(), &self.group.state, scale, transform); + let res = buffer.render_hardware_cursor( + AcquireSync::Unnecessary, + ReleaseSync::Explicit, + cursor.deref(), + &self.group.state, + scale, + transform, + ); match res { Ok(sync_file) => { hc.set_sync_file(sync_file); diff --git a/src/gfx_api.rs b/src/gfx_api.rs index 612930a1..9fe14ef3 100644 --- a/src/gfx_api.rs +++ b/src/gfx_api.rs @@ -208,13 +208,13 @@ pub enum AcquireSync { impl AcquireSync { pub fn from_sync_file(sync_file: Option) -> Self { match sync_file { - None => Self::Implicit, + None => Self::Unnecessary, Some(sync_file) => Self::SyncFile { sync_file }, } } } -#[derive(Copy, Clone, Eq, PartialEq)] +#[derive(Copy, Clone, Eq, PartialEq, Debug)] pub enum ReleaseSync { None, Implicit, @@ -260,6 +260,8 @@ pub trait GfxFramebuffer: Debug { fn render( &self, + acquire_sync: AcquireSync, + release_sync: ReleaseSync, ops: &[GfxApiOpt], clear: Option<&Color>, ) -> Result, GfxError>; @@ -279,12 +281,24 @@ pub trait GfxFramebuffer: Debug { } impl dyn GfxFramebuffer { - pub fn clear(&self) -> Result, GfxError> { - self.clear_with(0.0, 0.0, 0.0, 0.0) + pub fn clear( + &self, + acquire_sync: AcquireSync, + release_sync: ReleaseSync, + ) -> Result, GfxError> { + self.clear_with(acquire_sync, release_sync, 0.0, 0.0, 0.0, 0.0) } - pub fn clear_with(&self, r: f32, g: f32, b: f32, a: f32) -> Result, GfxError> { - self.render(&[], Some(&Color { r, g, b, a })) + pub fn clear_with( + &self, + acquire_sync: AcquireSync, + release_sync: ReleaseSync, + r: f32, + g: f32, + b: f32, + a: f32, + ) -> Result, GfxError> { + self.render(acquire_sync, release_sync, &[], Some(&Color { r, g, b, a })) } pub fn logical_size(&self, transform: Transform) -> (i32, i32) { @@ -302,7 +316,10 @@ impl dyn GfxFramebuffer { pub fn copy_texture( &self, + fb_acquire_sync: AcquireSync, + fb_release_sync: ReleaseSync, texture: &Rc, + resv: Option<&Rc>, acquire_sync: AcquireSync, release_sync: ReleaseSync, x: i32, @@ -320,16 +337,18 @@ impl dyn GfxFramebuffer { None, scale, None, - None, + resv.cloned(), acquire_sync, release_sync, ); let clear = self.format().has_alpha.then_some(&Color::TRANSPARENT); - self.render(&ops, clear) + self.render(fb_acquire_sync, fb_release_sync, &ops, clear) } pub fn render_custom( &self, + acquire_sync: AcquireSync, + release_sync: ReleaseSync, scale: Scale, clear: Option<&Color>, f: &mut dyn FnMut(&mut RendererBase), @@ -337,7 +356,7 @@ impl dyn GfxFramebuffer { let mut ops = vec![]; let mut renderer = self.renderer_base(&mut ops, scale, Transform::None); f(&mut renderer); - self.render(&ops, clear) + self.render(acquire_sync, release_sync, &ops, clear) } pub fn create_render_pass( @@ -366,12 +385,19 @@ impl dyn GfxFramebuffer { ) } - pub fn perform_render_pass(&self, pass: &GfxRenderPass) -> Result, GfxError> { - self.render(&pass.ops, pass.clear.as_ref()) + pub fn perform_render_pass( + &self, + acquire_sync: AcquireSync, + release_sync: ReleaseSync, + pass: &GfxRenderPass, + ) -> Result, GfxError> { + self.render(acquire_sync, release_sync, &pass.ops, pass.clear.as_ref()) } pub fn render_output( &self, + acquire_sync: AcquireSync, + release_sync: ReleaseSync, node: &OutputNode, state: &State, cursor_rect: Option, @@ -379,6 +405,8 @@ impl dyn GfxFramebuffer { render_hardware_cursor: bool, ) -> Result, GfxError> { self.render_node( + acquire_sync, + release_sync, node, state, cursor_rect, @@ -392,6 +420,8 @@ impl dyn GfxFramebuffer { pub fn render_node( &self, + acquire_sync: AcquireSync, + release_sync: ReleaseSync, node: &dyn Node, state: &State, cursor_rect: Option, @@ -412,11 +442,13 @@ impl dyn GfxFramebuffer { transform, None, ); - self.perform_render_pass(&pass) + self.perform_render_pass(acquire_sync, release_sync, &pass) } pub fn render_hardware_cursor( &self, + acquire_sync: AcquireSync, + release_sync: ReleaseSync, cursor: &dyn Cursor, state: &State, scale: Scale, @@ -433,7 +465,7 @@ impl dyn GfxFramebuffer { }, }; cursor.render_hardware_cursor(&mut renderer); - self.render(&ops, Some(&Color::TRANSPARENT)) + self.render(acquire_sync, release_sync, &ops, Some(&Color::TRANSPARENT)) } } diff --git a/src/gfx_apis/gl.rs b/src/gfx_apis/gl.rs index 22d2a511..3ef1e19f 100644 --- a/src/gfx_apis/gl.rs +++ b/src/gfx_apis/gl.rs @@ -73,6 +73,7 @@ use { ReleaseSync, SyncFile, }, gfx_apis::gl::{ + egl::image::EglImage, gl::texture::image_target, renderer::{ context::{GlRenderContext, TexCopyType, TexSourceType}, @@ -328,7 +329,7 @@ fn render_texture(ctx: &GlRenderContext, tex: &CopyTexture) { assert!(rc_eq(&ctx.ctx, &texture.ctx.ctx)); let gles = ctx.ctx.dpy.gles; unsafe { - handle_explicit_sync(ctx, texture, &tex.acquire_sync); + handle_explicit_sync(ctx, texture.gl.img.as_ref(), &tex.acquire_sync); (gles.glActiveTexture)(GL_TEXTURE0); @@ -395,7 +396,7 @@ fn render_texture(ctx: &GlRenderContext, tex: &CopyTexture) { } } -fn handle_explicit_sync(ctx: &GlRenderContext, texture: &Texture, sync: &AcquireSync) { +fn handle_explicit_sync(ctx: &GlRenderContext, img: Option<&Rc>, sync: &AcquireSync) { let sync_file = match sync { AcquireSync::None | AcquireSync::Implicit | AcquireSync::Unnecessary => return, AcquireSync::SyncFile { sync_file } => sync_file, @@ -417,7 +418,7 @@ fn handle_explicit_sync(ctx: &GlRenderContext, texture: &Texture, sync: &Acquire }; sync.wait(); } else { - if let Some(img) = &texture.gl.img { + if let Some(img) = img { if let Err(e) = img.dmabuf.import_sync_file(DMA_BUF_SYNC_READ, &sync_file) { log::error!("Could not import sync file into dmabuf: {}", ErrorFmt(e)); } diff --git a/src/gfx_apis/gl/renderer/framebuffer.rs b/src/gfx_apis/gl/renderer/framebuffer.rs index 67fe9b63..33071aa9 100644 --- a/src/gfx_apis/gl/renderer/framebuffer.rs +++ b/src/gfx_apis/gl/renderer/framebuffer.rs @@ -1,12 +1,13 @@ use { crate::{ format::Format, - gfx_api::{GfxApiOpt, GfxError, GfxFramebuffer, SyncFile}, + gfx_api::{AcquireSync, GfxApiOpt, GfxError, GfxFramebuffer, ReleaseSync, SyncFile}, gfx_apis::gl::{ gl::{ frame_buffer::GlFrameBuffer, sys::{GL_COLOR_BUFFER_BIT, GL_FRAMEBUFFER}, }, + handle_explicit_sync, renderer::context::GlRenderContext, run_ops, sys::{GL_ONE, GL_ONE_MINUS_SRC_ALPHA}, @@ -69,11 +70,13 @@ impl Framebuffer { pub fn render( &self, + acquire_sync: AcquireSync, ops: &[GfxApiOpt], clear: Option<&Color>, ) -> Result, RenderError> { let gles = self.ctx.ctx.dpy.gles; self.ctx.ctx.with_current(|| { + handle_explicit_sync(&self.ctx, self.gl.rb._img.as_ref(), &acquire_sync); unsafe { (gles.glBindFramebuffer)(GL_FRAMEBUFFER, self.gl.fbo); (gles.glViewport)(0, 0, self.gl.width, self.gl.height); @@ -101,10 +104,12 @@ impl GfxFramebuffer for Framebuffer { fn render( &self, + acquire_sync: AcquireSync, + _release_sync: ReleaseSync, ops: &[GfxApiOpt], clear: Option<&Color>, ) -> Result, GfxError> { - self.render(ops, clear).map_err(|e| e.into()) + self.render(acquire_sync, ops, clear).map_err(|e| e.into()) } fn copy_to_shm( diff --git a/src/gfx_apis/vulkan/bo_allocator.rs b/src/gfx_apis/vulkan/bo_allocator.rs index f89da55b..f9ae381d 100644 --- a/src/gfx_apis/vulkan/bo_allocator.rs +++ b/src/gfx_apis/vulkan/bo_allocator.rs @@ -86,8 +86,8 @@ impl VulkanDevice { drm: &Drm, ) -> Result { let allocator = self.create_allocator()?; - let pool = self.create_command_pool()?; - let command_buffer = pool.allocate_buffer()?; + let pool = self.create_command_pool(self.graphics_queue_idx)?; + let command_buffer = pool.allocate()?; let drm = drm.dup_render().map_err(VulkanError::DupDrm)?; Ok(VulkanBoAllocator { data: Rc::new(VulkanBoAllocatorData { diff --git a/src/gfx_apis/vulkan/command.rs b/src/gfx_apis/vulkan/command.rs index e3f737a5..c3edceb0 100644 --- a/src/gfx_apis/vulkan/command.rs +++ b/src/gfx_apis/vulkan/command.rs @@ -1,5 +1,5 @@ use { - crate::gfx_apis::vulkan::{device::VulkanDevice, VulkanError}, + crate::gfx_apis::vulkan::{device::VulkanDevice, renderer::CachedCommandBuffers, VulkanError}, ash::vk::{ CommandBuffer, CommandBufferAllocateInfo, CommandBufferLevel, CommandPool, CommandPoolCreateFlags, CommandPoolCreateInfo, @@ -53,17 +53,24 @@ impl VulkanCommandPool { } impl VulkanDevice { - pub fn create_command_pool(self: &Rc) -> Result, VulkanError> { + pub fn create_command_pool( + self: &Rc, + queue: u32, + ) -> Result { let info = CommandPoolCreateInfo::default() - .queue_family_index(self.graphics_queue_idx) + .queue_family_index(queue) .flags( CommandPoolCreateFlags::TRANSIENT | CommandPoolCreateFlags::RESET_COMMAND_BUFFER, ); let pool = unsafe { self.device.create_command_pool(&info, None) }; let pool = pool.map_err(VulkanError::AllocateCommandPool)?; - Ok(Rc::new(VulkanCommandPool { - device: self.clone(), - pool, - })) + Ok(CachedCommandBuffers { + pool: Rc::new(VulkanCommandPool { + device: self.clone(), + pool, + }), + buffers: Default::default(), + total_buffers: Default::default(), + }) } } diff --git a/src/gfx_apis/vulkan/device.rs b/src/gfx_apis/vulkan/device.rs index 8792a8ac..5ff1d796 100644 --- a/src/gfx_apis/vulkan/device.rs +++ b/src/gfx_apis/vulkan/device.rs @@ -63,6 +63,9 @@ pub struct VulkanDevice { pub(super) memory_types: ArrayVec, pub(super) graphics_queue: Queue, pub(super) graphics_queue_idx: u32, + pub(super) transfer_queue: Option, + pub(super) distinct_transfer_queue_family_idx: Option, + pub(super) transfer_granularity_mask: (u32, u32), } impl Drop for VulkanDevice { @@ -185,16 +188,59 @@ impl VulkanInstance { Err(VulkanError::NoDeviceFound(dev)) } - fn find_graphics_queue(&self, phy_dev: PhysicalDevice) -> Result { + fn find_queues( + &self, + phy_dev: PhysicalDevice, + ) -> Result<(u32, Option<(u32, u32, u32)>), VulkanError> { let props = unsafe { self.instance .get_physical_device_queue_family_properties(phy_dev) }; - props + let gfx_queue = props .iter() .position(|p| p.queue_flags.contains(QueueFlags::GRAPHICS)) - .map(|v| v as _) - .ok_or(VulkanError::NoGraphicsQueue) + .ok_or(VulkanError::NoGraphicsQueue)?; + let transfer_queue = 'transfer: { + let mut transfer_only = None; + let mut compute_only = None; + let mut separate_gfx = None; + for (idx, props) in props.iter().enumerate() { + if idx == gfx_queue { + continue; + } + let g = &props.min_image_transfer_granularity; + if g.width == 0 || g.height == 0 { + continue; + } + let f = props.queue_flags; + use QueueFlags as F; + if !f.intersects(F::GRAPHICS | F::COMPUTE) && f.intersects(F::TRANSFER) { + transfer_only = Some(idx); + } else if !f.intersects(F::GRAPHICS) && f.intersects(F::COMPUTE) { + compute_only = Some(idx); + } else if f.intersects(F::GRAPHICS) { + separate_gfx = Some(idx); + } + } + if let Some(idx) = transfer_only.or(compute_only).or(separate_gfx) { + break 'transfer Some(idx); + } + if props[gfx_queue].queue_count > 1 { + break 'transfer Some(gfx_queue); + } + None + }; + let mut width_mask = 0; + let mut height_mask = 0; + if let Some(idx) = transfer_queue { + let g = &props[idx].min_image_transfer_granularity; + width_mask = g.width.wrapping_sub(1); + height_mask = g.height.wrapping_sub(1); + } + Ok(( + gfx_queue as _, + transfer_queue.map(|v| (v as _, width_mask, height_mask)), + )) } fn supports_semaphore_import(&self, phy_dev: PhysicalDevice) -> bool { @@ -224,7 +270,15 @@ impl VulkanInstance { return Err(VulkanError::MissingDeviceExtension(ext)); } } - let graphics_queue_idx = self.find_graphics_queue(phy_dev)?; + let (graphics_queue_family_idx, transfer_queue_family) = self.find_queues(phy_dev)?; + let mut distinct_transfer_queue_family_idx = None; + let mut transfer_granularity_mask = (0, 0); + if let Some((idx, width_mask, height_mask)) = transfer_queue_family { + if idx != graphics_queue_family_idx { + distinct_transfer_queue_family_idx = Some(idx); + } + transfer_granularity_mask = (width_mask, height_mask); + } if !self.supports_semaphore_import(phy_dev) { return Err(VulkanError::SyncobjImport); } @@ -238,14 +292,24 @@ impl VulkanInstance { PhysicalDeviceSynchronization2Features::default().synchronization2(true); let mut dynamic_rendering_features = PhysicalDeviceDynamicRenderingFeatures::default().dynamic_rendering(true); - let queue_create_info = DeviceQueueCreateInfo::default() - .queue_family_index(graphics_queue_idx) - .queue_priorities(&[1.0]); + let mut queue_create_infos = ArrayVec::<_, 2>::new(); + queue_create_infos.push( + DeviceQueueCreateInfo::default() + .queue_family_index(graphics_queue_family_idx) + .queue_priorities(&[1.0]), + ); + if let Some((tq, _, _)) = transfer_queue_family { + queue_create_infos.push( + DeviceQueueCreateInfo::default() + .queue_family_index(tq) + .queue_priorities(&[1.0]), + ); + } let device_create_info = DeviceCreateInfo::default() .push_next(&mut semaphore_features) .push_next(&mut synchronization2_features) .push_next(&mut dynamic_rendering_features) - .queue_create_infos(std::slice::from_ref(&queue_create_info)) + .queue_create_infos(&queue_create_infos) .enabled_extension_names(&enabled_extensions); let device = unsafe { self.instance @@ -286,7 +350,14 @@ impl VulkanInstance { .iter() .copied() .collect(); - let graphics_queue = unsafe { device.get_device_queue(graphics_queue_idx, 0) }; + let graphics_queue = unsafe { device.get_device_queue(graphics_queue_family_idx, 0) }; + let transfer_queue = transfer_queue_family.map(|(family_idx, _, _)| { + let queue_idx = match family_idx == graphics_queue_family_idx { + true => 1, + false => 0, + }; + unsafe { device.get_device_queue(family_idx, queue_idx) } + }); Ok(Rc::new(VulkanDevice { physical_device: phy_dev, render_node, @@ -302,7 +373,10 @@ impl VulkanInstance { formats, memory_types, graphics_queue, - graphics_queue_idx, + graphics_queue_idx: graphics_queue_family_idx, + transfer_queue, + distinct_transfer_queue_family_idx, + transfer_granularity_mask, })) } } diff --git a/src/gfx_apis/vulkan/image.rs b/src/gfx_apis/vulkan/image.rs index a29a16f0..aa02a734 100644 --- a/src/gfx_apis/vulkan/image.rs +++ b/src/gfx_apis/vulkan/image.rs @@ -3,9 +3,9 @@ use { clientmem::ClientMemOffset, format::Format, gfx_api::{ - AsyncShmGfxTexture, AsyncShmGfxTextureCallback, AsyncShmGfxTextureUploadCancellable, - GfxApiOpt, GfxError, GfxFramebuffer, GfxImage, GfxTexture, PendingShmUpload, - ShmGfxTexture, SyncFile, + AcquireSync, AsyncShmGfxTexture, AsyncShmGfxTextureCallback, + AsyncShmGfxTextureUploadCancellable, GfxApiOpt, GfxError, GfxFramebuffer, GfxImage, + GfxTexture, PendingShmUpload, ReleaseSync, ShmGfxTexture, SyncFile, }, gfx_apis::vulkan::{ allocator::VulkanAllocation, device::VulkanDevice, format::VulkanModifierLimits, @@ -58,10 +58,41 @@ pub struct VulkanImage { pub(super) image: Image, pub(super) is_undefined: Cell, pub(super) contents_are_undefined: Cell, + pub(super) queue_state: Cell, pub(super) ty: VulkanImageMemory, pub(super) bridge: Option, } +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum QueueState { + Acquired { family: QueueFamily }, + Releasing, + Released { to: QueueFamily }, +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum QueueFamily { + Gfx, + Transfer, +} + +impl QueueState { + pub fn acquire(self, new: QueueFamily) -> QueueTransfer { + match self { + QueueState::Acquired { family } if family == new => QueueTransfer::Unnecessary, + QueueState::Released { to } if to == new => QueueTransfer::Possible, + _ => QueueTransfer::Impossible, + } + } +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum QueueTransfer { + Unnecessary, + Possible, + Impossible, +} + pub enum VulkanImageMemory { DmaBuf(VulkanDmaBufImage), Internal(VulkanShmImage), @@ -384,6 +415,9 @@ impl VulkanDmaBufImageTemplate { format: self.dmabuf.format, is_undefined: Cell::new(true), contents_are_undefined: Cell::new(false), + queue_state: Cell::new(QueueState::Acquired { + family: QueueFamily::Gfx, + }), bridge, })) } @@ -465,11 +499,13 @@ impl GfxFramebuffer for VulkanImage { fn render( &self, + acquire_sync: AcquireSync, + release_sync: ReleaseSync, ops: &[GfxApiOpt], clear: Option<&Color>, ) -> Result, GfxError> { self.renderer - .execute(self, ops, clear) + .execute(self, acquire_sync, release_sync, ops, clear) .map_err(|e| e.into()) } diff --git a/src/gfx_apis/vulkan/renderer.rs b/src/gfx_apis/vulkan/renderer.rs index 969cb448..10592eba 100644 --- a/src/gfx_apis/vulkan/renderer.rs +++ b/src/gfx_apis/vulkan/renderer.rs @@ -13,7 +13,7 @@ use { descriptor::VulkanDescriptorSetLayout, device::VulkanDevice, fence::VulkanFence, - image::{VulkanImage, VulkanImageMemory}, + image::{QueueFamily, QueueState, QueueTransfer, VulkanImage, VulkanImageMemory}, pipeline::{PipelineCreateInfo, VulkanPipeline}, semaphore::VulkanSemaphore, shaders::{ @@ -60,13 +60,12 @@ pub struct VulkanRenderer { pub(super) formats: Rc>, pub(super) device: Rc, pub(super) pipelines: CopyHashMap>, - pub(super) command_pool: Rc, - pub(super) command_buffers: Stack>, + pub(super) gfx_command_buffers: CachedCommandBuffers, + pub(super) transfer_command_buffers: Option, pub(super) wait_semaphores: Stack>, - pub(super) total_buffers: NumCell, pub(super) memory: RefCell, pub(super) pending_frames: CopyHashMap>, - pub(super) pending_uploads: CopyHashMap>, + pub(super) pending_submits: CopyHashMap>, pub(super) allocator: Rc, pub(super) last_point: NumCell, pub(super) buffer_resv_user: BufferResvUser, @@ -84,6 +83,26 @@ pub struct VulkanRenderer { pub(super) shm_allocator: Rc, } +pub(super) struct CachedCommandBuffers { + pub(super) pool: Rc, + pub(super) buffers: Stack>, + pub(super) total_buffers: NumCell, +} + +impl CachedCommandBuffers { + pub(super) fn allocate(&self) -> Result, VulkanError> { + zone!("allocate_command_buffer"); + let buf = match self.buffers.pop() { + Some(b) => b, + _ => { + self.total_buffers.fetch_add(1); + self.pool.allocate_buffer()? + } + }; + Ok(buf) + } +} + pub(super) struct UsedTexture { tex: Rc, resv: Option>, @@ -105,7 +124,8 @@ pub(super) enum TexSourceType { #[derive(Default)] pub(super) struct Memory { - sample: Vec>, + dmabuf_sample: Vec>, + queue_transfer: Vec>, textures: Vec, image_barriers: Vec>, wait_semaphores: Vec>, @@ -143,7 +163,11 @@ impl VulkanDevice { let tex_frag_shader = self.create_shader(TEX_FRAG)?; let tex_frag_mult_opaque_shader = self.create_shader(TEX_FRAG_MULT_OPAQUE)?; let tex_frag_mult_alpha_shader = self.create_shader(TEX_FRAG_MULT_ALPHA)?; - let command_pool = self.create_command_pool()?; + let gfx_command_buffers = self.create_command_pool(self.graphics_queue_idx)?; + let transfer_command_buffers = self + .distinct_transfer_queue_family_idx + .map(|idx| self.create_command_pool(idx)) + .transpose()?; let formats: AHashMap = self .formats .iter() @@ -181,13 +205,12 @@ impl VulkanDevice { formats: Rc::new(formats), device: self.clone(), pipelines: Default::default(), - command_pool, - command_buffers: Default::default(), + gfx_command_buffers, + transfer_command_buffers, wait_semaphores: Default::default(), - total_buffers: Default::default(), memory: Default::default(), pending_frames: Default::default(), - pending_uploads: Default::default(), + pending_submits: Default::default(), allocator, last_point: Default::default(), buffer_resv_user: Default::default(), @@ -276,15 +299,21 @@ impl VulkanRenderer { fn collect_memory(&self, opts: &[GfxApiOpt]) { zone!("collect_memory"); let mut memory = self.memory.borrow_mut(); - memory.sample.clear(); + memory.dmabuf_sample.clear(); + memory.queue_transfer.clear(); for cmd in opts { if let GfxApiOpt::CopyTexture(c) = cmd { let tex = c.tex.clone().into_vk(&self.device.device); if tex.contents_are_undefined.get() { continue; } + match tex.queue_state.get().acquire(QueueFamily::Gfx) { + QueueTransfer::Unnecessary => {} + QueueTransfer::Possible => memory.queue_transfer.push(tex.clone()), + QueueTransfer::Impossible => continue, + } if let VulkanImageMemory::DmaBuf(_) = &tex.ty { - memory.sample.push(tex.clone()) + memory.dmabuf_sample.push(tex.clone()) } memory.textures.push(UsedTexture { tex, @@ -340,7 +369,7 @@ impl VulkanRenderer { }); } memory.image_barriers.push(fb_image_memory_barrier); - for img in &memory.sample { + for img in &memory.dmabuf_sample { let image_memory_barrier = image_barrier() .src_queue_family_index(QUEUE_FAMILY_FOREIGN_EXT) .dst_queue_family_index(self.device.graphics_queue_idx) @@ -351,6 +380,19 @@ impl VulkanRenderer { .dst_stage_mask(PipelineStageFlags2::FRAGMENT_SHADER); memory.image_barriers.push(image_memory_barrier); } + if let Some(family_idx) = self.device.distinct_transfer_queue_family_idx { + for img in &memory.queue_transfer { + let image_memory_barrier = image_barrier() + .src_queue_family_index(family_idx) + .dst_queue_family_index(self.device.graphics_queue_idx) + .image(img.image) + .dst_access_mask(AccessFlags2::SHADER_SAMPLED_READ) + .dst_stage_mask(PipelineStageFlags2::FRAGMENT_SHADER) + .old_layout(ImageLayout::TRANSFER_DST_OPTIMAL) + .new_layout(ImageLayout::SHADER_READ_ONLY_OPTIMAL); + memory.image_barriers.push(image_memory_barrier); + } + } let dep_info = DependencyInfoKHR::default().image_memory_barriers(&memory.image_barriers); unsafe { self.device.device.cmd_pipeline_barrier2(buf, &dep_info); @@ -471,6 +513,11 @@ impl VulkanRenderer { log::warn!("Ignoring undefined texture"); continue; } + if tex.queue_state.get().acquire(QueueFamily::Gfx) == QueueTransfer::Impossible + { + log::warn!("Ignoring texture owned by different queue"); + continue; + } let copy_type = match c.alpha.is_some() { true => TexCopyType::Multiply, false => TexCopyType::Identity, @@ -616,7 +663,7 @@ impl VulkanRenderer { .src_stage_mask(PipelineStageFlags2::COLOR_ATTACHMENT_OUTPUT); } memory.image_barriers.push(fb_image_memory_barrier); - for img in &memory.sample { + for img in &memory.dmabuf_sample { let image_memory_barrier = image_barrier() .src_queue_family_index(self.device.graphics_queue_idx) .dst_queue_family_index(QUEUE_FAMILY_FOREIGN_EXT) @@ -643,7 +690,11 @@ impl VulkanRenderer { } } - fn create_wait_semaphores(&self, fb: &VulkanImage) -> Result<(), VulkanError> { + fn create_wait_semaphores( + &self, + fb: &VulkanImage, + fb_acquire_sync: &AcquireSync, + ) -> Result<(), VulkanError> { zone!("create_wait_semaphores"); let mut memory = self.memory.borrow_mut(); let memory = &mut *memory; @@ -699,13 +750,13 @@ impl VulkanRenderer { &mut memory.wait_semaphore_infos, &mut memory.wait_semaphores, fb, - &AcquireSync::Implicit, + fb_acquire_sync, DMA_BUF_SYNC_WRITE, )?; Ok(()) } - fn import_release_semaphore(&self, fb: &VulkanImage) { + fn import_release_semaphore(&self, fb: &VulkanImage, fb_release_sync: ReleaseSync) { zone!("import_release_semaphore"); let memory = &mut *self.memory.borrow_mut(); let sync_file = match memory.release_sync_file.as_ref() { @@ -728,6 +779,8 @@ impl VulkanRenderer { } } }; + let attach_async_shm_sync_file = self.device.transfer_queue.is_some() + && self.device.distinct_transfer_queue_family_idx.is_none(); for texture in &mut memory.textures { import( &texture.tex, @@ -735,8 +788,15 @@ impl VulkanRenderer { texture.resv.take(), DMA_BUF_SYNC_READ, ); + if attach_async_shm_sync_file { + if let VulkanImageMemory::Internal(shm) = &texture.tex.ty { + if let Some(data) = &shm.async_data { + data.last_sample.set(Some(sync_file.clone())); + } + } + } } - import(fb, ReleaseSync::Implicit, None, DMA_BUF_SYNC_WRITE); + import(fb, fb_release_sync, None, DMA_BUF_SYNC_WRITE); } fn submit(&self, buf: CommandBuffer) -> Result<(), VulkanError> { @@ -773,6 +833,12 @@ impl VulkanRenderer { fn store_layouts(&self, fb: &VulkanImage) { fb.is_undefined.set(false); + let memory = self.memory.borrow(); + for img in &*memory.queue_transfer { + img.queue_state.set(QueueState::Acquired { + family: QueueFamily::Gfx, + }); + } } fn create_pending_frame(self: &Rc, buf: Rc) { @@ -838,7 +904,10 @@ impl VulkanRenderer { )?; (&*tmp_tex as &dyn GfxFramebuffer) .copy_texture( + AcquireSync::None, + ReleaseSync::None, &(tex.clone() as _), + None, AcquireSync::None, ReleaseSync::None, x, @@ -925,7 +994,7 @@ impl VulkanRenderer { final_barriers = final_barriers.image_memory_barriers(slice::from_ref(&final_tex_barrier)); } - let buf = self.allocate_command_buffer()?; + let buf = self.gfx_command_buffers.allocate()?; let mut semaphores = vec![]; let mut semaphore_infos = vec![]; if let VulkanImageMemory::DmaBuf(buf) = &tex.ty { @@ -979,7 +1048,7 @@ impl VulkanRenderer { .map_err(VulkanError::Submit)?; } self.block(); - self.command_buffers.push(buf); + self.gfx_command_buffers.buffers.push(buf); for semaphore in semaphores { self.wait_semaphores.push(semaphore); } @@ -992,15 +1061,18 @@ impl VulkanRenderer { pub fn execute( self: &Rc, fb: &VulkanImage, + fb_acquire_sync: AcquireSync, + fb_release_sync: ReleaseSync, opts: &[GfxApiOpt], clear: Option<&Color>, ) -> Result, VulkanError> { zone!("execute"); - let res = self.try_execute(fb, opts, clear); + let res = self.try_execute(fb, fb_acquire_sync, fb_release_sync, opts, clear); let sync_file = { let mut memory = self.memory.borrow_mut(); memory.textures.clear(); - memory.sample.clear(); + memory.dmabuf_sample.clear(); + memory.queue_transfer.clear(); memory.wait_semaphores.clear(); memory.release_fence.take(); memory.release_sync_file.take() @@ -1008,18 +1080,6 @@ impl VulkanRenderer { res.map(|_| sync_file) } - pub(super) fn allocate_command_buffer(&self) -> Result, VulkanError> { - zone!("allocate_command_buffer"); - let buf = match self.command_buffers.pop() { - Some(b) => b, - _ => { - self.total_buffers.fetch_add(1); - self.command_pool.allocate_buffer()? - } - }; - Ok(buf) - } - fn allocate_semaphore(&self) -> Result, VulkanError> { zone!("allocate_semaphore"); let semaphore = match self.wait_semaphores.pop() { @@ -1032,11 +1092,13 @@ impl VulkanRenderer { fn try_execute( self: &Rc, fb: &VulkanImage, + fb_acquire_sync: AcquireSync, + fb_release_sync: ReleaseSync, opts: &[GfxApiOpt], clear: Option<&Color>, ) -> Result<(), VulkanError> { self.check_defunct()?; - let buf = self.allocate_command_buffer()?; + let buf = self.gfx_command_buffers.allocate()?; self.collect_memory(opts); self.begin_command_buffer(buf.buffer)?; self.initial_barriers(buf.buffer, fb); @@ -1047,9 +1109,9 @@ impl VulkanRenderer { self.copy_bridge_to_dmabuf(buf.buffer, fb); self.final_barriers(buf.buffer, fb); self.end_command_buffer(buf.buffer)?; - self.create_wait_semaphores(fb)?; + self.create_wait_semaphores(fb, &fb_acquire_sync)?; self.submit(buf.buffer)?; - self.import_release_semaphore(fb); + self.import_release_semaphore(fb, fb_release_sync); self.store_layouts(fb); self.create_pending_frame(buf); Ok(()) @@ -1067,7 +1129,7 @@ impl VulkanRenderer { pub fn on_drop(&self) { self.defunct.set(true); let mut pending_frames = self.pending_frames.lock(); - let mut pending_uploads = self.pending_uploads.lock(); + let mut pending_uploads = self.pending_submits.lock(); if pending_frames.is_not_empty() || pending_uploads.is_not_empty() { log::warn!("Context dropped with pending frames."); self.block(); @@ -1153,7 +1215,7 @@ async fn await_release( frame.renderer.block(); } if let Some(buf) = frame.cmd.take() { - frame.renderer.command_buffers.push(buf); + frame.renderer.gfx_command_buffers.buffers.push(buf); } for wait_semaphore in frame.wait_semaphores.take() { frame.renderer.wait_semaphores.push(wait_semaphore); diff --git a/src/gfx_apis/vulkan/shm_image.rs b/src/gfx_apis/vulkan/shm_image.rs index e5dc272e..7dcbb199 100644 --- a/src/gfx_apis/vulkan/shm_image.rs +++ b/src/gfx_apis/vulkan/shm_image.rs @@ -14,7 +14,7 @@ use { allocator::VulkanAllocation, command::VulkanCommandBuffer, fence::VulkanFence, - image::{VulkanImage, VulkanImageMemory}, + image::{QueueFamily, QueueState, QueueTransfer, VulkanImage, VulkanImageMemory}, renderer::{image_barrier, VulkanRenderer}, staging::VulkanStagingBuffer, VulkanError, @@ -22,9 +22,10 @@ use { rect::{Rect, Region}, utils::{clonecell::CloneCell, errorfmt::ErrorFmt, on_drop::OnDrop}, }, + arrayvec::ArrayVec, ash::vk::{ AccessFlags2, BufferImageCopy2, BufferMemoryBarrier2, CommandBufferBeginInfo, - CommandBufferSubmitInfo, CommandBufferUsageFlags, CopyBufferToImageInfo2, + CommandBufferSubmitInfo, CommandBufferUsageFlags, CopyBufferToImageInfo2, DependencyInfo, DependencyInfoKHR, DeviceSize, Extent3D, ImageAspectFlags, ImageCreateInfo, ImageLayout, ImageSubresourceLayers, ImageSubresourceRange, ImageTiling, ImageType, ImageUsageFlags, ImageViewCreateInfo, ImageViewType, Offset3D, PipelineStageFlags2, SampleCountFlags, @@ -58,6 +59,8 @@ pub struct VulkanShmImageAsyncData { pub(super) callback_id: Cell, pub(super) regions: RefCell>>, pub(super) cpu: Rc, + pub(super) last_sample: Cell>, + pub(super) data_copied: Cell, } impl VulkanShmImage { @@ -160,7 +163,7 @@ impl VulkanShmImage { } })?; let Some((cmd, fence, sync_file, point)) = - self.submit_buffer_to_image_copy(img, &staging, cpy)? + self.submit_buffer_to_image_copy(img, &staging, cpy, false)? else { return Ok(()); }; @@ -168,7 +171,7 @@ impl VulkanShmImage { "await upload", await_upload(point, img.clone(), cmd, sync_file, fence, staging), ); - img.renderer.pending_uploads.set(point, future); + img.renderer.pending_submits.set(point, future); Ok(()) } @@ -177,6 +180,7 @@ impl VulkanShmImage { img: &Rc, staging: &VulkanStagingBuffer, regions: &[BufferImageCopy2], + use_transfer_queue: bool, ) -> Result, Rc, SyncFile, u64)>, VulkanError> { let memory_barrier = |sam, ssm, dam, dsm| { @@ -189,18 +193,29 @@ impl VulkanShmImage { .dst_access_mask(dam) .dst_stage_mask(dsm) }; - let initial_image_barrier = image_barrier() + let mut transfer_queue_family_idx = img.renderer.device.graphics_queue_idx; + if use_transfer_queue { + if let Some(idx) = img.renderer.device.distinct_transfer_queue_family_idx { + transfer_queue_family_idx = idx; + } + } + let mut initial_image_barrier = image_barrier() .image(img.image) - .src_access_mask(AccessFlags2::SHADER_SAMPLED_READ) - .src_stage_mask(PipelineStageFlags2::FRAGMENT_SHADER) + .src_queue_family_index(img.renderer.device.graphics_queue_idx) + .dst_queue_family_index(transfer_queue_family_idx) + .dst_access_mask(AccessFlags2::TRANSFER_WRITE) + .dst_stage_mask(PipelineStageFlags2::TRANSFER) .old_layout(if img.is_undefined.get() { ImageLayout::UNDEFINED } else { ImageLayout::SHADER_READ_ONLY_OPTIMAL }) - .new_layout(ImageLayout::TRANSFER_DST_OPTIMAL) - .dst_access_mask(AccessFlags2::TRANSFER_WRITE) - .dst_stage_mask(PipelineStageFlags2::TRANSFER); + .new_layout(ImageLayout::TRANSFER_DST_OPTIMAL); + if transfer_queue_family_idx == img.renderer.device.graphics_queue_idx { + initial_image_barrier = initial_image_barrier + .src_access_mask(AccessFlags2::SHADER_SAMPLED_READ) + .src_stage_mask(PipelineStageFlags2::FRAGMENT_SHADER) + } let initial_buffer_barrier = memory_barrier( AccessFlags2::HOST_WRITE, PipelineStageFlags2::HOST, @@ -210,14 +225,19 @@ impl VulkanShmImage { let initial_dep_info = DependencyInfoKHR::default() .buffer_memory_barriers(slice::from_ref(&initial_buffer_barrier)) .image_memory_barriers(slice::from_ref(&initial_image_barrier)); - let final_image_barrier = image_barrier() + let mut final_image_barrier = image_barrier() .image(img.image) + .src_queue_family_index(transfer_queue_family_idx) + .dst_queue_family_index(img.renderer.device.graphics_queue_idx) .src_access_mask(AccessFlags2::TRANSFER_WRITE) .src_stage_mask(PipelineStageFlags2::TRANSFER) .old_layout(ImageLayout::TRANSFER_DST_OPTIMAL) - .new_layout(ImageLayout::SHADER_READ_ONLY_OPTIMAL) - .dst_access_mask(AccessFlags2::SHADER_SAMPLED_READ) - .dst_stage_mask(PipelineStageFlags2::FRAGMENT_SHADER); + .new_layout(ImageLayout::SHADER_READ_ONLY_OPTIMAL); + if transfer_queue_family_idx == img.renderer.device.graphics_queue_idx { + final_image_barrier = final_image_barrier + .dst_access_mask(AccessFlags2::SHADER_SAMPLED_READ) + .dst_stage_mask(PipelineStageFlags2::FRAGMENT_SHADER); + } let final_buffer_barrier = memory_barrier( AccessFlags2::TRANSFER_READ, PipelineStageFlags2::TRANSFER, @@ -232,7 +252,10 @@ impl VulkanShmImage { .dst_image(img.image) .dst_image_layout(ImageLayout::TRANSFER_DST_OPTIMAL) .regions(regions); - let cmd = img.renderer.allocate_command_buffer()?; + let cmd = match &img.renderer.transfer_command_buffers { + Some(b) if use_transfer_queue => b.allocate()?, + _ => img.renderer.gfx_command_buffers.allocate()?, + }; let dev = &img.renderer.device.device; let command_buffer_info = CommandBufferSubmitInfo::default().command_buffer(cmd.buffer); let submit_info = @@ -249,7 +272,10 @@ impl VulkanShmImage { dev.end_command_buffer(cmd.buffer) .map_err(VulkanError::EndCommandBuffer)?; dev.queue_submit2( - img.renderer.device.graphics_queue, + match img.renderer.device.transfer_queue { + Some(q) if use_transfer_queue => q, + _ => img.renderer.device.graphics_queue, + }, slice::from_ref(&submit_info), release_fence.fence, ) @@ -286,8 +312,8 @@ async fn await_upload( ); img.renderer.block(); } - img.renderer.command_buffers.push(buf); - img.renderer.pending_uploads.remove(&id); + img.renderer.gfx_command_buffers.buffers.push(buf); + img.renderer.pending_submits.remove(&id); } impl VulkanShmImageAsyncData { @@ -334,8 +360,9 @@ impl VulkanShmImage { return Err(VulkanError::InvalidBufferSize); } data.busy.set(true); + data.data_copied.set(false); if img.contents_are_undefined.get() { - damage = Region::new2(Rect::new_sized(0, 0, img.width as _, img.height as _).unwrap()) + damage = Region::new2(Rect::new_sized(0, 0, img.width as _, img.height as _).unwrap()); } let copies = &mut *data.regions.borrow_mut(); @@ -361,13 +388,18 @@ impl VulkanShmImage { .buffer_row_length(img.stride / self.shm_info.bpp); copies.push(copy); }; + let (width_mask, height_mask) = img.renderer.device.transfer_granularity_mask; + let width_mask = width_mask as i32; + let height_mask = height_mask as i32; for damage in damage.rects() { - let Some(damage) = Rect::new( - damage.x1().max(0), - damage.y1().max(0), - damage.x2().min(img.width as i32), - damage.y2().min(img.height as i32), - ) else { + if damage.x2() < 0 || damage.y2() < 0 { + continue; + } + let x1 = damage.x1().max(0) & !width_mask; + let y1 = damage.y1().max(0) & !height_mask; + let x2 = ((damage.x2() + width_mask) & !width_mask).min(img.width as i32); + let y2 = ((damage.y2() + height_mask) & !height_mask).min(img.height as i32); + let Some(damage) = Rect::new(x1, y1, x2, y2) else { continue; }; if damage.is_empty() { @@ -381,6 +413,8 @@ impl VulkanShmImage { ); } + self.async_release_from_gfx_queue(img, data)?; + if let Some(staging) = data.staging.get() { return self.async_upload_initiate_copy(img, data, &staging, copies, client_mem); } @@ -404,6 +438,95 @@ impl VulkanShmImage { ) } + fn async_release_from_gfx_queue( + &self, + img: &Rc, + data: &VulkanShmImageAsyncData, + ) -> Result<(), VulkanError> { + img.renderer.check_defunct()?; + let Some(transfer_queue_idx) = img.renderer.device.distinct_transfer_queue_family_idx + else { + let Some(sync_file) = data.last_sample.take() else { + img.queue_state.set(QueueState::Released { + to: QueueFamily::Transfer, + }); + return Ok(()); + }; + let id = img.renderer.allocate_point(); + let pending = img.renderer.eng.spawn( + "await_transfer_to_transfer", + await_gfx_queue_release(id, img.clone(), None, None, sync_file), + ); + img.renderer.pending_submits.set(id, pending); + img.queue_state.set(QueueState::Releasing); + return Ok(()); + }; + let mut barriers = ArrayVec::<_, 2>::new(); + match img.queue_state.get() { + QueueState::Acquired { family } => { + assert_eq!(family, QueueFamily::Gfx); + } + QueueState::Releasing => { + unreachable!(); + } + QueueState::Released { to } => { + assert_eq!(to, QueueFamily::Gfx); + let barrier = image_barrier() + .image(img.image) + .src_queue_family_index(transfer_queue_idx) + .dst_queue_family_index(img.renderer.device.graphics_queue_idx) + .dst_stage_mask(PipelineStageFlags2::ALL_COMMANDS) + .old_layout(ImageLayout::TRANSFER_DST_OPTIMAL) + .new_layout(ImageLayout::SHADER_READ_ONLY_OPTIMAL); + barriers.push(barrier); + } + } + let barrier = image_barrier() + .image(img.image) + .src_queue_family_index(img.renderer.device.graphics_queue_idx) + .dst_queue_family_index(transfer_queue_idx) + .src_access_mask(AccessFlags2::SHADER_SAMPLED_READ) + .src_stage_mask(PipelineStageFlags2::ALL_COMMANDS) + .old_layout(if img.is_undefined.get() { + ImageLayout::UNDEFINED + } else { + ImageLayout::SHADER_READ_ONLY_OPTIMAL + }) + .new_layout(ImageLayout::TRANSFER_DST_OPTIMAL); + barriers.push(barrier); + let dep_info = DependencyInfo::default().image_memory_barriers(&barriers); + let release_fence = img.renderer.device.create_fence()?; + let dev = &img.renderer.device.device; + let begin_info = + CommandBufferBeginInfo::default().flags(CommandBufferUsageFlags::ONE_TIME_SUBMIT); + let cmd = img.renderer.gfx_command_buffers.allocate()?; + let command_buffer_info = CommandBufferSubmitInfo::default().command_buffer(cmd.buffer); + let submit_info = + SubmitInfo2::default().command_buffer_infos(slice::from_ref(&command_buffer_info)); + unsafe { + dev.begin_command_buffer(cmd.buffer, &begin_info) + .map_err(VulkanError::BeginCommandBuffer)?; + dev.cmd_pipeline_barrier2(cmd.buffer, &dep_info); + dev.end_command_buffer(cmd.buffer) + .map_err(VulkanError::EndCommandBuffer)?; + dev.queue_submit2( + img.renderer.device.graphics_queue, + slice::from_ref(&submit_info), + release_fence.fence, + ) + .map_err(VulkanError::Submit)?; + } + let sync_file = release_fence.export_sync_file()?; + let id = img.renderer.allocate_point(); + let pending = img.renderer.eng.spawn( + "await_transfer_to_transfer", + await_gfx_queue_release(id, img.clone(), Some(cmd), Some(release_fence), sync_file), + ); + img.renderer.pending_submits.set(id, pending); + img.queue_state.set(QueueState::Releasing); + Ok(()) + } + fn async_upload_after_allocation( &self, img: &Rc, @@ -501,25 +624,28 @@ impl VulkanShmImage { &self, img: &Rc, data: &VulkanShmImageAsyncData, - res: Result<(), ReadWriteJobError>, ) -> Result<(), VulkanError> { - if let Err(e) = res { - return Err(VulkanError::AsyncCopyToStaging(e)); + if !data.data_copied.get() { + return Ok(()); + } + if img.queue_state.get().acquire(QueueFamily::Transfer) == QueueTransfer::Impossible { + return Ok(()); } img.renderer.check_defunct()?; let regions = &*data.regions.borrow(); let staging = data.staging.get().unwrap(); staging.upload(|_, _| ())?; let Some((cmd, fence, sync_file, point)) = - self.submit_buffer_to_image_copy(img, &staging, regions)? + self.submit_buffer_to_image_copy(img, &staging, regions, true)? else { return Ok(()); }; + img.queue_state.set(QueueState::Releasing); let future = img.renderer.eng.spawn( "await async upload", await_async_upload(point, img.clone(), cmd, fence, sync_file), ); - img.renderer.pending_uploads.set(point, future); + img.renderer.pending_submits.set(point, future); Ok(()) } } @@ -577,7 +703,42 @@ fn complete_async_upload( }; let data = shm.async_data.as_ref().unwrap(); store(data); - if let Err(e) = shm.async_upload_copy_buffer_to_image(img, data, res) { + if let Err(e) = res { + data.complete(Err(VulkanError::AsyncCopyToStaging(e))); + } + data.data_copied.set(true); + if let Err(e) = shm.async_upload_copy_buffer_to_image(img, data) { + data.complete(Err(e)); + } +} + +async fn await_gfx_queue_release( + id: u64, + img: Rc, + buf: Option>, + _fence: Option>, + sync_file: SyncFile, +) { + let res = img.renderer.ring.readable(&sync_file.0).await; + if let Err(e) = res { + log::error!( + "Could not wait for sync file to become readable: {}", + ErrorFmt(e) + ); + img.renderer.block(); + } + if let Some(buf) = buf { + img.renderer.gfx_command_buffers.buffers.push(buf); + } + img.renderer.pending_submits.remove(&id); + img.queue_state.set(QueueState::Released { + to: QueueFamily::Transfer, + }); + let VulkanImageMemory::Internal(shm) = &img.ty else { + unreachable!(); + }; + let data = shm.async_data.as_ref().unwrap(); + if let Err(e) = shm.async_upload_copy_buffer_to_image(&img, data) { data.complete(Err(e)); } } @@ -597,8 +758,14 @@ async fn await_async_upload( ); img.renderer.block(); } - img.renderer.command_buffers.push(buf); - img.renderer.pending_uploads.remove(&id); + match &img.renderer.transfer_command_buffers { + Some(b) => b.buffers.push(buf), + None => img.renderer.gfx_command_buffers.buffers.push(buf), + } + img.queue_state.set(QueueState::Released { + to: QueueFamily::Gfx, + }); + img.renderer.pending_submits.remove(&id); let VulkanImageMemory::Internal(shm) = &img.ty else { unreachable!(); }; @@ -701,6 +868,8 @@ impl VulkanRenderer { callback_id: Cell::new(0), regions: Default::default(), cpu: cpu.clone(), + last_sample: Default::default(), + data_copied: Default::default(), }); } let shm = VulkanShmImage { @@ -722,6 +891,9 @@ impl VulkanRenderer { image, is_undefined: Cell::new(true), contents_are_undefined: Cell::new(true), + queue_state: Cell::new(QueueState::Acquired { + family: QueueFamily::Gfx, + }), ty: VulkanImageMemory::Internal(shm), bridge: None, }); diff --git a/src/ifs/jay_screencast.rs b/src/ifs/jay_screencast.rs index 8e90ce53..d63a71bb 100644 --- a/src/ifs/jay_screencast.rs +++ b/src/ifs/jay_screencast.rs @@ -3,7 +3,9 @@ use { allocator::{AllocatorError, BufferObject, BO_USE_LINEAR, BO_USE_RENDERING}, client::{Client, ClientError}, format::XRGB8888, - gfx_api::{GfxContext, GfxError, GfxFramebuffer, GfxTexture}, + gfx_api::{ + AcquireSync, BufferResv, GfxContext, GfxError, GfxFramebuffer, GfxTexture, ReleaseSync, + }, ifs::{jay_output::JayOutput, jay_toplevel::JayToplevel, wl_buffer::WlBufferStorage}, leaks::Tracker, object::{Object, Version}, @@ -189,6 +191,8 @@ impl JayScreencast { for (idx, buffer) in buffer.deref_mut().iter_mut().enumerate() { if buffer.free { let res = buffer.fb.render_node( + AcquireSync::Implicit, + ReleaseSync::Implicit, tl.tl_as_node(), &self.client.state, Some(tl.node_absolute_position()), @@ -298,6 +302,9 @@ impl JayScreencast { &self, on: &OutputNode, texture: &Rc, + resv: Option<&Rc>, + acquire_sync: &AcquireSync, + release_sync: ReleaseSync, render_hardware_cursors: bool, x_off: i32, y_off: i32, @@ -320,7 +327,12 @@ impl JayScreencast { if buffer.free { let res = self.client.state.perform_screencopy( texture, + resv, + acquire_sync, + release_sync, &buffer.fb, + AcquireSync::Implicit, + ReleaseSync::Implicit, on.global.pos.get(), render_hardware_cursors, x_off, diff --git a/src/it/test_gfx_api.rs b/src/it/test_gfx_api.rs index 784b39c8..90157cfa 100644 --- a/src/it/test_gfx_api.rs +++ b/src/it/test_gfx_api.rs @@ -5,9 +5,10 @@ use { cpu_worker::CpuWorker, format::{Format, ARGB8888, XRGB8888}, gfx_api::{ - AsyncShmGfxTexture, AsyncShmGfxTextureCallback, CopyTexture, FillRect, FramebufferRect, - GfxApiOpt, GfxContext, GfxError, GfxFormat, GfxFramebuffer, GfxImage, GfxTexture, - GfxWriteModifier, PendingShmUpload, ResetStatus, ShmGfxTexture, SyncFile, + AcquireSync, AsyncShmGfxTexture, AsyncShmGfxTextureCallback, CopyTexture, FillRect, + FramebufferRect, GfxApiOpt, GfxContext, GfxError, GfxFormat, GfxFramebuffer, GfxImage, + GfxTexture, GfxWriteModifier, PendingShmUpload, ReleaseSync, ResetStatus, + ShmGfxTexture, SyncFile, }, rect::{Rect, Region}, theme::Color, @@ -413,6 +414,8 @@ impl GfxFramebuffer for TestGfxFb { fn render( &self, + _acquire_sync: AcquireSync, + _release_sync: ReleaseSync, ops: &[GfxApiOpt], clear: Option<&Color>, ) -> Result, GfxError> { diff --git a/src/portal/ptr_gui.rs b/src/portal/ptr_gui.rs index 5f65e332..23ba3ff6 100644 --- a/src/portal/ptr_gui.rs +++ b/src/portal/ptr_gui.rs @@ -635,13 +635,17 @@ impl WindowData { return; }; - let res = buf - .fb - .render_custom(self.scale.get(), Some(&Color::from_gray(0)), &mut |r| { + let res = buf.fb.render_custom( + AcquireSync::Implicit, + ReleaseSync::Implicit, + self.scale.get(), + Some(&Color::from_gray(0)), + &mut |r| { if let Some(content) = self.content.get() { content.render_at(r, 0.0, 0.0) } - }); + }, + ); if let Err(e) = res { log::error!("Could not render frame: {}", ErrorFmt(e)); return; diff --git a/src/screenshoter.rs b/src/screenshoter.rs index eebf9da9..076daf1f 100644 --- a/src/screenshoter.rs +++ b/src/screenshoter.rs @@ -2,7 +2,7 @@ use { crate::{ allocator::{AllocatorError, BufferObject, BufferUsage, BO_USE_RENDERING}, format::XRGB8888, - gfx_api::{needs_render_usage, GfxError}, + gfx_api::{needs_render_usage, AcquireSync, GfxError, ReleaseSync}, scale::Scale, state::State, video::drm::DrmError, @@ -77,6 +77,8 @@ pub fn take_screenshot( )?; let fb = ctx.clone().dmabuf_fb(bo.dmabuf())?; fb.render_node( + AcquireSync::Unnecessary, + ReleaseSync::Implicit, state.root.deref(), state, Some(state.root.extents.get()), diff --git a/src/state.rs b/src/state.rs index fe5f8abf..83f7fff7 100644 --- a/src/state.rs +++ b/src/state.rs @@ -27,8 +27,8 @@ use { forker::ForkerProxy, format::Format, gfx_api::{ - AcquireSync, GfxContext, GfxError, GfxFramebuffer, GfxTexture, ReleaseSync, SampleRect, - SyncFile, + AcquireSync, BufferResv, GfxContext, GfxError, GfxFramebuffer, GfxTexture, ReleaseSync, + SampleRect, SyncFile, }, gfx_apis::create_gfx_context, globals::{Globals, GlobalsError, RemovableWaylandGlobal, WaylandGlobal}, @@ -900,10 +900,14 @@ impl State { &self, output: &OutputNode, fb: &Rc, + acquire_sync: AcquireSync, + release_sync: ReleaseSync, tex: &Rc, render_hw_cursor: bool, ) -> Result, GfxError> { let sync_file = fb.render_output( + acquire_sync, + release_sync, output, self, Some(output.global.pos.get()), @@ -911,14 +915,28 @@ impl State { render_hw_cursor, )?; output.latched(); - output.perform_screencopies(tex, !render_hw_cursor, 0, 0, None); + output.perform_screencopies( + tex, + None, + &AcquireSync::Unnecessary, + ReleaseSync::None, + !render_hw_cursor, + 0, + 0, + None, + ); Ok(sync_file) } pub fn perform_screencopy( &self, src: &Rc, + resv: Option<&Rc>, + acquire_sync: &AcquireSync, + release_sync: ReleaseSync, target: &Rc, + target_acquire_sync: AcquireSync, + target_release_sync: ReleaseSync, position: Rect, render_hardware_cursors: bool, x_off: i32, @@ -947,9 +965,9 @@ impl State { size, Scale::from_int(1), None, - None, - AcquireSync::None, - ReleaseSync::Implicit, + resv.cloned(), + acquire_sync.clone(), + release_sync, ); if render_hardware_cursors { if let Some(cursor_user_group) = self.cursor_user_group_hardware_cursor.get() { @@ -963,7 +981,12 @@ impl State { } } } - target.render(&ops, Some(&Color::SOLID_BLACK)) + target.render( + target_acquire_sync, + target_release_sync, + &ops, + Some(&Color::SOLID_BLACK), + ) } fn have_hardware_cursor(&self) -> bool { @@ -980,6 +1003,7 @@ impl State { pub fn perform_shm_screencopy( &self, src: &Rc, + acquire_sync: &AcquireSync, position: Rect, x_off: i32, y_off: i32, @@ -1011,7 +1035,12 @@ impl State { .map_err(ShmScreencopyError::CreateTemporaryFb)?; self.perform_screencopy( src, + None, + acquire_sync, + ReleaseSync::None, &fb, + AcquireSync::Unnecessary, + ReleaseSync::None, position, true, x_off - capture.rect.x1(), diff --git a/src/tracy/tracy_impl.rs b/src/tracy/tracy_impl.rs index 4828ace7..cdb91160 100644 --- a/src/tracy/tracy_impl.rs +++ b/src/tracy/tracy_impl.rs @@ -28,7 +28,7 @@ struct ZoneNameData { unsafe impl Sync for ZoneNameData {} unsafe impl Send for ZoneNameData {} -static CACHE: LazyLock>> = LazyLock::new(|| Default::default()); +static CACHE: LazyLock>> = LazyLock::new(Default::default); impl ZoneName { pub fn __get(name: &str) -> Self { @@ -120,8 +120,7 @@ pub struct FrameName { name: &'static CString, } -static FRAME_CACHE: LazyLock>> = - LazyLock::new(|| Default::default()); +static FRAME_CACHE: LazyLock>> = LazyLock::new(Default::default); impl FrameName { pub fn get(name: &str) -> Self { @@ -177,7 +176,6 @@ impl Drop for RenderingFrame { } #[no_mangle] -#[allow(static_mut_refs)] unsafe extern "C" fn ___tracy_demangle( mangled: *const std::ffi::c_char, ) -> *const std::ffi::c_char { @@ -191,7 +189,7 @@ unsafe extern "C" fn ___tracy_demangle( let demangled = rustc_demangle::demangle(mangled); static mut BUF: Vec = Vec::new(); BUF.clear(); - if let Err(_) = write!(BUF, "{demangled:#}\0") { + if write!(BUF, "{demangled:#}\0").is_err() { return ptr::null(); } BUF.as_ptr().cast() diff --git a/src/tree/output.rs b/src/tree/output.rs index 2f774406..7f764883 100644 --- a/src/tree/output.rs +++ b/src/tree/output.rs @@ -4,7 +4,7 @@ use { client::ClientId, cursor::KnownCursor, fixed::Fixed, - gfx_api::GfxTexture, + gfx_api::{AcquireSync, BufferResv, GfxTexture, ReleaseSync}, ifs::{ jay_output::JayOutput, jay_screencast::JayScreencast, @@ -186,6 +186,9 @@ impl OutputNode { pub fn perform_screencopies( &self, tex: &Rc, + resv: Option<&Rc>, + acquire_sync: &AcquireSync, + release_sync: ReleaseSync, render_hardware_cursor: bool, x_off: i32, y_off: i32, @@ -196,15 +199,37 @@ impl OutputNode { return; } } - self.perform_wlr_screencopies(tex, render_hardware_cursor, x_off, y_off, size); + self.perform_wlr_screencopies( + tex, + resv, + acquire_sync, + release_sync, + render_hardware_cursor, + x_off, + y_off, + size, + ); for sc in self.screencasts.lock().values() { - sc.copy_texture(self, tex, render_hardware_cursor, x_off, y_off, size); + sc.copy_texture( + self, + tex, + resv, + acquire_sync, + release_sync, + render_hardware_cursor, + x_off, + y_off, + size, + ); } } pub fn perform_wlr_screencopies( &self, tex: &Rc, + resv: Option<&Rc>, + acquire_sync: &AcquireSync, + release_sync: ReleaseSync, render_hardware_cursors: bool, x_off: i32, y_off: i32, @@ -232,6 +257,7 @@ impl OutputNode { WlBufferStorage::Shm { mem, stride } => { let res = self.state.perform_shm_screencopy( tex, + acquire_sync, self.global.pos.get(), x_off, y_off, @@ -259,7 +285,12 @@ impl OutputNode { }; let res = self.state.perform_screencopy( tex, + resv, + acquire_sync, + release_sync, &fb, + AcquireSync::Implicit, + ReleaseSync::Implicit, self.global.pos.get(), render_hardware_cursors, x_off - capture.rect.x1(),