From 676961808b8ddb84a8215c873927a57970ac3e88 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 8 May 2026 10:33:52 +0100 Subject: [PATCH 01/17] perf(enc-ffmpeg): skip BufferedResampler when conversion is identity --- .../src/audio/buffered_resampler.rs | 45 ++++++++++++++----- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/crates/enc-ffmpeg/src/audio/buffered_resampler.rs b/crates/enc-ffmpeg/src/audio/buffered_resampler.rs index d350920856..a72acbabe1 100644 --- a/crates/enc-ffmpeg/src/audio/buffered_resampler.rs +++ b/crates/enc-ffmpeg/src/audio/buffered_resampler.rs @@ -9,7 +9,8 @@ use ffmpeg::software::resampling; /// for if the requested frame size is larger than the latest buffered frame, /// ensuring that the resulting frame's PTS is always accurate. pub struct BufferedResampler { - resampler: ffmpeg::software::resampling::Context, + resampler: Option, + output: AudioInfo, buffer: VecDeque<(ffmpeg::frame::Audio, i64)>, sample_index: usize, // used to account for cases where pts is rounded down instead of up @@ -18,13 +19,21 @@ pub struct BufferedResampler { impl BufferedResampler { pub fn new(from: AudioInfo, to: AudioInfo) -> Result { - let resampler = ffmpeg::software::resampler( - (from.sample_format, from.channel_layout(), from.sample_rate), - (to.sample_format, to.channel_layout(), to.sample_rate), - )?; + let needs_resampler = from.sample_format != to.sample_format + || from.sample_rate != to.sample_rate + || from.channel_layout() != to.channel_layout(); + let resampler = if needs_resampler { + Some(ffmpeg::software::resampler( + (from.sample_format, from.channel_layout(), from.sample_rate), + (to.sample_format, to.channel_layout(), to.sample_rate), + )?) + } else { + None + }; Ok(Self { resampler, + output: to, buffer: VecDeque::new(), sample_index: 0, min_next_pts: None, @@ -52,7 +61,14 @@ impl BufferedResampler { } pub fn output(&self) -> resampling::context::Definition { - *self.resampler.output() + match &self.resampler { + Some(resampler) => *resampler.output(), + None => resampling::context::Definition { + format: self.output.sample_format, + channel_layout: self.output.channel_layout(), + rate: self.output.sample_rate, + }, + } } pub fn add_frame(&mut self, mut frame: ffmpeg::frame::Audio) { @@ -64,9 +80,16 @@ impl BufferedResampler { let pts = frame.pts().unwrap(); + let Some(resampler) = self.resampler.as_mut() else { + let next_pts = pts + frame.samples() as i64; + self.buffer.push_back((frame, pts)); + self.min_next_pts = Some(next_pts); + return; + }; + let mut resampled_frame = ffmpeg::frame::Audio::empty(); - self.resampler.run(&frame, &mut resampled_frame).unwrap(); + resampler.run(&frame, &mut resampled_frame).unwrap(); let resampled_pts = (pts as f64 * (resampled_frame.rate() as f64 / frame.rate() as f64)) as i64; @@ -75,13 +98,13 @@ impl BufferedResampler { self.buffer.push_back((resampled_frame, resampled_pts)); - while self.resampler.delay().is_some() { + while resampler.delay().is_some() { let mut resampled_frame = ffmpeg::frame::Audio::new( - self.resampler.output().format, + resampler.output().format, 0, - self.resampler.output().channel_layout, + resampler.output().channel_layout, ); - self.resampler.flush(&mut resampled_frame).unwrap(); + resampler.flush(&mut resampled_frame).unwrap(); let samples = resampled_frame.samples(); if samples == 0 { break; From 0efa98f0da8260f7ff8fc50044108d8943684725 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 8 May 2026 10:33:54 +0100 Subject: [PATCH 02/17] perf(rendering): add rgba_to_nv12_fast CPU conversion --- crates/rendering/src/cpu_yuv.rs | 379 ++++++++++++++++++++++++++++++++ 1 file changed, 379 insertions(+) diff --git a/crates/rendering/src/cpu_yuv.rs b/crates/rendering/src/cpu_yuv.rs index 0207e46020..8179748dd0 100644 --- a/crates/rendering/src/cpu_yuv.rs +++ b/crates/rendering/src/cpu_yuv.rs @@ -32,6 +32,242 @@ impl ConversionProgress { } } +#[derive(Clone, Copy)] +pub struct RgbaToNv12Config { + pub width: u32, + pub height: u32, + pub rgba_stride: u32, + pub y_stride: u32, + pub uv_stride: u32, +} + +impl RgbaToNv12Config { + fn as_usize(self) -> RgbaToNv12ConfigUsize { + RgbaToNv12ConfigUsize { + width: self.width as usize, + height: self.height as usize, + rgba_stride: self.rgba_stride as usize, + y_stride: self.y_stride as usize, + uv_stride: self.uv_stride as usize, + } + } +} + +#[derive(Clone, Copy)] +struct RgbaToNv12ConfigUsize { + width: usize, + height: usize, + rgba_stride: usize, + y_stride: usize, + uv_stride: usize, +} + +impl RgbaToNv12ConfigUsize { + fn y_plane_size(self) -> Option { + self.y_stride.checked_mul(self.height) + } + + fn uv_height(self) -> usize { + self.height / 2 + } + + fn uv_plane_size(self) -> Option { + self.uv_stride.checked_mul(self.uv_height()) + } + + fn source_required_len(self) -> Option { + if self.height == 0 { + return Some(0); + } + + self.rgba_stride + .checked_mul(self.height - 1)? + .checked_add(self.width.checked_mul(4)?) + } + + fn output_required_len(self) -> Option { + self.y_plane_size()?.checked_add(self.uv_plane_size()?) + } + + fn is_valid_for(self, rgba: &[u8], output: &[u8]) -> bool { + if self.width == 0 || self.height == 0 { + return false; + } + + let Some(source_required) = self.source_required_len() else { + return false; + }; + let Some(output_required) = self.output_required_len() else { + return false; + }; + + self.rgba_stride >= self.width * 4 + && self.y_stride >= self.width + && self.uv_stride >= self.width + && rgba.len() >= source_required + && output.len() >= output_required + } +} + +const RGBA_TO_NV12_PARALLEL_THRESHOLD_PIXELS: usize = 1920 * 1080; + +pub fn rgba_to_nv12_fast(rgba: &[u8], output: &mut [u8], config: RgbaToNv12Config) -> bool { + let config = config.as_usize(); + if !config.is_valid_for(rgba, output) { + return false; + } + + let y_plane_size = config.y_plane_size().unwrap(); + let uv_plane_size = config.uv_plane_size().unwrap(); + let (y_plane, rest) = output.split_at_mut(y_plane_size); + let uv_plane = &mut rest[..uv_plane_size]; + + if config.width * config.height >= RGBA_TO_NV12_PARALLEL_THRESHOLD_PIXELS { + rgba_to_nv12_y_parallel(rgba, y_plane, config); + rgba_to_nv12_uv_parallel(rgba, uv_plane, config); + } else { + rgba_to_nv12_y_scalar(rgba, y_plane, config); + rgba_to_nv12_uv_scalar(rgba, uv_plane, config); + } + + true +} + +fn rgba_to_nv12_y_scalar(rgba: &[u8], y_plane: &mut [u8], config: RgbaToNv12ConfigUsize) { + for row in 0..config.height { + let src_start = row * config.rgba_stride; + let src_row = &rgba[src_start..src_start + config.width * 4]; + let y_start = row * config.y_stride; + let y_row = &mut y_plane[y_start..y_start + config.width]; + + for (col, y) in y_row.iter_mut().enumerate() { + let src = col * 4; + *y = rgb_to_y(src_row[src], src_row[src + 1], src_row[src + 2]); + } + } +} + +fn rgba_to_nv12_uv_scalar(rgba: &[u8], uv_plane: &mut [u8], config: RgbaToNv12ConfigUsize) { + for row in 0..config.uv_height() { + let src_start0 = row * 2 * config.rgba_stride; + let src_start1 = (row * 2 + 1) * config.rgba_stride; + let src_row0 = &rgba[src_start0..src_start0 + config.width * 4]; + let src_row1 = &rgba[src_start1..src_start1 + config.width * 4]; + let uv_start = row * config.uv_stride; + let uv_row = &mut uv_plane[uv_start..uv_start + config.width]; + + for (col, uv) in uv_row + .chunks_exact_mut(2) + .take(config.width / 2) + .enumerate() + { + let src = col * 8; + let r = average_2x2( + src_row0[src], + src_row0[src + 4], + src_row1[src], + src_row1[src + 4], + ); + let g = average_2x2( + src_row0[src + 1], + src_row0[src + 5], + src_row1[src + 1], + src_row1[src + 5], + ); + let b = average_2x2( + src_row0[src + 2], + src_row0[src + 6], + src_row1[src + 2], + src_row1[src + 6], + ); + let (u, v) = rgb_to_uv(r, g, b); + uv[0] = u; + uv[1] = v; + } + } +} + +fn rgba_to_nv12_y_parallel(rgba: &[u8], y_plane: &mut [u8], config: RgbaToNv12ConfigUsize) { + use rayon::prelude::*; + + y_plane + .par_chunks_mut(config.y_stride) + .take(config.height) + .enumerate() + .for_each(|(row, y_row)| { + let src_start = row * config.rgba_stride; + let src_row = &rgba[src_start..src_start + config.width * 4]; + for (col, y) in y_row.iter_mut().take(config.width).enumerate() { + let src = col * 4; + *y = rgb_to_y(src_row[src], src_row[src + 1], src_row[src + 2]); + } + }); +} + +fn rgba_to_nv12_uv_parallel(rgba: &[u8], uv_plane: &mut [u8], config: RgbaToNv12ConfigUsize) { + use rayon::prelude::*; + + uv_plane + .par_chunks_mut(config.uv_stride) + .take(config.uv_height()) + .enumerate() + .for_each(|(row, uv_row)| { + let src_start0 = row * 2 * config.rgba_stride; + let src_start1 = (row * 2 + 1) * config.rgba_stride; + let src_row0 = &rgba[src_start0..src_start0 + config.width * 4]; + let src_row1 = &rgba[src_start1..src_start1 + config.width * 4]; + + for (col, uv) in uv_row + .chunks_exact_mut(2) + .take(config.width / 2) + .enumerate() + { + let src = col * 8; + let r = average_2x2( + src_row0[src], + src_row0[src + 4], + src_row1[src], + src_row1[src + 4], + ); + let g = average_2x2( + src_row0[src + 1], + src_row0[src + 5], + src_row1[src + 1], + src_row1[src + 5], + ); + let b = average_2x2( + src_row0[src + 2], + src_row0[src + 6], + src_row1[src + 2], + src_row1[src + 6], + ); + let (u, v) = rgb_to_uv(r, g, b); + uv[0] = u; + uv[1] = v; + } + }); +} + +#[inline(always)] +fn average_2x2(a: u8, b: u8, c: u8, d: u8) -> i32 { + (i32::from(a) + i32::from(b) + i32::from(c) + i32::from(d) + 2) / 4 +} + +#[inline(always)] +fn rgb_to_y(r: u8, g: u8, b: u8) -> u8 { + let r = i32::from(r); + let g = i32::from(g); + let b = i32::from(b); + (16 + ((65 * r + 129 * g + 25 * b + 128) >> 8)).clamp(16, 235) as u8 +} + +#[inline(always)] +fn rgb_to_uv(r: i32, g: i32, b: i32) -> (u8, u8) { + let u = (128 + ((-38 * r - 74 * g + 112 * b + 128) >> 8)).clamp(16, 240) as u8; + let v = (128 + ((112 * r - 94 * g - 18 * b + 128) >> 8)).clamp(16, 240) as u8; + (u, v) +} + pub fn nv12_to_rgba( y_data: &[u8], uv_data: &[u8], @@ -919,6 +1155,149 @@ fn clamp_u8(val: i32) -> u8 { #[cfg(test)] mod tests { use super::*; + use std::{hint::black_box, time::Instant}; + + fn rgba_to_nv12_baseline(rgba: &[u8], output: &mut [u8], config: RgbaToNv12Config) { + let width = config.width as usize; + let height = config.height as usize; + let rgba_stride = config.rgba_stride as usize; + let y_stride = config.y_stride as usize; + let uv_stride = config.uv_stride as usize; + let y_plane_size = y_stride * height; + + for row in 0..height { + let src_row = &rgba[row * rgba_stride..row * rgba_stride + width * 4]; + let y_row = &mut output[row * y_stride..row * y_stride + width]; + for (col, y) in y_row.iter_mut().enumerate() { + let r = i32::from(src_row[col * 4]); + let g = i32::from(src_row[col * 4 + 1]); + let b = i32::from(src_row[col * 4 + 2]); + *y = (16 + ((65 * r + 129 * g + 25 * b + 128) >> 8)).clamp(16, 235) as u8; + } + } + + for row in 0..(height / 2) { + let src_row0 = &rgba[row * 2 * rgba_stride..row * 2 * rgba_stride + width * 4]; + let src_row1 = + &rgba[(row * 2 + 1) * rgba_stride..(row * 2 + 1) * rgba_stride + width * 4]; + let uv_row = + &mut output[y_plane_size + row * uv_stride..y_plane_size + row * uv_stride + width]; + for (col, uv) in uv_row.chunks_exact_mut(2).take(width / 2).enumerate() { + let src = col * 8; + let r = (i32::from(src_row0[src]) + + i32::from(src_row0[src + 4]) + + i32::from(src_row1[src]) + + i32::from(src_row1[src + 4]) + + 2) + / 4; + let g = (i32::from(src_row0[src + 1]) + + i32::from(src_row0[src + 5]) + + i32::from(src_row1[src + 1]) + + i32::from(src_row1[src + 5]) + + 2) + / 4; + let b = (i32::from(src_row0[src + 2]) + + i32::from(src_row0[src + 6]) + + i32::from(src_row1[src + 2]) + + i32::from(src_row1[src + 6]) + + 2) + / 4; + uv[0] = (128 + ((-38 * r - 74 * g + 112 * b + 128) >> 8)).clamp(16, 240) as u8; + uv[1] = (128 + ((112 * r - 94 * g - 18 * b + 128) >> 8)).clamp(16, 240) as u8; + } + } + } + + #[test] + fn test_rgba_to_nv12_fast_matches_baseline() { + let config = RgbaToNv12Config { + width: 32, + height: 16, + rgba_stride: 144, + y_stride: 40, + uv_stride: 40, + }; + let rgba = (0..config.rgba_stride * config.height) + .map(|i| ((i * 37 + 19) % 251) as u8) + .collect::>(); + let output_len = + (config.y_stride * config.height + config.uv_stride * (config.height / 2)) as usize; + let mut baseline = vec![0u8; output_len]; + let mut fast = vec![0u8; output_len]; + + rgba_to_nv12_baseline(&rgba, &mut baseline, config); + assert!(rgba_to_nv12_fast(&rgba, &mut fast, config)); + + assert_eq!(baseline, fast); + } + + #[test] + fn test_rgba_to_nv12_fast_parallel_matches_baseline() { + let config = RgbaToNv12Config { + width: 1920, + height: 1080, + rgba_stride: 1920 * 4, + y_stride: 1920, + uv_stride: 1920, + }; + let rgba = (0..config.rgba_stride * config.height) + .map(|i| ((i * 17 + 43) % 251) as u8) + .collect::>(); + let output_len = + (config.y_stride * config.height + config.uv_stride * (config.height / 2)) as usize; + let mut baseline = vec![0u8; output_len]; + let mut fast = vec![0u8; output_len]; + + rgba_to_nv12_baseline(&rgba, &mut baseline, config); + assert!(rgba_to_nv12_fast(&rgba, &mut fast, config)); + + assert_eq!(baseline, fast); + } + + #[test] + #[ignore] + fn benchmark_rgba_to_nv12_fast_4k() { + let config = RgbaToNv12Config { + width: 3840, + height: 2160, + rgba_stride: 3840 * 4, + y_stride: 3840, + uv_stride: 3840, + }; + let rgba = (0..config.rgba_stride * config.height) + .map(|i| ((i * 23 + 29) % 251) as u8) + .collect::>(); + let output_len = + (config.y_stride * config.height + config.uv_stride * (config.height / 2)) as usize; + let mut baseline = vec![0u8; output_len]; + let mut fast = vec![0u8; output_len]; + let iterations = 20usize; + + let baseline_start = Instant::now(); + for _ in 0..iterations { + rgba_to_nv12_baseline(black_box(&rgba), black_box(&mut baseline), config); + } + let baseline_elapsed = baseline_start.elapsed(); + + let fast_start = Instant::now(); + for _ in 0..iterations { + assert!(rgba_to_nv12_fast( + black_box(&rgba), + black_box(&mut fast), + config + )); + } + let fast_elapsed = fast_start.elapsed(); + + assert_eq!(baseline, fast); + + println!( + "{{\"baseline_ms\":{},\"optimized_ms\":{},\"speedup\":{:.3}}}", + baseline_elapsed.as_millis(), + fast_elapsed.as_millis(), + baseline_elapsed.as_secs_f64() / fast_elapsed.as_secs_f64() + ); + } #[test] fn test_nv12_basic_conversion() { From 7496ff11f7f39b3a6b49d4a039be68babf9176d9 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 8 May 2026 10:33:57 +0100 Subject: [PATCH 03/17] perf(rendering): parallelize segment meta load and add first_camera_duration --- crates/rendering/src/project_recordings.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/crates/rendering/src/project_recordings.rs b/crates/rendering/src/project_recordings.rs index e0b9985b3e..6cc5ff396f 100644 --- a/crates/rendering/src/project_recordings.rs +++ b/crates/rendering/src/project_recordings.rs @@ -4,6 +4,7 @@ use std::{ }; use cap_project::{AudioMeta, StudioRecordingMeta, VideoMeta}; +use rayon::prelude::*; use serde::Serialize; use specta::Type; @@ -147,8 +148,9 @@ impl ProjectRecordingsMeta { } StudioRecordingMeta::MultipleSegments { inner, .. } => inner .segments - .iter() - .map(|s| { + .par_iter() + .enumerate() + .map(|(i, s)| { let has_start_times = RefCell::new(None); let ensure_start_time = |time: Option| { @@ -204,9 +206,8 @@ impl ProjectRecordingsMeta { .map_err(|e| format!("mic / {e}"))?, system_audio, }) + .map_err(|e| format!("segment {i} / {e}")) }) - .enumerate() - .map(|(i, v)| v.map_err(|e| format!("segment {i} / {e}"))) .collect::>()?, }; @@ -217,6 +218,10 @@ impl ProjectRecordingsMeta { self.segments.iter().map(|s| s.duration()).sum() } + pub fn first_camera_duration(&self) -> Option { + self.segments.first()?.camera.map(|camera| camera.duration) + } + pub fn get_source_duration(&self, path: &PathBuf) -> Result { Video::new(path, 0.0).map(|v| v.duration) } From ea44510beac69894249bfa184aa5d95ff1d9ab84 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 8 May 2026 10:33:57 +0100 Subject: [PATCH 04/17] test(rendering): verify lazy zoom precompute matches full precompute --- .../rendering/src/zoom_focus_interpolation.rs | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/crates/rendering/src/zoom_focus_interpolation.rs b/crates/rendering/src/zoom_focus_interpolation.rs index cf133f160e..92d5d85cda 100644 --- a/crates/rendering/src/zoom_focus_interpolation.rs +++ b/crates/rendering/src/zoom_focus_interpolation.rs @@ -11,6 +11,77 @@ struct ZoomFocusPrecomputeSim { last_integrated_ms: f64, } +#[cfg(test)] +mod tests { + use super::*; + use cap_project::{CursorMoveEvent, ZoomMode}; + + fn move_event(time_ms: f64, x: f64, y: f64) -> CursorMoveEvent { + CursorMoveEvent { + active_modifiers: Vec::new(), + cursor_id: "default".to_string(), + time_ms, + x, + y, + } + } + + fn zoom_segment() -> ZoomSegment { + ZoomSegment { + start: 0.0, + end: 4.0, + amount: 2.0, + mode: ZoomMode::Auto, + glide_direction: Default::default(), + glide_speed: 0.5, + instant_animation: false, + edge_snap_ratio: 0.25, + } + } + + #[test] + fn lazy_precompute_matches_full_precompute() { + let cursor_events = CursorEvents { + clicks: Vec::new(), + moves: vec![ + move_event(0.0, 0.1, 0.2), + move_event(180.0, 0.2, 0.3), + move_event(460.0, 0.7, 0.4), + move_event(1200.0, 0.8, 0.8), + move_event(2400.0, 0.35, 0.65), + move_event(3800.0, 0.6, 0.2), + ], + }; + let zoom_segments = vec![zoom_segment()]; + let mut fully_precomputed = ZoomFocusInterpolator::new( + &cursor_events, + None, + ClickSpringConfig::default(), + ScreenMovementSpring::default(), + 5.0, + &zoom_segments, + ); + fully_precomputed.precompute(); + + let mut lazy = ZoomFocusInterpolator::new( + &cursor_events, + None, + ClickSpringConfig::default(), + ScreenMovementSpring::default(), + 5.0, + &zoom_segments, + ); + + for time_secs in [0.0_f32, 0.016, 0.25, 0.5, 1.0, 2.5, 4.0, 5.0] { + lazy.ensure_precomputed_until(time_secs); + let expected = fully_precomputed.interpolate(time_secs); + let actual = lazy.interpolate(time_secs); + assert!((expected.coord.x - actual.coord.x).abs() < 1e-9); + assert!((expected.coord.y - actual.coord.y).abs() < 1e-9); + } + } +} + const SAMPLE_INTERVAL_MS: f64 = 8.0; const CLUSTER_WIDTH_RATIO: f64 = 0.5; const CLUSTER_HEIGHT_RATIO: f64 = 0.7; From cc7684c363505389a2141e6f28e83399e0a293f6 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 8 May 2026 10:33:57 +0100 Subject: [PATCH 05/17] perf(rendering): cache clip offsets and accelerate NV12 readback conversion --- crates/rendering/src/lib.rs | 157 ++++++++++++++++-------------------- 1 file changed, 70 insertions(+), 87 deletions(-) diff --git a/crates/rendering/src/lib.rs b/crates/rendering/src/lib.rs index a87a27e61f..5947bb1179 100644 --- a/crates/rendering/src/lib.rs +++ b/crates/rendering/src/lib.rs @@ -316,15 +316,8 @@ impl RecordingSegmentDecoders { Ok(Some(camera)) }; - #[cfg(target_os = "windows")] let (screen, camera) = tokio::try_join!(screen_future, camera_future)?; - #[cfg(not(target_os = "windows"))] - let screen = screen_future.await?; - - #[cfg(not(target_os = "windows"))] - let camera = camera_future.await?; - Ok(Self { screen, camera, @@ -491,6 +484,21 @@ pub struct RenderSegment { pub render_display: bool, } +fn clip_offsets_by_index(project: &ProjectConfiguration) -> Vec { + let Some(max_index) = project.clips.iter().map(|clip| clip.index as usize).max() else { + return Vec::new(); + }; + let mut offsets = vec![ClipOffsets::default(); max_index + 1]; + for clip in &project.clips { + offsets[clip.index as usize] = clip.offsets; + } + offsets +} + +fn clip_offsets_for_index(offsets: &[ClipOffsets], index: u32) -> ClipOffsets { + offsets.get(index as usize).copied().unwrap_or_default() +} + #[allow(clippy::too_many_arguments)] pub async fn render_video_to_channel( constants: &RenderVideoConstants, @@ -579,6 +587,7 @@ pub async fn render_video_to_channel( const MAX_CONSECUTIVE_FAILURES: u32 = 200; let mut prefetched_decode: Option<(u32, f64, usize, Option)> = None; + let cached_clip_offsets = clip_offsets_by_index(project); loop { if frame_number >= total_frames { @@ -591,10 +600,7 @@ pub async fn render_video_to_channel( break; }; - let clip_config = project - .clips - .iter() - .find(|v| v.index == segment.recording_clip); + let clip_offsets = clip_offsets_for_index(&cached_clip_offsets, segment.recording_clip); let current_frame_number = { let prev = frame_number; @@ -618,7 +624,7 @@ pub async fn render_video_to_channel( segment_time, needs_camera, render_segment.render_display, - clip_config.map(|v| v.offsets).unwrap_or_default(), + clip_offsets, current_frame_number, is_initial_frame, fps, @@ -631,7 +637,7 @@ pub async fn render_video_to_channel( segment_time, needs_camera, render_segment.render_display, - clip_config.map(|v| v.offsets).unwrap_or_default(), + clip_offsets, current_frame_number, is_initial_frame, fps, @@ -667,10 +673,8 @@ pub async fn render_video_to_channel( let next_clip_index = next_segment.recording_clip as usize; next_prefetch_meta = Some((next_seg_time, next_clip_index)); let next_render_segment = &render_segments[next_clip_index]; - let next_clip_config = project - .clips - .iter() - .find(|v| v.index == next_segment.recording_clip); + let next_clip_offsets = + clip_offsets_for_index(&cached_clip_offsets, next_segment.recording_clip); let next_is_initial = last_successful_frame.is_none(); Some(decode_segment_frames_with_retry( @@ -678,7 +682,7 @@ pub async fn render_video_to_channel( next_seg_time, needs_camera, next_render_segment.render_display, - next_clip_config.map(|v| v.offsets).unwrap_or_default(), + next_clip_offsets, next_frame_number, next_is_initial, fps, @@ -902,9 +906,6 @@ pub async fn render_video_to_channel_nv12( ) }) .collect(); - for interp in &mut zoom_focus_interpolators { - interp.ensure_precomputed_until(duration as f32 + 1.0); - } let zoom_focus_interpolators_construct_ms = zoom_build_start.elapsed().as_millis() as u64; let mut frame_number = 0; @@ -938,6 +939,7 @@ pub async fn render_video_to_channel_nv12( const MAX_CONSECUTIVE_FAILURES: u32 = 200; let mut prefetched_decode: Option<(u32, f64, usize, Option)> = None; + let cached_clip_offsets = clip_offsets_by_index(project); let mut channel_frames_sent = 0u32; let mut stopped_after_frame_limit = false; @@ -955,10 +957,7 @@ pub async fn render_video_to_channel_nv12( break; }; - let clip_config = project - .clips - .iter() - .find(|v| v.index == segment.recording_clip); + let clip_offsets = clip_offsets_for_index(&cached_clip_offsets, segment.recording_clip); let current_frame_number = { let prev = frame_number; @@ -985,7 +984,7 @@ pub async fn render_video_to_channel_nv12( segment_time, needs_camera, render_segment.render_display, - clip_config.map(|v| v.offsets).unwrap_or_default(), + clip_offsets, current_frame_number, is_initial_frame, fps, @@ -998,7 +997,7 @@ pub async fn render_video_to_channel_nv12( segment_time, needs_camera, render_segment.render_display, - clip_config.map(|v| v.offsets).unwrap_or_default(), + clip_offsets, current_frame_number, is_initial_frame, fps, @@ -1035,10 +1034,8 @@ pub async fn render_video_to_channel_nv12( let next_clip_index = next_segment.recording_clip as usize; next_prefetch_meta = Some((next_seg_time, next_clip_index)); let next_render_segment = &render_segments[next_clip_index]; - let next_clip_config = project - .clips - .iter() - .find(|v| v.index == next_segment.recording_clip); + let next_clip_offsets = + clip_offsets_for_index(&cached_clip_offsets, next_segment.recording_clip); let next_is_initial = last_successful_frame.is_none(); Some(decode_segment_frames_with_retry( @@ -1046,7 +1043,7 @@ pub async fn render_video_to_channel_nv12( next_seg_time, needs_camera, next_render_segment.render_display, - next_clip_config.map(|v| v.offsets).unwrap_or_default(), + next_clip_offsets, next_frame_number, next_is_initial, fps, @@ -1443,25 +1440,22 @@ async fn decode_segment_frames_with_retry( pub fn get_duration( recordings: &ProjectRecordingsMeta, - recording_meta: &RecordingMeta, - meta: &StudioRecordingMeta, + _recording_meta: &RecordingMeta, + _meta: &StudioRecordingMeta, project: &ProjectConfiguration, ) -> f64 { let mut max_duration = recordings.duration(); - if let Some(camera_path) = meta.camera_path() - && let Ok(camera_duration) = - recordings.get_source_duration(&recording_meta.path(&camera_path)) - { - println!("Camera recording duration: {camera_duration}"); + if let Some(camera_duration) = recordings.first_camera_duration() { + tracing::debug!(camera_duration, "Camera recording duration"); max_duration = max_duration.max(camera_duration); - println!("New max duration after camera check: {max_duration}"); + tracing::debug!(max_duration, "Updated max duration after camera check"); } if let Some(timeline) = &project.timeline { timeline.duration() } else { - println!("No timeline found, using max_duration: {max_duration}"); + tracing::debug!(max_duration, "No timeline found, using media max duration"); max_duration } } @@ -2957,6 +2951,27 @@ mod tests { } } + #[test] + fn clip_offsets_cache_handles_sparse_indexes() { + let project = ProjectConfiguration { + clips: vec![cap_project::ClipConfiguration { + index: 3, + offsets: ClipOffsets { + camera: 1.0, + mic: 2.0, + system_audio: 3.0, + }, + }], + ..Default::default() + }; + + let offsets = clip_offsets_by_index(&project); + + assert_eq!(clip_offsets_for_index(&offsets, 0).camera, 0.0); + assert_eq!(clip_offsets_for_index(&offsets, 3).mic, 2.0); + assert_eq!(clip_offsets_for_index(&offsets, 9).system_audio, 0.0); + } + #[test] fn auto_aspect_ratio_preserves_source_ratio_with_padding() { let options = render_options(1920, 1080); @@ -3241,52 +3256,20 @@ impl<'a> FrameRenderer<'a> { let uv_plane_size = uv_stride * (height as usize / 2); nv12_buf.resize(y_plane_size + uv_plane_size, 0); - let src_data = &rgba_frame.data; - let src_stride = padded_bytes_per_row as usize; - - for row in 0..height as usize { - let src_row = &src_data[row * src_stride..row * src_stride + width as usize * 4]; - let y_row = &mut nv12_buf[row * y_stride..(row + 1) * y_stride]; - for col in 0..width as usize { - let r = src_row[col * 4] as i32; - let g = src_row[col * 4 + 1] as i32; - let b = src_row[col * 4 + 2] as i32; - y_row[col] = ((16 + ((65 * r + 129 * g + 25 * b + 128) >> 8)) as u8).clamp(16, 235); - } - } - - let uv_offset = y_plane_size; - for row in 0..(height as usize / 2) { - let src_row0 = - &src_data[row * 2 * src_stride..row * 2 * src_stride + width as usize * 4]; - let src_row1 = &src_data - [(row * 2 + 1) * src_stride..(row * 2 + 1) * src_stride + width as usize * 4]; - let uv_row = - &mut nv12_buf[uv_offset + row * uv_stride..uv_offset + (row + 1) * uv_stride]; - for col in 0..(width as usize / 2) { - let r = (src_row0[col * 8] as i32 - + src_row0[col * 8 + 4] as i32 - + src_row1[col * 8] as i32 - + src_row1[col * 8 + 4] as i32 - + 2) - / 4; - let g = (src_row0[col * 8 + 1] as i32 - + src_row0[col * 8 + 5] as i32 - + src_row1[col * 8 + 1] as i32 - + src_row1[col * 8 + 5] as i32 - + 2) - / 4; - let b = (src_row0[col * 8 + 2] as i32 - + src_row0[col * 8 + 6] as i32 - + src_row1[col * 8 + 2] as i32 - + src_row1[col * 8 + 6] as i32 - + 2) - / 4; - uv_row[col * 2] = - ((128 + ((-38 * r - 74 * g + 112 * b + 128) >> 8)) as u8).clamp(16, 240); - uv_row[col * 2 + 1] = - ((128 + ((112 * r - 94 * g - 18 * b + 128) >> 8)) as u8).clamp(16, 240); - } + if !cpu_yuv::rgba_to_nv12_fast( + &rgba_frame.data, + &mut nv12_buf, + cpu_yuv::RgbaToNv12Config { + width, + height, + rgba_stride: padded_bytes_per_row, + y_stride: width, + uv_stride: width, + }, + ) { + return Err(RenderingError::ImageLoadError( + "Failed to convert RGBA frame to NV12".to_string(), + )); } Ok(Some(frame_pipeline::Nv12RenderedFrame { From 5f5d2a3afbab23a2d3ed41c6ddb6455c957bca89 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 8 May 2026 10:34:01 +0100 Subject: [PATCH 06/17] fix(rendering): order readback wait after submit in finish_encoder paths --- crates/rendering/src/frame_pipeline.rs | 55 +++++++++++++++++++------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/crates/rendering/src/frame_pipeline.rs b/crates/rendering/src/frame_pipeline.rs index d861737902..0ce8674477 100644 --- a/crates/rendering/src/frame_pipeline.rs +++ b/crates/rendering/src/frame_pipeline.rs @@ -952,11 +952,7 @@ pub async fn finish_encoder( uniforms: &ProjectUniforms, encoder: wgpu::CommandEncoder, ) -> Result, RenderingError> { - let previous_frame = if let Some(prev) = session.pipelined_readback.take_pending() { - Some(prev.wait(device).await?) - } else { - None - }; + let previous_pending = session.pipelined_readback.take_pending(); session.pipelined_readback.perform_resize_if_needed(device); @@ -966,9 +962,30 @@ pub async fn finish_encoder( &session.textures.1 }; - session + let submit_result = session .pipelined_readback - .submit_readback(device, queue, texture, uniforms, encoder)?; + .submit_readback(device, queue, texture, uniforms, encoder); + + if let Err(error) = submit_result { + if let Some(prev) = previous_pending { + let _ = prev.wait(device).await; + } + return Err(error); + } + + let previous_frame = if let Some(prev) = previous_pending { + match prev.wait(device).await { + Ok(frame) => Some(frame), + Err(error) => { + if let Some(current) = session.pipelined_readback.take_pending() { + let _ = current.cancel(); + } + return Err(error); + } + } + } else { + None + }; Ok(previous_frame) } @@ -984,12 +1001,7 @@ pub async fn finish_encoder_nv12_pooled( ) -> Result, RenderingError> { let width = uniforms.output_size.0; let height = uniforms.output_size.1; - - let previous_frame = if let Some(prev) = nv12_converter.take_pending() { - Some(prev.wait_with_pool(device, buffer_pool).await?) - } else { - None - }; + let previous_pending = nv12_converter.take_pending(); let texture = if session.current_is_left { &session.textures.0 @@ -1012,9 +1024,24 @@ pub async fn finish_encoder_nv12_pooled( queue.submit(std::iter::once(encoder.finish())); nv12_converter.start_readback(); + let previous_frame = if let Some(prev) = previous_pending { + match prev.wait_with_pool(device, buffer_pool).await { + Ok(frame) => Some(frame), + Err(error) => { + if let Some(current) = nv12_converter.take_pending() { + let _ = current.cancel(); + } + return Err(error); + } + } + } else { + None + }; + Ok(previous_frame) - } else if let Some(prev_frame) = previous_frame { + } else if let Some(prev) = previous_pending { queue.submit(std::iter::once(encoder.finish())); + let prev_frame = prev.wait_with_pool(device, buffer_pool).await?; Ok(Some(prev_frame)) } else { let rgba_frame = finish_encoder(session, device, queue, uniforms, encoder).await?; From 963db8048eb8efc3fdc32a2e1209805290288629 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 8 May 2026 10:34:03 +0100 Subject: [PATCH 07/17] fix(audio): handle resampler flush loops and propagate decode EOF errors --- crates/audio/src/audio_data.rs | 83 +++++++--------------------------- 1 file changed, 17 insertions(+), 66 deletions(-) diff --git a/crates/audio/src/audio_data.rs b/crates/audio/src/audio_data.rs index 587d62cd75..6212853720 100644 --- a/crates/audio/src/audio_data.rs +++ b/crates/audio/src/audio_data.rs @@ -1,22 +1,28 @@ -use ffmpeg::{ - ChannelLayout, codec as avcodec, - format::{self as avformat}, -}; +use ffmpeg::{ChannelLayout, codec as avcodec, format as avformat, frame}; use std::path::Path; use crate::cast_bytes_to_f32_slice; -// F32 Packed 48kHz audio pub struct AudioData { samples: Vec, channels: u16, } +fn append_frame_samples(frame: &frame::Audio, samples: &mut Vec) { + let slice = &frame.data(0)[0..frame.samples() * 4 * frame.channels() as usize]; + samples.extend(unsafe { cast_bytes_to_f32_slice(slice) }); +} + impl AudioData { pub const SAMPLE_FORMAT: avformat::Sample = avformat::Sample::F32(avformat::sample::Type::Packed); pub const SAMPLE_RATE: u32 = 48_000; + #[cfg(test)] + pub(crate) fn from_samples(samples: Vec, channels: u16) -> Self { + Self { samples, channels } + } + pub fn from_file(path: impl AsRef) -> Result { fn inner(path: &Path) -> Result { let mut input_ctx = @@ -53,7 +59,6 @@ impl AudioData { let mut decoded_frame = ffmpeg::frame::Audio::empty(); let mut resampled_frame = ffmpeg::frame::Audio::empty(); - // let mut resampled_frames = 0; let mut samples: Vec = vec![]; for (stream, packet) in input_ctx.packets() { @@ -66,74 +71,22 @@ impl AudioData { .map_err(|e| format!("Send Packet / {e}"))?; while decoder.receive_frame(&mut decoded_frame).is_ok() { - let resample_delay = resampler + resampler .run(&decoded_frame, &mut resampled_frame) .map_err(|e| format!("Run Resampler / {e:?}"))?; - let slice = &resampled_frame.data(0) - [0..resampled_frame.samples() * 4 * resampled_frame.channels() as usize]; - samples.extend(unsafe { cast_bytes_to_f32_slice(slice) }); - - if resample_delay.is_some() { - loop { - let resample_delay = resampler - .flush(&mut resampled_frame) - .map_err(|e| format!("Flush Resampler / {e}"))?; - - let slice = &resampled_frame.data(0)[0..resampled_frame.samples() - * 4 - * resampled_frame.channels() as usize]; - samples.extend(unsafe { cast_bytes_to_f32_slice(slice) }); - - if resample_delay.is_none() { - break; - } - } - } - } - - loop { - let resample_delay = resampler - .flush(&mut resampled_frame) - .map_err(|e| format!("Flush Resampler / {e}"))?; - - let slice = &resampled_frame.data(0) - [0..resampled_frame.samples() * 4 * resampled_frame.channels() as usize]; - samples.extend(unsafe { cast_bytes_to_f32_slice(slice) }); - - if resample_delay.is_none() { - break; - } + append_frame_samples(&resampled_frame, &mut samples); } } - decoder.send_eof().unwrap(); + decoder.send_eof().map_err(|e| format!("Send EOF / {e}"))?; while decoder.receive_frame(&mut decoded_frame).is_ok() { - let resample_delay = resampler + resampler .run(&decoded_frame, &mut resampled_frame) .map_err(|e| format!("Run Resampler / {e}"))?; - let slice = &resampled_frame.data(0) - [0..resampled_frame.samples() * 4 * resampled_frame.channels() as usize]; - samples.extend(unsafe { cast_bytes_to_f32_slice(slice) }); - - if resample_delay.is_some() { - loop { - let resample_delay = resampler - .flush(&mut resampled_frame) - .map_err(|e| format!("Flush Resampler / {e}"))?; - - let slice = &resampled_frame.data(0)[0..resampled_frame.samples() - * 4 - * resampled_frame.channels() as usize]; - samples.extend(unsafe { cast_bytes_to_f32_slice(slice) }); - - if resample_delay.is_none() { - break; - } - } - } + append_frame_samples(&resampled_frame, &mut samples); } loop { @@ -141,9 +94,7 @@ impl AudioData { .flush(&mut resampled_frame) .map_err(|e| format!("Flush Resampler / {e}"))?; - let slice = &resampled_frame.data(0) - [0..resampled_frame.samples() * 4 * resampled_frame.channels() as usize]; - samples.extend(unsafe { cast_bytes_to_f32_slice(slice) }); + append_frame_samples(&resampled_frame, &mut samples); if resample_delay.is_none() { break; From fec7de5b300fe8c6d4c093ee9e1cc73a551a3946 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 8 May 2026 10:34:03 +0100 Subject: [PATCH 08/17] perf(audio): batch stereo mix using prepared linear gains --- crates/audio/src/renderer.rs | 442 +++++++++++++++++++++++++++++++---- 1 file changed, 401 insertions(+), 41 deletions(-) diff --git a/crates/audio/src/renderer.rs b/crates/audio/src/renderer.rs index 8cee672706..6e40a12faa 100644 --- a/crates/audio/src/renderer.rs +++ b/crates/audio/src/renderer.rs @@ -1,14 +1,16 @@ use crate::AudioData; +#[derive(Clone, Copy)] pub enum StereoMode { Stereo, MonoL, MonoR, } +#[derive(Clone, Copy)] pub struct AudioRendererTrack<'a> { pub data: &'a AudioData, - pub gain: f32, + pub linear_gain: f32, pub stereo_mode: StereoMode, pub offset: isize, } @@ -19,10 +21,20 @@ pub fn render_audio( samples: usize, out_offset: usize, out: &mut [f32], +) -> usize { + render_audio_from_tracks(tracks.iter().copied(), offset, samples, out_offset, out) +} + +pub fn render_audio_from_tracks<'a>( + tracks: impl Iterator> + Clone, + offset: usize, + samples: usize, + out_offset: usize, + out: &mut [f32], ) -> usize { let samples = samples.min( tracks - .iter() + .clone() .filter_map(|t| { let track_samples = t.data.samples().len() / t.data.channels() as usize; let available = track_samples as isize - offset as isize - t.offset; @@ -36,67 +48,415 @@ pub fn render_audio( .unwrap_or(0), ); - for i in 0..samples { - let mut left = 0.0; - let mut right = 0.0; + out[out_offset..out_offset + samples * 2].fill(0.0); - for track in tracks { - let i = i.wrapping_add_signed(track.offset); - - let data = track.data; - let gain = gain_for_db(track.gain); + for track in tracks { + if track.linear_gain == 0.0 { + continue; + } - if gain == f32::NEG_INFINITY { - continue; - } + let data = track.data; + let data_samples = data.samples(); + let channels = data.channels() as usize; + let track_samples = data_samples.len() / channels; + let valid_range = valid_output_range(track_samples, offset, track.offset, samples); + if valid_range.is_empty() { + continue; + } + let source_start = (offset as isize + valid_range.start as isize + track.offset) as usize; + let output_start = valid_range.start; + let output_end = valid_range.end; - if data.channels() == 1 { - if let Some(sample) = data.samples().get(offset + i) { - left += sample * 0.707 * gain; - right += sample * 0.707 * gain; - } - } else if data.channels() == 2 { - let base_idx = offset * 2 + i * 2; - let Some(l_sample) = data.samples().get(base_idx) else { - continue; - }; - let Some(r_sample) = data.samples().get(base_idx + 1) else { - continue; - }; + let gain = track.linear_gain; - match track.stereo_mode { - StereoMode::Stereo => { - left += l_sample * gain; - right += r_sample * gain; + if channels == 1 { + for (src_i, i) in (source_start..).zip(output_start..output_end) { + let sample = data_samples[src_i] * 0.707 * gain; + out[out_offset + i * 2] += sample; + out[out_offset + i * 2 + 1] += sample; + } + } else if channels == 2 { + match track.stereo_mode { + StereoMode::Stereo => { + let mut base_idx = source_start * 2; + for i in output_start..output_end { + let l_sample = data_samples[base_idx]; + let r_sample = data_samples[base_idx + 1]; + out[out_offset + i * 2] += l_sample * gain; + out[out_offset + i * 2 + 1] += r_sample * gain; + base_idx += 2; } - StereoMode::MonoL => { - left += l_sample * gain; - right += l_sample * gain; + } + StereoMode::MonoL => { + let mut base_idx = source_start * 2; + for i in output_start..output_end { + let l_sample = data_samples[base_idx]; + out[out_offset + i * 2] += l_sample * gain; + out[out_offset + i * 2 + 1] += l_sample * gain; + base_idx += 2; } - StereoMode::MonoR => { - left += r_sample * gain; - right += r_sample * gain; + } + StereoMode::MonoR => { + let mut base_idx = source_start * 2; + for i in output_start..output_end { + let r_sample = data_samples[base_idx + 1]; + out[out_offset + i * 2] += r_sample * gain; + out[out_offset + i * 2 + 1] += r_sample * gain; + base_idx += 2; } } } } + } + for i in 0..samples { + let left_index = out_offset + i * 2; + let right_index = left_index + 1; + let left = out[left_index]; + let right = out[right_index]; let l = left.clamp(-1.0, 1.0); let r = right.clamp(-1.0, 1.0); - out[out_offset + i * 2] = l; - out[out_offset + i * 2 + 1] = r; + out[left_index] = l; + out[right_index] = r; } samples } -fn gain_for_db(db: f32) -> f32 { +fn valid_output_range( + track_samples: usize, + offset: usize, + track_offset: isize, + samples: usize, +) -> std::ops::Range { + let start = if track_offset < 0 { + track_offset.unsigned_abs().saturating_sub(offset) + } else { + 0 + }; + let end = track_samples as isize - offset as isize - track_offset; + let end = if end <= 0 { + 0 + } else { + (end as usize).min(samples) + }; + + start.min(end)..end +} + +pub fn linear_gain_for_db(db: f32) -> f32 { match db { - // Fully mute when at minimum - v if v <= -30.0 => f32::NEG_INFINITY, + v if v <= -30.0 => 0.0, v => db_to_linear(v), } } fn db_to_linear(db: f32) -> f32 { 10.0_f32.powf(db / 20.0) } + +#[cfg(test)] +mod tests { + use super::*; + use std::{hint::black_box, time::Instant}; + + struct BaselineAudioRendererTrack<'a> { + data: &'a AudioData, + gain_db: f32, + stereo_mode: StereoMode, + offset: isize, + } + + fn assert_close(actual: f32, expected: f32) { + assert!((actual - expected).abs() < 0.000_01); + } + + fn render_audio_baseline( + tracks: &[BaselineAudioRendererTrack], + offset: usize, + samples: usize, + out_offset: usize, + out: &mut [f32], + ) -> usize { + let samples = samples.min( + tracks + .iter() + .filter_map(|t| { + let track_samples = t.data.samples().len() / t.data.channels() as usize; + let available = track_samples as isize - offset as isize - t.offset; + if available > 0 { + Some(available as usize) + } else { + None + } + }) + .max() + .unwrap_or(0), + ); + + for i in 0..samples { + let mut left = 0.0; + let mut right = 0.0; + + for track in tracks { + let i = i.wrapping_add_signed(track.offset); + + let data = track.data; + let gain = linear_gain_for_db(track.gain_db); + + if gain == 0.0 { + continue; + } + + if data.channels() == 1 { + if let Some(sample) = data.samples().get(offset + i) { + left += sample * 0.707 * gain; + right += sample * 0.707 * gain; + } + } else if data.channels() == 2 { + let base_idx = offset * 2 + i * 2; + let Some(l_sample) = data.samples().get(base_idx) else { + continue; + }; + let Some(r_sample) = data.samples().get(base_idx + 1) else { + continue; + }; + + match track.stereo_mode { + StereoMode::Stereo => { + left += l_sample * gain; + right += r_sample * gain; + } + StereoMode::MonoL => { + left += l_sample * gain; + right += l_sample * gain; + } + StereoMode::MonoR => { + left += r_sample * gain; + right += r_sample * gain; + } + } + } + } + + let l = left.clamp(-1.0, 1.0); + let r = right.clamp(-1.0, 1.0); + out[out_offset + i * 2] = l; + out[out_offset + i * 2 + 1] = r; + } + + samples + } + + #[test] + fn render_audio_mixes_stereo_and_mono_tracks() { + let stereo = AudioData::from_samples(vec![0.5, -0.5, 0.25, -0.25], 2); + let mono = AudioData::from_samples(vec![1.0, 0.5], 1); + let tracks = [ + AudioRendererTrack { + data: &stereo, + linear_gain: linear_gain_for_db(0.0), + stereo_mode: StereoMode::Stereo, + offset: 0, + }, + AudioRendererTrack { + data: &mono, + linear_gain: linear_gain_for_db(-6.0), + stereo_mode: StereoMode::Stereo, + offset: 0, + }, + ]; + let mut out = vec![0.0; 4]; + + let rendered = render_audio(&tracks, 0, 2, 0, &mut out); + let mono_gain = 10.0_f32.powf(-6.0 / 20.0) * 0.707; + + assert_eq!(rendered, 2); + assert_close(out[0], 0.5 + mono_gain); + assert_close(out[1], -0.5 + mono_gain); + assert_close(out[2], 0.25 + 0.5 * mono_gain); + assert_close(out[3], -0.25 + 0.5 * mono_gain); + } + + #[test] + fn render_audio_writes_silence_for_muted_tracks() { + let mono = AudioData::from_samples(vec![1.0, 0.5], 1); + let tracks = [AudioRendererTrack { + data: &mono, + linear_gain: 0.0, + stereo_mode: StereoMode::Stereo, + offset: 0, + }]; + let mut out = vec![1.0; 4]; + + let rendered = render_audio(&tracks, 0, 2, 0, &mut out); + + assert_eq!(rendered, 2); + assert_eq!(out, vec![0.0; 4]); + } + + #[test] + fn render_audio_matches_baseline_for_offsets_and_stereo_modes() { + let stereo = AudioData::from_samples( + (0..128) + .map(|i| ((i % 29) as f32 / 28.0) * 2.0 - 1.0) + .collect(), + 2, + ); + let mono = AudioData::from_samples( + (0..64) + .map(|i| ((i % 17) as f32 / 16.0) * 2.0 - 1.0) + .collect(), + 1, + ); + + for stereo_mode in [StereoMode::Stereo, StereoMode::MonoL, StereoMode::MonoR] { + for (offset, stereo_offset, mono_offset, samples) in + [(0, 0, 0, 24), (8, 6, 3, 24), (30, 5, 9, 40)] + { + let baseline_tracks = [ + BaselineAudioRendererTrack { + data: &stereo, + gain_db: -4.0, + stereo_mode, + offset: stereo_offset, + }, + BaselineAudioRendererTrack { + data: &mono, + gain_db: -9.0, + stereo_mode, + offset: mono_offset, + }, + ]; + let optimized_tracks = [ + AudioRendererTrack { + data: &stereo, + linear_gain: linear_gain_for_db(-4.0), + stereo_mode, + offset: stereo_offset, + }, + AudioRendererTrack { + data: &mono, + linear_gain: linear_gain_for_db(-9.0), + stereo_mode, + offset: mono_offset, + }, + ]; + let mut baseline_out = vec![9.0; samples * 2]; + let mut optimized_out = vec![9.0; samples * 2]; + + let baseline_rendered = + render_audio_baseline(&baseline_tracks, offset, samples, 0, &mut baseline_out); + let optimized_rendered = + render_audio(&optimized_tracks, offset, samples, 0, &mut optimized_out); + + assert_eq!(baseline_rendered, optimized_rendered); + assert_eq!( + &baseline_out[..baseline_rendered * 2], + &optimized_out[..optimized_rendered * 2] + ); + } + } + } + + #[test] + fn render_audio_silences_negative_track_preroll() { + let mono = AudioData::from_samples(vec![1.0, 0.5, -0.5, -1.0], 1); + let tracks = [AudioRendererTrack { + data: &mono, + linear_gain: linear_gain_for_db(0.0), + stereo_mode: StereoMode::Stereo, + offset: -4, + }]; + let mut out = vec![8.0; 12]; + + let rendered = render_audio(&tracks, 0, 6, 0, &mut out); + + assert_eq!(rendered, 6); + assert_eq!(&out[..8], &[0.0; 8]); + assert_close(out[8], 0.707); + assert_close(out[9], 0.707); + assert_close(out[10], 0.5 * 0.707); + assert_close(out[11], 0.5 * 0.707); + } + + #[test] + #[ignore] + fn benchmark_render_audio_prepared_gain() { + let frame_samples = 1600usize; + let iterations = 1000usize; + let stereo_samples = (0..frame_samples * 2) + .map(|i| ((i % 97) as f32 / 96.0) * 2.0 - 1.0) + .collect::>(); + let mono_samples = (0..frame_samples) + .map(|i| ((i % 53) as f32 / 52.0) * 2.0 - 1.0) + .collect::>(); + let stereo = AudioData::from_samples(stereo_samples, 2); + let mono = AudioData::from_samples(mono_samples, 1); + let baseline_tracks = [ + BaselineAudioRendererTrack { + data: &stereo, + gain_db: -4.0, + stereo_mode: StereoMode::Stereo, + offset: 0, + }, + BaselineAudioRendererTrack { + data: &mono, + gain_db: -9.0, + stereo_mode: StereoMode::Stereo, + offset: 0, + }, + ]; + let optimized_tracks = [ + AudioRendererTrack { + data: &stereo, + linear_gain: linear_gain_for_db(-4.0), + stereo_mode: StereoMode::Stereo, + offset: 0, + }, + AudioRendererTrack { + data: &mono, + linear_gain: linear_gain_for_db(-9.0), + stereo_mode: StereoMode::Stereo, + offset: 0, + }, + ]; + let mut baseline_out = vec![0.0; frame_samples * 2]; + let mut optimized_out = vec![0.0; frame_samples * 2]; + + let baseline_start = Instant::now(); + let mut baseline_rendered = 0usize; + for _ in 0..iterations { + baseline_rendered += render_audio_baseline( + &baseline_tracks, + 0, + frame_samples, + 0, + black_box(&mut baseline_out), + ); + } + let baseline_elapsed = baseline_start.elapsed(); + + let optimized_start = Instant::now(); + let mut optimized_rendered = 0usize; + for _ in 0..iterations { + optimized_rendered += render_audio( + &optimized_tracks, + 0, + frame_samples, + 0, + black_box(&mut optimized_out), + ); + } + let optimized_elapsed = optimized_start.elapsed(); + + assert_eq!(baseline_rendered, optimized_rendered); + assert_eq!(baseline_out, optimized_out); + + println!( + "{{\"baseline_ms\":{},\"optimized_ms\":{},\"speedup\":{:.3}}}", + baseline_elapsed.as_millis(), + optimized_elapsed.as_millis(), + baseline_elapsed.as_secs_f64() / optimized_elapsed.as_secs_f64() + ); + } +} From 3dab4a40e5c03204d2869cac94766f7b3caf8ea1 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 8 May 2026 10:34:04 +0100 Subject: [PATCH 09/17] perf(editor): render audio in-place with cached clip offsets --- crates/editor/src/audio.rs | 316 ++++++++++++++++++++++++++++++------- 1 file changed, 257 insertions(+), 59 deletions(-) diff --git a/crates/editor/src/audio.rs b/crates/editor/src/audio.rs index d4b7d03cfd..97146464bb 100644 --- a/crates/editor/src/audio.rs +++ b/crates/editor/src/audio.rs @@ -1,6 +1,4 @@ -use cap_audio::{ - AudioData, AudioRendererTrack, FromSampleBytes, StereoMode, cast_f32_slice_to_bytes, -}; +use cap_audio::{AudioData, AudioRendererTrack, FromSampleBytes, StereoMode, linear_gain_for_db}; use cap_media::MediaError; use cap_media_info::AudioInfo; use cap_project::{AudioConfiguration, ClipOffsets, ProjectConfiguration, TimelineConfiguration}; @@ -18,6 +16,10 @@ use tracing::info; pub struct AudioRenderer { data: Vec, cursor: AudioRendererCursor, + clip_offsets_by_index: Vec, + timeline_segment_index: usize, + timeline_segment_start_samples: usize, + timeline_segment_start_secs: f64, // sum of `frame.samples()` that have elapsed // this * channel count = cursor elapsed_samples: usize, @@ -83,6 +85,11 @@ struct TimelineCursor<'a> { segment: &'a cap_project::TimelineSegment, } +fn f32_slice_from_audio_frame(frame: &mut FFAudio, len: usize) -> &mut [f32] { + let bytes = &mut frame.data_mut(0)[..len * f32::BYTE_SIZE]; + unsafe { std::slice::from_raw_parts_mut(bytes.as_mut_ptr() as *mut f32, len) } +} + impl AudioRenderer { pub const SAMPLE_FORMAT: avformat::Sample = AudioData::SAMPLE_FORMAT; pub const SAMPLE_RATE: u32 = AudioData::SAMPLE_RATE; @@ -100,12 +107,43 @@ impl AudioRenderer { samples: 0, timescale: 1.0, }, + clip_offsets_by_index: Vec::new(), + timeline_segment_index: 0, + timeline_segment_start_samples: 0, + timeline_segment_start_secs: 0.0, elapsed_samples: 0, } } + pub fn new_with_project(data: Vec, project: &ProjectConfiguration) -> Self { + let mut renderer = Self::new(data); + renderer.update_clip_offsets(project); + renderer + } + + pub fn update_clip_offsets(&mut self, project: &ProjectConfiguration) { + let Some(max_index) = project.clips.iter().map(|clip| clip.index as usize).max() else { + self.clip_offsets_by_index.clear(); + return; + }; + + self.clip_offsets_by_index + .resize(max_index + 1, ClipOffsets::default()); + + for offsets in &mut self.clip_offsets_by_index { + *offsets = ClipOffsets::default(); + } + + for clip in &project.clips { + self.clip_offsets_by_index[clip.index as usize] = clip.offsets; + } + } + pub fn set_playhead(&mut self, playhead: f64, project: &ProjectConfiguration) { self.elapsed_samples = self.playhead_to_samples(playhead); + self.timeline_segment_index = 0; + self.timeline_segment_start_samples = 0; + self.timeline_segment_start_secs = 0.0; self.cursor = match project.get_segment_time(playhead) { Some((segment_time, segment)) => AudioRendererCursor { @@ -121,6 +159,20 @@ impl AudioRenderer { }; } + fn clip_offsets_for_project(&self, project: &ProjectConfiguration) -> ClipOffsets { + self.clip_offsets_by_index + .get(self.cursor.clip_index as usize) + .copied() + .or_else(|| { + project + .clips + .iter() + .find(|c| c.index == self.cursor.clip_index) + .map(|c| c.offsets) + }) + .unwrap_or_default() + } + fn playhead_to_samples(&self, playhead: f64) -> usize { (playhead * AudioData::SAMPLE_RATE as f64) as usize } @@ -134,16 +186,19 @@ impl AudioRenderer { requested_samples: usize, project: &ProjectConfiguration, ) -> Option { - self.render_frame_raw(requested_samples, project) - .map(move |(samples, data)| { - let mut raw_frame = - FFAudio::new(AudioData::SAMPLE_FORMAT, samples, ChannelLayout::STEREO); - raw_frame.set_rate(AudioData::SAMPLE_RATE); - raw_frame.data_mut(0)[0..data.len() * f32::BYTE_SIZE] - .copy_from_slice(unsafe { cast_f32_slice_to_bytes(&data) }); - - raw_frame - }) + let mut raw_frame = FFAudio::new( + AudioData::SAMPLE_FORMAT, + requested_samples, + ChannelLayout::STEREO, + ); + raw_frame.set_rate(AudioData::SAMPLE_RATE); + let sample_len = requested_samples * Self::CHANNELS as usize; + let samples = { + let out = f32_slice_from_audio_frame(&mut raw_frame, sample_len); + self.render_frame_to_slice(requested_samples, project, out)? + }; + raw_frame.set_samples(samples); + Some(raw_frame) } pub fn render_frame_raw( @@ -151,24 +206,51 @@ impl AudioRenderer { samples: usize, project: &ProjectConfiguration, ) -> Option<(usize, Vec)> { + let mut ret = vec![0.0; samples * Self::CHANNELS as usize]; + let rendered = self.render_frame_into(samples, project, &mut ret)?; + ret.truncate(rendered * Self::CHANNELS as usize); + Some((rendered, ret)) + } + + fn render_frame_into( + &mut self, + samples: usize, + project: &ProjectConfiguration, + out: &mut Vec, + ) -> Option { if let Some(timeline) = &project.timeline { - return self.render_timeline_frame_raw(samples, project, timeline); + return self.render_timeline_frame_into(samples, project, timeline, out); } - self.render_linear_frame_raw(samples, project) + self.render_linear_frame_into(samples, project, out) } - fn render_timeline_frame_raw( + fn render_frame_to_slice( + &mut self, + samples: usize, + project: &ProjectConfiguration, + out: &mut [f32], + ) -> Option { + if let Some(timeline) = &project.timeline { + return self.render_timeline_frame_to_slice(samples, project, timeline, out); + } + + self.render_linear_frame_to_slice(samples, project, out) + } + + fn render_timeline_frame_into( &mut self, samples: usize, project: &ProjectConfiguration, timeline: &TimelineConfiguration, - ) -> Option<(usize, Vec)> { + out: &mut Vec, + ) -> Option { if samples == 0 { return None; } - let mut ret = vec![0.0; samples * 2]; + out.resize(samples * Self::CHANNELS as usize, 0.0); + out.fill(0.0); let mut written = 0usize; while written < samples { @@ -189,7 +271,7 @@ impl AudioRenderer { }; if cursor.segment.timescale == 1.0 { - self.render_current_chunk(project, chunk_samples, written * 2, &mut ret); + self.render_current_chunk(project, chunk_samples, written * 2, out); self.cursor.samples += chunk_samples; } @@ -197,19 +279,58 @@ impl AudioRenderer { written += chunk_samples; } - if written == 0 { - None - } else { - ret.truncate(written * 2); - Some((written, ret)) + if written == 0 { None } else { Some(written) } + } + + fn render_timeline_frame_to_slice( + &mut self, + samples: usize, + project: &ProjectConfiguration, + timeline: &TimelineConfiguration, + out: &mut [f32], + ) -> Option { + if samples == 0 { + return None; } + + out[..samples * Self::CHANNELS as usize].fill(0.0); + let mut written = 0usize; + + while written < samples { + let Some(cursor) = self.timeline_cursor(timeline) else { + break; + }; + + let chunk_samples = + (cursor.segment_end_samples - self.elapsed_samples).min(samples - written); + if chunk_samples == 0 { + break; + } + + self.cursor = AudioRendererCursor { + clip_index: cursor.segment.recording_clip, + timescale: cursor.segment.timescale, + samples: self.playhead_to_samples(cursor.segment_time), + }; + + if cursor.segment.timescale == 1.0 { + self.render_current_chunk(project, chunk_samples, written * 2, out); + self.cursor.samples += chunk_samples; + } + + self.elapsed_samples += chunk_samples; + written += chunk_samples; + } + + if written == 0 { None } else { Some(written) } } - fn render_linear_frame_raw( + fn render_linear_frame_into( &mut self, samples: usize, project: &ProjectConfiguration, - ) -> Option<(usize, Vec)> { + out: &mut Vec, + ) -> Option { if samples == 0 { return None; } @@ -219,8 +340,8 @@ impl AudioRenderer { return None; } - let mut ret = vec![0.0; samples * 2]; - let rendered = self.render_current_chunk(project, samples, 0, &mut ret); + out.resize(samples * Self::CHANNELS as usize, 0.0); + let rendered = self.render_current_chunk(project, samples, 0, out); if rendered == 0 { self.elapsed_samples += samples; @@ -229,24 +350,61 @@ impl AudioRenderer { self.elapsed_samples += rendered; self.cursor.samples += rendered; - ret.truncate(rendered * 2); - Some((rendered, ret)) + Some(rendered) + } + + fn render_linear_frame_to_slice( + &mut self, + samples: usize, + project: &ProjectConfiguration, + out: &mut [f32], + ) -> Option { + if samples == 0 { + return None; + } + + if self.cursor.timescale != 1.0 { + self.elapsed_samples += samples; + return None; + } + + let rendered = self.render_current_chunk(project, samples, 0, out); + + if rendered == 0 { + self.elapsed_samples += samples; + return None; + } + + self.elapsed_samples += rendered; + self.cursor.samples += rendered; + + Some(rendered) } fn timeline_cursor<'a>( - &self, + &mut self, timeline: &'a TimelineConfiguration, ) -> Option> { - let mut segment_start_samples = 0usize; - let mut accumulated_duration = 0.0; + if timeline.segments.is_empty() { + return None; + } + + if self.timeline_segment_index >= timeline.segments.len() + || self.elapsed_samples < self.timeline_segment_start_samples + { + self.timeline_segment_index = 0; + self.timeline_segment_start_samples = 0; + self.timeline_segment_start_secs = 0.0; + } - for segment in &timeline.segments { - accumulated_duration += segment.duration(); - let segment_end_samples = self.playhead_to_samples(accumulated_duration); + while self.timeline_segment_index < timeline.segments.len() { + let segment = &timeline.segments[self.timeline_segment_index]; + let segment_end_secs = self.timeline_segment_start_secs + segment.duration(); + let segment_end_samples = self.playhead_to_samples(segment_end_secs); if self.elapsed_samples < segment_end_samples { - let local_samples = self.elapsed_samples - segment_start_samples; + let local_samples = self.elapsed_samples - self.timeline_segment_start_samples; let local_time = local_samples as f64 / Self::SAMPLE_RATE as f64; return Some(TimelineCursor { segment_end_samples, @@ -255,7 +413,9 @@ impl AudioRenderer { }); } - segment_start_samples = segment_end_samples; + self.timeline_segment_index += 1; + self.timeline_segment_start_samples = segment_end_samples; + self.timeline_segment_start_secs = segment_end_secs; } None @@ -277,12 +437,7 @@ impl AudioRenderer { return 0; } - let offsets = project - .clips - .iter() - .find(|c| c.index == self.cursor.clip_index) - .map(|c| c.offsets) - .unwrap_or_default(); + let offsets = self.clip_offsets_for_project(project); let max_samples = tracks .iter() @@ -300,22 +455,24 @@ impl AudioRenderer { let samples = samples.min(max_samples - self.cursor.samples); - let track_datas = tracks - .iter() - .map(|t| AudioRendererTrack { - data: t.data().as_ref(), - gain: if project.audio.mute { - f32::NEG_INFINITY - } else { - let g = t.gain(&project.audio); - if g < -30.0 { f32::NEG_INFINITY } else { g } - }, - stereo_mode: t.stereo_mode(&project.audio), - offset: (t.offset(&offsets) * Self::SAMPLE_RATE as f32) as isize, - }) - .collect::>(); + let track_datas = tracks.iter().map(|t| AudioRendererTrack { + data: t.data().as_ref(), + linear_gain: if project.audio.mute { + 0.0 + } else { + linear_gain_for_db(t.gain(&project.audio)) + }, + stereo_mode: t.stereo_mode(&project.audio), + offset: (t.offset(&offsets) * Self::SAMPLE_RATE as f32) as isize, + }); - cap_audio::render_audio(&track_datas, self.cursor.samples, samples, out_offset, out) + cap_audio::render_audio_from_tracks( + track_datas, + self.cursor.samples, + samples, + out_offset, + out, + ) } } @@ -666,6 +823,10 @@ mod tests { 0.0 } + fn mic_offset(offsets: &ClipOffsets) -> f32 { + offsets.mic + } + fn write_step_wav(path: &Path, section_values: &[i16]) { let sample_rate = AudioData::SAMPLE_RATE; let channels = 2u16; @@ -811,6 +972,43 @@ mod tests { assert_eq!(buffer.current_audible_playhead(1.0), 0.0); } + #[test] + fn cached_clip_offsets_match_uncached_render() { + let _ = ffmpeg::init(); + + let dir = tempfile::tempdir().unwrap(); + let clip_path = dir.path().join("clip.wav"); + write_step_wav(&clip_path, &[1000, 2000, 3000]); + let audio = Arc::new(AudioData::from_file(&clip_path).unwrap()); + let segments = vec![AudioSegment { + tracks: vec![AudioSegmentTrack::new(audio, gain, stereo, mic_offset)], + }]; + let project = ProjectConfiguration { + clips: vec![ClipConfiguration { + index: 0, + offsets: ClipOffsets { + mic: 1.0, + ..Default::default() + }, + }], + ..Default::default() + }; + let mut uncached = AudioRenderer::new(segments.clone()); + let mut cached = AudioRenderer::new_with_project(segments, &project); + + uncached.set_playhead(0.0, &project); + cached.set_playhead(0.0, &project); + + let uncached_samples = uncached + .render_frame_raw(AudioData::SAMPLE_RATE as usize, &project) + .unwrap(); + let cached_samples = cached + .render_frame_raw(AudioData::SAMPLE_RATE as usize, &project) + .unwrap(); + + assert_eq!(uncached_samples, cached_samples); + } + #[test] fn speed_segment_start_cuts_audio_inside_a_single_request() { let (_dir, mut renderer, project) = build_renderer_fixture(); From f8a3094356fb723c573a2a2fe958dcb5fbc554a1 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 8 May 2026 10:34:04 +0100 Subject: [PATCH 10/17] perf(editor): load segment audio concurrently with decoder setup --- crates/editor/src/editor_instance.rs | 134 +++++++++++++++------------ 1 file changed, 73 insertions(+), 61 deletions(-) diff --git a/crates/editor/src/editor_instance.rs b/crates/editor/src/editor_instance.rs index 3bb3e55837..bc878acb4f 100644 --- a/crates/editor/src/editor_instance.rs +++ b/crates/editor/src/editor_instance.rs @@ -11,6 +11,7 @@ use cap_rendering::{ SegmentVideoPaths, SharedWgpuDevice, Video, ZoomFocusInterpolator, get_duration, spring_mass_damper::SpringMassDamperSimulationConfig, }; +use futures::future::try_join_all; use std::{ path::{Path, PathBuf}, sync::{ @@ -627,6 +628,26 @@ pub struct SegmentMedia { pub decoders: RecordingSegmentDecoders, } +async fn load_audio_segment( + path: Option, + label: String, +) -> Result>, String> { + let Some(path) = path else { + return Ok(None); + }; + + let join_label = label.clone(); + let audio = tokio::task::spawn_blocking(move || { + AudioData::from_file(path) + .map(Arc::new) + .map_err(|e| format!("{label} / {e}")) + }) + .await + .map_err(|e| format!("{join_label} / {e}"))??; + + Ok(Some(audio)) +} + pub async fn create_segments( recording_meta: &RecordingMeta, meta: &StudioRecordingMeta, @@ -634,16 +655,6 @@ pub async fn create_segments( ) -> Result, String> { match &meta { cap_project::StudioRecordingMeta::SingleSegment { segment: s } => { - let audio = s - .audio - .as_ref() - .map(|audio_meta| { - AudioData::from_file(recording_meta.path(&audio_meta.path)) - .map_err(|e| format!("SingleSegment Audio / {e}")) - }) - .transpose()? - .map(Arc::new); - let cursor = Arc::new( s.cursor .as_ref() @@ -664,18 +675,27 @@ pub async fn create_segments( .unwrap_or_default(), ); - let decoders = RecordingSegmentDecoders::new( - recording_meta, - meta, - SegmentVideoPaths { - display: recording_meta.path(&s.display.path), - camera: s.camera.as_ref().map(|c| recording_meta.path(&c.path)), - }, - 0, - force_ffmpeg, - ) - .await - .map_err(|e| format!("SingleSegment / {e}"))?; + let audio_path = s + .audio + .as_ref() + .map(|audio_meta| recording_meta.path(&audio_meta.path)); + let audio_task = load_audio_segment(audio_path, "SingleSegment Audio".to_string()); + let decoders_task = async { + RecordingSegmentDecoders::new( + recording_meta, + meta, + SegmentVideoPaths { + display: recording_meta.path(&s.display.path), + camera: s.camera.as_ref().map(|c| recording_meta.path(&c.path)), + }, + 0, + force_ffmpeg, + ) + .await + .map_err(|e| format!("SingleSegment / {e}")) + }; + + let (audio, decoders) = tokio::try_join!(audio_task, decoders_task)?; Ok(vec![SegmentMedia { audio, @@ -686,56 +706,48 @@ pub async fn create_segments( }]) } cap_project::StudioRecordingMeta::MultipleSegments { inner, .. } => { - let mut segments = vec![]; - - for (i, s) in inner.segments.iter().enumerate() { - let audio = s - .mic - .as_ref() - .map(|audio| { - AudioData::from_file(recording_meta.path(&audio.path)) - .map_err(|e| format!("MultipleSegments {i} Audio / {e}")) - }) - .transpose()? - .map(Arc::new); - - let system_audio = s + try_join_all(inner.segments.iter().enumerate().map(|(i, s)| async move { + let audio_path = s.mic.as_ref().map(|audio| recording_meta.path(&audio.path)); + let system_audio_path = s .system_audio .as_ref() - .map(|audio| { - AudioData::from_file(recording_meta.path(&audio.path)) - .map_err(|e| format!("MultipleSegments {i} System Audio / {e}")) - }) - .transpose()? - .map(Arc::new); + .map(|audio| recording_meta.path(&audio.path)); + + let audio_task = + load_audio_segment(audio_path, format!("MultipleSegments {i} Audio")); + let system_audio_task = load_audio_segment( + system_audio_path, + format!("MultipleSegments {i} System Audio"), + ); + let decoders_task = async move { + RecordingSegmentDecoders::new( + recording_meta, + meta, + SegmentVideoPaths { + display: recording_meta.path(&s.display.path), + camera: s.camera.as_ref().map(|c| recording_meta.path(&c.path)), + }, + i, + force_ffmpeg, + ) + .await + .map_err(|e| format!("MultipleSegments {i} / {e}")) + }; let cursor = Arc::new(s.cursor_events(recording_meta)); - - let decoders = RecordingSegmentDecoders::new( - recording_meta, - meta, - SegmentVideoPaths { - display: recording_meta.path(&s.display.path), - camera: s.camera.as_ref().map(|c| recording_meta.path(&c.path)), - }, - i, - force_ffmpeg, - ) - .await - .map_err(|e| format!("MultipleSegments {i} / {e}"))?; - let keyboard = Arc::new(s.keyboard_events(recording_meta)); + let (audio, system_audio, decoders) = + tokio::try_join!(audio_task, system_audio_task, decoders_task)?; - segments.push(SegmentMedia { + Ok(SegmentMedia { audio, system_audio, cursor, keyboard, decoders, - }); - } - - Ok(segments) + }) + })) + .await } } } From fc335417b68a8930006e6316744907bc5ec49f4b Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 8 May 2026 10:34:05 +0100 Subject: [PATCH 11/17] refactor(editor): use first_camera_duration for recording length bound --- crates/editor/src/editor.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index f69ea714ae..2f1e966db3 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -74,10 +74,7 @@ impl Renderer { )?); let mut max_duration = recordings.duration(); - if let Some(camera_path) = meta.camera_path() - && let Ok(camera_duration) = - recordings.get_source_duration(&recording_meta.path(&camera_path)) - { + if let Some(camera_duration) = recordings.first_camera_duration() { max_duration = max_duration.max(camera_duration); } From e9b712aba7232843cb0a9dd1244257efdaf7e98f Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 8 May 2026 10:34:05 +0100 Subject: [PATCH 12/17] chore(editor): re-export AudioSegment from crate root --- crates/editor/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/editor/src/lib.rs b/crates/editor/src/lib.rs index 0d37d6e87d..a8208d6c8f 100644 --- a/crates/editor/src/lib.rs +++ b/crates/editor/src/lib.rs @@ -4,7 +4,7 @@ mod editor_instance; mod playback; mod segments; -pub use audio::AudioRenderer; +pub use audio::{AudioRenderer, AudioSegment}; pub use editor::EditorFrameOutput; pub use editor_instance::{EditorInstance, EditorState, SegmentMedia, create_segments}; pub use segments::get_audio_segments; From 2e780d0a05d6cb4cacaaf2da731c073f83d69ca0 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 8 May 2026 10:34:07 +0100 Subject: [PATCH 13/17] build(export): add cap-audio dev dependency for benchmarks --- crates/export/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/export/Cargo.toml b/crates/export/Cargo.toml index c46d511f2c..a0bfaf4315 100644 --- a/crates/export/Cargo.toml +++ b/crates/export/Cargo.toml @@ -31,6 +31,7 @@ rgb = "0.8" workspace-hack = { version = "0.1", path = "../workspace-hack" } [dev-dependencies] +cap-audio = { path = "../audio" } clap = { version = "4.5.41", features = ["derive"] } chrono = "0.4" directories = "6.0.0" From 5db763790f9e69a6be80df0481ce476cc852c2a2 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 8 May 2026 10:34:08 +0100 Subject: [PATCH 14/17] perf(export): parallelize renderer setup with segment loading --- crates/export/src/lib.rs | 46 ++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/crates/export/src/lib.rs b/crates/export/src/lib.rs index 7879e765cb..e694d80bd4 100644 --- a/crates/export/src/lib.rs +++ b/crates/export/src/lib.rs @@ -34,9 +34,9 @@ pub enum ExportError { #[derive(thiserror::Error, Debug)] pub enum ExporterBuildError { #[error("Failed to load config: {0}")] - ConfigLoad(#[source] Box), + ConfigLoad(String), #[error("Failed to load meta: {0}")] - MetaLoad(#[source] Box), + MetaLoad(String), #[error("Recording is not a studio recording")] NotStudioRecording, #[error("Failed to load recordings meta: {0}")] @@ -79,11 +79,11 @@ impl ExporterBuilder { config } else { ProjectConfiguration::load(&self.project_path) - .map_err(|v| Error::ConfigLoad(v.into()))? + .map_err(|v| Error::ConfigLoad(v.to_string()))? }; - let recording_meta = - RecordingMeta::load_for_project(&self.project_path).map_err(Error::MetaLoad)?; + let recording_meta = RecordingMeta::load_for_project(&self.project_path) + .map_err(|v| Error::MetaLoad(v.to_string()))?; let studio_meta = recording_meta .studio_meta() .ok_or(Error::NotStudioRecording)?; @@ -93,20 +93,30 @@ impl ExporterBuilder { .map_err(Error::RecordingsMeta)?, ); - let render_constants = Arc::new( - RenderVideoConstants::new( - &recordings.segments, - recording_meta.clone(), - studio_meta.clone(), - ) - .await - .map_err(Error::RendererSetup)?, - ); + let render_constants_task = { + let recordings = Arc::clone(&recordings); + let recording_meta = recording_meta.clone(); + let studio_meta = studio_meta.clone(); + async move { + RenderVideoConstants::new(&recordings.segments, recording_meta, studio_meta) + .await + .map(Arc::new) + .map_err(Error::RendererSetup) + } + }; + + let segments_task = { + let recording_meta = recording_meta.clone(); + let studio_meta = studio_meta.clone(); + let force_ffmpeg_decoder = self.force_ffmpeg_decoder; + async move { + cap_editor::create_segments(&recording_meta, &studio_meta, force_ffmpeg_decoder) + .await + .map_err(Error::MediaLoad) + } + }; - let segments = - cap_editor::create_segments(&recording_meta, studio_meta, self.force_ffmpeg_decoder) - .await - .map_err(Error::MediaLoad)?; + let (render_constants, segments) = tokio::try_join!(render_constants_task, segments_task)?; let output_path = self .output_path From 76e2a15b7fbb8303063a0ea7dfcc995b076b0293 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 8 May 2026 10:34:08 +0100 Subject: [PATCH 15/17] perf(export): speed NV12 pipeline and widen export frame channel --- crates/export/src/mp4.rs | 371 ++++++++++++++++++++++++++++----------- 1 file changed, 266 insertions(+), 105 deletions(-) diff --git a/crates/export/src/mp4.rs b/crates/export/src/mp4.rs index d4e6f851db..28e581cd7b 100644 --- a/crates/export/src/mp4.rs +++ b/crates/export/src/mp4.rs @@ -172,7 +172,8 @@ impl Mp4ExportSettings { let output_path = base.output_path.clone(); let meta = &base.studio_meta; - let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::(4); + let (frame_tx, frame_rx) = + std::sync::mpsc::sync_channel::(EXPORT_FRAME_CHANNEL_CAPACITY); let mut video_info = VideoInfo::from_raw(RawVideoFormat::Nv12, output_size.0, output_size.1, fps); @@ -223,7 +224,10 @@ impl Mp4ExportSettings { info!("Created MP4File encoder (NV12, external conversion, export settings)"); let mut audio_renderer = if has_audio { - Some(AudioRenderer::new(audio_segments)) + Some(AudioRenderer::new_with_project( + audio_segments, + &project_for_audio, + )) } else { None }; @@ -247,23 +251,6 @@ impl Mp4ExportSettings { audio.set_playhead(0.0, &project_for_audio); } - let audio_frame = audio_renderer.as_mut().and_then(|audio| { - let n = u64::from(input.frame_number); - let end = ((n + 1) * sample_rate) / fps_u64; - if end <= audio_sample_cursor { - return None; - } - let pts = audio_sample_cursor as i64; - let samples = (end - audio_sample_cursor) as usize; - audio_sample_cursor = end; - audio - .render_frame(samples, &project_for_audio) - .map(|mut frame| { - frame.set_pts(Some(pts)); - frame - }) - }); - fill_nv12_frame_direct( &mut reusable_frame, &input.nv12_data, @@ -279,9 +266,6 @@ impl Mp4ExportSettings { Duration::MAX, ) .map_err(|err| err.to_string())?; - if let Some(audio) = audio_frame { - encoder.queue_audio_frame(audio); - } encoded_frames += 1; if encoded_frames == 1 && let Some(atom) = record_first_queued_ms.as_ref() @@ -290,6 +274,27 @@ impl Mp4ExportSettings { let _ = atom.compare_exchange(u64::MAX, ms, Ordering::Relaxed, Ordering::Relaxed); } + + let audio_frame = audio_renderer.as_mut().and_then(|audio| { + let n = u64::from(input.frame_number); + let end = ((n + 1) * sample_rate) / fps_u64; + if end <= audio_sample_cursor { + return None; + } + let pts = audio_sample_cursor as i64; + let samples = (end - audio_sample_cursor) as usize; + audio_sample_cursor = end; + audio + .render_frame(samples, &project_for_audio) + .map(|mut frame| { + frame.set_pts(Some(pts)); + frame + }) + }); + + if let Some(audio) = audio_frame { + encoder.queue_audio_frame(audio); + } } let encode_elapsed = encode_start.elapsed(); @@ -384,72 +389,33 @@ fn nv12_from_rendered_frame(frame: Nv12RenderedFrame) -> ExportFrame { let width = frame.width; let height = frame.height; - let mut rgba_frame = ffmpeg::frame::Video::new(ffmpeg::format::Pixel::RGBA, width, height); - let stride = rgba_frame.stride(0); - let src_stride = frame.y_stride as usize; - for row in 0..height as usize { - let src_start = row * src_stride; - let dst_start = row * stride; - let copy_width = (width as usize * 4).min(stride).min(src_stride); - if src_start + copy_width <= frame.data.len() - && dst_start + copy_width <= rgba_frame.data_mut(0).len() - { - rgba_frame.data_mut(0)[dst_start..dst_start + copy_width] - .copy_from_slice(&frame.data[src_start..src_start + copy_width]); - } - } - - if let Ok(mut converter) = ffmpeg::software::scaling::Context::get( - ffmpeg::format::Pixel::RGBA, - width, - height, - ffmpeg::format::Pixel::NV12, - width, - height, - ffmpeg::software::scaling::flag::Flags::FAST_BILINEAR, + let mut result = vec![0u8; width as usize * height as usize * 3 / 2]; + if cap_rendering::cpu_yuv::rgba_to_nv12_fast( + &frame.data, + &mut result, + cap_rendering::cpu_yuv::RgbaToNv12Config { + width, + height, + rgba_stride: frame.y_stride, + y_stride: width, + uv_stride: width, + }, ) { - let mut nv12_frame = ffmpeg::frame::Video::new(ffmpeg::format::Pixel::NV12, width, height); - if converter.run(&rgba_frame, &mut nv12_frame).is_ok() { - let y_size = nv12_frame.stride(0) * height as usize; - let uv_size = nv12_frame.stride(1) * (height as usize / 2); - let y_data = &nv12_frame.data(0)[..y_size]; - let uv_data = &nv12_frame.data(1)[..uv_size]; - let mut result = Vec::with_capacity(width as usize * height as usize * 3 / 2); - - if nv12_frame.stride(0) == width as usize { - result.extend_from_slice(y_data); - } else { - for row in 0..height as usize { - let start = row * nv12_frame.stride(0); - result.extend_from_slice(&y_data[start..start + width as usize]); - } - } - - if nv12_frame.stride(1) == width as usize { - result.extend_from_slice(uv_data); - } else { - for row in 0..(height as usize / 2) { - let start = row * nv12_frame.stride(1); - result.extend_from_slice(&uv_data[start..start + width as usize]); - } - } - - return ExportFrame { - nv12_data: SharedNv12Buffer::from_vec(result), - width, - height, - y_stride: width, - frame_number: frame.frame_number, - }; - } + return ExportFrame { + nv12_data: SharedNv12Buffer::from_vec(result), + width, + height, + y_stride: width, + frame_number: frame.frame_number, + }; } tracing::error!( frame_number = frame.frame_number, - "swscale RGBA to NV12 conversion failed, using zeroed NV12" + "Fast RGBA to NV12 conversion failed, using zeroed NV12" ); ExportFrame { - nv12_data: SharedNv12Buffer::from_vec(vec![0u8; width as usize * height as usize * 3 / 2]), + nv12_data: SharedNv12Buffer::from_vec(result), width, height, y_stride: width, @@ -480,38 +446,41 @@ fn fill_nv12_frame_direct( }; let dst_y_stride = frame.stride(0); - if dst_y_stride == y_stride { - let copy_len = y_src.len().min(frame.data_mut(0).len()); - frame.data_mut(0)[..copy_len].copy_from_slice(&y_src[..copy_len]); - } else { - for row in 0..height { - let src_start = row * y_stride; - let dst_start = row * dst_y_stride; + { + let dst_y = frame.data_mut(0); + if dst_y_stride == y_stride { + let copy_len = y_src.len().min(dst_y.len()); + dst_y[..copy_len].copy_from_slice(&y_src[..copy_len]); + } else { let copy_width = width.min(y_stride).min(dst_y_stride); - if src_start + copy_width <= y_src.len() - && dst_start + copy_width <= frame.data_mut(0).len() - { - frame.data_mut(0)[dst_start..dst_start + copy_width] - .copy_from_slice(&y_src[src_start..src_start + copy_width]); + for row in 0..height { + let src_start = row * y_stride; + let dst_start = row * dst_y_stride; + if src_start + copy_width <= y_src.len() && dst_start + copy_width <= dst_y.len() { + dst_y[dst_start..dst_start + copy_width] + .copy_from_slice(&y_src[src_start..src_start + copy_width]); + } } } } let uv_height = height / 2; let dst_uv_stride = frame.stride(1); - if dst_uv_stride == width { - let copy_len = uv_src.len().min(frame.data_mut(1).len()); - frame.data_mut(1)[..copy_len].copy_from_slice(&uv_src[..copy_len]); - } else { - for row in 0..uv_height { - let src_start = row * width; - let dst_start = row * dst_uv_stride; + { + let dst_uv = frame.data_mut(1); + if dst_uv_stride == width { + let copy_len = uv_src.len().min(dst_uv.len()); + dst_uv[..copy_len].copy_from_slice(&uv_src[..copy_len]); + } else { let copy_width = width.min(dst_uv_stride); - if src_start + copy_width <= uv_src.len() - && dst_start + copy_width <= frame.data_mut(1).len() - { - frame.data_mut(1)[dst_start..dst_start + copy_width] - .copy_from_slice(&uv_src[src_start..src_start + copy_width]); + for row in 0..uv_height { + let src_start = row * width; + let dst_start = row * dst_uv_stride; + if src_start + copy_width <= uv_src.len() && dst_start + copy_width <= dst_uv.len() + { + dst_uv[dst_start..dst_start + copy_width] + .copy_from_slice(&uv_src[src_start..src_start + copy_width]); + } } } } @@ -585,6 +554,7 @@ use cap_rendering::{ProjectRecordingsMeta, RenderVideoConstants}; const FRAME_RECEIVE_INITIAL_TIMEOUT_SECS: u64 = 120; const FRAME_RECEIVE_STEADY_TIMEOUT_SECS: u64 = 90; const MAX_CONSECUTIVE_FRAME_TIMEOUTS: u32 = 3; +const EXPORT_FRAME_CHANNEL_CAPACITY: usize = 8; #[allow(clippy::too_many_arguments)] async fn export_render_to_channel( @@ -718,6 +688,7 @@ async fn export_render_to_channel( #[cfg(test)] mod tests { use super::*; + use std::{hint::black_box, time::Instant}; fn sum_samples(sample_rate: u64, fps: u64, frames: u64) -> u64 { (0..frames) @@ -806,6 +777,49 @@ mod tests { assert_eq!(*result.nv12_data, data); } + #[test] + fn nv12_from_rendered_frame_converts_rgba_format() { + use cap_rendering::{GpuOutputFormat, Nv12RenderedFrame}; + + let width = 4u32; + let height = 2u32; + let rgba_stride = 20u32; + let rgba = (0..rgba_stride * height) + .map(|i| ((i * 31 + 17) % 251) as u8) + .collect::>(); + let mut expected = vec![0u8; width as usize * height as usize * 3 / 2]; + + assert!(cap_rendering::cpu_yuv::rgba_to_nv12_fast( + &rgba, + &mut expected, + cap_rendering::cpu_yuv::RgbaToNv12Config { + width, + height, + rgba_stride, + y_stride: width, + uv_stride: width, + }, + )); + + let frame = Nv12RenderedFrame { + data: SharedNv12Buffer::from_vec(rgba), + width, + height, + y_stride: rgba_stride, + frame_number: 7, + target_time_ns: 0, + format: GpuOutputFormat::Rgba, + }; + + let result = nv12_from_rendered_frame(frame); + + assert_eq!(result.width, width); + assert_eq!(result.height, height); + assert_eq!(result.y_stride, width); + assert_eq!(result.frame_number, 7); + assert_eq!(*result.nv12_data, expected); + } + #[test] fn nv12_export_frame_dimensions_match() { let width = 1920u32; @@ -830,4 +844,151 @@ mod tests { "NV12 should save ~62.5% vs RGBA, got {savings_pct:.1}%" ); } + + fn fill_nv12_frame_direct_baseline( + frame: &mut ffmpeg::frame::Video, + nv12_data: &[u8], + width: u32, + height: u32, + y_stride: u32, + pts: i64, + ) { + frame.set_pts(Some(pts)); + + let width = width as usize; + let height = height as usize; + let y_stride = y_stride as usize; + + let y_plane_size = y_stride * height; + let y_src = &nv12_data[..y_plane_size.min(nv12_data.len())]; + let uv_src = if y_plane_size < nv12_data.len() { + &nv12_data[y_plane_size..] + } else { + &[] + }; + + let dst_y_stride = frame.stride(0); + if dst_y_stride == y_stride { + let copy_len = y_src.len().min(frame.data_mut(0).len()); + frame.data_mut(0)[..copy_len].copy_from_slice(&y_src[..copy_len]); + } else { + for row in 0..height { + let src_start = row * y_stride; + let dst_start = row * dst_y_stride; + let copy_width = width.min(y_stride).min(dst_y_stride); + if src_start + copy_width <= y_src.len() + && dst_start + copy_width <= frame.data_mut(0).len() + { + frame.data_mut(0)[dst_start..dst_start + copy_width] + .copy_from_slice(&y_src[src_start..src_start + copy_width]); + } + } + } + + let uv_height = height / 2; + let dst_uv_stride = frame.stride(1); + if dst_uv_stride == width { + let copy_len = uv_src.len().min(frame.data_mut(1).len()); + frame.data_mut(1)[..copy_len].copy_from_slice(&uv_src[..copy_len]); + } else { + for row in 0..uv_height { + let src_start = row * width; + let dst_start = row * dst_uv_stride; + let copy_width = width.min(dst_uv_stride); + if src_start + copy_width <= uv_src.len() + && dst_start + copy_width <= frame.data_mut(1).len() + { + frame.data_mut(1)[dst_start..dst_start + copy_width] + .copy_from_slice(&uv_src[src_start..src_start + copy_width]); + } + } + } + } + + fn assert_plane_matches( + frame: &ffmpeg::frame::Video, + plane: usize, + src: &[u8], + rows: usize, + width: usize, + ) { + let stride = frame.stride(plane); + let data = frame.data(plane); + for row in 0..rows { + let src_start = row * width; + let dst_start = row * stride; + assert_eq!( + &src[src_start..src_start + width], + &data[dst_start..dst_start + width] + ); + } + } + + #[test] + #[ignore] + fn benchmark_fill_nv12_frame_direct_4k() { + ffmpeg::init().unwrap(); + + let width = 3840u32; + let height = 2160u32; + let y_size = (width * height) as usize; + let uv_size = (width * height / 2) as usize; + let nv12_data = (0..y_size + uv_size) + .map(|i| ((i * 31 + 17) % 251) as u8) + .collect::>(); + let iterations = 120usize; + let mut baseline_frame = + ffmpeg::frame::Video::new(ffmpeg::format::Pixel::NV12, width, height); + let mut optimized_frame = + ffmpeg::frame::Video::new(ffmpeg::format::Pixel::NV12, width, height); + + let baseline_start = Instant::now(); + for frame_number in 0..iterations { + fill_nv12_frame_direct_baseline( + black_box(&mut baseline_frame), + black_box(&nv12_data), + width, + height, + width, + frame_number as i64, + ); + } + let baseline_elapsed = baseline_start.elapsed(); + + let optimized_start = Instant::now(); + for frame_number in 0..iterations { + fill_nv12_frame_direct( + black_box(&mut optimized_frame), + black_box(&nv12_data), + width, + height, + width, + frame_number as i64, + ); + } + let optimized_elapsed = optimized_start.elapsed(); + + assert_eq!(baseline_frame.pts(), optimized_frame.pts()); + assert_plane_matches( + &optimized_frame, + 0, + &nv12_data[..y_size], + height as usize, + width as usize, + ); + assert_plane_matches( + &optimized_frame, + 1, + &nv12_data[y_size..], + (height / 2) as usize, + width as usize, + ); + + println!( + "{{\"baseline_ms\":{},\"optimized_ms\":{},\"speedup\":{:.3}}}", + baseline_elapsed.as_millis(), + optimized_elapsed.as_millis(), + baseline_elapsed.as_secs_f64() / optimized_elapsed.as_secs_f64() + ); + } } From a424daaefbd23cd36241a3e31bba6127573a5577 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 8 May 2026 10:34:09 +0100 Subject: [PATCH 16/17] chore(export): add CPU startup profiling example binary --- .../examples/export-cpu-startup-profile.rs | 367 ++++++++++++++++++ 1 file changed, 367 insertions(+) create mode 100644 crates/export/examples/export-cpu-startup-profile.rs diff --git a/crates/export/examples/export-cpu-startup-profile.rs b/crates/export/examples/export-cpu-startup-profile.rs new file mode 100644 index 0000000000..d439433c9e --- /dev/null +++ b/crates/export/examples/export-cpu-startup-profile.rs @@ -0,0 +1,367 @@ +use cap_audio::{AudioData, AudioRendererTrack, StereoMode, linear_gain_for_db, render_audio}; +use cap_editor::{AudioSegment, create_segments, get_audio_segments}; +use cap_project::RecordingMeta; +use cap_rendering::{ + PrecomputedCursorTimeline, ProjectRecordingsMeta, ZoomFocusInterpolator, get_duration, + spring_mass_damper::SpringMassDamperSimulationConfig, +}; +use clap::Parser; +use serde::Serialize; +use std::{hint::black_box, path::PathBuf, sync::Arc, time::Instant}; + +#[derive(Parser, Debug)] +struct Cli { + path: PathBuf, + #[arg(long, default_value_t = 60)] + fps: u32, + #[arg(long, default_value_t = false)] + force_ffmpeg_decoder: bool, +} + +#[derive(Serialize)] +struct ExportCpuStartupProfile { + project: String, + fps: u32, + duration_secs: f64, + total_frames: u32, + recording_meta_load_ms: u128, + project_config_load_ms: u128, + recordings_meta_load_ms: u128, + segment_media_load_ms: u128, + precomputed_cursor_timelines_ms: u128, + zoom_interpolator_construct_ms: u128, + zoom_full_precompute_ms: u128, + zoom_lazy_first_frame_ms: u128, + zoom_lazy_all_frames_ms: u128, + audio_mix_profile: Option, +} + +#[derive(Serialize)] +struct AudioMixProfile { + tracks: usize, + frame_samples: usize, + iterations: usize, + baseline_ms: u128, + optimized_ms: u128, + speedup: f64, + outputs_equal: bool, +} + +struct BaselineAudioRendererTrack<'a> { + data: &'a AudioData, + gain_db: f32, + stereo_mode: StereoMode, + offset: isize, +} + +fn elapsed_ms(start: Instant) -> u128 { + start.elapsed().as_millis() +} + +fn cursor_smoothing( + project_config: &cap_project::ProjectConfiguration, +) -> Option { + (!project_config.cursor.raw).then_some(SpringMassDamperSimulationConfig { + tension: project_config.cursor.tension, + mass: project_config.cursor.mass, + friction: project_config.cursor.friction, + }) +} + +fn build_zoom_interpolators( + segments: &[cap_editor::SegmentMedia], + cursor_timelines: &[Arc], + project_config: &cap_project::ProjectConfiguration, + duration_secs: f64, +) -> Vec { + let smoothing = cursor_smoothing(project_config); + let click_spring = project_config.cursor.click_spring_config(); + let zoom_segments = project_config + .timeline + .as_ref() + .map(|timeline| timeline.zoom_segments.as_slice()) + .unwrap_or(&[]); + + segments + .iter() + .zip(cursor_timelines.iter()) + .map(|(segment, precomputed_cursor)| { + ZoomFocusInterpolator::new_with_precomputed_cursor( + &segment.cursor, + smoothing, + click_spring, + project_config.screen_movement_spring, + duration_secs, + zoom_segments, + Some(precomputed_cursor.clone()), + ) + }) + .collect() +} + +fn render_audio_baseline( + tracks: &[BaselineAudioRendererTrack], + offset: usize, + samples: usize, + out_offset: usize, + out: &mut [f32], +) -> usize { + let samples = samples.min( + tracks + .iter() + .filter_map(|t| { + let track_samples = t.data.samples().len() / t.data.channels() as usize; + let available = track_samples as isize - offset as isize - t.offset; + if available > 0 { + Some(available as usize) + } else { + None + } + }) + .max() + .unwrap_or(0), + ); + + for i in 0..samples { + let mut left: f32 = 0.0; + let mut right: f32 = 0.0; + + for track in tracks { + let i = i.wrapping_add_signed(track.offset); + + let data = track.data; + let gain = linear_gain_for_db(track.gain_db); + + if gain == 0.0 { + continue; + } + + if data.channels() == 1 { + if let Some(sample) = data.samples().get(offset + i) { + left += sample * 0.707 * gain; + right += sample * 0.707 * gain; + } + } else if data.channels() == 2 { + let base_idx = offset * 2 + i * 2; + let Some(l_sample) = data.samples().get(base_idx) else { + continue; + }; + let Some(r_sample) = data.samples().get(base_idx + 1) else { + continue; + }; + + match track.stereo_mode { + StereoMode::Stereo => { + left += l_sample * gain; + right += r_sample * gain; + } + StereoMode::MonoL => { + left += l_sample * gain; + right += l_sample * gain; + } + StereoMode::MonoR => { + left += r_sample * gain; + right += r_sample * gain; + } + } + } + } + + let l = left.clamp(-1.0, 1.0); + let r = right.clamp(-1.0, 1.0); + out[out_offset + i * 2] = l; + out[out_offset + i * 2 + 1] = r; + } + + samples +} + +fn benchmark_audio_mix( + audio_segments: &[AudioSegment], + project_config: &cap_project::ProjectConfiguration, +) -> Option { + let (clip_index, segment) = audio_segments + .iter() + .enumerate() + .find(|(_, segment)| !segment.tracks.is_empty())?; + + let offsets = project_config + .clips + .iter() + .find(|clip| clip.index == clip_index as u32) + .map(|clip| clip.offsets) + .unwrap_or_default(); + + let baseline_tracks = segment + .tracks + .iter() + .map(|track| { + let gain_db = if project_config.audio.mute { + -30.0 + } else { + track.gain(&project_config.audio) + }; + BaselineAudioRendererTrack { + data: track.data().as_ref(), + gain_db, + stereo_mode: track.stereo_mode(&project_config.audio), + offset: (track.offset(&offsets) * AudioData::SAMPLE_RATE as f32) as isize, + } + }) + .collect::>(); + + let tracks = segment + .tracks + .iter() + .map(|track| AudioRendererTrack { + data: track.data().as_ref(), + linear_gain: if project_config.audio.mute { + 0.0 + } else { + linear_gain_for_db(track.gain(&project_config.audio)) + }, + stereo_mode: track.stereo_mode(&project_config.audio), + offset: (track.offset(&offsets) * AudioData::SAMPLE_RATE as f32) as isize, + }) + .collect::>(); + + if tracks.is_empty() { + return None; + } + + let frame_samples = 1600usize; + let iterations = 1000usize; + let mut baseline_out = vec![0.0; frame_samples * 2]; + let mut optimized_out = vec![0.0; frame_samples * 2]; + + let started = Instant::now(); + for _ in 0..iterations { + render_audio_baseline( + &baseline_tracks, + 0, + frame_samples, + 0, + black_box(&mut baseline_out), + ); + } + let baseline_elapsed = started.elapsed(); + + let started = Instant::now(); + for _ in 0..iterations { + render_audio(&tracks, 0, frame_samples, 0, black_box(&mut optimized_out)); + } + let optimized_elapsed = started.elapsed(); + + Some(AudioMixProfile { + tracks: tracks.len(), + frame_samples, + iterations, + baseline_ms: baseline_elapsed.as_millis(), + optimized_ms: optimized_elapsed.as_millis(), + speedup: baseline_elapsed.as_secs_f64() / optimized_elapsed.as_secs_f64(), + outputs_equal: baseline_out == optimized_out, + }) +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + let cli = Cli::parse(); + + let started = Instant::now(); + let recording_meta = RecordingMeta::load_for_project(&cli.path)?; + let recording_meta_load_ms = elapsed_ms(started); + + let studio_meta = recording_meta + .studio_meta() + .ok_or_else(|| std::io::Error::other("recording is not a studio recording"))? + .clone(); + + let started = Instant::now(); + let project_config = recording_meta.project_config(); + let project_config_load_ms = elapsed_ms(started); + + let started = Instant::now(); + let recordings = Arc::new( + ProjectRecordingsMeta::new(&recording_meta.project_path, &studio_meta) + .map_err(std::io::Error::other)?, + ); + let recordings_meta_load_ms = elapsed_ms(started); + + let duration_secs = get_duration(&recordings, &recording_meta, &studio_meta, &project_config); + let total_frames = (duration_secs * f64::from(cli.fps)).ceil() as u32; + + let started = Instant::now(); + let segments = create_segments(&recording_meta, &studio_meta, cli.force_ffmpeg_decoder) + .await + .map_err(std::io::Error::other)?; + let segment_media_load_ms = elapsed_ms(started); + let audio_segments = get_audio_segments(&segments); + let audio_mix_profile = benchmark_audio_mix(&audio_segments, &project_config); + + let smoothing = cursor_smoothing(&project_config); + let click_spring = project_config.cursor.click_spring_config(); + + let started = Instant::now(); + let cursor_timelines: Vec> = segments + .iter() + .map(|segment| { + Arc::new(PrecomputedCursorTimeline::new( + &segment.cursor, + smoothing, + Some(click_spring), + )) + }) + .collect(); + let precomputed_cursor_timelines_ms = elapsed_ms(started); + + let started = Instant::now(); + let mut full_zoom = + build_zoom_interpolators(&segments, &cursor_timelines, &project_config, duration_secs); + let zoom_interpolator_construct_ms = elapsed_ms(started); + + let started = Instant::now(); + for interpolator in &mut full_zoom { + interpolator.ensure_precomputed_until(duration_secs as f32 + 1.0); + } + let zoom_full_precompute_ms = elapsed_ms(started); + + let mut lazy_first = + build_zoom_interpolators(&segments, &cursor_timelines, &project_config, duration_secs); + let started = Instant::now(); + for interpolator in &mut lazy_first { + interpolator.ensure_precomputed_until(1.0 / cli.fps as f32); + } + let zoom_lazy_first_frame_ms = elapsed_ms(started); + + let mut lazy_all = + build_zoom_interpolators(&segments, &cursor_timelines, &project_config, duration_secs); + let started = Instant::now(); + for frame_number in 0..total_frames { + let until = (frame_number as f32 + 1.0) / cli.fps as f32; + for interpolator in &mut lazy_all { + interpolator.ensure_precomputed_until(until); + } + } + let zoom_lazy_all_frames_ms = elapsed_ms(started); + + let profile = ExportCpuStartupProfile { + project: cli.path.display().to_string(), + fps: cli.fps, + duration_secs, + total_frames, + recording_meta_load_ms, + project_config_load_ms, + recordings_meta_load_ms, + segment_media_load_ms, + precomputed_cursor_timelines_ms, + zoom_interpolator_construct_ms, + zoom_full_precompute_ms, + zoom_lazy_first_frame_ms, + zoom_lazy_all_frames_ms, + audio_mix_profile, + }; + + println!("{}", serde_json::to_string_pretty(&profile)?); + + Ok(()) +} From 0598c118bf4466e7486248f65f6833bb405ee175 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 8 May 2026 10:34:09 +0100 Subject: [PATCH 17/17] chore(export): add encoder benchmark example binary --- .../examples/export-encoder-benchmark.rs | 380 ++++++++++++++++++ 1 file changed, 380 insertions(+) create mode 100644 crates/export/examples/export-encoder-benchmark.rs diff --git a/crates/export/examples/export-encoder-benchmark.rs b/crates/export/examples/export-encoder-benchmark.rs new file mode 100644 index 0000000000..9d36f1e667 --- /dev/null +++ b/crates/export/examples/export-encoder-benchmark.rs @@ -0,0 +1,380 @@ +use cap_editor::{AudioRenderer, AudioSegment, create_segments, get_audio_segments}; +use cap_enc_ffmpeg::{AudioEncoder, aac::AACEncoder, h264::H264Encoder, mp4::MP4File}; +use cap_export::mp4::ExportCompression; +use cap_media_info::{RawVideoFormat, VideoInfo}; +use cap_project::{ProjectConfiguration, RecordingMeta}; +use clap::Parser; +use serde::Serialize; +use std::{ + path::{Path, PathBuf}, + time::{Duration, Instant}, +}; + +#[derive(Parser, Debug)] +struct Cli { + #[arg(long, default_value_t = 3)] + duration: u32, + #[arg(long, default_value_t = 4)] + pattern_frames: usize, + #[arg(long)] + output_dir: Option, + #[arg(long)] + recording_path: Option, + #[arg(long, default_value_t = false)] + force_ffmpeg_decoder: bool, +} + +#[derive(Clone, Copy)] +struct Preset { + label: &'static str, + width: u32, + height: u32, + fps: u32, + compression: ExportCompression, +} + +#[derive(Serialize)] +struct BenchmarkResult { + label: String, + width: u32, + height: u32, + fps: u32, + compression: String, + frames: u32, + encode_loop_ms: u128, + finish_ms: u128, + total_ms: u128, + effective_fps: f64, + output_mb: f64, + has_audio: bool, + audio_frames: u32, + audio_samples: u64, + audio_render_us: u128, +} + +struct AudioFixture { + project_config: ProjectConfiguration, + audio_segments: Vec, +} + +fn presets() -> Vec { + vec![ + Preset { + label: "MP4 1080p/30fps/Maximum", + width: 1920, + height: 1080, + fps: 30, + compression: ExportCompression::Maximum, + }, + Preset { + label: "MP4 1080p/30fps/Social", + width: 1920, + height: 1080, + fps: 30, + compression: ExportCompression::Social, + }, + Preset { + label: "MP4 1080p/60fps/Maximum", + width: 1920, + height: 1080, + fps: 60, + compression: ExportCompression::Maximum, + }, + Preset { + label: "MP4 4K/30fps/Maximum", + width: 3840, + height: 2160, + fps: 30, + compression: ExportCompression::Maximum, + }, + Preset { + label: "MP4 4K/30fps/Social", + width: 3840, + height: 2160, + fps: 30, + compression: ExportCompression::Social, + }, + ] +} + +fn compression_label(compression: ExportCompression) -> &'static str { + match compression { + ExportCompression::Maximum => "Maximum", + ExportCompression::Social => "Social", + ExportCompression::Web => "Web", + ExportCompression::Potato => "Potato", + } +} + +fn make_nv12_pattern(width: u32, height: u32, frame_index: usize) -> Vec { + let width = width as usize; + let height = height as usize; + let y_size = width * height; + let uv_size = width * height / 2; + let mut data = vec![0u8; y_size + uv_size]; + + for row in 0..height { + let row_start = row * width; + for col in 0..width { + data[row_start + col] = ((row * 3 + col * 5 + frame_index * 11) % 220 + 16) as u8; + } + } + + for row in 0..height / 2 { + let row_start = y_size + row * width; + for col in (0..width).step_by(2) { + data[row_start + col] = ((row * 7 + col * 3 + frame_index * 13) % 128 + 64) as u8; + data[row_start + col + 1] = ((row * 5 + col * 11 + frame_index * 17) % 128 + 64) as u8; + } + } + + data +} + +fn copy_nv12_to_frame(frame: &mut ffmpeg::frame::Video, nv12_data: &[u8], width: u32, height: u32) { + let width = width as usize; + let height = height as usize; + let y_size = width * height; + let y_src = &nv12_data[..y_size]; + let uv_src = &nv12_data[y_size..]; + + let y_stride = frame.stride(0); + { + let y_dst = frame.data_mut(0); + if y_stride == width { + y_dst[..y_size].copy_from_slice(y_src); + } else { + for row in 0..height { + let src_start = row * width; + let dst_start = row * y_stride; + y_dst[dst_start..dst_start + width] + .copy_from_slice(&y_src[src_start..src_start + width]); + } + } + } + + let uv_stride = frame.stride(1); + let uv_height = height / 2; + { + let uv_dst = frame.data_mut(1); + if uv_stride == width { + uv_dst[..uv_src.len()].copy_from_slice(uv_src); + } else { + for row in 0..uv_height { + let src_start = row * width; + let dst_start = row * uv_stride; + uv_dst[dst_start..dst_start + width] + .copy_from_slice(&uv_src[src_start..src_start + width]); + } + } + } +} + +fn run_preset( + preset: Preset, + duration: u32, + pattern_frames: usize, + output_dir: &Path, + audio_fixture: Option<&AudioFixture>, +) -> Result { + let frames = duration * preset.fps; + let mut video_info = VideoInfo::from_raw( + RawVideoFormat::Nv12, + preset.width, + preset.height, + preset.fps, + ); + video_info.time_base = ffmpeg::Rational::new(1, preset.fps as i32); + let output_path = output_dir.join( + preset + .label + .replace('/', "-") + .replace(' ', "_") + .to_lowercase(), + ); + let output_file = output_path.with_extension("mp4"); + + let patterns = (0..pattern_frames.max(1)) + .map(|i| make_nv12_pattern(preset.width, preset.height, i)) + .collect::>(); + let has_audio = audio_fixture + .filter(|fixture| !fixture.project_config.audio.mute) + .is_some_and(|fixture| { + fixture + .audio_segments + .first() + .is_some_and(|segment| !segment.tracks.is_empty()) + }); + + let mut encoder = MP4File::init( + "encoder-benchmark", + output_path, + false, + |o| { + H264Encoder::builder(video_info) + .with_bpp(preset.compression.bits_per_pixel()) + .with_export_priority() + .with_export_settings() + .with_external_conversion() + .build(o) + }, + |o| { + has_audio.then(|| { + AACEncoder::init(AudioRenderer::info(), o) + .map(|v| v.boxed()) + .map_err(Into::into) + }) + }, + ) + .map_err(|err| err.to_string())?; + + let audio_project_config = audio_fixture.map(|fixture| fixture.project_config.clone()); + let mut audio_renderer = if has_audio { + audio_fixture.map(|fixture| { + let mut renderer = AudioRenderer::new_with_project( + fixture.audio_segments.clone(), + &fixture.project_config, + ); + renderer.set_playhead(0.0, &fixture.project_config); + renderer + }) + } else { + None + }; + let mut audio_sample_cursor = 0u64; + let mut audio_frames = 0u32; + let mut audio_samples = 0u64; + let mut audio_render_elapsed = Duration::ZERO; + let sample_rate = u64::from(AudioRenderer::SAMPLE_RATE); + let fps_u64 = u64::from(preset.fps); + + let mut frame = + ffmpeg::frame::Video::new(ffmpeg::format::Pixel::NV12, preset.width, preset.height); + let mut converted_frame = None; + let encode_start = Instant::now(); + + for frame_number in 0..frames { + let pattern = &patterns[frame_number as usize % patterns.len()]; + copy_nv12_to_frame(&mut frame, pattern, preset.width, preset.height); + frame.set_pts(Some(frame_number as i64)); + encoder + .queue_video_frame_reusable(&mut frame, &mut converted_frame, Duration::MAX) + .map_err(|err| err.to_string())?; + + if let (Some(audio), Some(project_config)) = + (&mut audio_renderer, audio_project_config.as_ref()) + { + let frame_number = u64::from(frame_number); + let end = ((frame_number + 1) * sample_rate) / fps_u64; + if end > audio_sample_cursor { + let pts = audio_sample_cursor as i64; + let samples = (end - audio_sample_cursor) as usize; + audio_sample_cursor = end; + let audio_started = Instant::now(); + let audio_frame = audio + .render_frame(samples, project_config) + .map(|mut frame| { + frame.set_pts(Some(pts)); + frame + }); + audio_render_elapsed += audio_started.elapsed(); + if let Some(audio_frame) = audio_frame { + encoder.queue_audio_frame(audio_frame); + audio_frames += 1; + audio_samples += samples as u64; + } + } + } + } + + let encode_loop_elapsed = encode_start.elapsed(); + let finish_start = Instant::now(); + let finish = encoder.finish().map_err(|err| err.to_string())?; + finish + .video_finish + .map_err(|err| format!("Video encoding failed: {err}"))?; + finish + .audio_finish + .map_err(|err| format!("Audio encoding failed: {err}"))?; + let finish_elapsed = finish_start.elapsed(); + let total_elapsed = encode_start.elapsed(); + let output_mb = std::fs::metadata(output_file) + .map(|m| m.len() as f64 / 1024.0 / 1024.0) + .unwrap_or(0.0); + + Ok(BenchmarkResult { + label: preset.label.to_string(), + width: preset.width, + height: preset.height, + fps: preset.fps, + compression: compression_label(preset.compression).to_string(), + frames, + encode_loop_ms: encode_loop_elapsed.as_millis(), + finish_ms: finish_elapsed.as_millis(), + total_ms: total_elapsed.as_millis(), + effective_fps: frames as f64 / total_elapsed.as_secs_f64().max(0.001), + output_mb, + has_audio, + audio_frames, + audio_samples, + audio_render_us: audio_render_elapsed.as_micros(), + }) +} + +async fn load_audio_fixture( + path: &Path, + force_ffmpeg_decoder: bool, +) -> Result { + let recording_meta = RecordingMeta::load_for_project(path).map_err(|err| err.to_string())?; + let studio_meta = recording_meta + .studio_meta() + .ok_or_else(|| "recording is not a studio recording".to_string())? + .clone(); + let project_config = recording_meta.project_config(); + let segments = create_segments(&recording_meta, &studio_meta, force_ffmpeg_decoder) + .await + .map_err(|err| err.to_string())?; + let audio_segments = get_audio_segments(&segments); + + Ok(AudioFixture { + project_config, + audio_segments, + }) +} + +#[tokio::main] +async fn main() -> Result<(), String> { + ffmpeg::init().map_err(|err| err.to_string())?; + let cli = Cli::parse(); + let temp_dir = tempfile::TempDir::new().map_err(|err| err.to_string())?; + let output_dir = cli.output_dir.as_deref().unwrap_or(temp_dir.path()); + std::fs::create_dir_all(output_dir).map_err(|err| err.to_string())?; + let audio_fixture = if let Some(recording_path) = cli.recording_path.as_deref() { + Some(load_audio_fixture(recording_path, cli.force_ffmpeg_decoder).await?) + } else { + None + }; + + let mut results = Vec::new(); + for preset in presets() { + let result = run_preset( + preset, + cli.duration, + cli.pattern_frames, + output_dir, + audio_fixture.as_ref(), + )?; + eprintln!( + "{}: {:.1} fps, {:.2} MB", + result.label, result.effective_fps, result.output_mb + ); + results.push(result); + } + + println!( + "{}", + serde_json::to_string_pretty(&results).map_err(|err| err.to_string())? + ); + + Ok(()) +}