From 8044a42f9809299e2aa79c7eb3386c18579649e8 Mon Sep 17 00:00:00 2001 From: Mollusk Date: Sun, 24 May 2026 15:25:05 -0400 Subject: [PATCH] fix(quality): scale after videoconvert at exact even WxH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Live medium-quality stream errored with "negotiation problem" on the host and rendered a squashed, garbled picture in the viewer. Two causes, both from inserting videoscale before videoconvert with PAR+range caps: - videoscale was scaling pipewiresrc's raw output directly. The portal source's format/memory (e.g. DMABuf) isn't something software videoscale negotiates — the original pipeline always fed pipewiresrc through videoconvert first. Move videoscale *after* videoconvert so it operates on system-memory NV12/I420. - `pixel-aspect-ratio=1/1` + a width range over-constrained negotiation and risked a non-square-PAR / distorted result. Instead compute an exact even WxH from the known source dimensions (Wayland: portal size; X11: root/window geometry), preserving aspect, and pin it fully in the caps. This is also downscale-only now — a source already at/below the target height is left native instead of upscaled. Unknown dims (rare X11 geometry failure) fall back to the height-only + square-pixel + even width-range negotiation. source_dims threaded through pipeline::spawn from both backends. Smoke test updated to mirror the new ordering (1920x1080 -> 852x480, videoscale after videoconvert) and still asserts an even sub-source width. Co-Authored-By: Claude Opus 4.7 --- scripts/smoke-pipeline.sh | 17 +++++----- src/host/pipeline.rs | 68 +++++++++++++++++++++++++-------------- src/host/wayland.rs | 2 +- src/host/x11.rs | 24 +++++++++----- 4 files changed, 69 insertions(+), 42 deletions(-) diff --git a/scripts/smoke-pipeline.sh b/scripts/smoke-pipeline.sh index ce64522..ee9e033 100755 --- a/scripts/smoke-pipeline.sh +++ b/scripts/smoke-pipeline.sh @@ -53,21 +53,22 @@ else fi # ── quality-preset downscale check ──────────────────────────────────────── -# Mirrors the videoscale element host/pipeline.rs inserts for a non-Source -# preset. A 16:9 source @ 480p wants width 853.3 — the danger case for H.264, -# which needs even dimensions. Asserts videoscale negotiates an even width -# (the pixel-aspect-ratio=1/1 + stepped-even-range caps) and the encoder -# accepts it. Guards the "even-width caveat" the design plan flagged. +# Mirrors the videoscale step host/pipeline.rs inserts for a non-Source preset: +# AFTER videoconvert (scaling system-memory NV12, not the raw source format) and +# pinned to an exact even WxH computed from the source size. A 16:9 source @ +# 480p wants width 853.3 -> 852 even. Asserts the negotiated size matches and +# the encoder accepts it. Guards both the "even-width caveat" and the +# negotiation/placement regression that squashed the picture. SCALED="${TMPDIR:-/tmp}/pixelpass-smoke-scaled-$$.ts" trap 'rm -f "$OUT" "$SCALED"' EXIT -echo "[smoke] downscale check: 1920x1080 -> height 480 (videoscale, even-width)" +echo "[smoke] downscale check: 1920x1080 -> 852x480 (videoscale after videoconvert)" gst-launch-1.0 -q \ mpegtsmux name=mux ! queue ! filesink location="$SCALED" \ videotestsrc num-buffers=30 is-live=false \ ! video/x-raw,width=1920,height=1080,framerate=30/1 \ ! videorate ! video/x-raw,framerate=30/1 \ - ! videoscale ! "video/x-raw,height=480,pixel-aspect-ratio=1/1,width=[2,8192,2]" \ - ! videoconvert ! video/x-raw,format=NV12 \ + ! queue ! videoconvert ! video/x-raw,format=NV12 \ + ! videoscale ! video/x-raw,format=NV12,width=852,height=480 \ ! vah264enc rate-control=cbr bitrate=1000 key-int-max=60 \ ! h264parse config-interval=-1 \ ! video/x-h264,stream-format=byte-stream,alignment=au ! mux. \ diff --git a/src/host/pipeline.rs b/src/host/pipeline.rs index 894223c..829ab0c 100644 --- a/src/host/pipeline.rs +++ b/src/host/pipeline.rs @@ -66,17 +66,22 @@ impl Drop for CaptureHandle { /// Spawn the shared gst pipeline for a backend that supplies `source_args` /// (the video-source element + its properties, e.g. `["pipewiresrc", "fd=7", -/// …]` or `["ximagesrc", "use-damage=false", …]`). `after_spawn` runs once, -/// immediately after the gst child is launched — Wayland uses it to `close` -/// the pipewire fd it leaked into the child; X11 passes a no-op. +/// …]` or `["ximagesrc", "use-damage=false", …]`). `source_dims` is the source +/// pixel size when the backend knows it (Wayland from the portal, X11 from +/// root/window geometry); it lets a downscale preset compute an exact even +/// target resolution and skip scaling when the source is already small enough. +/// `after_spawn` runs once, immediately after the gst child is launched — +/// Wayland uses it to `close` the pipewire fd it leaked into the child; X11 +/// passes a no-op. pub async fn spawn( opts: &HostOpts, quality: &EffectiveQuality, + source_dims: Option<(u32, u32)>, source_args: Vec, after_spawn: impl FnOnce(), ) -> Result { let (audio_routing, audio_device) = setup_audio(opts).await?; - let args = build_args(&source_args, &audio_device, opts, quality); + let args = build_args(&source_args, &audio_device, opts, quality, source_dims); let mut gst_cmd = Command::new("gst-launch-1.0"); gst_cmd @@ -148,6 +153,7 @@ fn build_args( audio_device: &str, opts: &HostOpts, quality: &EffectiveQuality, + source_dims: Option<(u32, u32)>, ) -> Vec { let key_interval = (quality.framerate * 2).to_string(); let bitrate = quality.bitrate.to_string(); @@ -187,9 +193,38 @@ fn build_args( "fd=1".into(), ]; + // Downscale step for the quality presets. `None` = encode at native size + // (the "Source" preset, or a source already at/below the target height — we + // never upscale). When the source dimensions are known we pin an exact even + // WxH preserving the source aspect; H.264 4:2:0 needs even dims, so width is + // rounded to even and height is forced even (preset heights already are; a + // raw --max-height override is rounded down). When dims are unknown (a rare + // X11 geometry-read failure) we fall back to height-only + square pixels + + // an even-stepped width range and let videoscale negotiate. + let scale_caps: Option = match quality.max_height { + None => None, + Some(max_h) => { + let h = (max_h & !1).max(2); + match source_dims { + Some((sw, sh)) if sh > h => { + let w = ((sw as u64 * h as u64 + sh as u64 / 2) / sh as u64) as u32; + let w = (w & !1).max(2); + Some(format!("{raw_format},width={w},height={h}")) + } + Some(_) => None, // source already <= target height + None => Some(format!( + "{raw_format},height={h},pixel-aspect-ratio=1/1,width=[2,8192,2]" + )), + } + } + }; + // video branch — videorate caps to the target fps so we don't ship at the // monitor's refresh rate (e.g. 180Hz) and pile up frames in the demuxer - // queue faster than realtime. + // queue faster than realtime. videoscale (when scaling) runs *after* + // videoconvert so it operates on system-memory NV12/I420: scaling + // pipewiresrc's raw output directly can hit a format/memory (e.g. DMABuf) + // that software videoscale won't negotiate. args.extend(source.iter().cloned()); args.extend([ "!".into(), @@ -197,26 +232,6 @@ fn build_args( "!".into(), framerate_caps, "!".into(), - ]); - // Optional downscale (quality presets). Omitted entirely for the native - // "Source" preset. `pixel-aspect-ratio=1/1` forces a *proportional* scale: - // screen capture always has square pixels, and without pinning PAR videoscale - // keeps the full source width and just squashes PAR to preserve display - // aspect (e.g. 1920x480 @ PAR 4/9 — no bandwidth win at all). With square - // pixels fixed, width follows the source DAR. H.264 4:2:0 needs even - // dimensions, so we pin height to an even value (preset heights already are; - // a raw --max-height override is rounded down) and constrain width to a - // stepped even range — verified 1920x1080→852x480 and 1366x768→1280x720. - if let Some(h) = quality.max_height { - let h = (h & !1).max(2); - args.extend([ - "videoscale".into(), - "!".into(), - format!("video/x-raw,height={h},pixel-aspect-ratio=1/1,width=[2,8192,2]"), - "!".into(), - ]); - } - args.extend([ "queue".into(), "!".into(), "videoconvert".into(), @@ -224,6 +239,9 @@ fn build_args( raw_format.into(), "!".into(), ]); + if let Some(caps) = scale_caps { + args.extend(["videoscale".into(), "!".into(), caps, "!".into()]); + } args.extend(encoder_args); args.extend([ "!".into(), diff --git a/src/host/wayland.rs b/src/host/wayland.rs index b51ea5c..606160d 100644 --- a/src/host/wayland.rs +++ b/src/host/wayland.rs @@ -70,7 +70,7 @@ pub async fn start(opts: &HostOpts, quality: &EffectiveQuality) -> Result Result tracing::info!(width = w, height = h, xid = ?xid, "X11 capture geometry"), - Err(e) => tracing::warn!("could not read X11 geometry (capture will still try): {e:#}"), - } + // Geometry mirrors Wayland's portal-handshake log line and feeds the + // downscale presets (so they can compute an exact target size). A failure + // here shouldn't abort capture — ximagesrc will surface a real error if the + // X connection is genuinely unusable, and the scaler falls back to a + // height-only negotiation when dims are unknown. + let source_dims = match read_geometry(xid) { + Ok((w, h)) => { + tracing::info!(width = w, height = h, xid = ?xid, "X11 capture geometry"); + Some((w as u32, h as u32)) + } + Err(e) => { + tracing::warn!("could not read X11 geometry (capture will still try): {e:#}"); + None + } + }; let mut source_args = vec![ "ximagesrc".to_string(), @@ -42,7 +50,7 @@ pub async fn start(opts: &HostOpts, quality: &EffectiveQuality) -> Result