From cd127a970478359ac0f4ad8d3bdb5369ff3585ee Mon Sep 17 00:00:00 2001 From: Mollusk Date: Sat, 23 May 2026 20:39:16 -0400 Subject: [PATCH] feat(host): X11 capture backend + shared pipeline extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract the display-agnostic encode/mux tail out of wayland.rs into a new host/pipeline.rs: CaptureHandle + lifecycle, audio routing setup, the gst arg builder, the spawn, and Serve::bind now live there. Backends supply only their video-source element args plus a post-spawn hook (Wayland uses it to close its leaked pipewire fd; X11 passes a no-op). capture.rs collapses to a thin dispatcher; its CaptureHandle enum is gone. Add host/x11.rs: ximagesrc (use-damage=false show-pointer=true), whole root window by default or a single window via --window (xwininfo click-picker → xid). x11rb reads geometry for an info log, justifying the previously-vestigial dep. No portal, no fd dance — capture starts silently when the first viewer connects (the ticket is the access control). Viewer is display-agnostic and unchanged. Wire --no-hwencode for real (was a no-op): the shared tail now selects x264enc(tune=zerolatency,ultrafast)/I420 vs vah264enc/NV12 and switches the videoconvert target format to match. Applies to both backends. deps.rs: check_host_binaries now takes &HostOpts and checks shared elements for both backends, encoder by --no-hwencode, source per backend (pipewiresrc/ximagesrc), and xwininfo only when X11 + --window. Install hints added for x264enc, ximagesrc, xwininfo. Verified: warning-free build; smoke test still passes (tail unchanged); ximagesrc + both encoder tails produce mpv-decodable H.264 against an Xwayland root. Interactive cross-machine end-to-end pending. Co-Authored-By: Claude Opus 4.7 --- README.md | 43 ++++++-- src/common/deps.rs | 69 ++++++++++-- src/host/capture.rs | 40 ++----- src/host/mod.rs | 8 +- src/host/pipeline.rs | 255 +++++++++++++++++++++++++++++++++++++++++++ src/host/wayland.rs | 212 ++++------------------------------- src/host/x11.rs | 94 ++++++++++++++++ 7 files changed, 474 insertions(+), 247 deletions(-) create mode 100644 src/host/pipeline.rs create mode 100644 src/host/x11.rs diff --git a/README.md b/README.md index 229e7c9..5f5a63c 100644 --- a/README.md +++ b/README.md @@ -16,8 +16,10 @@ without spinning up a Discord call or fighting with NAT. Working: - Wayland capture via the screencast portal (KDE Plasma 6 confirmed; other Wayland compositors with the portal should work but are untested) +- X11 capture via `ximagesrc` (whole screen, or a single window with + `--window`); selected automatically, or forced with `--display-server x11` - VAAPI H.264 encode in GStreamer (RDNA3 confirmed; other VAAPI-capable - GPUs should work) + GPUs should work), with a software x264 fallback via `--no-hwencode` - Audio capture of the default sink's monitor, with optional per-app routing (`--app `) and microphone mixing (`--mic`) - `--repair` cleanup of orphaned PipeWire state left by a crashed host @@ -30,8 +32,10 @@ Working: `~/.config/pixelpass/config.toml` and used to auto-size the default viewer cap -Not yet working: -- X11 capture (stubbed, returns an error — Phase 2 follow-up) +Not yet built (deferred, not blocking): +- Per-monitor selection on a multi-monitor X11 host — `ximagesrc` grabs the + whole root canvas; single-monitor cropping needs xrandr region coords +- `use-damage=true` CPU optimization for the X11 capture path ## Quick start @@ -68,7 +72,7 @@ pixelpass ## Requirements -- Linux (Wayland session for now; X11 stubbed) +- Linux (Wayland or X11; the backend is autodetected) - A VAAPI-capable GPU and the right driver: - AMD: `libva-mesa-driver` - Intel: `intel-media-driver` (modern iGPUs) or `intel-vaapi-driver` (older) @@ -117,10 +121,10 @@ cargo build --release ``` Host Viewer ──── ────── - Wayland portal (ashpd) ──> PipeWire fd - │ - ▼ - gst-launch: pipewiresrc -> videorate -> vah264enc -> + Wayland portal (ashpd) ──> PipeWire fd ─┐ (X11: ximagesrc, no portal) + │ + ▼ + gst-launch: -> videorate -> vah264enc/x264enc -> h264parse -> mpegtsmux (audio: pulsesrc .monitor -> avenc_aac -> aacparse ─┘) @@ -142,7 +146,7 @@ cargo build --release The viewer's player connects to a localhost HTTP server, which is just one end of the iroh tunnel. The host's HTTP server sits on the other end and streams GStreamer's stdout (an MPEG-TS containing -hardware-encoded H.264 + AAC) through with no demux or remux. +H.264 + AAC) through with no demux or remux. iroh handles NAT traversal: direct UDP if hole-punching succeeds, relay path otherwise. Both have been verified end-to-end. @@ -201,6 +205,27 @@ If a host crashes mid-session it can leave orphaned `pixelpass_capture_*` null-sinks and their paired loopbacks loaded in PipeWire. Run `pixelpass --repair` to unload them and exit. +## Display server + +The capture backend is autodetected from the environment +(`WAYLAND_DISPLAY` → Wayland, else `DISPLAY` → X11, else +`XDG_SESSION_TYPE`). Override it with `--display-server wayland|x11` — for +example to force the X11 path while running inside a Wayland session (an +Xwayland or Xephyr `DISPLAY`). + +- **Wayland** goes through the screencast portal: a "Share Screen?" dialog + appears when the first viewer connects, and you pick the monitor (or + window, with `--window`) there. +- **X11** uses `ximagesrc` and starts silently when the first viewer + connects — the ticket is the access control, there's no portal gate. + `--window` runs an `xwininfo` picker (click the window you want to + share); without it the whole root window is captured. + +Encoding is hardware VAAPI (`vah264enc`) by default. `--no-hwencode` +switches to software x264 (`x264enc tune=zerolatency`) for hosts without a +working VAAPI H.264 entrypoint — higher CPU, no GPU needed. This applies +to both backends. + ## Multi-viewer One gst capture pipeline fans out to N concurrent viewers via a diff --git a/src/common/deps.rs b/src/common/deps.rs index f36920f..98e6f2d 100644 --- a/src/common/deps.rs +++ b/src/common/deps.rs @@ -2,21 +2,45 @@ use anyhow::{Result, bail}; use std::path::PathBuf; use std::process::Command; +use crate::cli::HostOpts; use crate::common::display::DisplayServer; -pub fn check_host_binaries(display: DisplayServer) -> Result<()> { - if display == DisplayServer::Wayland { - require("gst-launch-1.0")?; - require("gst-inspect-1.0")?; - require("pactl")?; - require_gst_element("pipewiresrc")?; - require_gst_element("vah264enc")?; - require_gst_element("h264parse")?; - require_gst_element("mpegtsmux")?; - require_gst_element("pulsesrc")?; - require_gst_element("avenc_aac")?; - require_gst_element("aacparse")?; +pub fn check_host_binaries(display: DisplayServer, opts: &HostOpts) -> Result<()> { + // Unknown is handled (and rejected) by the caller; nothing to check here. + if display == DisplayServer::Unknown { + return Ok(()); } + + // Shared across both backends: the gst tools, audio routing, and the + // encode/mux tail elements. + require("gst-launch-1.0")?; + require("gst-inspect-1.0")?; + require("pactl")?; + require_gst_element("h264parse")?; + require_gst_element("mpegtsmux")?; + require_gst_element("pulsesrc")?; + require_gst_element("avenc_aac")?; + require_gst_element("aacparse")?; + + // Encoder depends on --no-hwencode (software x264 vs hardware VAAPI). + if opts.no_hwencode { + require_gst_element("x264enc")?; + } else { + require_gst_element("vah264enc")?; + } + + // Per-backend video source, plus the X11 window-picker when --window is set. + match display { + DisplayServer::Wayland => require_gst_element("pipewiresrc")?, + DisplayServer::X11 => { + require_gst_element("ximagesrc")?; + if opts.window { + require("xwininfo")?; + } + } + DisplayServer::Unknown => unreachable!("early-returned above"), + } + Ok(()) } @@ -69,6 +93,13 @@ fn install_hint_for_bin(bin: &str) -> String { Some("opensuse" | "opensuse-tumbleweed" | "opensuse-leap") => "pulseaudio-utils", _ => "pulseaudio-utils (provides `pactl`)", }, + "xwininfo" => match distro.as_deref() { + Some("arch" | "cachyos" | "manjaro" | "endeavouros") => "xorg-xwininfo", + Some("debian" | "ubuntu" | "pop" | "linuxmint") => "x11-utils", + Some("fedora" | "nobara") => "xorg-x11-utils", + Some("opensuse" | "opensuse-tumbleweed" | "opensuse-leap") => "xwininfo", + _ => "xwininfo (X11 window-info utility)", + }, _ => bin, }; install_command(&distro, pkg) @@ -91,6 +122,20 @@ fn install_hint_for_gst_element(name: &str) -> String { Some("opensuse" | "opensuse-tumbleweed" | "opensuse-leap") => "gstreamer-plugins-bad", _ => "the GStreamer VA-API plugin (requires an H.264-capable GPU; almost all modern GPUs)", }, + "x264enc" => match distro.as_deref() { + Some("arch" | "cachyos" | "manjaro" | "endeavouros") => "gst-plugins-ugly", + Some("debian" | "ubuntu" | "pop" | "linuxmint") => "gstreamer1.0-plugins-ugly", + Some("fedora" | "nobara") => "gstreamer1-plugins-ugly", + Some("opensuse" | "opensuse-tumbleweed" | "opensuse-leap") => "gstreamer-plugins-ugly", + _ => "the GStreamer x264 plugin (plugins-ugly)", + }, + "ximagesrc" => match distro.as_deref() { + Some("arch" | "cachyos" | "manjaro" | "endeavouros") => "gst-plugins-good", + Some("debian" | "ubuntu" | "pop" | "linuxmint") => "gstreamer1.0-plugins-good", + Some("fedora" | "nobara") => "gstreamer1-plugins-good", + Some("opensuse" | "opensuse-tumbleweed" | "opensuse-leap") => "gstreamer-plugins-good", + _ => "the GStreamer X11 plugin (plugins-good)", + }, "h264parse" | "mpegtsmux" | "aacparse" => match distro.as_deref() { Some("arch" | "cachyos" | "manjaro" | "endeavouros") => "gst-plugins-bad", Some("debian" | "ubuntu" | "pop" | "linuxmint") => "gstreamer1.0-plugins-bad", diff --git a/src/host/capture.rs b/src/host/capture.rs index d1be31b..1cbc30d 100644 --- a/src/host/capture.rs +++ b/src/host/capture.rs @@ -1,40 +1,20 @@ -//! Capture dispatcher. Selects the per-display-server pipeline and returns its -//! [`CaptureHandle`]. Each backend owns its own children and tears them down -//! via its own Drop / shutdown. +//! Capture dispatcher. Picks the per-display-server source backend and returns +//! the shared [`pipeline::CaptureHandle`]. The handle itself, its teardown, and +//! the whole encode/serve tail are display-agnostic and live in +//! [`super::pipeline`]; the backends differ only in how they obtain a video +//! source element. -use anyhow::{Result, bail}; +use anyhow::Result; use crate::cli::HostOpts; use crate::common::display::DisplayServer; -use crate::host::wayland; - -pub enum CaptureHandle { - Wayland(wayland::CaptureHandle), -} - -impl CaptureHandle { - pub fn local_port(&self) -> u16 { - match self { - CaptureHandle::Wayland(h) => h.local_port(), - } - } - - pub async fn shutdown(self) { - match self { - CaptureHandle::Wayland(h) => h.shutdown().await, - } - } -} +use crate::host::pipeline::CaptureHandle; +use crate::host::{wayland, x11}; pub async fn spawn(display: DisplayServer, opts: &HostOpts) -> Result { match display { - DisplayServer::Wayland => { - let h = wayland::start(opts).await?; - Ok(CaptureHandle::Wayland(h)) - } - DisplayServer::X11 => { - bail!("X11 capture pipeline not yet implemented (Phase 2 follow-up)"); - } + DisplayServer::Wayland => wayland::start(opts).await, + DisplayServer::X11 => x11::start(opts).await, DisplayServer::Unknown => unreachable!("caller guarantees display != Unknown"), } } diff --git a/src/host/mod.rs b/src/host/mod.rs index 5139562..4a33956 100644 --- a/src/host/mod.rs +++ b/src/host/mod.rs @@ -1,7 +1,9 @@ pub mod audio; mod capture; +mod pipeline; mod serve; mod wayland; +mod x11; use anyhow::{Result, bail}; use iroh::endpoint::{Connection, presets}; @@ -17,7 +19,7 @@ use crate::common::{ tunnel, }; -use self::capture::CaptureHandle; +use self::pipeline::CaptureHandle; /// Messages from per-viewer tasks to the capture supervisor. enum SupervisorMsg { @@ -32,7 +34,7 @@ enum SupervisorMsg { pub async fn run(opts: HostOpts) -> Result<()> { let display = DisplayServer::resolve(opts.display_server); - deps::check_host_binaries(display)?; + deps::check_host_binaries(display, &opts)?; if display == DisplayServer::Unknown { bail!( @@ -249,7 +251,7 @@ fn print_host_banner( eprintln!("│ display server : {display:?}"); eprintln!("│ capture : {}", capture_summary(opts)); eprintln!("│ bitrate / fps : {} kbps @ {} fps", opts.bitrate, opts.framerate); - eprintln!("│ hw encode : {}", if opts.no_hwencode { "off" } else { "auto (VAAPI if available)" }); + eprintln!("│ hw encode : {}", if opts.no_hwencode { "off (software x264)" } else { "on (VAAPI H.264)" }); eprintln!("│ max viewers : {} ({})", resolution.value, resolution.source.label()); eprintln!("│"); if clipboard_ok { diff --git a/src/host/pipeline.rs b/src/host/pipeline.rs new file mode 100644 index 0000000..b3ca789 --- /dev/null +++ b/src/host/pipeline.rs @@ -0,0 +1,255 @@ +//! Display-server-agnostic capture pipeline. The video *source* element is the +//! only part that differs between Wayland (`pipewiresrc`, after a portal +//! handshake) and X11 (`ximagesrc`); everything downstream — the videorate cap, +//! the encoder, `h264parse`, `mpegtsmux`, the whole audio branch, the gst spawn, +//! the [`Serve`] fanout binding, and the [`CaptureHandle`] lifecycle — is shared +//! and lives here. Backends call [`spawn`] with just their source-element args. + +use anyhow::{Context, Result, bail}; +use nix::sys::signal::{Signal, kill}; +use nix::unistd::Pid; +use std::process::Stdio; +use std::time::Duration; +use tokio::process::{Child, Command}; +use tokio::time::timeout; + +use super::audio::Routing; +use super::serve::Serve; +use crate::cli::HostOpts; + +pub struct CaptureHandle { + gst: Option, + audio: Option, + serve: Option, +} + +impl CaptureHandle { + pub fn local_port(&self) -> u16 { + self.serve + .as_ref() + .expect("serve is always Some until shutdown") + .local_port() + } + + /// Graceful teardown: SIGTERM gst, give it ~1s to exit, then SIGKILL, + /// unload audio routing (if any), then tear down the serve layer. + /// The serve reader will see EOF on gst stdout and exit on its own; + /// serve.shutdown() is the backstop. + pub async fn shutdown(mut self) { + if let Some(child) = self.gst.as_mut() + && let Some(pid) = child.id() + { + let _ = kill(Pid::from_raw(pid as i32), Signal::SIGTERM); + } + if let Some(child) = self.gst.as_mut() { + let _ = timeout(Duration::from_millis(1000), child.wait()).await; + let _ = child.start_kill(); + } + if let Some(audio) = self.audio.take() { + audio.shutdown(); + } + if let Some(serve) = self.serve.take() { + serve.shutdown().await; + } + } +} + +impl Drop for CaptureHandle { + fn drop(&mut self) { + if let Some(child) = self.gst.as_mut() { + let _ = child.start_kill(); + } + // Routing's and Serve's own Drop impls handle the rest. + } +} + +/// Spawn the shared gst pipeline for a backend that supplies `source_args` +/// (the video-source element + its properties, e.g. `["pipewiresrc", "fd=7", +/// …]` or `["ximagesrc", "use-damage=false", …]`). `after_spawn` runs once, +/// immediately after the gst child is launched — Wayland uses it to `close` +/// the pipewire fd it leaked into the child; X11 passes a no-op. +pub async fn spawn( + opts: &HostOpts, + source_args: Vec, + after_spawn: impl FnOnce(), +) -> Result { + let (audio_routing, audio_device) = setup_audio(opts).await?; + let args = build_args(&source_args, &audio_device, opts); + + let mut gst_cmd = Command::new("gst-launch-1.0"); + gst_cmd + .args(&args) + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::inherit()); + if std::env::var_os("PIXELPASS_GST_DEBUG").is_some() { + gst_cmd.env("GST_DEBUG", "3"); + } + let mut gst = gst_cmd.spawn().context("failed to spawn gst-launch-1.0")?; + + // Backend-specific post-spawn cleanup (Wayland closes its leaked pw fd here, + // once gst has inherited its own copy). + after_spawn(); + + let gst_stdout = gst + .stdout + .take() + .context("gst-launch-1.0 stdout pipe unavailable")?; + + // Hand stdout to the serve layer, which binds the localhost HTTP listener + // and runs the broadcast fanout. No demux/remux, no codec assumptions. + let serve = Serve::bind(gst_stdout).await?; + + Ok(CaptureHandle { + gst: Some(gst), + audio: audio_routing, + serve: Some(serve), + }) +} + +/// Decide whether per-app audio routing is active and produce the `device=…` +/// argument for `pulsesrc`. Routing activates when either `--app` is set +/// (per-stream rerouting to a per-PID null-sink) or `PIXELPASS_AUDIO_VIA_NULL_SINK=1` +/// is set (no app filter — captures everything via the null-sink, used for +/// dogfooding the loopback path). Otherwise we capture the default sink's +/// monitor (system audio out), not the default source (the mic). +async fn setup_audio(opts: &HostOpts) -> Result<(Option, String)> { + let routing_requested = + opts.app.is_some() || std::env::var_os("PIXELPASS_AUDIO_VIA_NULL_SINK").is_some(); + let audio_routing = if routing_requested { + Some( + Routing::start(opts) + .await + .context("audio routing setup failed")?, + ) + } else { + None + }; + let audio_device = if let Some(r) = &audio_routing { + format!("device={}.monitor", r.sink_name()) + } else { + let default = default_audio_monitor().await?; + format!("device={default}") + }; + Ok((audio_routing, audio_device)) +} + +/// Build the full gst-launch argument vector: MPEG-TS mux + fdsink, then the +/// video branch (caller's `source` → videorate cap → encoder → h264parse → +/// mux.), then the audio branch (pulsesrc → AAC → mux.). The encoder and the +/// `videoconvert` target format are selected by `opts.no_hwencode`: +/// hardware VAAPI wants NV12, software x264 wants I420. +fn build_args(source: &[String], audio_device: &str, opts: &HostOpts) -> Vec { + let key_interval = (opts.framerate * 2).to_string(); + let bitrate = opts.bitrate.to_string(); + let framerate_caps = format!("video/x-raw,framerate={}/1", opts.framerate); + + let (raw_format, encoder_args): (&str, Vec) = if opts.no_hwencode { + ( + "video/x-raw,format=I420", + vec![ + "x264enc".into(), + "tune=zerolatency".into(), + "speed-preset=ultrafast".into(), + format!("bitrate={bitrate}"), + format!("key-int-max={key_interval}"), + ], + ) + } else { + ( + "video/x-raw,format=NV12", + vec![ + "vah264enc".into(), + "rate-control=cbr".into(), + format!("bitrate={bitrate}"), + format!("key-int-max={key_interval}"), + ], + ) + }; + + // muxer + sink + let mut args: Vec = vec![ + "mpegtsmux".into(), + "name=mux".into(), + "!".into(), + "queue".into(), + "!".into(), + "fdsink".into(), + "fd=1".into(), + ]; + + // video branch — videorate caps to the target fps so we don't ship at the + // monitor's refresh rate (e.g. 180Hz) and pile up frames in the demuxer + // queue faster than realtime. + args.extend(source.iter().cloned()); + args.extend([ + "!".into(), + "videorate".into(), + "!".into(), + framerate_caps, + "!".into(), + "queue".into(), + "!".into(), + "videoconvert".into(), + "!".into(), + raw_format.into(), + "!".into(), + ]); + args.extend(encoder_args); + args.extend([ + "!".into(), + "h264parse".into(), + "config-interval=-1".into(), + "!".into(), + "video/x-h264,stream-format=byte-stream,alignment=au".into(), + "!".into(), + "mux.".into(), + ]); + + // audio branch — capture the default sink's MONITOR (system audio out), + // not the default source (which is the mic). + args.extend([ + "pulsesrc".into(), + audio_device.to_string(), + "do-timestamp=true".into(), + "!".into(), + "queue".into(), + "!".into(), + "audioconvert".into(), + "!".into(), + "audioresample".into(), + "!".into(), + "audio/x-raw,rate=48000,channels=2".into(), + "!".into(), + "avenc_aac".into(), + "bitrate=128000".into(), + "!".into(), + "aacparse".into(), + "!".into(), + "mux.".into(), + ]); + + args +} + +async fn default_audio_monitor() -> Result { + let output = Command::new("pactl") + .arg("get-default-sink") + .output() + .await + .context("failed to run `pactl get-default-sink` (install pulseaudio-utils or pipewire-pulse)")?; + if !output.status.success() { + bail!( + "pactl get-default-sink failed: {}", + String::from_utf8_lossy(&output.stderr).trim() + ); + } + let sink = String::from_utf8(output.stdout) + .context("default sink name was not UTF-8")? + .trim() + .to_string(); + if sink.is_empty() { + bail!("pactl get-default-sink returned no name (is a sound server running?)"); + } + Ok(format!("{sink}.monitor")) +} diff --git a/src/host/wayland.rs b/src/host/wayland.rs index 916f0a1..8ec15ea 100644 --- a/src/host/wayland.rs +++ b/src/host/wayland.rs @@ -1,8 +1,9 @@ -//! Wayland capture: ashpd ScreenCast portal → PipeWire fd → gst-launch. -//! Builds the gst pipeline that produces MPEG-TS on stdout, then hands -//! that stdout to [`super::serve::Serve`] which handles the HTTP fanout. +//! Wayland capture: ashpd ScreenCast portal → PipeWire fd → `pipewiresrc`. +//! This module owns only the portal handshake and the source-element args; +//! the shared encode/mux tail, gst spawn, and serving live in +//! [`super::pipeline`]. -use anyhow::{Context, Result, bail}; +use anyhow::{Context, Result}; use ashpd::{ WindowIdentifier, desktop::{ @@ -11,64 +12,12 @@ use ashpd::{ }, }; use nix::fcntl::{FcntlArg, FdFlag, fcntl}; -use nix::sys::signal::{Signal, kill}; -use nix::unistd::{Pid, close}; +use nix::unistd::close; use std::os::fd::{AsFd, IntoRawFd, OwnedFd, RawFd}; -use std::process::Stdio; -use std::time::Duration; -use tokio::process::{Child, Command}; -use tokio::time::timeout; -use super::audio::Routing; -use super::serve::Serve; +use super::pipeline::{self, CaptureHandle}; use crate::cli::HostOpts; -pub struct CaptureHandle { - gst: Option, - audio: Option, - serve: Option, -} - -impl CaptureHandle { - pub fn local_port(&self) -> u16 { - self.serve - .as_ref() - .expect("serve is always Some until shutdown") - .local_port() - } - - /// Graceful teardown: SIGTERM gst, give it ~1s to exit, then SIGKILL, - /// unload audio routing (if any), then tear down the serve layer. - /// The serve reader will see EOF on gst stdout and exit on its own; - /// serve.shutdown() is the backstop. - pub async fn shutdown(mut self) { - if let Some(child) = self.gst.as_mut() - && let Some(pid) = child.id() - { - let _ = kill(Pid::from_raw(pid as i32), Signal::SIGTERM); - } - if let Some(child) = self.gst.as_mut() { - let _ = timeout(Duration::from_millis(1000), child.wait()).await; - let _ = child.start_kill(); - } - if let Some(audio) = self.audio.take() { - audio.shutdown(); - } - if let Some(serve) = self.serve.take() { - serve.shutdown().await; - } - } -} - -impl Drop for CaptureHandle { - fn drop(&mut self) { - if let Some(child) = self.gst.as_mut() { - let _ = child.start_kill(); - } - // Routing's and Serve's own Drop impls handle the rest. - } -} - pub async fn start(opts: &HostOpts) -> Result { // 1. Negotiate the screencast session with the portal. let proxy = Screencast::new() @@ -108,124 +57,23 @@ pub async fn start(opts: &HostOpts) -> Result { tracing::info!(node_id, width = w, height = h, "portal handshake complete"); // The fd is CLOEXEC by default; the gst child needs to inherit it across // exec. We then leak it via into_raw_fd so its lifetime spans the spawn, - // and close the parent's copy once gst is running. + // and close the parent's copy once gst is running (the pipeline's + // after_spawn hook below). clear_cloexec(&pw_fd)?; let raw_fd: RawFd = pw_fd.into_raw_fd(); - // 2. Spawn gst-launch with the full pipeline: video AND audio captured, - // encoded, and muxed into MPEG-TS inside gst. Output goes to stdout, - // which the serve layer pipes to its HTTP fanout — no demux/remux, - // no codec assumptions. - let key_interval = (opts.framerate * 2).to_string(); - let bitrate = opts.bitrate.to_string(); + let source_args = vec![ + "pipewiresrc".to_string(), + format!("fd={raw_fd}"), + format!("path={node_id}"), + "do-timestamp=true".to_string(), + ]; - // Audio routing activates when either: - // - `opts.app` is set (per-stream rerouting to a per-PID null-sink), - // - or `PIXELPASS_AUDIO_VIA_NULL_SINK=1` is set (no app filter, just - // captures everything via the null-sink → useful for development - // and dogfooding the loopback path before app filtering is picked). - let routing_requested = - opts.app.is_some() || std::env::var_os("PIXELPASS_AUDIO_VIA_NULL_SINK").is_some(); - let audio_routing = if routing_requested { - Some( - Routing::start(opts) - .await - .context("audio routing setup failed")?, - ) - } else { - None - }; - let audio_device = if let Some(r) = &audio_routing { - format!("device={}.monitor", r.sink_name()) - } else { - let default = default_audio_monitor().await?; - format!("device={default}") - }; - let mut gst_cmd = Command::new("gst-launch-1.0"); - gst_cmd - .args([ - // muxer + sink - "mpegtsmux", - "name=mux", - "!", - "queue", - "!", - "fdsink", - "fd=1", - // video branch — videorate caps to 30fps so we don't ship at the - // monitor's refresh rate (e.g. 180Hz) and pile up frames in mpv's - // demuxer queue faster than realtime. - "pipewiresrc", - &format!("fd={raw_fd}"), - &format!("path={node_id}"), - "do-timestamp=true", - "!", - "videorate", - "!", - &format!("video/x-raw,framerate={}/1", opts.framerate), - "!", - "queue", - "!", - "videoconvert", - "!", - "video/x-raw,format=NV12", - "!", - "vah264enc", - "rate-control=cbr", - &format!("bitrate={bitrate}"), - &format!("key-int-max={key_interval}"), - "!", - "h264parse", - "config-interval=-1", - "!", - "video/x-h264,stream-format=byte-stream,alignment=au", - "!", - "mux.", - // audio branch — capture the default sink's MONITOR (system audio - // out), not the default source (which is the mic). - "pulsesrc", - &audio_device, - "do-timestamp=true", - "!", - "queue", - "!", - "audioconvert", - "!", - "audioresample", - "!", - "audio/x-raw,rate=48000,channels=2", - "!", - "avenc_aac", - "bitrate=128000", - "!", - "aacparse", - "!", - "mux.", - ]) - .stdin(Stdio::null()) - .stdout(Stdio::piped()) - .stderr(Stdio::inherit()); - if std::env::var_os("PIXELPASS_GST_DEBUG").is_some() { - gst_cmd.env("GST_DEBUG", "3"); - } - let mut gst = gst_cmd.spawn().context("failed to spawn gst-launch-1.0")?; - // Parent no longer needs the pipewire fd — gst inherited its own copy. - let _ = close(raw_fd); - - let gst_stdout = gst - .stdout - .take() - .context("gst-launch-1.0 stdout pipe unavailable")?; - - // 3. Hand stdout to the serve layer, which binds the localhost HTTP - // listener and runs the broadcast fanout. - let serve = Serve::bind(gst_stdout).await?; - - Ok(CaptureHandle { - gst: Some(gst), - audio: audio_routing, - serve: Some(serve), + pipeline::spawn(opts, source_args, move || { + // Parent no longer needs the pipewire fd — gst inherited its own copy. + let _ = close(raw_fd); }) + .await } fn clear_cloexec(fd: &impl AsFd) -> Result<()> { @@ -235,25 +83,3 @@ fn clear_cloexec(fd: &impl AsFd) -> Result<()> { fcntl(fd.as_fd(), FcntlArg::F_SETFD(flags)).context("F_SETFD on pipewire fd")?; Ok(()) } - -async fn default_audio_monitor() -> Result { - let output = Command::new("pactl") - .arg("get-default-sink") - .output() - .await - .context("failed to run `pactl get-default-sink` (install pulseaudio-utils or pipewire-pulse)")?; - if !output.status.success() { - bail!( - "pactl get-default-sink failed: {}", - String::from_utf8_lossy(&output.stderr).trim() - ); - } - let sink = String::from_utf8(output.stdout) - .context("default sink name was not UTF-8")? - .trim() - .to_string(); - if sink.is_empty() { - bail!("pactl get-default-sink returned no name (is a sound server running?)"); - } - Ok(format!("{sink}.monitor")) -} diff --git a/src/host/x11.rs b/src/host/x11.rs new file mode 100644 index 0000000..5d68f95 --- /dev/null +++ b/src/host/x11.rs @@ -0,0 +1,94 @@ +//! X11 capture: `ximagesrc` → the shared encode/mux tail in [`super::pipeline`]. +//! Unlike Wayland there's no portal and no fd hand-off — `ximagesrc` opens its +//! own X connection from `$DISPLAY`. The whole root window is captured by +//! default; `--window` resolves a single window's XID via an `xwininfo` +//! click-picker. The ticket is the access control, so capture starts silently +//! when the first viewer connects (no host-side consent prompt). + +use anyhow::{Context, Result, bail}; +use tokio::process::Command; +use x11rb::connection::Connection; +use x11rb::protocol::xproto::ConnectionExt; + +use super::pipeline::{self, CaptureHandle}; +use crate::cli::HostOpts; + +pub async fn start(opts: &HostOpts) -> Result { + let xid = if opts.window { + Some(pick_window().await?) + } else { + None + }; + + // Geometry is informational (mirrors Wayland's portal-handshake log line); + // a failure here shouldn't abort capture — ximagesrc will surface a real + // error if the X connection is genuinely unusable. + match read_geometry(xid) { + Ok((w, h)) => tracing::info!(width = w, height = h, xid = ?xid, "X11 capture geometry"), + Err(e) => tracing::warn!("could not read X11 geometry (capture will still try): {e:#}"), + } + + let mut source_args = vec![ + "ximagesrc".to_string(), + // Full frames (no damage regions) to avoid partial-update artifacts; + // use-damage=true is a later CPU optimization. show-pointer matches + // Wayland's CursorMode::Embedded. + "use-damage=false".to_string(), + "show-pointer=true".to_string(), + ]; + if let Some(xid) = xid { + source_args.push(format!("xid={xid}")); + } + + // X11 has no leaked fd to clean up, so the post-spawn hook is a no-op. + pipeline::spawn(opts, source_args, || {}).await +} + +/// Run `xwininfo` and let the user click the window they want to share, then +/// parse the `Window id: 0x…` line out of its output. Returns the numeric XID. +async fn pick_window() -> Result { + eprintln!("[pixelpass] click the window you want to share…"); + let output = Command::new("xwininfo") + .output() + .await + .context("failed to run `xwininfo` (install xorg-xwininfo)")?; + if !output.status.success() { + bail!( + "xwininfo failed: {}", + String::from_utf8_lossy(&output.stderr).trim() + ); + } + let text = String::from_utf8_lossy(&output.stdout); + for line in text.lines() { + // e.g. "xwininfo: Window id: 0x3a00007 \"xterm\"" + if let Some((_, rest)) = line.split_once("Window id: ") { + let token = rest.split_whitespace().next().unwrap_or(""); + let hex = token.strip_prefix("0x").unwrap_or(token); + if let Ok(xid) = u32::from_str_radix(hex, 16) { + return Ok(xid); + } + } + } + bail!("could not parse a window id from xwininfo output"); +} + +/// Read pixel dimensions: the selected window's geometry when `--window` was +/// used, otherwise the root window of the screen named by `$DISPLAY`. +fn read_geometry(xid: Option) -> Result<(u16, u16)> { + let (conn, screen_num) = + x11rb::connect(None).context("could not connect to the X server (is DISPLAY set?)")?; + match xid { + Some(id) => { + let geo = conn + .get_geometry(id) + .context("GetGeometry request failed")? + .reply() + .context("GetGeometry reply failed")?; + Ok((geo.width, geo.height)) + } + None => { + let screen = &conn.setup().roots[screen_num]; + Ok((screen.width_in_pixels, screen.height_in_pixels)) + } + } +}