pal/unix/
process.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Linux process launching support.
5
6#![warn(missing_docs)]
7
8#[cfg(target_os = "linux")]
9mod linux;
10#[cfg(target_os = "macos")]
11mod posix_spawn;
12
13use super::SyscallResult;
14use super::while_eintr;
15use std::collections::BTreeMap;
16use std::ffi::CString;
17use std::ffi::OsString;
18use std::io;
19use std::os::unix::prelude::*;
20use std::path::PathBuf;
21use std::process::ExitStatus;
22
23#[cfg(target_os = "linux")]
24use caps::CapsHashSet;
25#[cfg(target_os = "linux")]
26use landlock::RulesetCreated;
27#[cfg(target_os = "linux")]
28use seccompiler::SeccompFilter;
29
30/// The different failure modes for sandbox related syscalls.
31#[derive(Copy, Clone, Default)]
32pub enum SandboxFailureMode {
33    /// When a sandbox related syscall fails during process started,
34    /// report no error and continue.
35    Silent,
36    /// When a sandbox related syscall fails during process started,
37    /// report a trace::warn, and continue.
38    Warn,
39    /// When a sandbox related syscall fails during process started,
40    /// report no trace::error, and fail.
41    #[default]
42    Error,
43}
44
45/// A container for linux specific builder options.
46#[cfg(target_os = "linux")]
47#[derive(Default)]
48pub struct LinuxBuilder<'a> {
49    clone_flags: libc::c_int,
50    vfork: bool,
51    setsid: bool,
52    sandbox_failure_mode: SandboxFailureMode,
53    controlling_terminal: Option<BorrowedFd<'a>>,
54    permitted_capabilities: Option<CapsHashSet>,
55    effective_capabilities: Option<CapsHashSet>,
56    ambient_capabilities: Option<CapsHashSet>,
57    bounding_capabilities: Option<CapsHashSet>,
58    inheritable_capabilities: Option<CapsHashSet>,
59    landlock_rules: Option<RulesetCreated>,
60    seccomp_filter: Option<SeccompFilter>,
61}
62
63/// A builder for a child process.
64pub struct Builder<'a> {
65    executable: CString,
66    argv: Vec<CString>,
67    env: BTreeMap<OsString, Option<OsString>>,
68    clear_env: bool,
69    saw_nul: bool,
70    stdin: Stdio<'a>,
71    stdout: Stdio<'a>,
72    stderr: Stdio<'a>,
73    fd_ops: Vec<(i32, FdOp)>,
74    uid: Option<libc::uid_t>,
75    gid: Option<libc::uid_t>,
76    #[cfg(target_os = "linux")]
77    linux_builder: LinuxBuilder<'a>,
78}
79
80/// A stdio option.
81#[derive(Debug)]
82pub enum Stdio<'a> {
83    /// Inherit the current process's stdio fd.
84    Inherit,
85    /// Open /dev/null for the child process.
86    Null,
87    /// Use the provided fd.
88    Fd(BorrowedFd<'a>),
89}
90
91impl Stdio<'_> {
92    fn op(&self, null: &mut Option<std::fs::File>) -> io::Result<Option<FdOp>> {
93        Ok(match self {
94            Stdio::Inherit => None,
95            Stdio::Null => {
96                let null = if let Some(null) = null.as_ref() {
97                    null
98                } else {
99                    let f = std::fs::OpenOptions::new()
100                        .read(true)
101                        .write(true)
102                        .open("/dev/null")?;
103                    null.get_or_insert(f)
104                };
105                Some(FdOp::Dup(null.as_raw_fd()))
106            }
107            Stdio::Fd(oldfd) => Some(FdOp::Dup(oldfd.as_raw_fd())),
108        })
109    }
110}
111
112#[derive(Debug, Copy, Clone)]
113enum FdOp {
114    Close,
115    Dup(i32),
116}
117
118fn os2c(s: OsString, saw_nul: &mut bool) -> CString {
119    CString::new(s.into_vec()).unwrap_or_else(|_| {
120        *saw_nul = true;
121        CString::new("xxx").unwrap()
122    })
123}
124
125fn c_slice_to_pointers(s: &[CString]) -> Vec<*const libc::c_char> {
126    s.iter()
127        .map(|x| x.as_ptr())
128        .chain(std::iter::once(std::ptr::null()))
129        .collect()
130}
131
132impl<'a> Builder<'a> {
133    /// Creates a new process builder for `program`.
134    pub fn new(program: impl Into<PathBuf>) -> Self {
135        let mut saw_nul = false;
136        let executable = os2c(program.into().into_os_string(), &mut saw_nul);
137        let argv = vec![executable.clone()];
138        Self {
139            executable,
140            argv,
141            saw_nul,
142            env: BTreeMap::new(),
143            clear_env: false,
144            stdin: Stdio::Inherit,
145            stdout: Stdio::Inherit,
146            stderr: Stdio::Inherit,
147            fd_ops: vec![],
148            uid: None,
149            gid: None,
150            #[cfg(target_os = "linux")]
151            linux_builder: LinuxBuilder {
152                clone_flags: 0,
153                vfork: true,
154                ..Default::default()
155            },
156        }
157    }
158
159    /// Sets argv\[0\].
160    pub fn arg0(&mut self, arg: impl Into<OsString>) -> &mut Self {
161        self.argv[0] = os2c(arg.into(), &mut self.saw_nul);
162        self
163    }
164
165    /// Appends a command-line argument.
166    pub fn arg(&mut self, arg: impl Into<OsString>) -> &mut Self {
167        self.argv.push(os2c(arg.into(), &mut self.saw_nul));
168        self
169    }
170
171    /// Appends a list of command-line arguments.
172    pub fn args<I, S>(&mut self, args: I) -> &mut Self
173    where
174        I: IntoIterator<Item = S>,
175        S: Into<OsString>,
176    {
177        for arg in args {
178            self.arg(arg);
179        }
180        self
181    }
182
183    /// Sets the environment variable `key` to `val`.
184    pub fn env<K, V>(&mut self, key: K, val: V) -> &mut Self
185    where
186        K: Into<OsString>,
187        V: Into<OsString>,
188    {
189        self.env.insert(key.into(), Some(val.into()));
190        self
191    }
192
193    /// Removes the environment variable `key`.
194    pub fn env_remove<K: Into<OsString>>(&mut self, key: K) -> &mut Self {
195        self.env.insert(key.into(), None);
196        self
197    }
198
199    /// Clears all environment variables.
200    pub fn env_clear(&mut self) -> &mut Self {
201        self.env.clear();
202        self.clear_env = true;
203        self
204    }
205
206    /// Sets the policy for stdin.
207    pub fn stdin(&mut self, stdin: Stdio<'a>) -> &mut Self {
208        self.stdin = stdin;
209        self
210    }
211
212    /// Sets the policy for stdout.
213    pub fn stdout(&mut self, stdout: Stdio<'a>) -> &mut Self {
214        self.stdout = stdout;
215        self
216    }
217
218    /// Sets the policy for stderr.
219    pub fn stderr(&mut self, stderr: Stdio<'a>) -> &mut Self {
220        self.stderr = stderr;
221        self
222    }
223
224    /// Closes `fd` in the new process.
225    pub fn close_fd(&mut self, fd: i32) -> &mut Self {
226        self.fd_ops.push((fd, FdOp::Close));
227        self
228    }
229
230    /// Duplicates `oldfd` to `newfd` in the new process.
231    pub fn dup_fd(&mut self, oldfd: BorrowedFd<'a>, newfd: i32) -> &mut Self {
232        self.fd_ops.push((newfd, FdOp::Dup(oldfd.as_raw_fd())));
233        self
234    }
235
236    /// Sets the real and effective user id of the new process.
237    pub fn setuid(&mut self, uid: u32) -> &mut Self {
238        self.uid = Some(uid);
239        self
240    }
241
242    /// Gets the real and effective user id of the new process.
243    pub fn uid(&self) -> Option<u32> {
244        self.uid
245    }
246
247    /// Sets the real and effective group id of the new process.
248    pub fn setgid(&mut self, gid: u32) -> &mut Self {
249        self.gid = Some(gid);
250        self
251    }
252
253    /// Gets the real and effective user id of the new process.
254    pub fn gid(&self) -> Option<u32> {
255        self.gid
256    }
257
258    /// Sets whether the new process will vfork or not.
259    #[cfg(target_os = "linux")]
260    pub fn set_vfork(&mut self, vfork: bool) -> &mut Self {
261        self.linux_builder.vfork = vfork;
262        self
263    }
264
265    /// Gets whether the new process will vfork or not.
266    #[cfg(target_os = "linux")]
267    pub fn vfork(&mut self) -> bool {
268        self.linux_builder.vfork
269    }
270
271    /// Sets the sandbox failure mode.
272    #[cfg(target_os = "linux")]
273    pub fn set_sandbox_failure_mode(&mut self, mode: SandboxFailureMode) -> &mut Self {
274        self.linux_builder.sandbox_failure_mode = mode;
275        self
276    }
277
278    /// Gets the sandbox failure mode.
279    #[cfg(target_os = "linux")]
280    pub fn sandbox_failure_mode(&mut self) -> SandboxFailureMode {
281        self.linux_builder.sandbox_failure_mode
282    }
283
284    /// Sets the permitted and inheritable capabilities of the new process.
285    #[cfg(target_os = "linux")]
286    pub fn set_permitted_caps(&mut self, caps: CapsHashSet) -> &mut Self {
287        self.linux_builder.permitted_capabilities = Some(caps);
288        self
289    }
290
291    /// Gets the permitted and inheritable capabilities of the new process.
292    #[cfg(target_os = "linux")]
293    pub fn permitted_caps(&mut self) -> Option<CapsHashSet> {
294        self.linux_builder.permitted_capabilities.clone()
295    }
296
297    /// Sets the effective capabilities of the new process.
298    #[cfg(target_os = "linux")]
299    pub fn set_effective_caps(&mut self, caps: CapsHashSet) -> &mut Self {
300        self.linux_builder.effective_capabilities = Some(caps);
301        self
302    }
303
304    /// Gets the effective capabilities of the new process.
305    #[cfg(target_os = "linux")]
306    pub fn effective_caps(&mut self) -> Option<CapsHashSet> {
307        self.linux_builder.effective_capabilities.clone()
308    }
309
310    /// Sets the ambient capabilities of the new process.
311    #[cfg(target_os = "linux")]
312    pub fn set_ambient_caps(&mut self, caps: CapsHashSet) -> &mut Self {
313        self.linux_builder.ambient_capabilities = Some(caps);
314        self
315    }
316
317    /// Gets the ambient capabilities of the new process.
318    #[cfg(target_os = "linux")]
319    pub fn ambient_caps(&mut self) -> Option<CapsHashSet> {
320        self.linux_builder.ambient_capabilities.clone()
321    }
322
323    /// Sets the inheritable capabilities of the new process.
324    #[cfg(target_os = "linux")]
325    pub fn set_inheritable_caps(&mut self, caps: CapsHashSet) -> &mut Self {
326        self.linux_builder.inheritable_capabilities = Some(caps);
327        self
328    }
329
330    /// Gets the inheritable capabilities of the new process.
331    #[cfg(target_os = "linux")]
332    pub fn inheritable_caps(&mut self) -> Option<CapsHashSet> {
333        self.linux_builder.inheritable_capabilities.clone()
334    }
335
336    /// Sets the bounding capabilities of the new process.
337    #[cfg(target_os = "linux")]
338    pub fn set_bounding_caps(&mut self, caps: CapsHashSet) -> &mut Self {
339        self.linux_builder.bounding_capabilities = Some(caps);
340        self
341    }
342
343    /// Gets the bounding capabilities of the new process.
344    #[cfg(target_os = "linux")]
345    pub fn bounding_caps(&mut self) -> Option<CapsHashSet> {
346        self.linux_builder.bounding_capabilities.clone()
347    }
348
349    /// Sets the landlock ruleset of the new process.
350    #[cfg(target_os = "linux")]
351    pub fn set_landlock_rules(&mut self, landlock_rules: RulesetCreated) -> &mut Self {
352        self.linux_builder.landlock_rules = Some(landlock_rules);
353        self
354    }
355
356    /// Gets the landlock ruleset of the new process.
357    #[cfg(target_os = "linux")]
358    pub fn landlock_rules(&mut self) -> Option<RulesetCreated> {
359        self.linux_builder
360            .landlock_rules
361            .as_ref()
362            .map(|ruleset_created| ruleset_created.try_clone().unwrap())
363    }
364
365    /// Sets the seccomp filter for the new process.
366    #[cfg(target_os = "linux")]
367    pub fn set_seccomp_filter(&mut self, seccomp_filter: SeccompFilter) -> &mut Self {
368        self.linux_builder.seccomp_filter = Some(seccomp_filter);
369        self
370    }
371
372    /// Gets the seccomp filter for the new process.
373    #[cfg(target_os = "linux")]
374    pub fn seccomp_filter(&mut self) -> Option<SeccompFilter> {
375        self.linux_builder.seccomp_filter.clone()
376    }
377
378    /// Creates a new session with the new process as the leader.
379    #[cfg(target_os = "linux")]
380    pub fn setsid(&mut self, setsid: bool) -> &mut Self {
381        self.linux_builder.setsid = setsid;
382        self
383    }
384
385    /// Sets the controlling terminal for the new process.
386    #[cfg(target_os = "linux")]
387    pub fn controlling_terminal(&mut self, controlling_terminal: BorrowedFd<'a>) -> &mut Self {
388        self.linux_builder.controlling_terminal = Some(controlling_terminal);
389        self
390    }
391
392    /// Spawns the process.
393    pub fn spawn(&self) -> io::Result<Child> {
394        let mut env = if self.clear_env {
395            BTreeMap::new()
396        } else {
397            std::env::vars_os().collect()
398        };
399        for (key, value) in &self.env {
400            if let Some(value) = value {
401                env.insert(key.to_owned(), value.to_owned());
402            } else {
403                env.remove(key);
404            }
405        }
406
407        let mut saw_nul = self.saw_nul;
408
409        let envp: Vec<_> = env
410            .into_iter()
411            .map(|(mut key, value)| {
412                key.push("=");
413                key.push(&value);
414                os2c(key, &mut saw_nul)
415            })
416            .collect();
417
418        if saw_nul {
419            return Err(io::Error::new(
420                io::ErrorKind::InvalidInput,
421                "null character in input string",
422            ));
423        }
424
425        let mut null_file = None;
426        let mut fd_ops = self.fd_ops.clone();
427
428        if let Some(op) = self.stdin.op(&mut null_file)? {
429            fd_ops.push((0, op));
430        }
431        if let Some(op) = self.stdout.op(&mut null_file)? {
432            fd_ops.push((1, op));
433        }
434        if let Some(op) = self.stderr.op(&mut null_file)? {
435            fd_ops.push((2, op));
436        }
437
438        self.spawn_internal(&envp, &mut fd_ops)
439    }
440}
441
442/// A child process.
443#[derive(Debug)]
444pub struct Child {
445    pid: i32,
446    #[cfg(target_os = "linux")]
447    pidfd: OwnedFd,
448    status: Option<ExitStatus>,
449}
450
451impl Child {
452    /// Synchronously waits for the child process to exit.
453    pub fn wait(&mut self) -> io::Result<ExitStatus> {
454        self.wait_internal(0).transpose().unwrap()
455    }
456
457    /// Tries to reap the child process it has exited. Otherwise returns `Ok(None)`.
458    pub fn try_wait(&mut self) -> io::Result<Option<ExitStatus>> {
459        self.wait_internal(libc::WNOHANG)
460    }
461
462    fn wait_internal(&mut self, options: i32) -> io::Result<Option<ExitStatus>> {
463        if self.status.is_some() {
464            return Ok(self.status);
465        }
466
467        let mut status = 0;
468        // SAFETY: calling as documented.
469        let n = unsafe {
470            while_eintr(|| libc::waitpid(self.pid, &mut status, options).syscall_result())?
471        };
472        if n != 0 {
473            self.status = Some(ExitStatus::from_raw(status));
474        }
475        Ok(self.status)
476    }
477
478    /// Returns the child process ID.
479    pub fn id(&self) -> i32 {
480        self.pid
481    }
482}
483
484/// Terminates the process immediately.
485pub(crate) fn terminate(exit_code: i32) -> ! {
486    // SAFETY: there are no safety requirements for calling this function.
487    unsafe {
488        libc::_exit(exit_code);
489    }
490}
491
492#[cfg(test)]
493mod tests {
494    use super::Builder;
495
496    #[test]
497    fn test_command() {
498        let cmd = Builder::new("/usr/bin/true");
499        let mut child = cmd.spawn().unwrap();
500
501        #[cfg(target_os = "linux")]
502        {
503            use crate::sys::SyscallResult;
504            use crate::sys::while_eintr;
505            use std::os::unix::prelude::*;
506
507            let mut pollfd = libc::pollfd {
508                fd: child.as_fd().as_raw_fd(),
509                events: libc::POLLIN,
510                revents: 0,
511            };
512            // SAFETY: pollfd holds a valid and open file descriptor.
513            unsafe { while_eintr(|| libc::poll(&mut pollfd, 1, -1).syscall_result()).unwrap() };
514            assert_eq!(pollfd.revents, libc::POLLIN);
515            assert_eq!(child.try_wait().unwrap().unwrap().code().unwrap(), 0);
516        }
517
518        #[cfg(not(target_os = "linux"))]
519        {
520            assert_eq!(child.wait().unwrap().code().unwrap(), 0);
521        }
522    }
523
524    #[test]
525    #[cfg(target_os = "linux")]
526    fn test_landlock_sandbox() {
527        use crate::sys::SyscallResult;
528        use crate::sys::while_eintr;
529        use landlock::AccessFs;
530        use landlock::PathBeneath;
531        use landlock::PathFd;
532        use landlock::Ruleset;
533        use landlock::RulesetAttr;
534        use landlock::RulesetCreatedAttr;
535        use std::os::unix::prelude::*;
536
537        let landlock_rules = Ruleset::default()
538            .handle_access(AccessFs::Execute)
539            .unwrap()
540            .create()
541            .unwrap()
542            .add_rule(PathBeneath::new(
543                PathFd::new("/").unwrap(),
544                AccessFs::Execute,
545            ))
546            .unwrap();
547
548        let mut cmd = Builder::new("/usr/bin/true");
549        cmd.set_vfork(false);
550        cmd.set_landlock_rules(landlock_rules);
551
552        let mut child = cmd.spawn().unwrap();
553
554        let mut pollfd = libc::pollfd {
555            fd: child.as_fd().as_raw_fd(),
556            events: libc::POLLIN,
557            revents: 0,
558        };
559        // SAFETY: pollfd holds a valid and open file descriptor.
560        unsafe { while_eintr(|| libc::poll(&mut pollfd, 1, -1).syscall_result()).unwrap() };
561        assert_eq!(pollfd.revents, libc::POLLIN);
562        assert_eq!(child.wait().unwrap().code().unwrap(), 0);
563    }
564
565    #[test]
566    #[cfg(target_os = "linux")]
567    #[cfg(target_arch = "x86_64")] // xtask-fmt allow-target-arch sys-crate
568    fn test_seccomp_sandbox() {
569        use crate::sys::SyscallResult;
570        use crate::sys::while_eintr;
571        use seccompiler::SeccompAction;
572        use seccompiler::SeccompFilter;
573        use seccompiler::TargetArch;
574        use std::os::unix::prelude::*;
575
576        // This isn't defined in libc MUSL yet.
577        const SYS_RSEQ: libc::c_long = 334;
578
579        // This filter should work for both a dynamically linked `true`
580        // or for a busybox statically linked `true`.
581        let seccomp_filter = SeccompFilter::new(
582            vec![
583                (libc::SYS_execve, vec![]),
584                (libc::SYS_brk, vec![]),
585                (libc::SYS_arch_prctl, vec![]),
586                (libc::SYS_mmap, vec![]),
587                (libc::SYS_access, vec![]),
588                (libc::SYS_openat, vec![]),
589                (libc::SYS_newfstatat, vec![]),
590                (libc::SYS_fstat, vec![]),
591                (libc::SYS_close, vec![]),
592                (libc::SYS_read, vec![]),
593                (libc::SYS_pread64, vec![]),
594                (libc::SYS_set_tid_address, vec![]),
595                (libc::SYS_set_robust_list, vec![]),
596                (SYS_RSEQ, vec![]),
597                (libc::SYS_mprotect, vec![]),
598                (libc::SYS_prlimit64, vec![]),
599                (libc::SYS_munmap, vec![]),
600                (libc::SYS_getrandom, vec![]),
601                (libc::SYS_futex, vec![]),
602                (libc::SYS_write, vec![]),
603                (libc::SYS_exit_group, vec![]),
604                (libc::SYS_readlink, vec![]),
605                (libc::SYS_uname, vec![]),
606                (libc::SYS_getgid, vec![]),
607                (libc::SYS_getuid, vec![]),
608                (libc::SYS_setgid, vec![]),
609                (libc::SYS_setuid, vec![]),
610                (libc::SYS_prctl, vec![]),
611            ]
612            .into_iter()
613            .collect(),
614            // mismatch_action
615            SeccompAction::Log,
616            // match_action
617            SeccompAction::Allow,
618            // target architecture of filter
619            TargetArch::x86_64,
620        )
621        .unwrap();
622
623        let mut cmd = Builder::new("/usr/bin/true");
624        cmd.set_vfork(false);
625        cmd.set_seccomp_filter(seccomp_filter);
626
627        let mut child = cmd.spawn().unwrap();
628
629        let mut pollfd = libc::pollfd {
630            fd: child.as_fd().as_raw_fd(),
631            events: libc::POLLIN,
632            revents: 0,
633        };
634        // SAFETY: pollfd holds a valid and open file descriptor.
635        unsafe { while_eintr(|| libc::poll(&mut pollfd, 1, -1).syscall_result()).unwrap() };
636        assert_eq!(pollfd.revents, libc::POLLIN);
637        assert!([None, Some(0)].contains(&child.wait().unwrap().code()));
638    }
639}