1use super::Builder;
7use super::Child;
8use super::FdOp;
9use super::SandboxFailureMode;
10use crate::unix::SyscallResult;
11use crate::unix::errno;
12use caps::CapsHashSet;
13use landlock::RulesetCreated;
14use seccompiler::SeccompFilter;
15use std::ffi::CStr;
16use std::ffi::CString;
17use std::io;
18use std::os::unix::prelude::*;
19
20struct CloneContext<'a> {
21 executable: &'a CStr,
22 argv: &'a [*const libc::c_char],
23 envp: &'a [*const libc::c_char],
24 result: Option<i32>,
25 fd_ops: &'a mut [(i32, FdOp)],
27 sandbox_failure_mode: SandboxFailureMode,
28 setsid: bool,
29 controlling_terminal: Option<BorrowedFd<'a>>,
30 uid: Option<libc::uid_t>,
31 gid: Option<libc::uid_t>,
32 permitted_capabilities: Option<CapsHashSet>,
33 effective_capabilities: Option<CapsHashSet>,
34 ambient_capabilities: Option<CapsHashSet>,
35 inheritable_capabilities: Option<CapsHashSet>,
36 bounding_capabilities: Option<CapsHashSet>,
37 landlock_rules: Option<RulesetCreated>,
38 seccomp_filter: Option<SeccompFilter>,
39}
40
41impl Builder<'_> {
42 pub(super) fn spawn_internal(
43 &self,
44 envp: &[CString],
45 fd_ops: &mut [(i32, FdOp)],
46 ) -> io::Result<Child> {
47 let mut landlock_rules = None;
48 if let Some(lr) = &self.linux_builder.landlock_rules {
49 landlock_rules = Some(lr.try_clone()?);
50 }
51
52 let argv = super::c_slice_to_pointers(&self.argv);
54 let envp = super::c_slice_to_pointers(envp);
55
56 let mut context = CloneContext {
57 executable: &self.executable,
58 argv: &argv,
59 envp: &envp,
60 result: None,
61 fd_ops: &mut *fd_ops,
62 sandbox_failure_mode: self.linux_builder.sandbox_failure_mode,
63 setsid: self.linux_builder.setsid,
64 controlling_terminal: self.linux_builder.controlling_terminal,
65 uid: self.uid,
66 gid: self.gid,
67 permitted_capabilities: self.linux_builder.permitted_capabilities.clone(),
68 effective_capabilities: self.linux_builder.effective_capabilities.clone(),
69 inheritable_capabilities: self.linux_builder.inheritable_capabilities.clone(),
70 ambient_capabilities: self.linux_builder.ambient_capabilities.clone(),
71 bounding_capabilities: self.linux_builder.bounding_capabilities.clone(),
72 landlock_rules,
73 seccomp_filter: self.linux_builder.seccomp_filter.clone(),
74 };
75
76 let mut flags = self.linux_builder.clone_flags | libc::CLONE_PIDFD | libc::SIGCHLD;
82
83 if self.linux_builder.vfork {
84 flags |= libc::CLONE_VM | libc::CLONE_VFORK;
85 }
86
87 let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) } as usize;
89
90 let stack_len: usize = std::cmp::max(16 * 1024, page_size);
93 assert!(stack_len % page_size == 0);
94
95 let stack_len = stack_len + page_size;
97 let stack = unsafe {
99 libc::mmap(
100 std::ptr::null_mut(),
101 stack_len,
102 libc::PROT_READ | libc::PROT_WRITE,
103 libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
104 -1,
105 0,
106 )
107 };
108 if stack == libc::MAP_FAILED {
109 return Err(errno().into());
110 }
111 let mmap_guard = ChildStackGuard(stack, stack_len);
112 unsafe { libc::mprotect(stack, page_size, libc::PROT_NONE) }.syscall_result()?;
114 let mut pidfd: libc::pid_t = -1;
115
116 let pid = unsafe {
121 libc::clone(
122 clone_cb,
123 stack.add(stack_len),
124 flags,
125 std::ptr::from_mut(&mut context).cast(),
126 &mut pidfd,
127 )
128 }
129 .syscall_result()?;
130 drop(mmap_guard);
131
132 let pidfd = unsafe { OwnedFd::from_raw_fd(pidfd) };
134 let mut child = Child {
135 pid,
136 pidfd,
137 status: None,
138 };
139
140 if self.linux_builder.vfork && context.result != Some(0) {
144 let status = child.wait().unwrap();
148 let ec = context.result.unwrap_or_else(|| {
149 status
150 .code()
151 .expect("child should not have failed with a signal")
152 });
153 return Err(io::Error::from_raw_os_error(ec));
154 }
155
156 Ok(child)
157 }
158}
159
160struct ChildStackGuard(*mut libc::c_void, usize);
161
162impl Drop for ChildStackGuard {
163 fn drop(&mut self) {
164 unsafe { libc::munmap(self.0, self.1) }
168 .syscall_result()
169 .unwrap();
170 }
171}
172
173extern "C" fn clone_cb(context: *mut libc::c_void) -> libc::c_int {
187 let context = unsafe { &mut *(context.cast::<CloneContext<'_>>()) };
190
191 if context.setsid {
192 if unsafe { libc::setsid() } < 0 {
194 return errno().0;
195 }
196 }
197
198 if let Some(fd) = context.controlling_terminal {
199 if unsafe { libc::ioctl(fd.as_raw_fd(), libc::TIOCSCTTY, 0) } < 0 {
201 return errno().0;
202 }
203 }
204
205 let maxfd = context.fd_ops.iter().map(|(fd, _)| *fd).max();
207
208 if let Some(maxfd) = maxfd {
209 for (newfd, op) in &mut *context.fd_ops {
210 match op {
211 FdOp::Close => {}
212 FdOp::Dup(oldfd) => {
213 if oldfd != newfd && *oldfd < maxfd {
217 let new_oldfd =
219 unsafe { libc::fcntl(*oldfd, libc::F_DUPFD_CLOEXEC, maxfd) };
220 if new_oldfd < 0 {
221 return errno().0;
222 }
223 *oldfd = new_oldfd;
224 }
225 }
226 }
227 }
228
229 for (newfd, op) in &*context.fd_ops {
230 match op {
231 FdOp::Close => {
232 if unsafe { libc::close(*newfd) } < 0 {
234 return errno().0;
235 }
236 }
237 FdOp::Dup(oldfd) => {
238 if *newfd == *oldfd {
239 if unsafe {
241 libc::fcntl(
242 *oldfd,
243 libc::F_SETFD,
244 libc::fcntl(*oldfd, libc::F_GETFD) & !libc::FD_CLOEXEC,
245 )
246 } < 0
247 {
248 return errno().0;
249 }
250 } else {
251 if unsafe { libc::dup2(*oldfd, *newfd) } < 0 {
253 return errno().0;
254 }
255 }
256 }
257 }
258 }
259 }
260
261 macro_rules! handle_sandbox_failure {
262 ($m:expr, $r:expr) => {
263 match context.sandbox_failure_mode {
264 SandboxFailureMode::Silent => {}
265 SandboxFailureMode::Warn => {
266 tracing::warn!($m);
267 }
268 SandboxFailureMode::Error => {
269 tracing::error!($m);
270 return $r;
271 }
272 }
273 };
274 }
275
276 if let Some(landlock_rules) = context.landlock_rules.take() {
277 if landlock_rules.restrict_self().is_err() {
278 handle_sandbox_failure!("failed to apply landlock ruleset", libc::ENOTSUP);
279 }
280 }
281
282 if let Some(gid) = context.gid {
283 if unsafe { libc::setresgid(gid, gid, gid) } < 0 {
285 handle_sandbox_failure!("failed to change group id", libc::ENOTSUP);
286 }
287 }
288
289 if let Some(uid) = context.uid {
290 if unsafe { libc::setresuid(uid, uid, uid) } < 0 {
292 handle_sandbox_failure!("failed to change user id", libc::ENOTSUP);
293 }
294 }
295
296 macro_rules! set_capabilities {
297 ($t:expr, $v:ident) => {
298 if let Some($v) = &context.$v {
299 if caps::set(None, $t, &$v).is_err() {
300 handle_sandbox_failure!(
301 std::concat!("failed to apply ", stringify!($t), " capabilities"),
302 libc::ENOTSUP
303 );
304 }
305 }
306 };
307 }
308
309 set_capabilities!(caps::CapSet::Bounding, bounding_capabilities);
310 set_capabilities!(caps::CapSet::Permitted, permitted_capabilities);
311 set_capabilities!(caps::CapSet::Ambient, ambient_capabilities);
312 set_capabilities!(caps::CapSet::Inheritable, inheritable_capabilities);
313 set_capabilities!(caps::CapSet::Effective, effective_capabilities);
314
315 if let Some(seccomp_filter) = context.seccomp_filter.take() {
316 if let Ok(bpf_program) = TryInto::<seccompiler::BpfProgram>::try_into(seccomp_filter) {
317 if seccompiler::apply_filter(&bpf_program).is_err() {
318 handle_sandbox_failure!("failed to apply seccomp profile", libc::ENOTSUP);
319 }
320 }
321 }
322
323 context.result = Some(0);
325 unsafe {
329 libc::execvpe(
330 context.executable.as_ptr(),
331 context.argv.as_ptr(),
332 context.envp.as_ptr(),
333 )
334 };
335 context.result = Some(errno().0);
337 255
338}
339
340impl AsFd for Child {
341 fn as_fd(&self) -> BorrowedFd<'_> {
342 self.pidfd.as_fd()
343 }
344}