Skip to main content

xtask/tasks/fmt/
lints.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! A harness for running custom text-based lints over repository files.
5
6mod cfg_target_arch;
7mod copyright;
8mod crate_name_nodash;
9mod package_info;
10mod repr_packed;
11mod trailing_newline;
12mod unsafe_code_comment;
13mod unused_deps;
14mod workspaced;
15
16use crate::fs_helpers::git_diffed;
17use crate::tasks::fmt::FmtCtx;
18use crate::tasks::fmt::FmtPass;
19use std::fmt::Display;
20use std::ops::Deref;
21use std::path::Path;
22use std::path::PathBuf;
23use std::sync::atomic::AtomicBool;
24use toml_edit::DocumentMut;
25
26/// Context passed to each lint, containing configuration options.
27pub struct LintCtx {
28    /// When true we are linting a subset of repo files, so some lints may want
29    /// to skip checks that require whole-repo analysis.
30    only_diffed: bool,
31}
32
33/// A trait representing a single lint check.
34pub trait Lint {
35    /// Create a new instance of this lint for a workspace.
36    fn new(ctx: &LintCtx) -> Self
37    where
38        Self: Sized;
39
40    /// Begin processing a workspace, given the parsed Cargo.toml of the workspace root.
41    fn enter_workspace(&mut self, content: &Lintable<DocumentMut>);
42
43    /// Begin processing a crate, given the parsed Cargo.toml of the crate root.
44    fn enter_crate(&mut self, content: &Lintable<DocumentMut>);
45
46    /// Process a Rust source file in the current crate.
47    fn visit_file(&mut self, content: &mut Lintable<String>);
48
49    /// Finish processing a crate, given the parsed Cargo.toml of the crate root.
50    fn exit_crate(&mut self, content: &mut Lintable<DocumentMut>);
51
52    /// Finish processing a workspace, given the parsed Cargo.toml of the workspace root.
53    fn exit_workspace(&mut self, content: &mut Lintable<DocumentMut>);
54
55    /// Process a non-Rust file in the current crate or workspace.
56    ///
57    /// For files within the directory of a crate this is called during crate processing.
58    /// For files outside of any crate this is called during workspace processing after
59    /// all crates have been processed.
60    fn visit_nonrust_file(&mut self, extension: &str, content: &mut Lintable<String>) {
61        let _ = (extension, content);
62    }
63}
64
65/// A wrapper around file content for linting.
66///
67/// Most lints will want to use the `Deref` impl to access the content directly,
68/// but this also provides utilities for reporting errors and making fixes.
69pub struct Lintable<T> {
70    content: T,
71    raw: Option<String>,
72    fix: bool,
73    path: PathBuf,
74    workspace_dir: PathBuf,
75    modified: bool,
76    // This doesn't really need to be atomic, but it lets `unfixable` only take
77    // `&self` which is more convenient.
78    failed: AtomicBool,
79}
80
81impl<T> Deref for Lintable<T> {
82    type Target = T;
83
84    fn deref(&self) -> &Self::Target {
85        &self.content
86    }
87}
88
89impl Lintable<String> {
90    /// Read a text file into a `Lintable<String>`.
91    ///
92    /// Returns `None` for binary (non-UTF-8) files.
93    fn from_file(path: &Path, ctx: &FmtCtx, workspace_dir: &Path) -> anyhow::Result<Option<Self>> {
94        let bytes = fs_err::read(path)?;
95        let content = match String::from_utf8(bytes) {
96            Ok(s) => s,
97            Err(_) => return Ok(None),
98        };
99        Ok(Some(Self {
100            content,
101            raw: None,
102            fix: ctx.fix,
103            path: path.strip_prefix(workspace_dir).unwrap().to_owned(),
104            workspace_dir: workspace_dir.to_owned(),
105            modified: false,
106            failed: AtomicBool::new(false),
107        }))
108    }
109}
110
111impl Lintable<DocumentMut> {
112    /// Read a Cargo.toml file into a `Lintable<DocumentMut>`.
113    ///
114    /// This can be from a crate or a workspace.
115    fn from_file(path: &Path, ctx: &FmtCtx, workspace_dir: &Path) -> anyhow::Result<Self> {
116        let raw = fs_err::read_to_string(path)?;
117        Ok(Self {
118            content: raw.parse()?,
119            raw: Some(raw),
120            fix: ctx.fix,
121            path: path.strip_prefix(workspace_dir).unwrap().to_owned(),
122            workspace_dir: workspace_dir.to_owned(),
123            modified: false,
124            failed: AtomicBool::new(false),
125        })
126    }
127}
128
129impl<T> Lintable<T> {
130    /// Get the path of this file relative to the workspace root, for use in error messages.
131    pub fn path(&self) -> &Path {
132        &self.path
133    }
134
135    /// Get the original raw file content as a string, for lints that need to do their own parsing.
136    ///
137    /// If the file content is already a string this will be None.
138    /// This field is not modified when fixes are made.
139    pub fn raw(&self) -> Option<&str> {
140        self.raw.as_deref()
141    }
142
143    /// If fix is enabled, apply the given fix operation to the content and mark it modified.
144    /// If fix is not enabled, report an error with the given description.
145    pub fn fix(&mut self, description: &str, op: impl FnOnce(&mut T)) {
146        if self.fix {
147            op(&mut self.content);
148            self.modified = true;
149        } else {
150            log::error!("{}: {}", self.path.display(), description);
151            self.failed
152                .store(true, std::sync::atomic::Ordering::Relaxed);
153        }
154    }
155
156    /// Report an error with the given description that cannot be automatically fixed.
157    pub fn unfixable(&self, description: &str) {
158        log::error!("{}: {}", self.path.display(), description);
159        self.failed
160            .store(true, std::sync::atomic::Ordering::Relaxed);
161    }
162
163    /// If modified, write the content back to the file. Return whether any errors were reported.
164    fn finalize(self) -> anyhow::Result<bool>
165    where
166        T: Display,
167    {
168        if self.modified {
169            let full_path = self.workspace_dir.join(&self.path);
170            fs_err::write(full_path, self.content.to_string())?;
171        }
172        Ok(self.failed.into_inner())
173    }
174}
175
176pub struct Lints;
177
178impl FmtPass for Lints {
179    fn run(self, ctx: FmtCtx) -> anyhow::Result<()> {
180        // Walk tree once to discover all Cargo.toml files and all other files
181        // (including .rs). This avoids a second walk per-crate later.
182        let mut workspace_dirs = Vec::new();
183        let mut all_crate_dirs = Vec::new();
184        let mut all_files = Vec::new();
185        for entry in ignore::Walk::new(&ctx.ctx.root) {
186            let entry = entry?;
187            if entry.file_name() == "Cargo.toml" {
188                // Identify workspace roots (Cargo.toml files with a [workspace] key).
189                let raw = fs_err::read_to_string(entry.path())?;
190                let doc: DocumentMut = raw.parse()?;
191                if doc.contains_key("workspace") {
192                    workspace_dirs.push(entry.path().parent().unwrap().to_owned());
193                } else {
194                    // Build the set of all crate directories (every Cargo.toml parent
195                    // that is not itself a workspace root).
196                    all_crate_dirs.push(entry.path().parent().unwrap().to_owned());
197                }
198            } else if entry.file_type().is_some_and(|ft| ft.is_file()) {
199                all_files.push(entry.into_path());
200            }
201        }
202
203        let mut any_failed = false;
204
205        // Run a fresh set of lints over each workspace.
206        for workspace_dir in &workspace_dirs {
207            // Nested workspace dirs that are children of this workspace.
208            let nested_workspace_dirs: Vec<_> = workspace_dirs
209                .iter()
210                .filter(|other| *other != workspace_dir && other.starts_with(workspace_dir))
211                .collect();
212
213            // Crate dirs belonging to this workspace: under workspace_dir
214            // but not under any deeper nested workspace.
215            let mut crate_dirs: Vec<_> = all_crate_dirs
216                .iter()
217                .filter(|crate_dir| {
218                    crate_dir.starts_with(workspace_dir)
219                        && !nested_workspace_dirs
220                            .iter()
221                            .any(|nested| crate_dir.starts_with(*nested))
222                })
223                .collect();
224
225            // All files belonging to this workspace (under workspace_dir,
226            // not under any nested workspace).
227            let workspace_files: Vec<_> = all_files
228                .iter()
229                .filter(|f| {
230                    f.starts_with(workspace_dir)
231                        && !nested_workspace_dirs
232                            .iter()
233                            .any(|nested| f.starts_with(*nested))
234                })
235                .collect();
236
237            // Non-crate files: files not under any crate dir, excluding .rs files.
238            let mut non_crate_files: Vec<_> = workspace_files
239                .iter()
240                .filter(|f| {
241                    f.extension().and_then(|e| e.to_str()) != Some("rs")
242                        && !crate_dirs.iter().any(|crate_dir| f.starts_with(crate_dir))
243                })
244                .copied()
245                .collect();
246
247            // If only_diffed, filter crate dirs and non-crate files.
248            if ctx.only_diffed {
249                let diffed = git_diffed(ctx.ctx.in_git_hook)?;
250                // git diff outputs paths relative to the repo root, so strip
251                // the root from our other full paths before checking for a match
252                crate_dirs.retain(|crate_dir| {
253                    let crate_dir = crate_dir.strip_prefix(&ctx.ctx.root).unwrap();
254                    diffed.iter().any(|f| f.starts_with(crate_dir))
255                });
256                non_crate_files.retain(|f| {
257                    let f = f.strip_prefix(&ctx.ctx.root).unwrap().to_owned();
258                    diffed.contains(&f)
259                });
260            }
261
262            any_failed |= lint_workspace(
263                workspace_dir,
264                &crate_dirs,
265                &non_crate_files,
266                &workspace_files,
267                &ctx,
268            )?;
269        }
270
271        if any_failed {
272            anyhow::bail!("one or more lint checks failed");
273        }
274
275        Ok(())
276    }
277}
278
279/// Run a fresh set of lints over a single workspace and its member crates..
280fn lint_workspace(
281    workspace_dir: &Path,
282    crate_dirs: &[&PathBuf],
283    non_crate_files: &[&PathBuf],
284    all_files: &[&PathBuf],
285    ctx: &FmtCtx,
286) -> anyhow::Result<bool> {
287    let lint_ctx = LintCtx {
288        only_diffed: ctx.only_diffed,
289    };
290
291    let mut lints: Vec<Box<dyn Lint>> = vec![
292        Box::new(cfg_target_arch::CfgTargetArch::new(&lint_ctx)),
293        Box::new(copyright::Copyright::new(&lint_ctx)),
294        Box::new(crate_name_nodash::CrateNameNoDash::new(&lint_ctx)),
295        Box::new(package_info::PackageInfo::new(&lint_ctx)),
296        Box::new(repr_packed::ReprPacked::new(&lint_ctx)),
297        Box::new(trailing_newline::TrailingNewline::new(&lint_ctx)),
298        Box::new(unsafe_code_comment::UnsafeCodeComment::new(&lint_ctx)),
299        Box::new(unused_deps::UnusedDeps::new(&lint_ctx)),
300        Box::new(workspaced::WorkspacedManifest::new(&lint_ctx)),
301    ];
302
303    let workspace_manifest_path = workspace_dir.join("Cargo.toml");
304    let mut workspace_manifest =
305        Lintable::<DocumentMut>::from_file(&workspace_manifest_path, ctx, workspace_dir)?;
306
307    log::debug!(
308        "Linting workspace {} with {} crates and {} non-crate files",
309        workspace_dir.display(),
310        crate_dirs.len(),
311        non_crate_files.len()
312    );
313    for lint in lints.iter_mut() {
314        lint.enter_workspace(&workspace_manifest);
315    }
316
317    let mut any_failed = false;
318
319    for crate_dir in crate_dirs {
320        let manifest_path = crate_dir.join("Cargo.toml");
321        let mut crate_manifest =
322            Lintable::<DocumentMut>::from_file(&manifest_path, ctx, workspace_dir)?;
323
324        log::debug!("Linting crate {}", crate_dir.display());
325        for lint in lints.iter_mut() {
326            lint.enter_crate(&crate_manifest);
327        }
328
329        // Collect nested crate dirs within this crate to avoid
330        // processing files that belong to a child crate.
331        let nested_crate_dirs: Vec<_> = crate_dirs
332            .iter()
333            .filter(|other| *other != crate_dir && other.starts_with(crate_dir))
334            .collect();
335
336        // Use pre-collected file paths instead of walking the crate
337        // directory again, avoiding redundant filesystem traversals.
338        for path in all_files.iter().filter(|f| {
339            f.starts_with(crate_dir)
340                && !nested_crate_dirs.iter().any(|nested| f.starts_with(nested))
341        }) {
342            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
343            let Some(mut file) = Lintable::<String>::from_file(path, ctx, workspace_dir)? else {
344                // Skip binary files
345                continue;
346            };
347
348            for lint in lints.iter_mut() {
349                if ext == "rs" {
350                    lint.visit_file(&mut file);
351                } else {
352                    lint.visit_nonrust_file(ext, &mut file);
353                }
354            }
355            any_failed |= file.finalize()?;
356        }
357
358        for lint in lints.iter_mut() {
359            lint.exit_crate(&mut crate_manifest);
360        }
361        any_failed |= crate_manifest.finalize()?;
362    }
363
364    // Process non-crate files (e.g. scripts, Guide).
365    for path in non_crate_files {
366        log::debug!("Linting non-crate file {}", path.display());
367        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
368        let Some(mut file) = Lintable::<String>::from_file(path, ctx, workspace_dir)? else {
369            // Skip binary files
370            log::debug!("Skipping binary file {}", path.display());
371            continue;
372        };
373        for lint in lints.iter_mut() {
374            lint.visit_nonrust_file(ext, &mut file);
375        }
376        any_failed |= file.finalize()?;
377    }
378
379    for lint in lints.iter_mut() {
380        lint.exit_workspace(&mut workspace_manifest);
381    }
382    any_failed |= workspace_manifest.finalize()?;
383
384    Ok(any_failed)
385}