Skip to main content

xtask/tasks/fmt/lints/
copyright.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Checks that source files have the correct copyright and license header.
5//!
6//! The expected header for most file types is:
7//!
8//! ```text
9//! // Copyright (c) Microsoft Corporation.
10//! // Licensed under the MIT License.
11//!
12//! ```
13//!
14//! Files may start with a shebang (`#!`) or `<!DOCTYPE html>` before the
15//! header. A blank line is expected between the shebang and the header.
16//!
17//! Files with a non-Microsoft copyright are left alone.
18
19use super::Lint;
20use super::LintCtx;
21use super::Lintable;
22use toml_edit::DocumentMut;
23
24const HEADER_MIT_FIRST: &str = "Copyright (c) Microsoft Corporation.";
25const HEADER_MIT_SECOND: &str = "Licensed under the MIT License.";
26
27const CHECKED_EXTENSIONS: &[&str] = &[
28    "c", "css", "html", "js", "proto", "ps1", "py", "rs", "toml", "ts", "tsx",
29];
30
31/// Returns the comment prefix and suffix for a given file extension.
32fn comment_delimiters(ext: &str) -> (&'static str, &'static str) {
33    match ext {
34        "rs" | "c" | "proto" | "ts" | "tsx" | "js" => ("//", ""),
35        "toml" | "py" | "ps1" => ("#", ""),
36        "css" => ("/*", " */"),
37        "html" => ("<!--", " -->"),
38        _ => unreachable!(),
39    }
40}
41
42pub struct Copyright {
43    is_msft_internal: bool,
44}
45
46impl Lint for Copyright {
47    fn new(_ctx: &LintCtx) -> Self {
48        Copyright {
49            is_msft_internal: std::env::var("XTASK_FMT_COPYRIGHT_ALLOW_MISSING_MIT").is_ok(),
50        }
51    }
52
53    fn enter_workspace(&mut self, _content: &Lintable<DocumentMut>) {}
54    fn enter_crate(&mut self, _content: &Lintable<DocumentMut>) {}
55
56    fn visit_file(&mut self, content: &mut Lintable<String>) {
57        self.check(content, "rs");
58    }
59
60    fn exit_crate(&mut self, content: &mut Lintable<DocumentMut>) {
61        self.check_toml(content, "package");
62    }
63    fn exit_workspace(&mut self, content: &mut Lintable<DocumentMut>) {
64        self.check_toml(content, "workspace");
65    }
66
67    fn visit_nonrust_file(&mut self, extension: &str, content: &mut Lintable<String>) {
68        // TODO: should we check everything regardless of extension?
69        if CHECKED_EXTENSIONS.contains(&extension) {
70            self.check(content, extension);
71        }
72    }
73}
74
75impl Copyright {
76    fn check_toml(&self, content: &mut Lintable<DocumentMut>, section_name: &str) {
77        let table = content[section_name].as_table().unwrap();
78        let prefix = table
79            .decor()
80            .prefix()
81            .and_then(|x| x.as_str())
82            .unwrap_or("");
83
84        // TEMP: until we have more robust infrastructure for distinct
85        // microsoft-internal checks, include this "escape hatch" for preserving
86        // non-MIT licensed files when running `xtask fmt` in the msft internal
87        // repo. This uses a job-specific env var, instead of being properly plumbed
88        // through via `clap`, to make it easier to remove in the future.
89        if self.is_msft_internal {
90            // Support both new and existing copyright banner styles
91            if !(prefix.contains("Copyright") && prefix.contains("Microsoft")) {
92                let prefix = prefix.trim().to_owned();
93                content.fix("missing or incorrect internal copyright header", |content| {
94                    let table = content[section_name].as_table_mut().unwrap();
95                    let new_prefix = format!(
96                        "# Copyright (C) Microsoft Corporation. All rights reserved.\n\n{prefix}",
97                    );
98                    table.decor_mut().set_prefix(new_prefix);
99                });
100            }
101        } else if !(prefix.starts_with("# ")
102            && prefix[2..].starts_with(HEADER_MIT_FIRST)
103            && prefix[3 + HEADER_MIT_FIRST.len()..].starts_with("# ")
104            && prefix[5 + HEADER_MIT_FIRST.len()..].contains(HEADER_MIT_SECOND))
105        {
106            let prefix = prefix.trim().to_owned();
107            content.fix("missing or incorrect copyright header", |content| {
108                let table = content[section_name].as_table_mut().unwrap();
109                let new_prefix =
110                    format!("# {HEADER_MIT_FIRST}\n# {HEADER_MIT_SECOND}\n\n{prefix}",);
111                table.decor_mut().set_prefix(new_prefix);
112            });
113        }
114    }
115
116    fn check(&self, content: &mut Lintable<String>, ext: &str) {
117        // Skip a leading UTF-8 BOM if present.
118        let has_bom = content.starts_with('\u{feff}');
119        let mut lines = content.strip_prefix('\u{feff}').unwrap_or(content).lines();
120        let first_line = lines.next().unwrap_or("").to_owned();
121
122        // Someone may decide to put a script interpreter line (aka "shebang")
123        // in a .config or a .toml file, and mark the file as executable. While
124        // that's not common, we choose not to constrain creativity.
125        //
126        // The shebang (`#!`) is part of the valid grammar of Rust, and does not
127        // indicate that the file should be interpreted as a script. So we don't
128        // allow that line in Rust files.
129        //
130        // Some HTML files may start with a `<!DOCTYPE html>` line, so let that line pass as well
131        let (has_special, blank_after_special, header_first) = if (first_line.starts_with("#!")
132            && ext != "rs")
133            || (first_line.starts_with("<!DOCTYPE html>") && ext == "html")
134        {
135            let second = lines.next().unwrap_or("").to_owned();
136            let blank = second.is_empty();
137            let header_start = if blank {
138                lines.next().unwrap_or("").to_owned()
139            } else {
140                second
141            };
142            (true, blank, header_start)
143        } else {
144            (false, false, first_line.clone())
145        };
146
147        let header_second = lines.next().unwrap_or("").to_owned();
148        let after_header_line = lines.next().unwrap_or("").to_owned();
149
150        // Preserve any files which are copyright, but not by Microsoft.
151        if header_first.contains("Copyright") && !header_first.contains("Microsoft") {
152            return;
153        }
154
155        let (prefix, suffix) = comment_delimiters(ext);
156
157        let expected_first = format!("{prefix} {HEADER_MIT_FIRST}{suffix}");
158        let expected_second = format!("{prefix} {HEADER_MIT_SECOND}{suffix}");
159
160        let has_first = header_first.contains(HEADER_MIT_FIRST);
161        let has_second = header_second.contains(HEADER_MIT_SECOND);
162        let mut missing_banner = !has_first || !has_second;
163        let mut expected_header_lines = 2;
164
165        // TEMP: until we have more robust infrastructure for distinct
166        // microsoft-internal checks, include this "escape hatch" for preserving
167        // non-MIT licensed files when running `xtask fmt` in the msft internal
168        // repo. This uses a job-specific env var, instead of being properly plumbed
169        // through via `clap`, to make it easier to remove in the future.
170        if self.is_msft_internal && missing_banner {
171            // Support both new and existing copyright banner styles
172            missing_banner =
173                !(header_first.contains("Copyright") && header_first.contains("Microsoft"));
174            expected_header_lines = 1;
175        }
176
177        let missing_blank_after_header = if missing_banner {
178            // Will be fixed as part of inserting the banner.
179            false
180        } else if expected_header_lines == 1 {
181            !header_second.is_empty()
182        } else {
183            !after_header_line.is_empty()
184        };
185
186        let missing_blank_after_special = has_special && !blank_after_special;
187
188        if !missing_banner && !missing_blank_after_header && !missing_blank_after_special {
189            return;
190        }
191
192        content.fix("missing or incorrect copyright header", |content| {
193            // Build the replacement header.
194            let mut hdr = String::new();
195            if has_bom {
196                hdr.push('\u{feff}');
197            }
198            if has_special {
199                hdr.push_str(&first_line);
200                hdr.push_str("\n\n");
201            }
202            hdr.push_str(&expected_first);
203            hdr.push('\n');
204            if expected_header_lines == 2 {
205                hdr.push_str(&expected_second);
206                hdr.push('\n');
207            }
208            hdr.push('\n');
209
210            // Count leading lines to replace and find their byte length.
211            let skip = (has_special as usize)
212                + (has_special && blank_after_special) as usize
213                + if !missing_banner {
214                    expected_header_lines + (!missing_blank_after_header) as usize
215                } else {
216                    0
217                };
218            let skip_bytes = if skip > 0 {
219                content
220                    .match_indices('\n')
221                    .nth(skip - 1)
222                    .map_or(content.len(), |(i, _)| i + 1)
223            } else if has_bom {
224                '\u{feff}'.len_utf8()
225            } else {
226                0
227            };
228
229            content.replace_range(..skip_bytes, &hdr);
230        });
231    }
232}