flowey_lib_hvlite/
download_openvmm_vmm_tests_artifacts.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Download OpenVMM VMM test artifacts from Azure Blob Storage.
5//!
6//! If persistent storage is available, caches downloaded artifacts locally.
7
8use flowey::node::prelude::*;
9use std::collections::BTreeSet;
10use vmm_test_images::KnownTestArtifacts;
11
12const STORAGE_ACCOUNT: &str = "hvlitetestvhds";
13const CONTAINER: &str = "vhds";
14
15#[derive(Serialize, Deserialize, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
16pub enum CustomDiskPolicy {
17    /// Allow swapping in non-standard disk image variants
18    Loose,
19    /// Deny swapping in non-standard disk image variants, redownloading any
20    /// images that were detected as inconsistent.
21    Strict,
22}
23
24flowey_request! {
25    pub enum Request {
26        /// Local only: if true, skips interactive prompt that warns user about
27        /// downloading many gigabytes of disk images.
28        LocalOnlySkipDownloadPrompt(bool),
29        /// Local only: set policy when detecting a non-standard cached disk image
30        LocalOnlyCustomDiskPolicy(CustomDiskPolicy),
31        /// Specify a custom cache directory. By default, VHDs are cloned
32        /// into a job-local temp directory.
33        CustomCacheDir(PathBuf),
34        /// Download test artifacts into the download folder
35        Download(Vec<KnownTestArtifacts>),
36        /// Get path to folder containing all downloaded artifacts
37        GetDownloadFolder(WriteVar<PathBuf>),
38    }
39}
40
41new_flow_node!(struct Node);
42
43impl FlowNode for Node {
44    type Request = Request;
45
46    fn imports(ctx: &mut ImportCtx<'_>) {
47        ctx.import::<flowey_lib_common::download_azcopy::Node>();
48        ctx.import::<flowey_lib_common::install_azure_cli::Node>();
49    }
50
51    fn emit(requests: Vec<Self::Request>, ctx: &mut NodeCtx<'_>) -> anyhow::Result<()> {
52        let mut skip_prompt = None;
53        let mut custom_disk_policy = None;
54        let mut test_artifacts = BTreeSet::<_>::new();
55        let mut custom_cache_dir = None;
56        let mut get_download_folder = Vec::new();
57
58        for req in requests {
59            match req {
60                Request::LocalOnlySkipDownloadPrompt(v) => {
61                    same_across_all_reqs("LocalOnlySkipDownloadPrompt", &mut skip_prompt, v)?
62                }
63                Request::LocalOnlyCustomDiskPolicy(v) => {
64                    same_across_all_reqs("LocalOnlyCustomDiskPolicy", &mut custom_disk_policy, v)?
65                }
66                Request::CustomCacheDir(v) => {
67                    same_across_all_reqs("CustomCacheDir", &mut custom_cache_dir, v)?
68                }
69                Request::Download(v) => v.into_iter().for_each(|v| {
70                    test_artifacts.insert(v);
71                }),
72                Request::GetDownloadFolder(path) => get_download_folder.push(path),
73            }
74        }
75
76        let skip_prompt = if matches!(ctx.backend(), FlowBackend::Local) {
77            skip_prompt.unwrap_or(false)
78        } else {
79            if skip_prompt.is_some() {
80                anyhow::bail!("set `LocalOnlySkipDownloadPrompt` on non-local backend")
81            }
82            true
83        };
84
85        let persistent_dir = ctx.persistent_dir();
86
87        let azcopy_bin = ctx.reqv(flowey_lib_common::download_azcopy::Request::GetAzCopy);
88
89        let (files_to_download, write_files_to_download) = ctx.new_var::<Vec<(String, u64)>>();
90        let (output_folder, write_output_folder) = ctx.new_var();
91
92        ctx.emit_rust_step("calculating required VMM tests disk images", |ctx| {
93            let persistent_dir = persistent_dir.clone().claim(ctx);
94            let test_artifacts = test_artifacts.into_iter().collect::<Vec<_>>();
95            let write_files_to_download = write_files_to_download.claim(ctx);
96            let write_output_folder = write_output_folder.claim(ctx);
97            move |rt| {
98                let output_folder = if let Some(dir) = custom_cache_dir {
99                    dir
100                } else if let Some(dir) = persistent_dir {
101                    rt.read(dir)
102                } else {
103                    std::env::current_dir()?
104                };
105
106                rt.write(write_output_folder, &output_folder.absolute()?);
107
108                //
109                // Check for VHDs that have already been downloaded, to see if
110                // we can skip invoking azure-cli and `azcopy` entirely.
111                //
112                let mut skip_artifacts = BTreeSet::new();
113                let mut unexpected_artifacts = BTreeSet::new();
114
115                for e in fs_err::read_dir(&output_folder)? {
116                    let e = e?;
117                    if e.file_type()?.is_dir() {
118                        continue;
119                    }
120                    let filename = e.file_name();
121                    let Some(filename) = filename.to_str() else {
122                        continue;
123                    };
124
125                    if let Some(vhd) = KnownTestArtifacts::from_filename(filename) {
126                        let size = e.metadata()?.len();
127                        let expected_size = vhd.file_size();
128                        if size != expected_size {
129                            log::warn!(
130                                "unexpected size for {}: expected {}, found {}",
131                                filename,
132                                expected_size,
133                                size
134                            );
135                            unexpected_artifacts.insert(vhd);
136                        } else {
137                            skip_artifacts.insert(vhd);
138                        }
139                    } else {
140                        continue;
141                    }
142                }
143
144                if !unexpected_artifacts.is_empty() {
145                    if custom_disk_policy.is_none() && matches!(rt.backend(), FlowBackend::Local) {
146                        log::warn!(
147                            r#"
148================================================================================
149Detected inconsistencies between expected and cached VMM test images.
150
151  If you are trying to use the same disks used in CI, then this is not expected,
152  and your cached disks are corrupt / out-of-date and need to be re-downloaded.
153  Please tweak your CLI invocation / pipeline such that
154  `LocalOnlyCustomDiskPolicy` is set to `CustomDiskPolicy::Strict`.
155
156  If you manually modified or replaced disks and you would like to keep them,
157  please tweak your CLI invocation / pipeline such that
158  `LocalOnlyCustomDiskPolicy` is set to `CustomDiskPolicy::Loose`.
159================================================================================
160"#
161                        );
162                    }
163
164                    match custom_disk_policy {
165                        Some(CustomDiskPolicy::Loose) => {
166                            skip_artifacts.extend(unexpected_artifacts.iter().copied());
167                            unexpected_artifacts.clear();
168                        }
169                        Some(CustomDiskPolicy::Strict) => {
170                            log::warn!("detected inconsistent disks. will re-download them");
171                        }
172                        None => {
173                            anyhow::bail!("detected inconsistent disks in disk cache")
174                        }
175                    }
176                }
177
178                let files_to_download = {
179                    let mut files = Vec::new();
180
181                    for artifact in test_artifacts {
182                        if !skip_artifacts.contains(&artifact)
183                            || unexpected_artifacts.contains(&artifact)
184                        {
185                            files.push((artifact.filename().to_string(), artifact.file_size()));
186                        }
187                    }
188
189                    // for aesthetic reasons
190                    files.sort();
191                    files
192                };
193
194                if !files_to_download.is_empty() {
195                    //
196                    // If running locally, warn the user they're about to download a
197                    // _lot_ of data
198                    //
199                    if matches!(rt.backend(), FlowBackend::Local) {
200                        let output_folder = output_folder.display();
201                        let disk_image_list = files_to_download
202                            .iter()
203                            .map(|(name, size)| format!("  - {name} ({size})"))
204                            .collect::<Vec<_>>()
205                            .join("\n");
206                        let download_size: u64 =
207                            files_to_download.iter().map(|(_, size)| size).sum();
208                        let msg = format!(
209                            r#"
210================================================================================
211In order to run the selected VMM tests, some (possibly large) disk images need
212to be downloaded from Azure blob storage.
213================================================================================
214- The following disk images will be downloaded:
215{disk_image_list}
216
217- Images will be downloaded to: {output_folder}
218- The total download size is: {download_size} bytes
219
220If running locally, you can re-run with `--help` for info on how to:
221- tweak the selected download folder (e.g: download images to an external HDD)
222- skip this warning prompt in the future
223
224If you're OK with starting the download, please press <enter>.
225Otherwise, press `ctrl-c` to cancel the run.
226================================================================================
227"#
228                        );
229                        log::warn!("{}", msg.trim());
230                        if !skip_prompt {
231                            let _ = std::io::stdin().read_line(&mut String::new());
232                        }
233                    }
234                }
235
236                rt.write(write_files_to_download, &files_to_download);
237                Ok(())
238            }
239        });
240
241        let did_download = ctx.emit_rust_step("downloading VMM test disk images", |ctx| {
242            let azcopy_bin = azcopy_bin.claim(ctx);
243            let files_to_download = files_to_download.claim(ctx);
244            let output_folder = output_folder.clone().claim(ctx);
245            |rt| {
246                let files_to_download = rt.read(files_to_download);
247                let output_folder = rt.read(output_folder);
248                let azcopy_bin = rt.read(azcopy_bin);
249
250                if !files_to_download.is_empty() {
251                    download_blobs_from_azure(
252                        rt,
253                        &azcopy_bin,
254                        None,
255                        files_to_download,
256                        &output_folder,
257                    )?;
258                }
259
260                Ok(())
261            }
262        });
263
264        ctx.emit_minor_rust_step("report downloaded VMM test disk images", |ctx| {
265            did_download.claim(ctx);
266            let output_folder = output_folder.claim(ctx);
267            let get_download_folder = get_download_folder.claim(ctx);
268            |rt| {
269                let output_folder = rt.read(output_folder);
270                for path in get_download_folder {
271                    rt.write(path, &output_folder)
272                }
273            }
274        });
275
276        Ok(())
277    }
278}
279
280#[expect(dead_code)]
281enum AzCopyAuthMethod {
282    /// Pull credentials from the Azure CLI instance running the command.
283    AzureCli,
284    /// Print a link to stdout and require the user to click it to authenticate.
285    Device,
286}
287
288fn download_blobs_from_azure(
289    // pass dummy _rt to ensure no-one accidentally calls this at graph
290    // resolution time
291    _rt: &mut RustRuntimeServices<'_>,
292    azcopy_bin: &PathBuf,
293    azcopy_auth_method: Option<AzCopyAuthMethod>,
294    files_to_download: Vec<(String, u64)>,
295    output_folder: &Path,
296) -> anyhow::Result<()> {
297    let sh = xshell::Shell::new()?;
298
299    //
300    // Use azcopy to download the files
301    //
302    let url = format!("https://{STORAGE_ACCOUNT}.blob.core.windows.net/{CONTAINER}/*");
303
304    let include_path = files_to_download
305        .into_iter()
306        .map(|(name, _)| name)
307        .collect::<Vec<_>>()
308        .join(";");
309
310    // Translate the authentication method we're using.
311    let auth_method = azcopy_auth_method.map(|x| match x {
312        AzCopyAuthMethod::AzureCli => "AZCLI",
313        AzCopyAuthMethod::Device => "DEVICE",
314    });
315
316    if let Some(auth_method) = auth_method {
317        sh.set_var("AZCOPY_AUTO_LOGIN_TYPE", auth_method);
318    }
319    // instead of using return codes to signal success/failure,
320    // azcopy forces you to parse execution logs in order to find
321    // specific strings to detect if/how a copy has failed
322    //
323    // thanks azcopy. very cool.
324    //
325    // <https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-configure#review-the-logs-for-errors>
326    sh.set_var("AZCOPY_JOB_PLAN_LOCATION", sh.current_dir());
327    sh.set_var("AZCOPY_LOG_LOCATION", sh.current_dir());
328
329    // setting `--overwrite true` since we do our own pre-download
330    // filtering
331    let result = xshell::cmd!(
332        sh,
333        "{azcopy_bin} copy
334            {url}
335            {output_folder}
336            --include-path {include_path}
337            --overwrite true
338            --skip-version-check
339        "
340    )
341    .run();
342
343    if result.is_err() {
344        xshell::cmd!(
345            sh,
346            "df -h --output=source,fstype,size,used,avail,pcent,target -x tmpfs -x devtmpfs"
347        )
348        .run()?;
349        let dir_contents = sh.read_dir(sh.current_dir())?;
350        for log in dir_contents
351            .iter()
352            .filter(|p| p.extension() == Some("log".as_ref()))
353        {
354            println!("{}:\n{}\n", log.display(), sh.read_file(log)?);
355        }
356        return result.context("failed to download VMM test disk images");
357    }
358
359    Ok(())
360}