Skip to main content

flowey_lib_hvlite/
download_openvmm_vmm_tests_artifacts.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Download OpenVMM VMM test artifacts from Azure Blob Storage.
5//!
6//! If persistent storage is available, caches downloaded artifacts locally.
7
8use flowey::node::prelude::*;
9use std::collections::BTreeSet;
10use std::io::IsTerminal;
11use vmm_test_images::KnownTestArtifacts;
12
13const STORAGE_ACCOUNT: &str = "hvlitetestvhds";
14const CONTAINER: &str = "vhds";
15
16#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
17pub enum CustomDiskPolicy {
18    /// Allow swapping in non-standard disk image variants
19    Loose,
20    /// Deny swapping in non-standard disk image variants, redownloading any
21    /// images that were detected as inconsistent.
22    Strict,
23}
24
25flowey_config! {
26    /// Config for the download_openvmm_vmm_tests_artifacts node.
27    pub struct Config {
28        /// Local only: if true, skips interactive prompt that warns user about
29        /// downloading many gigabytes of disk images.
30        pub skip_prompt: Option<bool>,
31        /// Local only: set policy when detecting a non-standard cached disk image
32        pub custom_disk_policy: Option<CustomDiskPolicy>,
33        /// Specify a custom cache directory. By default, VHDs are cloned
34        /// into a job-local temp directory.
35        pub custom_cache_dir: Option<PathBuf>,
36    }
37}
38
39flowey_request! {
40    pub enum Request {
41        /// Download test artifacts into the download folder
42        Download(Vec<KnownTestArtifacts>),
43        /// Get path to folder containing all downloaded artifacts
44        GetDownloadFolder(WriteVar<PathBuf>),
45    }
46}
47
48new_flow_node_with_config!(struct Node);
49
50impl FlowNodeWithConfig for Node {
51    type Request = Request;
52    type Config = Config;
53
54    fn imports(ctx: &mut ImportCtx<'_>) {
55        ctx.import::<flowey_lib_common::download_azcopy::Node>();
56        ctx.import::<flowey_lib_common::install_azure_cli::Node>();
57    }
58
59    fn emit(
60        config: Config,
61        requests: Vec<Self::Request>,
62        ctx: &mut NodeCtx<'_>,
63    ) -> anyhow::Result<()> {
64        let mut test_artifacts = BTreeSet::<_>::new();
65        let mut get_download_folder = Vec::new();
66
67        for req in requests {
68            match req {
69                Request::Download(v) => v.into_iter().for_each(|v| {
70                    test_artifacts.insert(v);
71                }),
72                Request::GetDownloadFolder(path) => get_download_folder.push(path),
73            }
74        }
75
76        let skip_prompt = if matches!(ctx.backend(), FlowBackend::Local) {
77            config.skip_prompt.unwrap_or(false)
78        } else {
79            if config.skip_prompt.is_some() {
80                anyhow::bail!("set `skip_prompt` config on non-local backend")
81            }
82            true
83        };
84        let custom_disk_policy = config.custom_disk_policy;
85        let custom_cache_dir = config.custom_cache_dir;
86
87        let persistent_dir = ctx.persistent_dir();
88
89        let azcopy_bin = ctx.reqv(flowey_lib_common::download_azcopy::Request::GetAzCopy);
90
91        let (files_to_download, write_files_to_download) = ctx.new_var::<Vec<(String, u64)>>();
92        let (output_folder, write_output_folder) = ctx.new_var();
93
94        ctx.emit_rust_step("calculating required VMM tests disk images", |ctx| {
95            let persistent_dir = persistent_dir.clone().claim(ctx);
96            let test_artifacts = test_artifacts.into_iter().collect::<Vec<_>>();
97            let write_files_to_download = write_files_to_download.claim(ctx);
98            let write_output_folder = write_output_folder.claim(ctx);
99            move |rt| {
100                let output_folder = if let Some(dir) = custom_cache_dir {
101                    dir
102                } else if let Some(dir) = persistent_dir {
103                    rt.read(dir)
104                } else {
105                    std::env::current_dir()?
106                };
107
108                rt.write(write_output_folder, &output_folder.absolute()?);
109
110                //
111                // Check for VHDs that have already been downloaded, to see if
112                // we can skip invoking azure-cli and `azcopy` entirely.
113                //
114                let mut skip_artifacts = BTreeSet::new();
115                let mut unexpected_artifacts = BTreeSet::new();
116
117                for e in fs_err::read_dir(&output_folder)? {
118                    let e = e?;
119                    if e.file_type()?.is_dir() {
120                        continue;
121                    }
122                    let filename = e.file_name();
123                    let Some(filename) = filename.to_str() else {
124                        continue;
125                    };
126
127                    if let Some(vhd) = KnownTestArtifacts::from_filename(filename) {
128                        let size = e.metadata()?.len();
129                        let expected_size = vhd.file_size();
130                        if size != expected_size {
131                            log::warn!(
132                                "unexpected size for {}: expected {}, found {}",
133                                filename,
134                                expected_size,
135                                size
136                            );
137                            unexpected_artifacts.insert(vhd);
138                        } else {
139                            skip_artifacts.insert(vhd);
140                        }
141                    } else {
142                        continue;
143                    }
144                }
145
146                if !unexpected_artifacts.is_empty() {
147                    if custom_disk_policy.is_none() && matches!(rt.backend(), FlowBackend::Local) {
148                        log::warn!(
149                            r#"
150================================================================================
151Detected inconsistencies between expected and cached VMM test images.
152
153  If you are trying to use the same disks used in CI, then this is not expected,
154  and your cached disks are corrupt / out-of-date and need to be re-downloaded.
155  Please set the `custom_disk_policy` config to `CustomDiskPolicy::Strict`.
156
157  If you manually modified or replaced disks and you would like to keep them,
158  please set the `custom_disk_policy` config to `CustomDiskPolicy::Loose`.
159================================================================================
160"#
161                        );
162                    }
163
164                    match custom_disk_policy {
165                        Some(CustomDiskPolicy::Loose) => {
166                            skip_artifacts.extend(unexpected_artifacts.iter().copied());
167                            unexpected_artifacts.clear();
168                        }
169                        Some(CustomDiskPolicy::Strict) => {
170                            log::warn!("detected inconsistent disks. will re-download them");
171                        }
172                        None => {
173                            anyhow::bail!("detected inconsistent disks in disk cache")
174                        }
175                    }
176                }
177
178                let files_to_download = {
179                    let mut files = Vec::new();
180
181                    for artifact in test_artifacts {
182                        if !skip_artifacts.contains(&artifact)
183                            || unexpected_artifacts.contains(&artifact)
184                        {
185                            files.push((artifact.filename().to_string(), artifact.file_size()));
186                        }
187                    }
188
189                    // for aesthetic reasons
190                    files.sort();
191                    files
192                };
193
194                if !files_to_download.is_empty() {
195                    //
196                    // If running locally, warn the user they're about to download a
197                    // _lot_ of data
198                    //
199                    if matches!(rt.backend(), FlowBackend::Local) {
200                        let output_folder = output_folder.display();
201                        let disk_image_list = files_to_download
202                            .iter()
203                            .map(|(name, size)| format!("  - {name} ({size})"))
204                            .collect::<Vec<_>>()
205                            .join("\n");
206                        let download_size: u64 =
207                            files_to_download.iter().map(|(_, size)| size).sum();
208                        let msg = format!(
209                            r#"
210================================================================================
211In order to run the selected VMM tests, some (possibly large) disk images need
212to be downloaded from Azure blob storage.
213================================================================================
214- The following disk images will be downloaded:
215{disk_image_list}
216
217- Images will be downloaded to: {output_folder}
218- The total download size is: {download_size} bytes
219
220If running locally, you can re-run with `--help` for info on how to:
221- tweak the selected download folder (e.g: download images to an external HDD)
222- skip this warning prompt in the future
223
224If you're OK with starting the download, please press just <enter>.
225Otherwise, press anything else with <enter> to cancel the run.
226================================================================================
227"#
228                        );
229                        log::warn!("{}", msg.trim());
230
231                        // If this is not an interactive terminal, just allow the download to proceed
232                        let is_terminal = std::io::stdin().is_terminal();
233
234                        if !skip_prompt && is_terminal {
235                            // Only display the prompt for 30s before timing out
236                            let result = crossterm::event::poll(std::time::Duration::from_secs(30));
237                            match result {
238                                Ok(true) => {
239                                    if let crossterm::event::Event::Key(key_event) =
240                                        crossterm::event::read().unwrap()
241                                    {
242                                        if key_event.code == crossterm::event::KeyCode::Enter {
243                                            // proceed with download
244                                        } else {
245                                            anyhow::bail!("user cancelled the run");
246                                        }
247                                    } else {
248                                        anyhow::bail!(
249                                            "unexpected event while waiting for user input"
250                                        );
251                                    }
252                                }
253                                Ok(false) => {
254                                    anyhow::bail!("timed out waiting for user input");
255                                }
256                                Err(e) => {
257                                    anyhow::bail!("error while waiting for user input: {e}");
258                                }
259                            }
260                        }
261                    }
262                }
263
264                rt.write(write_files_to_download, &files_to_download);
265                Ok(())
266            }
267        });
268
269        let did_download = ctx.emit_rust_step("downloading VMM test disk images", |ctx| {
270            let azcopy_bin = azcopy_bin.claim(ctx);
271            let files_to_download = files_to_download.claim(ctx);
272            let output_folder = output_folder.clone().claim(ctx);
273            |rt| {
274                let files_to_download = rt.read(files_to_download);
275                let output_folder = rt.read(output_folder);
276                let azcopy_bin = rt.read(azcopy_bin);
277
278                if !files_to_download.is_empty() {
279                    download_blobs_from_azure(
280                        rt,
281                        &azcopy_bin,
282                        None,
283                        files_to_download,
284                        &output_folder,
285                    )?;
286                }
287
288                Ok(())
289            }
290        });
291
292        ctx.emit_minor_rust_step("report downloaded VMM test disk images", |ctx| {
293            did_download.claim(ctx);
294            let output_folder = output_folder.claim(ctx);
295            let get_download_folder = get_download_folder.claim(ctx);
296            |rt| {
297                let output_folder = rt.read(output_folder);
298                for path in get_download_folder {
299                    rt.write(path, &output_folder)
300                }
301            }
302        });
303
304        Ok(())
305    }
306}
307
308#[expect(dead_code)]
309enum AzCopyAuthMethod {
310    /// Pull credentials from the Azure CLI instance running the command.
311    AzureCli,
312    /// Print a link to stdout and require the user to click it to authenticate.
313    Device,
314}
315
316fn download_blobs_from_azure(
317    // pass dummy _rt to ensure no-one accidentally calls this at graph
318    // resolution time
319    rt: &mut RustRuntimeServices<'_>,
320    azcopy_bin: &PathBuf,
321    azcopy_auth_method: Option<AzCopyAuthMethod>,
322    files_to_download: Vec<(String, u64)>,
323    output_folder: &Path,
324) -> anyhow::Result<()> {
325    //
326    // Use azcopy to download the files
327    //
328    let url = format!("https://{STORAGE_ACCOUNT}.blob.core.windows.net/{CONTAINER}/*");
329
330    let include_path = files_to_download
331        .into_iter()
332        .map(|(name, _)| name)
333        .collect::<Vec<_>>()
334        .join(";");
335
336    // Translate the authentication method we're using.
337    let auth_method = azcopy_auth_method.map(|x| match x {
338        AzCopyAuthMethod::AzureCli => "AZCLI",
339        AzCopyAuthMethod::Device => "DEVICE",
340    });
341
342    if let Some(auth_method) = auth_method {
343        rt.sh.set_var("AZCOPY_AUTO_LOGIN_TYPE", auth_method);
344    }
345    // instead of using return codes to signal success/failure,
346    // azcopy forces you to parse execution logs in order to find
347    // specific strings to detect if/how a copy has failed
348    //
349    // thanks azcopy. very cool.
350    //
351    // <https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-configure#review-the-logs-for-errors>
352    let current_dir = rt.sh.current_dir();
353    rt.sh
354        .set_var("AZCOPY_JOB_PLAN_LOCATION", current_dir.clone());
355    rt.sh.set_var("AZCOPY_LOG_LOCATION", current_dir.clone());
356
357    // setting `--overwrite true` since we do our own pre-download
358    // filtering
359    let result = flowey::shell_cmd!(
360        rt,
361        "{azcopy_bin} copy
362            {url}
363            {output_folder}
364            --include-path {include_path}
365            --overwrite true
366            --skip-version-check
367        "
368    )
369    .run();
370
371    if result.is_err() {
372        flowey::shell_cmd!(
373            rt,
374            "df -h --output=source,fstype,size,used,avail,pcent,target -x tmpfs -x devtmpfs"
375        )
376        .run()?;
377        let dir_contents = rt.sh.read_dir(current_dir)?;
378        for log in dir_contents
379            .iter()
380            .filter(|p| p.extension() == Some("log".as_ref()))
381        {
382            println!("{}:\n{}\n", log.display(), rt.sh.read_file(log)?);
383        }
384        return result.context("failed to download VMM test disk images");
385    }
386
387    Ok(())
388}